OPML export/import of some proprietary FreshRSS attributes (#4342)

* OPML export/import of some proprietary FreshRSS attributes
#fix https://github.com/FreshRSS/FreshRSS/issues/4077
And one of the TODOs of https://github.com/FreshRSS/FreshRSS/pull/4220
XPath options, CSS Selector, and action filters

* Bump library patch version

* OPML namespace + documentation

* Add example
pull/4356/head
Alexandre Alapetite 2 years ago committed by GitHub
parent 9d1930d9ad
commit 4a87206f28
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 5
      app/Models/BooleanSearch.php
  2. 12
      app/Models/Feed.php
  3. 4
      app/Models/FeedDAO.php
  4. 3
      app/Models/FilterAction.php
  5. 4
      app/Services/ExportService.php
  6. 31
      app/Services/ImportService.php
  7. 32
      app/views/helpers/export/opml.phtml
  8. 74
      docs/en/developers/OPML.md
  9. 31
      lib/lib_opml.php

@ -5,6 +5,7 @@
*/
class FreshRSS_BooleanSearch {
/** @var string */
private $raw_input = '';
private $searches = array();
@ -54,11 +55,11 @@ class FreshRSS_BooleanSearch {
return null;
}
public function __toString() {
public function __toString(): string {
return $this->getRawInput();
}
public function getRawInput() {
public function getRawInput(): string {
return $this->raw_input;
}
}

@ -245,7 +245,7 @@ class FreshRSS_Feed extends Minz_Model {
}
$this->url = $value;
}
public function _kind($value) {
public function _kind(int $value) {
$this->kind = $value;
}
public function _category($value) {
@ -567,8 +567,8 @@ class FreshRSS_Feed extends Minz_Model {
$feedSourceUrl = preg_replace('#((.+)://)(.+)#', '${1}' . $this->httpAuth . '@${3}', $feedSourceUrl);
}
// Same naming conventions than https://github.com/RSS-Bridge/rss-bridge/wiki/XPathAbstract
// https://github.com/RSS-Bridge/rss-bridge/wiki/The-collectData-function
// Same naming conventions than https://rss-bridge.github.io/rss-bridge/Bridge_API/XPathAbstract.html
// https://rss-bridge.github.io/rss-bridge/Bridge_API/BridgeAbstract.html#collectdata
/** @var array<string,string> */
$xPathSettings = $this->attributes('xpath');
$xPathFeedTitle = $xPathSettings['feedTitle'] ?? '';
@ -758,7 +758,8 @@ class FreshRSS_Feed extends Minz_Model {
}
}
public function filtersAction(string $action) {
/** @return array<FreshRSS_BooleanSearch> */
public function filtersAction(string $action): array {
$action = trim($action);
if ($action == '') {
return array();
@ -775,6 +776,9 @@ class FreshRSS_Feed extends Minz_Model {
return $filters;
}
/**
* @param array<string> $filters
*/
public function _filtersAction(string $action, $filters) {
$action = trim($action);
if ($action == '' || !is_array($filters)) {

@ -104,6 +104,7 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo implements FreshRSS_Searchable {
'website' => $feed->website(),
'description' => $feed->description(),
'lastUpdate' => 0,
'pathEntries' => $feed->pathEntries(),
'httpAuth' => $feed->httpAuth(),
'attributes' => $feed->attributes(),
);
@ -384,6 +385,9 @@ SQL;
return false;
}
/**
* @return array<FreshRSS_Feed>
*/
public function listByCategory(int $cat): array {
$sql = 'SELECT * FROM `_feed` WHERE category=?';
$stm = $this->pdo->prepare($sql);

@ -2,6 +2,7 @@
class FreshRSS_FilterAction {
/** @var FreshRSS_BooleanSearch */
private $booleanSearch = null;
private $actions = null;
@ -33,7 +34,7 @@ class FreshRSS_FilterAction {
'actions' => $this->actions,
);
}
return '';
return [];
}
public static function fromJSON($json) {

@ -19,6 +19,10 @@ class FreshRSS_Export_Service {
/** @var FreshRSS_TagDAO */
private $tag_dao;
const FRSS_NAMESPACE = 'https://freshrss.org/opml';
const TYPE_HTML_XPATH = 'HTML+XPath';
const TYPE_RSS_ATOM = 'rss';
/**
* Initialize the service for the given user.
*

@ -148,6 +148,37 @@ class FreshRSS_Import_Service {
$feed->_website($website);
$feed->_description($description);
switch ($feed_elt['type'] ?? '') {
case FreshRSS_Export_Service::TYPE_HTML_XPATH:
$feed->_kind(FreshRSS_Feed::KIND_HTML_XPATH);
break;
case FreshRSS_Export_Service::TYPE_RSS_ATOM:
default:
$feed->_kind(FreshRSS_Feed::KIND_RSS);
break;
}
$xPathSettings = [];
foreach ($feed_elt as $key => $value) {
if (is_array($value) && !empty($value['value']) && ($value['namespace'] ?? '') === FreshRSS_Export_Service::FRSS_NAMESPACE) {
switch ($key) {
case 'cssFullContent': $feed->_pathEntries($value['value']); break;
case 'filtersActionRead': $feed->_filtersAction('read', preg_split('/[\n\r]+/', $value['value'])); break;
case 'xPathItem': $xPathSettings['item'] = $value['value']; break;
case 'xPathItemTitle': $xPathSettings['itemTitle'] = $value['value']; break;
case 'xPathItemContent': $xPathSettings['itemContent'] = $value['value']; break;
case 'xPathItemUri': $xPathSettings['itemUri'] = $value['value']; break;
case 'xPathItemAuthor': $xPathSettings['itemAuthor'] = $value['value']; break;
case 'xPathItemTimestamp': $xPathSettings['itemTimestamp'] = $value['value']; break;
case 'xPathItemThumbnail': $xPathSettings['itemThumbnail'] = $value['value']; break;
case 'xPathItemCategories': $xPathSettings['itemCategories'] = $value['value']; break;
}
}
}
if (!empty($xPathSettings)) {
$feed->_attributes('xpath', $xPathSettings);
}
// Call the extension hook
$feed = Minz_ExtensionManager::callHook('feed_before_insert', $feed);
if ($feed != null) {

@ -15,14 +15,40 @@ foreach ($this->categories as $key => $cat) {
'@outlines' => array()
);
/** @var FreshRSS_Feed $feed */
foreach ($cat['feeds'] as $feed) {
$opml_array['body'][$key]['@outlines'][] = array(
$outline = [
'text' => htmlspecialchars_decode($feed->name(), ENT_QUOTES),
'type' => 'rss',
'type' => FreshRSS_Export_Service::TYPE_RSS_ATOM,
'xmlUrl' => htmlspecialchars_decode($feed->url(), ENT_QUOTES),
'htmlUrl' => htmlspecialchars_decode($feed->website(), ENT_QUOTES),
'description' => htmlspecialchars_decode($feed->description(), ENT_QUOTES),
);
];
if ($feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH) {
$outline['type'] = FreshRSS_Export_Service::TYPE_HTML_XPATH;
/** @var array<string,string> */
$xPathSettings = $feed->attributes('xpath');
$outline['frss:xPathItem'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['item'] ?? null];
$outline['frss:xPathItemTitle'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemTitle'] ?? null];
$outline['frss:xPathItemContent'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemContent'] ?? null];
$outline['frss:xPathItemUri'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemUri'] ?? null];
$outline['frss:xPathItemAuthor'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemAuthor'] ?? null];
$outline['frss:xPathItemTimestamp'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemTimestamp'] ?? null];
$outline['frss:xPathItemThumbnail'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemThumbnail'] ?? null];
$outline['frss:xPathItemCategories'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemCategories'] ?? null];
}
if (!empty($feed->filtersAction('read'))) {
$filters = '';
foreach ($feed->filtersAction('read') as $filterRead) {
$filters .= $filterRead->getRawInput() . "\n";
}
$filters = trim($filters);
$outline['frss:filtersActionRead'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $filters];
}
if ($feed->pathEntries() != '') {
$outline['frss:cssFullContent'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $feed->pathEntries()];
}
$opml_array['body'][$key]['@outlines'][] = $outline;
}
}

@ -0,0 +1,74 @@
# OPML in FreshRSS
FreshRSS supports the [OPML](https://en.wikipedia.org/wiki/OPML) format to export and import lists of RSS/Atom feeds in a standard way, compatible with several other RSS aggregators.
However, FreshRSS also supports several additional features not covered by the basic OPML specification.
Luckily, the [OPML specification](http://opml.org/spec2.opml) allows extensions:
> *An OPML file may contain elements and attributes not described on this page, only if those elements are defined in a namespace.*
and:
> *OPML can also be extended by the addition of new values for the type attribute.*
## FreshRSS OPML extension
FreshRSS uses the XML namespace <https://freshrss.org/opml> to export/import extended information not covered by the basic OPML specification.
The list of the custom FreshRSS attributes can be seen in [the source code](https://github.com/FreshRSS/FreshRSS/blob/edge/app/views/helpers/export/opml.phtml), and here is an overview:
### HTML+XPath
* `<outline type="HTML+XPath" ...`: Additional type of source, which is not RSS/Atom, but HTML Web Scraping using [XPath](https://www.w3.org/TR/xpath-10/) 1.0.
> ℹ [XPath 1.0](https://en.wikipedia.org/wiki/XPath) is a standard query language, which FreshRSS supports to enable [Web scraping](https://en.wikipedia.org/wiki/Web_scraping).
The following attributes are using similar naming conventions than [RSS-Bridge](https://rss-bridge.github.io/rss-bridge/Bridge_API/XPathAbstract.html).
* `frss:xPathItem`: XPath expression for extracting the feed items from the source page.
* Example: `//div[@class="news-item"]`
* `frss:xPathItemTitle`: XPath expression for extracting the feed title from the source page.
* Example: `descendant::h2`
* `frss:xPathItemContent`: XPath expression for extracting an item’s content from the item context.
* Example: `.`
* `frss:xPathItemUri`: XPath expression for extracting an item link from the item context.
* Example: `descendant::a/@href`
* `frss:xPathItemAuthor`: XPath expression for extracting an item author from the item context.
* Example: `"Anonymous"`
* `frss:xPathItemTimestamp`: XPath expression for extracting an item timestamp from the item context. The result will be parsed by [`strtotime()`](https://php.net/strtotime).
* `frss:xPathItemThumbnail`: XPath expression for extracting an item’s thumbnail (image) URL from the item context.
* Example: `descendant::img/@src`
* `frss:xPathItemCategories`: XPath expression for extracting a list of categories (tags) from the item context.
### Miscellaneous
* `frss:cssFullContent`: [CSS Selector](https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Selectors) to enable the download and extraction of the matching HTML section of each articles’ Web address.
* Example: `div.main`
* `frss:filtersActionRead`: List (separated by a new line) of search queries to automatically mark a new article as read.
### Example
```xml
<?xml version="1.0" encoding="UTF-8"?>
<opml version="2.0">
<head>
<title>FreshRSS OPML extension example</title>
</head>
<body>
<outline xmlns:frss="https://freshrss.org/opml"
text="Example"
type="HTML+XPath"
xmlUrl="https://www.example.net/page.html"
htmlUrl="https://www.example.net/page.html"
description="Example of Web scraping"
frss:xPathItem="//a[contains(@href, '/interesting/')]/ancestor::article"
frss:xPathItemTitle="descendant::h2"
frss:xPathItemContent="."
frss:xPathItemUri="descendant::a[string-length(@href)&gt;0]/@href"
frss:xPathItemThumbnail="descendant::img/@src"
frss:cssFullContent="article"
frss:filtersActionRead="intitle:⚡ OR intitle:🔥&#10;something"
/>
</body>
</opml>
```

@ -12,7 +12,7 @@
*
* @author Marien Fressinaud <dev@marienfressinaud.fr>
* @link https://github.com/marienfressinaud/lib_opml
* @version 0.2-FreshRSS~1.5.1
* @version 0.2-FreshRSS~1.20.0
* @license public domain
*
* Usages:
@ -91,8 +91,20 @@ function libopml_parse_outline($outline_xml, $strict = true) {
// An outline may contain any kind of attributes but "text" attribute is
// required !
$text_is_present = false;
foreach ($outline_xml->attributes() as $key => $value) {
$outline[$key] = (string)$value;
$elem = dom_import_simplexml($outline_xml);
/** @var DOMAttr $attr */
foreach ($elem->attributes as $attr) {
$key = $attr->localName;
if ($attr->namespaceURI == '') {
$outline[$key] = $attr->value;
} else {
$outline[$key] = [
'namespace' => $attr->namespaceURI,
'value' => $attr->value,
];
}
if ($key === 'text') {
$text_is_present = true;
@ -257,17 +269,22 @@ function libopml_render_outline($parent_elt, $outline, $strict) {
foreach ($value as $outline_child) {
libopml_render_outline($outline_elt, $outline_child, $strict);
}
} elseif (is_array($value)) {
} elseif (is_array($value) && !isset($value['namespace'])) {
throw new LibOPML_Exception(
'Type of outline elements cannot be array: ' . $key
'Type of outline elements cannot be array (except for providing a namespace): ' . $key
);
} else {
// Detect text attribute is present, that's good :)
if ($key === 'text') {
$text_is_present = true;
}
$outline_elt->addAttribute($key, $value);
if (is_array($value)) {
if (!empty($value['namespace']) && !empty($value['value'])) {
$outline_elt->addAttribute($key, $value['value'], $value['namespace']);
}
} else {
$outline_elt->addAttribute($key, $value);
}
}
}

Loading…
Cancel
Save