diff --git a/app/Models/BooleanSearch.php b/app/Models/BooleanSearch.php index e2f99d524..774e42073 100644 --- a/app/Models/BooleanSearch.php +++ b/app/Models/BooleanSearch.php @@ -5,6 +5,7 @@ */ class FreshRSS_BooleanSearch { + /** @var string */ private $raw_input = ''; private $searches = array(); @@ -54,11 +55,11 @@ class FreshRSS_BooleanSearch { return null; } - public function __toString() { + public function __toString(): string { return $this->getRawInput(); } - public function getRawInput() { + public function getRawInput(): string { return $this->raw_input; } } diff --git a/app/Models/Feed.php b/app/Models/Feed.php index e0d26046b..b5b6fdfd8 100644 --- a/app/Models/Feed.php +++ b/app/Models/Feed.php @@ -245,7 +245,7 @@ class FreshRSS_Feed extends Minz_Model { } $this->url = $value; } - public function _kind($value) { + public function _kind(int $value) { $this->kind = $value; } public function _category($value) { @@ -567,8 +567,8 @@ class FreshRSS_Feed extends Minz_Model { $feedSourceUrl = preg_replace('#((.+)://)(.+)#', '${1}' . $this->httpAuth . '@${3}', $feedSourceUrl); } - // Same naming conventions than https://github.com/RSS-Bridge/rss-bridge/wiki/XPathAbstract - // https://github.com/RSS-Bridge/rss-bridge/wiki/The-collectData-function + // Same naming conventions than https://rss-bridge.github.io/rss-bridge/Bridge_API/XPathAbstract.html + // https://rss-bridge.github.io/rss-bridge/Bridge_API/BridgeAbstract.html#collectdata /** @var array */ $xPathSettings = $this->attributes('xpath'); $xPathFeedTitle = $xPathSettings['feedTitle'] ?? ''; @@ -758,7 +758,8 @@ class FreshRSS_Feed extends Minz_Model { } } - public function filtersAction(string $action) { + /** @return array */ + public function filtersAction(string $action): array { $action = trim($action); if ($action == '') { return array(); @@ -775,6 +776,9 @@ class FreshRSS_Feed extends Minz_Model { return $filters; } + /** + * @param array $filters + */ public function _filtersAction(string $action, $filters) { $action = trim($action); if ($action == '' || !is_array($filters)) { diff --git a/app/Models/FeedDAO.php b/app/Models/FeedDAO.php index c4a0b1429..89e667813 100644 --- a/app/Models/FeedDAO.php +++ b/app/Models/FeedDAO.php @@ -104,6 +104,7 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo implements FreshRSS_Searchable { 'website' => $feed->website(), 'description' => $feed->description(), 'lastUpdate' => 0, + 'pathEntries' => $feed->pathEntries(), 'httpAuth' => $feed->httpAuth(), 'attributes' => $feed->attributes(), ); @@ -384,6 +385,9 @@ SQL; return false; } + /** + * @return array + */ public function listByCategory(int $cat): array { $sql = 'SELECT * FROM `_feed` WHERE category=?'; $stm = $this->pdo->prepare($sql); diff --git a/app/Models/FilterAction.php b/app/Models/FilterAction.php index 23a45d14e..394b573a4 100644 --- a/app/Models/FilterAction.php +++ b/app/Models/FilterAction.php @@ -2,6 +2,7 @@ class FreshRSS_FilterAction { + /** @var FreshRSS_BooleanSearch */ private $booleanSearch = null; private $actions = null; @@ -33,7 +34,7 @@ class FreshRSS_FilterAction { 'actions' => $this->actions, ); } - return ''; + return []; } public static function fromJSON($json) { diff --git a/app/Services/ExportService.php b/app/Services/ExportService.php index 7069ccec1..a80f20ce4 100644 --- a/app/Services/ExportService.php +++ b/app/Services/ExportService.php @@ -19,6 +19,10 @@ class FreshRSS_Export_Service { /** @var FreshRSS_TagDAO */ private $tag_dao; + const FRSS_NAMESPACE = 'https://freshrss.org/opml'; + const TYPE_HTML_XPATH = 'HTML+XPath'; + const TYPE_RSS_ATOM = 'rss'; + /** * Initialize the service for the given user. * diff --git a/app/Services/ImportService.php b/app/Services/ImportService.php index 7d1bb1c7c..7e7cccfdb 100644 --- a/app/Services/ImportService.php +++ b/app/Services/ImportService.php @@ -148,6 +148,37 @@ class FreshRSS_Import_Service { $feed->_website($website); $feed->_description($description); + switch ($feed_elt['type'] ?? '') { + case FreshRSS_Export_Service::TYPE_HTML_XPATH: + $feed->_kind(FreshRSS_Feed::KIND_HTML_XPATH); + break; + case FreshRSS_Export_Service::TYPE_RSS_ATOM: + default: + $feed->_kind(FreshRSS_Feed::KIND_RSS); + break; + } + + $xPathSettings = []; + foreach ($feed_elt as $key => $value) { + if (is_array($value) && !empty($value['value']) && ($value['namespace'] ?? '') === FreshRSS_Export_Service::FRSS_NAMESPACE) { + switch ($key) { + case 'cssFullContent': $feed->_pathEntries($value['value']); break; + case 'filtersActionRead': $feed->_filtersAction('read', preg_split('/[\n\r]+/', $value['value'])); break; + case 'xPathItem': $xPathSettings['item'] = $value['value']; break; + case 'xPathItemTitle': $xPathSettings['itemTitle'] = $value['value']; break; + case 'xPathItemContent': $xPathSettings['itemContent'] = $value['value']; break; + case 'xPathItemUri': $xPathSettings['itemUri'] = $value['value']; break; + case 'xPathItemAuthor': $xPathSettings['itemAuthor'] = $value['value']; break; + case 'xPathItemTimestamp': $xPathSettings['itemTimestamp'] = $value['value']; break; + case 'xPathItemThumbnail': $xPathSettings['itemThumbnail'] = $value['value']; break; + case 'xPathItemCategories': $xPathSettings['itemCategories'] = $value['value']; break; + } + } + } + if (!empty($xPathSettings)) { + $feed->_attributes('xpath', $xPathSettings); + } + // Call the extension hook $feed = Minz_ExtensionManager::callHook('feed_before_insert', $feed); if ($feed != null) { diff --git a/app/views/helpers/export/opml.phtml b/app/views/helpers/export/opml.phtml index 971dc1445..458ea0e58 100644 --- a/app/views/helpers/export/opml.phtml +++ b/app/views/helpers/export/opml.phtml @@ -15,14 +15,40 @@ foreach ($this->categories as $key => $cat) { '@outlines' => array() ); + /** @var FreshRSS_Feed $feed */ foreach ($cat['feeds'] as $feed) { - $opml_array['body'][$key]['@outlines'][] = array( + $outline = [ 'text' => htmlspecialchars_decode($feed->name(), ENT_QUOTES), - 'type' => 'rss', + 'type' => FreshRSS_Export_Service::TYPE_RSS_ATOM, 'xmlUrl' => htmlspecialchars_decode($feed->url(), ENT_QUOTES), 'htmlUrl' => htmlspecialchars_decode($feed->website(), ENT_QUOTES), 'description' => htmlspecialchars_decode($feed->description(), ENT_QUOTES), - ); + ]; + if ($feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH) { + $outline['type'] = FreshRSS_Export_Service::TYPE_HTML_XPATH; + /** @var array */ + $xPathSettings = $feed->attributes('xpath'); + $outline['frss:xPathItem'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['item'] ?? null]; + $outline['frss:xPathItemTitle'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemTitle'] ?? null]; + $outline['frss:xPathItemContent'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemContent'] ?? null]; + $outline['frss:xPathItemUri'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemUri'] ?? null]; + $outline['frss:xPathItemAuthor'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemAuthor'] ?? null]; + $outline['frss:xPathItemTimestamp'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemTimestamp'] ?? null]; + $outline['frss:xPathItemThumbnail'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemThumbnail'] ?? null]; + $outline['frss:xPathItemCategories'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemCategories'] ?? null]; + } + if (!empty($feed->filtersAction('read'))) { + $filters = ''; + foreach ($feed->filtersAction('read') as $filterRead) { + $filters .= $filterRead->getRawInput() . "\n"; + } + $filters = trim($filters); + $outline['frss:filtersActionRead'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $filters]; + } + if ($feed->pathEntries() != '') { + $outline['frss:cssFullContent'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $feed->pathEntries()]; + } + $opml_array['body'][$key]['@outlines'][] = $outline; } } diff --git a/docs/en/developers/OPML.md b/docs/en/developers/OPML.md new file mode 100644 index 000000000..59a59a748 --- /dev/null +++ b/docs/en/developers/OPML.md @@ -0,0 +1,74 @@ +# OPML in FreshRSS + +FreshRSS supports the [OPML](https://en.wikipedia.org/wiki/OPML) format to export and import lists of RSS/Atom feeds in a standard way, compatible with several other RSS aggregators. + +However, FreshRSS also supports several additional features not covered by the basic OPML specification. +Luckily, the [OPML specification](http://opml.org/spec2.opml) allows extensions: + +> *An OPML file may contain elements and attributes not described on this page, only if those elements are defined in a namespace.* + +and: + +> *OPML can also be extended by the addition of new values for the type attribute.* + +## FreshRSS OPML extension + +FreshRSS uses the XML namespace to export/import extended information not covered by the basic OPML specification. + +The list of the custom FreshRSS attributes can be seen in [the source code](https://github.com/FreshRSS/FreshRSS/blob/edge/app/views/helpers/export/opml.phtml), and here is an overview: + +### HTML+XPath + +* ` ℹ️ [XPath 1.0](https://en.wikipedia.org/wiki/XPath) is a standard query language, which FreshRSS supports to enable [Web scraping](https://en.wikipedia.org/wiki/Web_scraping). + +The following attributes are using similar naming conventions than [RSS-Bridge](https://rss-bridge.github.io/rss-bridge/Bridge_API/XPathAbstract.html). + +* `frss:xPathItem`: XPath expression for extracting the feed items from the source page. + * Example: `//div[@class="news-item"]` +* `frss:xPathItemTitle`: XPath expression for extracting the feed title from the source page. + * Example: `descendant::h2` +* `frss:xPathItemContent`: XPath expression for extracting an item’s content from the item context. + * Example: `.` +* `frss:xPathItemUri`: XPath expression for extracting an item link from the item context. + * Example: `descendant::a/@href` +* `frss:xPathItemAuthor`: XPath expression for extracting an item author from the item context. + * Example: `"Anonymous"` +* `frss:xPathItemTimestamp`: XPath expression for extracting an item timestamp from the item context. The result will be parsed by [`strtotime()`](https://php.net/strtotime). +* `frss:xPathItemThumbnail`: XPath expression for extracting an item’s thumbnail (image) URL from the item context. + * Example: `descendant::img/@src` +* `frss:xPathItemCategories`: XPath expression for extracting a list of categories (tags) from the item context. + +### Miscellaneous + +* `frss:cssFullContent`: [CSS Selector](https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Selectors) to enable the download and extraction of the matching HTML section of each articles’ Web address. + * Example: `div.main` +* `frss:filtersActionRead`: List (separated by a new line) of search queries to automatically mark a new article as read. + +### Example + +```xml + + + + FreshRSS OPML extension example + + + + + +``` diff --git a/lib/lib_opml.php b/lib/lib_opml.php index b62f988c4..04b747a05 100644 --- a/lib/lib_opml.php +++ b/lib/lib_opml.php @@ -12,7 +12,7 @@ * * @author Marien Fressinaud * @link https://github.com/marienfressinaud/lib_opml - * @version 0.2-FreshRSS~1.5.1 + * @version 0.2-FreshRSS~1.20.0 * @license public domain * * Usages: @@ -91,8 +91,20 @@ function libopml_parse_outline($outline_xml, $strict = true) { // An outline may contain any kind of attributes but "text" attribute is // required ! $text_is_present = false; - foreach ($outline_xml->attributes() as $key => $value) { - $outline[$key] = (string)$value; + + $elem = dom_import_simplexml($outline_xml); + /** @var DOMAttr $attr */ + foreach ($elem->attributes as $attr) { + $key = $attr->localName; + + if ($attr->namespaceURI == '') { + $outline[$key] = $attr->value; + } else { + $outline[$key] = [ + 'namespace' => $attr->namespaceURI, + 'value' => $attr->value, + ]; + } if ($key === 'text') { $text_is_present = true; @@ -257,17 +269,22 @@ function libopml_render_outline($parent_elt, $outline, $strict) { foreach ($value as $outline_child) { libopml_render_outline($outline_elt, $outline_child, $strict); } - } elseif (is_array($value)) { + } elseif (is_array($value) && !isset($value['namespace'])) { throw new LibOPML_Exception( - 'Type of outline elements cannot be array: ' . $key + 'Type of outline elements cannot be array (except for providing a namespace): ' . $key ); } else { // Detect text attribute is present, that's good :) if ($key === 'text') { $text_is_present = true; } - - $outline_elt->addAttribute($key, $value); + if (is_array($value)) { + if (!empty($value['namespace']) && !empty($value['value'])) { + $outline_elt->addAttribute($key, $value['value'], $value['namespace']); + } + } else { + $outline_elt->addAttribute($key, $value); + } } }