From f7f5dba795e162fc9981cf900fd3c2b073fd9dcc Mon Sep 17 00:00:00 2001 From: "Daniel J. Summers" Date: Fri, 31 May 2024 16:00:04 -0400 Subject: [PATCH] Split out feed parsing from document WIP - still moving things around... --- src/composer.json | 3 +- src/composer.lock | 5 +- src/lib/Domain/Feed.php | 72 ------ src/lib/Domain/Item.php | 6 +- src/lib/Feed.php | 315 ++++------------------- src/lib/ParsedFeed.php | 252 ++++++++++++++++++ src/lib/{FeedItem.php => ParsedItem.php} | 30 +-- src/public/bookmark.php | 22 +- src/public/feed/items.php | 2 +- src/public/user/log-on.php | 1 + src/util/refresh.php | 10 +- src/util/user.php | 49 ++-- 12 files changed, 371 insertions(+), 396 deletions(-) delete mode 100644 src/lib/Domain/Feed.php create mode 100644 src/lib/ParsedFeed.php rename src/lib/{FeedItem.php => ParsedItem.php} (71%) diff --git a/src/composer.json b/src/composer.json index 96ef946..62c4ceb 100644 --- a/src/composer.json +++ b/src/composer.json @@ -15,7 +15,8 @@ "bit-badger/documents-sqlite": "dev-conversion", "ext-sqlite3": "*", "ext-dom": "*", - "ext-curl": "*" + "ext-curl": "*", + "ext-readline": "*" }, "autoload": { "psr-4": { diff --git a/src/composer.lock b/src/composer.lock index f31827f..8767b95 100644 --- a/src/composer.lock +++ b/src/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "6919c5b5b8f417396276d24c8f8edbde", + "content-hash": "029a3af4ce4e5cc5488c1ca634a8af61", "packages": [ { "name": "bit-badger/documents-common", @@ -44,7 +44,8 @@ "platform": { "ext-sqlite3": "*", "ext-dom": "*", - "ext-curl": "*" + "ext-curl": "*", + "ext-readline": "*" }, "platform-dev": [], "plugin-api-version": "2.6.0" diff --git a/src/lib/Domain/Feed.php b/src/lib/Domain/Feed.php deleted file mode 100644 index bbf7734..0000000 --- a/src/lib/Domain/Feed.php +++ /dev/null @@ -1,72 +0,0 @@ -updated_on) ? null : new DateTimeImmutable($this->updated_on); - } - - /** - * The date/time this feed was last checked - * - * @return DateTimeImmutable|null The last checked date, or null if it is not set - * @throws Exception If the date/time is an invalid format - */ - public function checkedOn(): ?DateTimeImmutable - { - return is_null($this->checked_on) ? null : new DateTimeImmutable($this->checked_on); - } - - /** - * Create a document from the parsed feed - * - * @param FeedParsed $feed The parsed feed - * @return static The document constructed from the parsed feed - */ - public static function fromParsed(FeedParsed $feed): static - { - $it = new static(); - $it->user_id = $_SESSION[Key::USER_ID]; - $it->url = $feed->url; - $it->title = $feed->title; - $it->updated_on = $feed->updatedOn; - $it->checked_on = Data::formatDate('now'); - - return $it; - } -} diff --git a/src/lib/Domain/Item.php b/src/lib/Domain/Item.php index e718350..48a88fa 100644 --- a/src/lib/Domain/Item.php +++ b/src/lib/Domain/Item.php @@ -1,7 +1,7 @@ feed_id = $feedId; diff --git a/src/lib/Feed.php b/src/lib/Feed.php index 799f568..9589d03 100644 --- a/src/lib/Feed.php +++ b/src/lib/Feed.php @@ -12,44 +12,16 @@ use BitBadger\Documents\SQLite\Exists; use BitBadger\Documents\SQLite\Find; use BitBadger\Documents\SQLite\Patch; use DateTimeInterface; -use DOMDocument; -use DOMElement; -use DOMException; -use DOMNode; -use FeedReaderCentral\Domain\Feed as FeedDocument; use FeedReaderCentral\Domain\Item; use FeedReaderCentral\Domain\Table; use SQLite3; /** - * Feed retrieval, parsing, and manipulation + * An RSS or Atom feed */ -class Feed { - - /** @var string The URL for the feed */ - public string $url = ''; - - /** @var string The title of the feed */ - public string $title = ''; - - /** @var ?string When the feed was last updated */ - public ?string $updatedOn = null; - - /** @var FeedItem[] The items contained in the feed */ - public array $items = []; - - /** @var string The XML namespace for Atom feeds */ - public const string ATOM_NS = 'http://www.w3.org/2005/Atom'; - - /** @var string The XML namespace for the `` tag that allows HTML content in a feed */ - public const string CONTENT_NS = 'http://purl.org/rss/1.0/modules/content/'; - - /** @var string The XML namespace for XHTML */ - public const string XHTML_NS = 'http://www.w3.org/1999/xhtml'; - - /** @var string The user agent for Feed Reader Central's refresh requests */ - private const string USER_AGENT = - 'FeedReaderCentral/' . FRC_VERSION . ' +https://bitbadger.solutions/open-source/feed-reader-central'; +class Feed +{ + // ***** CONSTANTS ***** /** @var int Do not purge items */ public const int PURGE_NONE = 0; @@ -63,231 +35,57 @@ class Feed { /** @var int Purge items in number greater than the specified number of items to keep */ public const int PURGE_BY_COUNT = 3; - /** - * When parsing XML into a DOMDocument, errors are presented as warnings; this creates an exception for them - * - * @param int $errno The error level encountered - * @param string $errstr The text of the error encountered - * @return bool False, to delegate to the next error handler in the chain - * @throws DOMException If the error is a warning - */ - private static function xmlParseError(int $errno, string $errstr): bool { - if ($errno == E_WARNING && substr_count($errstr, 'DOMDocument::loadXML()') > 0) { - throw new DOMException($errstr, $errno); - } - return false; - } + // ***** PROPERTIES ***** + + /** @var int The ID of the feed */ + public int $id = 0; + + /** @var int The ID of the user to whom this subscription belongs */ + public int $user_id = 0; + + /** @var string The URL of the feed */ + public string $url = ''; + + /** @var string|null The title of this feed */ + public ?string $title = null; + + /** @var string|null The date/time items in this feed were last updated */ + public ?string $updated_on = null; + + /** @var string|null The date/time this feed was last checked */ + public ?string $checked_on = null; + + // ***** STATIC FUNCTIONS ***** /** - * Parse a feed into an XML tree + * Create a document from the parsed feed * - * @param string $content The feed's RSS content - * @return array|DOMDocument[]|string[] ['ok' => feed] if successful, ['error' => message] if not + * @param ParsedFeed $parsed The parsed feed + * @return static The document constructed from the parsed feed */ - public static function parseFeed(string $content): array { - set_error_handler(self::xmlParseError(...)); - try { - $feed = new DOMDocument(); - $feed->loadXML($content); - return ['ok' => $feed]; - } catch (DOMException $ex) { - return ['error' => $ex->getMessage()]; - } finally { - restore_error_handler(); - } - } + public static function fromParsed(ParsedFeed $parsed): static + { + $it = new static(); + $it->user_id = $_SESSION[Key::USER_ID]; + $it->url = $parsed->url; + $it->title = $parsed->title; + $it->updated_on = $parsed->updatedOn; + $it->checked_on = Data::formatDate('now'); - /** - * Get the value of a child element by its tag name for an RSS feed - * - * @param DOMNode $element The parent element - * @param string $tagName The name of the tag whose value should be obtained - * @return string The value of the element (or "[element] not found" if that element does not exist) - */ - public static function rssValue(DOMNode $element, string $tagName): string { - $tags = $element->getElementsByTagName($tagName); - return $tags->length == 0 ? "$tagName not found" : $tags->item(0)->textContent; - } - - /** - * Extract items from an RSS feed - * - * @param DOMDocument $xml The XML received from the feed - * @param string $url The actual URL for the feed - * @return array|Feed[]|string[] ['ok' => feed] if successful, ['error' => message] if not - */ - private static function fromRSS(DOMDocument $xml, string $url): array { - $channel = $xml->getElementsByTagName('channel')->item(0); - if (!($channel instanceof DOMElement)) { - $type = $channel?->nodeType ?? -1; - return ['error' => "Channel element not found ($type)"]; - } - - // The Atom namespace provides a lastBuildDate, which contains the last time an item in the feed was updated; if - // that is not present, use the pubDate element instead - if (($updatedOn = self::rssValue($channel, 'lastBuildDate')) == 'lastBuildDate not found') { - if (($updatedOn = self::rssValue($channel, 'pubDate')) == 'pubDate not found') { - $updatedOn = null; - } - } - - $feed = new static(); - $feed->title = self::rssValue($channel, 'title'); - $feed->url = $url; - $feed->updatedOn = Data::formatDate($updatedOn); - foreach ($channel->getElementsByTagName('item') as $item) $feed->items[] = FeedItem::fromRSS($item); - - return ['ok' => $feed]; - } - - /** - * Get an attribute value from a DOM node - * - * @param DOMNode $node The node with an attribute value to obtain - * @param string $name The name of the attribute whose value should be obtained - * @return string The attribute value if it exists, an empty string if not - */ - private static function attrValue(DOMNode $node, string $name): string { - return ($node->hasAttributes() ? $node->attributes->getNamedItem($name)?->value : null) ?? ''; - - } - /** - * Get the value of a child element by its tag name for an Atom feed - * - * (Atom feeds can have type attributes on nearly any value. For our purposes, types "text" and "html" will work as - * regular string values; for "xhtml", though, we will need to get the `
` and extract its contents instead.) - * - * @param DOMNode $element The parent element - * @param string $tagName The name of the tag whose value should be obtained - * @return string The value of the element (or "[element] not found" if that element does not exist) - */ - public static function atomValue(DOMNode $element, string $tagName): string { - $tags = $element->getElementsByTagName($tagName); - if ($tags->length == 0) return "$tagName not found"; - $tag = $tags->item(0); - if (!($tag instanceof DOMElement)) return $tag->textContent; - if (self::attrValue($tag, 'type') == 'xhtml') { - $div = $tag->getElementsByTagNameNS(Feed::XHTML_NS, 'div'); - if ($div->length == 0) return "-- invalid XHTML content --"; - return $div->item(0)->textContent; - } - return $tag->textContent; - } - - /** - * Extract items from an Atom feed - * - * @param DOMDocument $xml The XML received from the feed - * @param string $url The actual URL for the feed - * @return array|Feed[] ['ok' => feed] - */ - private static function fromAtom(DOMDocument $xml, string $url): array { - $root = $xml->getElementsByTagNameNS(self::ATOM_NS, 'feed')->item(0); - if (($updatedOn = self::atomValue($root, 'updated')) == 'pubDate not found') $updatedOn = null; - - $feed = new Feed(); - $feed->title = self::atomValue($root, 'title'); - $feed->url = $url; - $feed->updatedOn = Data::formatDate($updatedOn); - foreach ($root->getElementsByTagName('entry') as $entry) $feed->items[] = FeedItem::fromAtom($entry); - - return ['ok' => $feed]; - } - - /** - * Retrieve a document (http/https) - * - * @param string $url The URL of the document to retrieve - * @return array ['content' => document content, 'error' => error message, 'code' => HTTP response code, - * 'url' => effective URL] - */ - private static function retrieveDocument(string $url): array { - $docReq = curl_init($url); - curl_setopt($docReq, CURLOPT_FOLLOWLOCATION, true); - curl_setopt($docReq, CURLOPT_RETURNTRANSFER, true); - curl_setopt($docReq, CURLOPT_CONNECTTIMEOUT, 5); - curl_setopt($docReq, CURLOPT_TIMEOUT, 15); - curl_setopt($docReq, CURLOPT_USERAGENT, self::USER_AGENT); - - $result = [ - 'content' => curl_exec($docReq), - 'error' => curl_error($docReq), - 'code' => curl_getinfo($docReq, CURLINFO_RESPONSE_CODE), - 'url' => curl_getinfo($docReq, CURLINFO_EFFECTIVE_URL) - ]; - - curl_close($docReq); - return $result; - } - - /** - * Derive a feed URL from an HTML document - * - * @param string $content The HTML document content from which to derive a feed URL - * @return array|string[] ['ok' => feed URL] if successful, ['error' => message] if not - */ - private static function deriveFeedFromHTML(string $content): array { - $html = new DOMDocument(); - $html->loadHTML(substr($content, 0, strpos($content, '') + 7)); - $headTags = $html->getElementsByTagName('head'); - if ($headTags->length < 1) return ['error' => 'Cannot find feed at this URL']; - $head = $headTags->item(0); - foreach ($head->getElementsByTagName('link') as $link) { - if (self::attrValue($link, 'rel') == 'alternate') { - $type = self::attrValue($link, 'type'); - if ($type == 'application/rss+xml' || $type == 'application/atom+xml') { - return ['ok' => self::attrValue($link, 'href')]; - } - } - } - return ['error' => 'Cannot find feed at this URL']; - } - - /** - * Retrieve the feed - * - * @param string $url The URL of the feed to retrieve - * @return array|Feed[]|string[] ['ok' => feed] if successful, ['error' => message] if not - */ - public static function retrieveFeed(string $url): array { - $doc = self::retrieveDocument($url); - - if ($doc['error'] != '') return ['error' => $doc['error']]; - if ($doc['code'] != 200) { - return ['error' => "Prospective feed URL $url returned HTTP Code {$doc['code']}: {$doc['content']}"]; - } - - $start = strtolower(strlen($doc['content']) >= 9 ? substr($doc['content'], 0, 9) : $doc['content']); - if ($start == ' $derivedURL['error']]; - $feedURL = $derivedURL['ok']; - if (!str_starts_with($feedURL, 'http')) { - // Relative URL; feed should be retrieved in the context of the original URL - $original = parse_url($url); - $port = key_exists('port', $original) ? ":{$original['port']}" : ''; - $feedURL = $original['scheme'] . '://' . $original['host'] . $port . $feedURL; - } - $doc = self::retrieveDocument($feedURL); - } - - $parsed = self::parseFeed($doc['content']); - if (key_exists('error', $parsed)) return ['error' => $parsed['error']]; - - $extract = $parsed['ok']->getElementsByTagNameNS(self::ATOM_NS, 'feed')->length > 0 - ? self::fromAtom(...) : self::fromRSS(...); - return $extract($parsed['ok'], $doc['url']); + return $it; } /** * Update a feed's items * * @param int $feedId The ID of the feed to which these items belong - * @param Feed $feed The extracted Atom or RSS feed items + * @param ParsedFeed $parsed The extracted Atom or RSS feed items * @param DateTimeInterface $lastChecked When this feed was last checked (only new items will be added) * @return array ['ok' => true] if successful, ['error' => message] if not */ - public static function updateItems(int $feedId, Feed $feed, DateTimeInterface $lastChecked, SQLite3 $db): array { + public static function updateItems(int $feedId, ParsedFeed $parsed, DateTimeInterface $lastChecked, + SQLite3 $db): array + { $results = array_map(function ($item) use ($db, $feedId) { try { @@ -305,7 +103,7 @@ class Feed { } catch (DocumentException $ex) { return ['error' => "$ex"]; } - }, array_filter($feed->items, + }, array_filter($parsed->items, fn($it) => date_create_immutable($it->updatedOn ?? $it->publishedOn) >= $lastChecked)); $errors = array_map(fn($it) => $it['error'], array_filter($results, fn($it) => array_key_exists('error', $it))); return sizeof($errors) > 0 ? ['error' => implode("\n", $errors)] : ['ok' => true]; @@ -318,7 +116,8 @@ class Feed { * @param SQLite3 $db The database connection on which items should be purged * @return array|string[]|true[] ['ok' => true] if purging was successful, ['error' => message] if not */ - private static function purgeItems(int $feedId, SQLite3 $db): array { + private static function purgeItems(int $feedId, SQLite3 $db): array + { if (!array_search(PURGE_TYPE, [self::PURGE_READ, self::PURGE_BY_DAYS, self::PURGE_BY_COUNT])) { return ['error' => 'Unrecognized purge type ' . PURGE_TYPE]; } @@ -362,12 +161,12 @@ class Feed { * @return array|string[]|true[] ['ok' => true] if successful, ['error' => message] if not */ public static function refreshFeed(int $feedId, string $url, SQLite3 $db): array { - $feedRetrieval = self::retrieveFeed($url); + $feedRetrieval = ParsedFeed::retrieve($url); if (key_exists('error', $feedRetrieval)) return $feedRetrieval; $feed = $feedRetrieval['ok']; try { - $feedDoc = Find::byId(Table::FEED, $feedId, FeedDocument::class); + $feedDoc = Find::byId(Table::FEED, $feedId, self::class); if (!$feedDoc) return ['error' => 'Could not derive date last checked for feed']; $lastChecked = date_create_immutable($feedDoc->checked_on ?? WWW_EPOCH); @@ -395,7 +194,7 @@ class Feed { * @return array ['ok' => feedId] if successful, ['error' => message] if not */ public static function add(string $url, SQLite3 $db): array { - $feedExtract = self::retrieveFeed($url); + $feedExtract = ParsedFeed::retrieve($url); if (key_exists('error', $feedExtract)) return $feedExtract; $feed = $feedExtract['ok']; @@ -406,9 +205,9 @@ class Feed { return ['error' => "Already subscribed to feed $feed->url"]; } - Document::insert(Table::FEED, FeedDocument::fromParsed($feed), $db); + Document::insert(Table::FEED, self::fromParsed($feed), $db); - $doc = Find::firstByFields(Table::FEED, $fields, FeedDocument::class); + $doc = Find::firstByFields(Table::FEED, $fields, self::class); if (!$doc) return ['error' => 'Could not retrieve inserted feed']; } catch (DocumentException $ex) { return ['error' => "$ex"]; @@ -423,12 +222,12 @@ class Feed { /** * Update an RSS feed * - * @param FeedDocument $existing The existing RSS feed + * @param Feed $existing The existing feed * @param string $url The URL with which the existing feed should be modified * @param SQLite3 $db The database connection on which to execute the update * @return bool[]|string[] [ 'ok' => true ] if successful, [ 'error' => message ] if not */ - public static function update(FeedDocument $existing, string $url, SQLite3 $db): array { + public static function update(Feed $existing, string $url, SQLite3 $db): array { try { Patch::byFields(Table::FEED, [Field::EQ(Configuration::idField(), $existing->id), Field::EQ('user_id', $_SESSION[Key::USER_ID])], @@ -444,14 +243,14 @@ class Feed { * Retrieve all feeds, optionally for a specific user * * @param int $user The ID of the user whose feeds should be retrieved (optional, defaults to all feeds) - * @return DocumentList A list of feeds + * @return DocumentList A list of feeds * @throws DocumentException If any is encountered */ public static function retrieveAll(int $user = 0): DocumentList { return $user == 0 - ? Find::all(Table::FEED, FeedDocument::class) - : Find::byFields(Table::FEED, [Field::EQ('user_id', $user)], FeedDocument::class); + ? Find::all(Table::FEED, self::class) + : Find::byFields(Table::FEED, [Field::EQ('user_id', $user)], self::class); } /** @@ -482,11 +281,11 @@ class Feed { * Retrieve a feed by its ID for the current user * * @param int $feedId The ID of the feed to retrieve - * @return FeedDocument|false The data for the feed if found, false if not found + * @return static|false The data for the feed if found, false if not found * @throws DocumentException If any is encountered */ - public static function retrieveById(int $feedId): FeedDocument|false { - $doc = Find::byId(Table::FEED, $feedId, FeedDocument::class); + public static function retrieveById(int $feedId): static|false { + $doc = Find::byId(Table::FEED, $feedId, self::class); return $doc && $doc->user_id == $_SESSION[Key::USER_ID] ? $doc : false; } } diff --git a/src/lib/ParsedFeed.php b/src/lib/ParsedFeed.php new file mode 100644 index 0000000..ee57981 --- /dev/null +++ b/src/lib/ParsedFeed.php @@ -0,0 +1,252 @@ +` tag that allows HTML content in a feed */ + public const string CONTENT_NS = 'http://purl.org/rss/1.0/modules/content/'; + + /** @var string The XML namespace for XHTML */ + public const string XHTML_NS = 'http://www.w3.org/1999/xhtml'; + + /** @var string The user agent for Feed Reader Central's refresh requests */ + private const string USER_AGENT = + 'FeedReaderCentral/' . FRC_VERSION . ' +https://bitbadger.solutions/open-source/feed-reader-central'; + + /** + * When parsing XML into a DOMDocument, errors are presented as warnings; this creates an exception for them + * + * @param int $errno The error level encountered + * @param string $errstr The text of the error encountered + * @return bool False, to delegate to the next error handler in the chain + * @throws DOMException If the error is a warning + */ + private static function xmlParseError(int $errno, string $errstr): bool { + if ($errno == E_WARNING && substr_count($errstr, 'DOMDocument::loadXML()') > 0) { + throw new DOMException($errstr, $errno); + } + return false; + } + + /** + * Parse a feed into an XML tree + * + * @param string $content The feed's RSS content + * @return array|DOMDocument[]|string[] ['ok' => feed] if successful, ['error' => message] if not + */ + public static function parseFeed(string $content): array { + set_error_handler(self::xmlParseError(...)); + try { + $feed = new DOMDocument(); + $feed->loadXML($content); + return ['ok' => $feed]; + } catch (DOMException $ex) { + return ['error' => $ex->getMessage()]; + } finally { + restore_error_handler(); + } + } + + /** + * Get the value of a child element by its tag name for an RSS feed + * + * @param DOMNode $element The parent element + * @param string $tagName The name of the tag whose value should be obtained + * @return string The value of the element (or "[element] not found" if that element does not exist) + */ + public static function rssValue(DOMNode $element, string $tagName): string { + $tags = $element->getElementsByTagName($tagName); + return $tags->length == 0 ? "$tagName not found" : $tags->item(0)->textContent; + } + + /** + * Extract items from an RSS feed + * + * @param DOMDocument $xml The XML received from the feed + * @param string $url The actual URL for the feed + * @return array|Feed[]|string[] ['ok' => feed] if successful, ['error' => message] if not + */ + private static function fromRSS(DOMDocument $xml, string $url): array { + $channel = $xml->getElementsByTagName('channel')->item(0); + if (!($channel instanceof DOMElement)) { + $type = $channel?->nodeType ?? -1; + return ['error' => "Channel element not found ($type)"]; + } + + // The Atom namespace provides a lastBuildDate, which contains the last time an item in the feed was updated; if + // that is not present, use the pubDate element instead + if (($updatedOn = self::rssValue($channel, 'lastBuildDate')) == 'lastBuildDate not found') { + if (($updatedOn = self::rssValue($channel, 'pubDate')) == 'pubDate not found') { + $updatedOn = null; + } + } + + $feed = new static(); + $feed->title = self::rssValue($channel, 'title'); + $feed->url = $url; + $feed->updatedOn = Data::formatDate($updatedOn); + foreach ($channel->getElementsByTagName('item') as $item) $feed->items[] = ParsedItem::fromRSS($item); + + return ['ok' => $feed]; + } + + /** + * Get an attribute value from a DOM node + * + * @param DOMNode $node The node with an attribute value to obtain + * @param string $name The name of the attribute whose value should be obtained + * @return string The attribute value if it exists, an empty string if not + */ + private static function attrValue(DOMNode $node, string $name): string { + return ($node->hasAttributes() ? $node->attributes->getNamedItem($name)?->value : null) ?? ''; + + } + /** + * Get the value of a child element by its tag name for an Atom feed + * + * (Atom feeds can have type attributes on nearly any value. For our purposes, types "text" and "html" will work as + * regular string values; for "xhtml", though, we will need to get the `
` and extract its contents instead.) + * + * @param DOMNode $element The parent element + * @param string $tagName The name of the tag whose value should be obtained + * @return string The value of the element (or "[element] not found" if that element does not exist) + */ + public static function atomValue(DOMNode $element, string $tagName): string { + $tags = $element->getElementsByTagName($tagName); + if ($tags->length == 0) return "$tagName not found"; + $tag = $tags->item(0); + if (!($tag instanceof DOMElement)) return $tag->textContent; + if (self::attrValue($tag, 'type') == 'xhtml') { + $div = $tag->getElementsByTagNameNS(self::XHTML_NS, 'div'); + if ($div->length == 0) return "-- invalid XHTML content --"; + return $div->item(0)->textContent; + } + return $tag->textContent; + } + + /** + * Extract items from an Atom feed + * + * @param DOMDocument $xml The XML received from the feed + * @param string $url The actual URL for the feed + * @return array|Feed[] ['ok' => feed] + */ + private static function fromAtom(DOMDocument $xml, string $url): array { + $root = $xml->getElementsByTagNameNS(self::ATOM_NS, 'feed')->item(0); + if (($updatedOn = self::atomValue($root, 'updated')) == 'pubDate not found') $updatedOn = null; + + $feed = new static(); + $feed->title = self::atomValue($root, 'title'); + $feed->url = $url; + $feed->updatedOn = Data::formatDate($updatedOn); + foreach ($root->getElementsByTagName('entry') as $entry) $feed->items[] = ParsedItem::fromAtom($entry); + + return ['ok' => $feed]; + } + + /** + * Retrieve a document (http/https) + * + * @param string $url The URL of the document to retrieve + * @return array ['content' => document content, 'error' => error message, 'code' => HTTP response code, + * 'url' => effective URL] + */ + private static function retrieveDocument(string $url): array { + $docReq = curl_init($url); + curl_setopt($docReq, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($docReq, CURLOPT_RETURNTRANSFER, true); + curl_setopt($docReq, CURLOPT_CONNECTTIMEOUT, 5); + curl_setopt($docReq, CURLOPT_TIMEOUT, 15); + curl_setopt($docReq, CURLOPT_USERAGENT, self::USER_AGENT); + + $result = [ + 'content' => curl_exec($docReq), + 'error' => curl_error($docReq), + 'code' => curl_getinfo($docReq, CURLINFO_RESPONSE_CODE), + 'url' => curl_getinfo($docReq, CURLINFO_EFFECTIVE_URL) + ]; + + curl_close($docReq); + return $result; + } + + /** + * Derive a feed URL from an HTML document + * + * @param string $content The HTML document content from which to derive a feed URL + * @return array|string[] ['ok' => feed URL] if successful, ['error' => message] if not + */ + private static function deriveFeedFromHTML(string $content): array { + $html = new DOMDocument(); + $html->loadHTML(substr($content, 0, strpos($content, '') + 7)); + $headTags = $html->getElementsByTagName('head'); + if ($headTags->length < 1) return ['error' => 'Cannot find feed at this URL']; + $head = $headTags->item(0); + foreach ($head->getElementsByTagName('link') as $link) { + if (self::attrValue($link, 'rel') == 'alternate') { + $type = self::attrValue($link, 'type'); + if ($type == 'application/rss+xml' || $type == 'application/atom+xml') { + return ['ok' => self::attrValue($link, 'href')]; + } + } + } + return ['error' => 'Cannot find feed at this URL']; + } + + /** + * Retrieve the feed + * + * @param string $url The URL of the feed to retrieve + * @return array|ParsedFeed[]|string[] ['ok' => feed] if successful, ['error' => message] if not + */ + public static function retrieve(string $url): array { + $doc = self::retrieveDocument($url); + + if ($doc['error'] != '') return ['error' => $doc['error']]; + if ($doc['code'] != 200) { + return ['error' => "Prospective feed URL $url returned HTTP Code {$doc['code']}: {$doc['content']}"]; + } + + $start = strtolower(strlen($doc['content']) >= 9 ? substr($doc['content'], 0, 9) : $doc['content']); + if ($start == ' $derivedURL['error']]; + $feedURL = $derivedURL['ok']; + if (!str_starts_with($feedURL, 'http')) { + // Relative URL; feed should be retrieved in the context of the original URL + $original = parse_url($url); + $port = key_exists('port', $original) ? ":{$original['port']}" : ''; + $feedURL = $original['scheme'] . '://' . $original['host'] . $port . $feedURL; + } + $doc = self::retrieveDocument($feedURL); + } + + $parsed = self::parseFeed($doc['content']); + if (key_exists('error', $parsed)) return ['error' => $parsed['error']]; + + $extract = $parsed['ok']->getElementsByTagNameNS(self::ATOM_NS, 'feed')->length > 0 + ? self::fromAtom(...) : self::fromRSS(...); + return $extract($parsed['ok'], $doc['url']); + } +} diff --git a/src/lib/FeedItem.php b/src/lib/ParsedItem.php similarity index 71% rename from src/lib/FeedItem.php rename to src/lib/ParsedItem.php index 1b2cd95..09ad6a6 100644 --- a/src/lib/FeedItem.php +++ b/src/lib/ParsedItem.php @@ -6,8 +6,8 @@ use DOMNode; /** * Information for a feed item */ -class FeedItem { - +class ParsedItem +{ /** @var string The title of the feed item */ public string $title = ''; @@ -50,7 +50,7 @@ class FeedItem { */ public static function fromAtom(DOMNode $node): static { - $guid = Feed::atomValue($node, 'id'); + $guid = ParsedFeed::atomValue($node, 'id'); $link = ''; foreach ($node->getElementsByTagName('link') as $linkElt) { if ($linkElt->hasAttributes()) { @@ -65,11 +65,11 @@ class FeedItem { $item = new static(); $item->guid = $guid; - $item->title = Feed::atomValue($node, 'title'); + $item->title = ParsedFeed::atomValue($node, 'title'); $item->link = $link; - $item->publishedOn = Data::formatDate(Feed::atomValue($node, 'published')); - $item->updatedOn = Data::formatDate(Feed::atomValue($node, 'updated')); - $item->content = Feed::atomValue($node, 'content'); + $item->publishedOn = Data::formatDate(ParsedFeed::atomValue($node, 'published')); + $item->updatedOn = Data::formatDate(ParsedFeed::atomValue($node, 'updated')); + $item->content = ParsedFeed::atomValue($node, 'content'); return $item; } @@ -82,19 +82,19 @@ class FeedItem { */ public static function fromRSS(DOMNode $node): static { - $itemGuid = Feed::rssValue($node, 'guid'); - $updNodes = $node->getElementsByTagNameNS(Feed::ATOM_NS, 'updated'); - $encNodes = $node->getElementsByTagNameNS(Feed::CONTENT_NS, 'encoded'); + $itemGuid = ParsedFeed::rssValue($node, 'guid'); + $updNodes = $node->getElementsByTagNameNS(ParsedFeed::ATOM_NS, 'updated'); + $encNodes = $node->getElementsByTagNameNS(ParsedFeed::CONTENT_NS, 'encoded'); $item = new static(); - $item->guid = $itemGuid == 'guid not found' ? Feed::rssValue($node, 'link') : $itemGuid; - $item->title = Feed::rssValue($node, 'title'); - $item->link = Feed::rssValue($node, 'link'); - $item->publishedOn = Data::formatDate(Feed::rssValue($node, 'pubDate')); + $item->guid = $itemGuid == 'guid not found' ? ParsedFeed::rssValue($node, 'link') : $itemGuid; + $item->title = ParsedFeed::rssValue($node, 'title'); + $item->link = ParsedFeed::rssValue($node, 'link'); + $item->publishedOn = Data::formatDate(ParsedFeed::rssValue($node, 'pubDate')); $item->updatedOn = Data::formatDate($updNodes->length > 0 ? $updNodes->item(0)->textContent : null); $item->content = $encNodes->length > 0 ? $encNodes->item(0)->textContent - : Feed::rssValue($node, 'description'); + : ParsedFeed::rssValue($node, 'description'); return $item; } diff --git a/src/public/bookmark.php b/src/public/bookmark.php index 13e11ce..b0ec25f 100644 --- a/src/public/bookmark.php +++ b/src/public/bookmark.php @@ -6,8 +6,11 @@ * This will display a button which will either add or remove a bookmark for a given item. */ +use BitBadger\Documents\DocumentException; +use BitBadger\Documents\SQLite\Find; use BitBadger\Documents\SQLite\Patch; use FeedReaderCentral\Data; +use FeedReaderCentral\Domain\Item; use FeedReaderCentral\Domain\Table; use FeedReaderCentral\Key; use FeedReaderCentral\Security; @@ -36,21 +39,18 @@ if (key_exists('action', $_GET)) { $flag = 0; } if (isset($flag)) { - Patch::byId(Table::ITEM, $id, ['is_bookmarked' => $flag], $db); -// $update = $db->prepare('UPDATE item SET is_bookmarked = :flag WHERE id = :id'); -// $update->bindValue(':id', $id); -// $update->bindValue(':flag', $flag); -// if (!$update->execute()) die(Data::error($db)['error']); + try { + Patch::byId(Table::ITEM, $id, ['is_bookmarked' => $flag], $db); + } catch (DocumentException $ex) { + add_error("$ex"); + } } } -$bookQuery = $db->prepare('SELECT id, is_bookmarked FROM item WHERE id = :id'); -$bookQuery->bindValue(':id', $id); -$bookResult = $bookQuery->execute(); -$bookmark = $bookResult ? $bookResult->fetchArray(SQLITE3_ASSOC) : ['id' => $id, 'is_bookmarked' => 0]; +if (!$item = Find::byId(Table::ITEM, $id, Item::class)) not_found(); -$action = $bookmark['is_bookmarked'] ? 'remove' : 'add'; -$icon = $bookmark['is_bookmarked'] ? 'added' : 'add'; ?> +$action = $item->isBookmarked() ? 'remove' : 'add'; +$icon = $item->isBookmarked() ? 'added' : 'add'; ?>