` tag * * @param DOMNode $node The XML node from which a feed item should be constructed * @return FeedItem A feed item constructed from the given node */ public static function fromAtom(DOMNode $node): FeedItem { $guid = Feed::atomValue($node, 'id'); $link = ''; foreach ($node->getElementsByTagName('link') as $linkElt) { if ($linkElt->hasAttributes()) { $relAttr = $linkElt->attributes->getNamedItem('rel'); if ($relAttr && $relAttr->value == 'alternate') { $link = $linkElt->attributes->getNamedItem('href')->value; break; } } } if ($link == '' && str_starts_with($guid, 'http')) $link = $guid; $item = new FeedItem(); $item->guid = $guid; $item->title = Feed::atomValue($node, 'title'); $item->link = $link; $item->publishedOn = Data::formatDate(Feed::atomValue($node, 'published')); $item->updatedOn = Data::formatDate(Feed::atomValue($node, 'updated')); $item->content = Feed::atomValue($node, 'content'); return $item; } /** * Construct a feed item from an RSS feed's `` tag * * @param DOMNode $node The XML node from which a feed item should be constructed * @return FeedItem A feed item constructed from the given node */ public static function fromRSS(DOMNode $node): FeedItem { $itemGuid = Feed::rssValue($node, 'guid'); $updNodes = $node->getElementsByTagNameNS(Feed::ATOM_NS, 'updated'); $encNodes = $node->getElementsByTagNameNS(Feed::CONTENT_NS, 'encoded'); $item = new FeedItem(); $item->guid = $itemGuid == 'guid not found' ? Feed::rssValue($node, 'link') : $itemGuid; $item->title = Feed::rssValue($node, 'title'); $item->link = Feed::rssValue($node, 'link'); $item->publishedOn = Data::formatDate(Feed::rssValue($node, 'pubDate')); $item->updatedOn = Data::formatDate($updNodes->length > 0 ? $updNodes->item(0)->textContent : null); $item->content = $encNodes->length > 0 ? $encNodes->item(0)->textContent : Feed::rssValue($node, 'description'); return $item; } } /** * Feed retrieval, parsing, and manipulation */ class Feed { /** @var string The URL for the feed */ public string $url = ''; /** @var string The title of the feed */ public string $title = ''; /** @var ?string When the feed was last updated */ public ?string $updatedOn = null; /** @var FeedItem[] The items contained in the feed */ public array $items = []; /** @var string The XML namespace for Atom feeds */ public const string ATOM_NS = 'http://www.w3.org/2005/Atom'; /** @var string The XML namespace for the `` tag that allows HTML content in a feed */ public const string CONTENT_NS = 'http://purl.org/rss/1.0/modules/content/'; /** @var string The XML namespace for XHTML */ public const string XHTML_NS = 'http://www.w3.org/1999/xhtml'; /** * When parsing XML into a DOMDocument, errors are presented as warnings; this creates an exception for them * * @param int $errno The error level encountered * @param string $errstr The text of the error encountered * @return bool False, to delegate to the next error handler in the chain * @throws DOMException If the error is a warning */ private static function xmlParseError(int $errno, string $errstr): bool { if ($errno == E_WARNING && substr_count($errstr, 'DOMDocument::loadXml()') > 0) { throw new DOMException($errstr, $errno); } return false; } /** * Parse a feed into an XML tree * * @param string $content The feed's RSS content * @return array|DOMDocument[]|string[] ['ok' => feed] if successful, ['error' => message] if not */ public static function parseFeed(string $content): array { set_error_handler(self::xmlParseError(...)); try { $feed = new DOMDocument(); $feed->loadXML($content); return ['ok' => $feed]; } catch (DOMException $ex) { return ['error' => $ex->getMessage()]; } finally { restore_error_handler(); } } /** * Get the value of a child element by its tag name for an RSS feed * * @param DOMNode $element The parent element * @param string $tagName The name of the tag whose value should be obtained * @return string The value of the element (or "[element] not found" if that element does not exist) */ public static function rssValue(DOMNode $element, string $tagName): string { $tags = $element->getElementsByTagName($tagName); return $tags->length == 0 ? "$tagName not found" : $tags->item(0)->textContent; } /** * Extract items from an RSS feed * * @param DOMDocument $xml The XML received from the feed * @param string $url The actual URL for the feed * @return array|Feed[]|string[] ['ok' => feed] if successful, ['error' => message] if not */ private static function fromRSS(DOMDocument $xml, string $url): array { $channel = $xml->getElementsByTagName('channel')->item(0); if (!($channel instanceof DOMElement)) { return ['error' => "Channel element not found ($channel->nodeType)"]; } // The Atom namespace provides a lastBuildDate, which contains the last time an item in the feed was updated; if // that is not present, use the pubDate element instead $updatedOn = self::rssValue($channel, 'lastBuildDate'); if ($updatedOn == 'lastBuildDate not found') { $updatedOn = self::rssValue($channel, 'pubDate'); if ($updatedOn == 'pubDate not found') $updatedOn = null; } $feed = new Feed(); $feed->title = self::rssValue($channel, 'title'); $feed->url = $url; $feed->updatedOn = Data::formatDate($updatedOn); foreach ($channel->getElementsByTagName('item') as $item) $feed->items[] = FeedItem::fromRSS($item); return ['ok' => $feed]; } /** * Get the value of a child element by its tag name for an Atom feed * * (Atom feeds can have type attributes on nearly any value. For our purposes, types "text" and "html" will work as * regular string values; for "xhtml", though, we will need to get the `
` and extract its contents instead.) * * @param DOMNode $element The parent element * @param string $tagName The name of the tag whose value should be obtained * @return string The value of the element (or "[element] not found" if that element does not exist) */ public static function atomValue(DOMNode $element, string $tagName): string { $tags = $element->getElementsByTagName($tagName); if ($tags->length == 0) return "$tagName not found"; $tag = $tags->item(0); if (!($tag instanceof DOMElement)) return $tag->textContent; if ($tag->hasAttributes() && $tag->attributes->getNamedItem('type') == 'xhtml') { $div = $tag->getElementsByTagNameNS(Feed::XHTML_NS, 'div'); if ($div->length == 0) return "-- invalid XHTML content --"; return $div->item(0)->textContent; } return $tag->textContent; } /** * Extract items from an Atom feed * * @param DOMDocument $xml The XML received from the feed * @param string $url The actual URL for the feed * @return array|Feed[] ['ok' => feed] */ private static function fromAtom(DOMDocument $xml, string $url): array { $root = $xml->getElementsByTagNameNS(self::ATOM_NS, 'feed')->item(0); $updatedOn = self::atomValue($root, 'updated'); if ($updatedOn == 'pubDate not found') $updatedOn = null; $feed = new Feed(); $feed->title = self::atomValue($root, 'title'); $feed->url = $url; $feed->updatedOn = Data::formatDate($updatedOn); foreach ($root->getElementsByTagName('entry') as $entry) $feed->items[] = FeedItem::fromAtom($entry); return ['ok' => $feed]; } /** * Retrieve the feed * * @param string $url The URL of the feed to retrieve * @return array|Feed[]|string[] ['ok' => feed] if successful, ['error' => message] if not */ public static function retrieveFeed(string $url): array { $feedReq = curl_init($url); curl_setopt($feedReq, CURLOPT_FOLLOWLOCATION, true); curl_setopt($feedReq, CURLOPT_RETURNTRANSFER, true); curl_setopt($feedReq, CURLOPT_CONNECTTIMEOUT, 5); curl_setopt($feedReq, CURLOPT_TIMEOUT, 15); $feedContent = curl_exec($feedReq); $result = array(); $error = curl_error($feedReq); $code = curl_getinfo($feedReq, CURLINFO_RESPONSE_CODE); if ($error) { $result['error'] = $error; } elseif ($code == 200) { $parsed = self::parseFeed($feedContent); if (array_key_exists('error', $parsed)) { $result['error'] = $parsed['error']; } else { $extract = $parsed['ok']->getElementsByTagNameNS(self::ATOM_NS, 'feed')->length > 0 ? self::fromAtom(...) : self::fromRSS(...); $result = $extract($parsed['ok'], curl_getinfo($feedReq, CURLINFO_EFFECTIVE_URL)); } } else { $result['error'] = "Prospective feed URL $url returned HTTP Code $code: $feedContent"; } curl_close($feedReq); return $result; } /** * Update a feed item * * @param int $itemId The ID of the item to be updated * @param FeedItem $item The item to be updated * @param SQLite3 $db A database connection to use for the update */ private static function updateItem(int $itemId, FeedItem $item, SQLite3 $db): void { $query = $db->prepare(<<<'SQL' UPDATE item SET title = :title, published_on = :published, updated_on = :updated, content = :content, is_read = 0 WHERE id = :id SQL); $query->bindValue(':title', $item->title); $query->bindValue(':published', $item->publishedOn); $query->bindValue(':updated', $item->updatedOn); $query->bindValue(':content', $item->content); $query->bindValue(':id', $itemId); $query->execute(); } /** * Add a feed item * * @param int $feedId The ID of the feed to which the item should be added * @param FeedItem $item The item to be added * @param SQLite3 $db A database connection to use for the addition */ private static function addItem(int $feedId, FeedItem $item, SQLite3 $db): void { $query = $db->prepare(<<<'SQL' INSERT INTO item ( feed_id, item_guid, item_link, title, published_on, updated_on, content ) VALUES ( :feed, :guid, :link, :title, :published, :updated, :content ) SQL); $query->bindValue(':feed', $feedId); $query->bindValue(':guid', $item->guid); $query->bindValue(':link', $item->link); $query->bindValue(':title', $item->title); $query->bindValue(':published', $item->publishedOn); $query->bindValue(':updated', $item->updatedOn); $query->bindValue(':content', $item->content); $query->execute(); } /** * Update a feed's items * * @param int $feedId The ID of the feed to which these items belong * @param Feed $feed The extracted Atom or RSS feed items * @return array ['ok' => true] if successful, ['error' => message] if not */ public static function updateItems(int $feedId, Feed $feed, SQLite3 $db): array { try { foreach ($feed->items as $item) { $existsQuery = $db->prepare( 'SELECT id, published_on, updated_on FROM item WHERE feed_id = :feed AND item_guid = :guid'); $existsQuery->bindValue(':feed', $feedId); $existsQuery->bindValue(':guid', $item->guid); $exists = $existsQuery->execute(); if ($exists) { $existing = $exists->fetchArray(SQLITE3_ASSOC); if ($existing) { if ( $existing['published_on'] != $item->publishedOn || ($existing['updated_on'] ?? '') != ($item->updatedOn ?? '')) { self::updateItem($existing['id'], $item, $db); } } else { self::addItem($feedId, $item, $db); } } else { throw new Exception($db->lastErrorMsg()); } } } catch (Exception $ex) { return ['error' => $ex->getMessage()]; } return ['ok', true]; } /** * Refresh a feed * * @param string $url The URL of the feed to be refreshed * @param SQLite3 $db A database connection to use to refresh the feed * @return array|string[]|true[] ['ok' => true] if successful, ['error' => message] if not */ private static function refreshFeed(string $url, SQLite3 $db): array { $feedQuery = $db->prepare('SELECT id FROM feed WHERE url = :url AND user_id = :user'); $feedQuery->bindValue(':url', $url); $feedQuery->bindValue(':user', $_SESSION[Key::USER_ID]); $feedResult = $feedQuery->execute(); $feedId = $feedResult ? $feedResult->fetchArray(SQLITE3_NUM)[0] : -1; if ($feedId < 0) return ['error' => "No feed for URL $url found"]; $feedExtract = self::retrieveFeed($url); if (array_key_exists('error', $feedExtract)) return $feedExtract; $feed = $feedExtract['ok']; $itemUpdate = self::updateItems($feedId, $feed, $db); if (array_key_exists('error', $itemUpdate)) return $itemUpdate; $urlUpdate = $url == $feed->url ? '' : ', url = :url'; $feedUpdate = $db->prepare(<<bindValue(':title', $feed->title); $feedUpdate->bindValue(':updated', $feed->updatedOn); $feedUpdate->bindValue(':checked', Data::formatDate('now')); $feedUpdate->bindValue(':id', $feedId); if ($urlUpdate != '') $feedUpdate->bindValue(':url', $feed->url); $feedUpdate->execute(); return ['ok' => true]; } /** * Add an RSS feed * * @param string $url The URL of the RSS feed to add * @return array ['ok' => feedId] if successful, ['error' => message] if not */ public static function add(string $url, SQLite3 $db): array { $feedExtract = self::retrieveFeed($url); if (array_key_exists('error', $feedExtract)) return $feedExtract; $feed = $feedExtract['ok']; $query = $db->prepare(<<<'SQL' INSERT INTO feed (user_id, url, title, updated_on, checked_on) VALUES (:user, :url, :title, :updated, :checked) SQL); $query->bindValue(':user', $_SESSION[Key::USER_ID]); $query->bindValue(':url', $feed->url); $query->bindValue(':title', $feed->title); $query->bindValue(':updated', $feed->updatedOn); $query->bindValue(':checked', Data::formatDate('now')); $result = $query->execute(); $feedId = $result ? $db->lastInsertRowID() : -1; if ($feedId < 0) return ['error' => $db->lastErrorMsg()]; $result = self::updateItems($feedId, $feed, $db); if (array_key_exists('error', $result)) return $result; return ['ok' => $feedId]; } /** * Update an RSS feed * * @param array $existing The existing RSS feed * @param string $url The URL with which the existing feed should be modified * @param SQLite3 $db The database connection on which to execute the update * @return bool[]|string[] [ 'ok' => true ] if successful, [ 'error' => message ] if not */ public static function update(array $existing, string $url, SQLite3 $db): array { $query = $db->prepare('UPDATE feed SET url = :url WHERE id = :id AND user_id = :user'); $query->bindValue(':url', $url); $query->bindValue(':id', $existing['id']); $query->bindValue(':user', $_SESSION[Key::USER_ID]); $query->execute(); return self::refreshFeed($url, $db); } /** * Refresh all feeds * * @param SQLite3 $db The database connection to use for refreshing feeds * @return array|true[] ['ok => true] if successful, ['error' => message] if not (may have multiple error lines) */ public static function refreshAll(SQLite3 $db): array { $query = $db->prepare('SELECT url FROM feed WHERE user_id = :user'); $query->bindValue(':user', $_SESSION[Key::USER_ID]); $result = $query->execute(); $url = $result ? $result->fetchArray(SQLITE3_NUM) : false; if ($url) { $errors = array(); while ($url) { $updateResult = self::refreshFeed($url[0], $db); if (array_key_exists('error', $updateResult)) $errors[] = $updateResult['error']; $url = $result->fetchArray(SQLITE3_NUM); } return sizeof($errors) == 0 ? ['ok' => true] : ['error' => implode("\n", $errors)]; } return ['error' => $db->lastErrorMsg()]; } }