` tag that allows HTML content in a feed */
public const string CONTENT_NS = 'http://purl.org/rss/1.0/modules/content/';
/** @var string The XML namespace for XHTML */
public const string XHTML_NS = 'http://www.w3.org/1999/xhtml';
/**
* When parsing XML into a DOMDocument, errors are presented as warnings; this creates an exception for them
*
* @param int $errno The error level encountered
* @param string $errstr The text of the error encountered
* @return bool False, to delegate to the next error handler in the chain
* @throws DOMException If the error is a warning
*/
private static function xmlParseError(int $errno, string $errstr): bool {
if ($errno == E_WARNING && substr_count($errstr, 'DOMDocument::loadXML()') > 0) {
throw new DOMException($errstr, $errno);
}
return false;
}
/**
* Parse a feed into an XML tree
*
* @param string $content The feed's RSS content
* @return array|DOMDocument[]|string[] ['ok' => feed] if successful, ['error' => message] if not
*/
public static function parseFeed(string $content): array {
set_error_handler(self::xmlParseError(...));
try {
$feed = new DOMDocument();
$feed->loadXML($content);
return ['ok' => $feed];
} catch (DOMException $ex) {
return ['error' => $ex->getMessage()];
} finally {
restore_error_handler();
}
}
/**
* Get the value of a child element by its tag name for an RSS feed
*
* @param DOMNode $element The parent element
* @param string $tagName The name of the tag whose value should be obtained
* @return string The value of the element (or "[element] not found" if that element does not exist)
*/
public static function rssValue(DOMNode $element, string $tagName): string {
$tags = $element->getElementsByTagName($tagName);
return $tags->length == 0 ? "$tagName not found" : $tags->item(0)->textContent;
}
/**
* Extract items from an RSS feed
*
* @param DOMDocument $xml The XML received from the feed
* @param string $url The actual URL for the feed
* @return array|Feed[]|string[] ['ok' => feed] if successful, ['error' => message] if not
*/
private static function fromRSS(DOMDocument $xml, string $url): array {
$channel = $xml->getElementsByTagName('channel')->item(0);
if (!($channel instanceof DOMElement)) {
return ['error' => "Channel element not found ($channel->nodeType)"];
}
// The Atom namespace provides a lastBuildDate, which contains the last time an item in the feed was updated; if
// that is not present, use the pubDate element instead
$updatedOn = self::rssValue($channel, 'lastBuildDate');
if ($updatedOn == 'lastBuildDate not found') {
$updatedOn = self::rssValue($channel, 'pubDate');
if ($updatedOn == 'pubDate not found') $updatedOn = null;
}
$feed = new Feed();
$feed->title = self::rssValue($channel, 'title');
$feed->url = $url;
$feed->updatedOn = Data::formatDate($updatedOn);
foreach ($channel->getElementsByTagName('item') as $item) $feed->items[] = FeedItem::fromRSS($item);
return ['ok' => $feed];
}
/**
* Get an attribute value from a DOM node
*
* @param DOMNode $node The node with an attribute value to obtain
* @param string $name The name of the attribute whose value should be obtained
* @return string The attribute value if it exists, an empty string if not
*/
private static function attrValue(DOMNode $node, string $name): string {
return ($node->hasAttributes() ? $node->attributes->getNamedItem($name)?->value : null) ?? '';
}
/**
* Get the value of a child element by its tag name for an Atom feed
*
* (Atom feeds can have type attributes on nearly any value. For our purposes, types "text" and "html" will work as
* regular string values; for "xhtml", though, we will need to get the `
` and extract its contents instead.)
*
* @param DOMNode $element The parent element
* @param string $tagName The name of the tag whose value should be obtained
* @return string The value of the element (or "[element] not found" if that element does not exist)
*/
public static function atomValue(DOMNode $element, string $tagName): string {
$tags = $element->getElementsByTagName($tagName);
if ($tags->length == 0) return "$tagName not found";
$tag = $tags->item(0);
if (!($tag instanceof DOMElement)) return $tag->textContent;
if (self::attrValue($tag, 'type') == 'xhtml') {
$div = $tag->getElementsByTagNameNS(Feed::XHTML_NS, 'div');
if ($div->length == 0) return "-- invalid XHTML content --";
return $div->item(0)->textContent;
}
return $tag->textContent;
}
/**
* Extract items from an Atom feed
*
* @param DOMDocument $xml The XML received from the feed
* @param string $url The actual URL for the feed
* @return array|Feed[] ['ok' => feed]
*/
private static function fromAtom(DOMDocument $xml, string $url): array {
$root = $xml->getElementsByTagNameNS(self::ATOM_NS, 'feed')->item(0);
$updatedOn = self::atomValue($root, 'updated');
if ($updatedOn == 'pubDate not found') $updatedOn = null;
$feed = new Feed();
$feed->title = self::atomValue($root, 'title');
$feed->url = $url;
$feed->updatedOn = Data::formatDate($updatedOn);
foreach ($root->getElementsByTagName('entry') as $entry) $feed->items[] = FeedItem::fromAtom($entry);
return ['ok' => $feed];
}
/**
* Retrieve a document (http/https)
*
* @param string $url The URL of the document to retrieve
* @return array ['content' => document content, 'error' => error message, 'code' => HTTP response code,
* 'url' => effective URL]
*/
private static function retrieveDocument(string $url): array {
$docReq = curl_init($url);
curl_setopt($docReq, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($docReq, CURLOPT_RETURNTRANSFER, true);
curl_setopt($docReq, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($docReq, CURLOPT_TIMEOUT, 15);
$result = [
'content' => curl_exec($docReq),
'error' => curl_error($docReq),
'code' => curl_getinfo($docReq, CURLINFO_RESPONSE_CODE),
'url' => curl_getinfo($docReq, CURLINFO_EFFECTIVE_URL)
];
curl_close($docReq);
return $result;
}
/**
* Derive a feed URL from an HTML document
*
* @param string $content The HTML document content from which to derive a feed URL
* @return array|string[] ['ok' => feed URL] if successful, ['error' => message] if not
*/
private static function deriveFeedFromHTML(string $content): array {
$html = new DOMDocument();
$html->loadHTML(substr($content, 0, strpos($content, '') + 7));
$headTags = $html->getElementsByTagName('head');
if ($headTags->length < 1) return ['error' => 'Cannot find feed at this URL'];
$head = $headTags->item(0);
foreach ($head->getElementsByTagName('link') as $link) {
if (self::attrValue($link, 'rel') == 'alternate') {
$type = self::attrValue($link, 'type');
if ($type == 'application/rss+xml' || $type == 'application/atom+xml') {
return ['ok' => self::attrValue($link, 'href')];
}
}
}
return ['error' => 'Cannot find feed at this URL'];
}
/**
* Retrieve the feed
*
* @param string $url The URL of the feed to retrieve
* @return array|Feed[]|string[] ['ok' => feed] if successful, ['error' => message] if not
*/
public static function retrieveFeed(string $url): array {
$doc = self::retrieveDocument($url);
if ($doc['error'] != '') return ['error' => $doc['error']];
if ($doc['code'] != 200) {
return ['error' => "Prospective feed URL $url returned HTTP Code {$doc['code']}: {$doc['content']}"];
}
$start = strtolower(strlen($doc['content']) >= 9 ? substr($doc['content'], 0, 9) : $doc['content']);
if ($start == ' $derivedURL['error']];
$feedURL = $derivedURL['ok'];
if (!str_starts_with($feedURL, 'http')) {
// Relative URL; feed should be retrieved in the context of the original URL
$original = parse_url($url);
$port = array_key_exists('port', $original) ? ":{$original['port']}" : '';
$feedURL = "{$original['scheme']}://{$original['host']}$port$feedURL";
}
$doc = self::retrieveDocument($feedURL);
}
$parsed = self::parseFeed($doc['content']);
if (array_key_exists('error', $parsed)) return ['error' => $parsed['error']];
$extract = $parsed['ok']->getElementsByTagNameNS(self::ATOM_NS, 'feed')->length > 0
? self::fromAtom(...) : self::fromRSS(...);
return $extract($parsed['ok'], $doc['url']);
}
/**
* Update a feed item
*
* @param int $itemId The ID of the item to be updated
* @param FeedItem $item The item to be updated
* @param SQLite3 $db A database connection to use for the update
*/
private static function updateItem(int $itemId, FeedItem $item, SQLite3 $db): void {
$query = $db->prepare(<<<'SQL'
UPDATE item
SET title = :title,
published_on = :published,
updated_on = :updated,
content = :content,
is_read = 0
WHERE id = :id
SQL);
$query->bindValue(':title', $item->title);
$query->bindValue(':published', $item->publishedOn);
$query->bindValue(':updated', $item->updatedOn);
$query->bindValue(':content', $item->content);
$query->bindValue(':id', $itemId);
$query->execute();
}
/**
* Add a feed item
*
* @param int $feedId The ID of the feed to which the item should be added
* @param FeedItem $item The item to be added
* @param SQLite3 $db A database connection to use for the addition
*/
private static function addItem(int $feedId, FeedItem $item, SQLite3 $db): void {
$query = $db->prepare(<<<'SQL'
INSERT INTO item (
feed_id, item_guid, item_link, title, published_on, updated_on, content
) VALUES (
:feed, :guid, :link, :title, :published, :updated, :content
)
SQL);
$query->bindValue(':feed', $feedId);
$query->bindValue(':guid', $item->guid);
$query->bindValue(':link', $item->link);
$query->bindValue(':title', $item->title);
$query->bindValue(':published', $item->publishedOn);
$query->bindValue(':updated', $item->updatedOn);
$query->bindValue(':content', $item->content);
$query->execute();
}
/**
* Update a feed's items
*
* @param int $feedId The ID of the feed to which these items belong
* @param Feed $feed The extracted Atom or RSS feed items
* @return array ['ok' => true] if successful, ['error' => message] if not
*/
public static function updateItems(int $feedId, Feed $feed, SQLite3 $db): array {
try {
foreach ($feed->items as $item) {
$existsQuery = $db->prepare(
'SELECT id, published_on, updated_on FROM item WHERE feed_id = :feed AND item_guid = :guid');
$existsQuery->bindValue(':feed', $feedId);
$existsQuery->bindValue(':guid', $item->guid);
$exists = $existsQuery->execute();
if ($exists) {
$existing = $exists->fetchArray(SQLITE3_ASSOC);
if ($existing) {
if ( $existing['published_on'] != $item->publishedOn
|| ($existing['updated_on'] ?? '') != ($item->updatedOn ?? '')) {
self::updateItem($existing['id'], $item, $db);
}
} else {
self::addItem($feedId, $item, $db);
}
} else {
throw new Exception($db->lastErrorMsg());
}
}
} catch (Exception $ex) {
return ['error' => $ex->getMessage()];
}
return ['ok', true];
}
/**
* Refresh a feed
*
* @param string $url The URL of the feed to be refreshed
* @param SQLite3 $db A database connection to use to refresh the feed
* @return array|string[]|true[] ['ok' => true] if successful, ['error' => message] if not
*/
private static function refreshFeed(string $url, SQLite3 $db): array {
$feedQuery = $db->prepare('SELECT id FROM feed WHERE url = :url AND user_id = :user');
$feedQuery->bindValue(':url', $url);
$feedQuery->bindValue(':user', $_SESSION[Key::USER_ID]);
$feedResult = $feedQuery->execute();
$feedId = $feedResult ? $feedResult->fetchArray(SQLITE3_NUM)[0] : -1;
if ($feedId < 0) return ['error' => "No feed for URL $url found"];
$feedExtract = self::retrieveFeed($url);
if (array_key_exists('error', $feedExtract)) return $feedExtract;
$feed = $feedExtract['ok'];
$itemUpdate = self::updateItems($feedId, $feed, $db);
if (array_key_exists('error', $itemUpdate)) return $itemUpdate;
$urlUpdate = $url == $feed->url ? '' : ', url = :url';
$feedUpdate = $db->prepare(<<bindValue(':title', $feed->title);
$feedUpdate->bindValue(':updated', $feed->updatedOn);
$feedUpdate->bindValue(':checked', Data::formatDate('now'));
$feedUpdate->bindValue(':id', $feedId);
if ($urlUpdate != '') $feedUpdate->bindValue(':url', $feed->url);
$feedUpdate->execute();
return ['ok' => true];
}
/**
* Add an RSS feed
*
* @param string $url The URL of the RSS feed to add
* @return array ['ok' => feedId] if successful, ['error' => message] if not
*/
public static function add(string $url, SQLite3 $db): array {
$feedExtract = self::retrieveFeed($url);
if (array_key_exists('error', $feedExtract)) return $feedExtract;
$feed = $feedExtract['ok'];
$existsQuery = $db->prepare('SELECT COUNT(*) FROM feed WHERE user_id = :user AND url = :url');
$existsQuery->bindValue(':user', $_SESSION[Key::USER_ID]);
$existsQuery->bindValue(':url', $feed->url);
$existsResult = $existsQuery->execute();
if (!$existsResult) return ['error' => 'SQLite error: ' . $db->lastErrorMsg()];
$exists = $existsResult->fetchArray(SQLITE3_NUM);
if ($exists[0] != 0) return ['error' => "Already subscribed to feed $feed->url"];
$query = $db->prepare(<<<'SQL'
INSERT INTO feed (user_id, url, title, updated_on, checked_on)
VALUES (:user, :url, :title, :updated, :checked)
SQL);
$query->bindValue(':user', $_SESSION[Key::USER_ID]);
$query->bindValue(':url', $feed->url);
$query->bindValue(':title', $feed->title);
$query->bindValue(':updated', $feed->updatedOn);
$query->bindValue(':checked', Data::formatDate('now'));
$result = $query->execute();
$feedId = $result ? $db->lastInsertRowID() : -1;
if ($feedId < 0) return ['error' => $db->lastErrorMsg()];
$result = self::updateItems($feedId, $feed, $db);
if (array_key_exists('error', $result)) return $result;
return ['ok' => $feedId];
}
/**
* Update an RSS feed
*
* @param array $existing The existing RSS feed
* @param string $url The URL with which the existing feed should be modified
* @param SQLite3 $db The database connection on which to execute the update
* @return bool[]|string[] [ 'ok' => true ] if successful, [ 'error' => message ] if not
*/
public static function update(array $existing, string $url, SQLite3 $db): array {
$query = $db->prepare('UPDATE feed SET url = :url WHERE id = :id AND user_id = :user');
$query->bindValue(':url', $url);
$query->bindValue(':id', $existing['id']);
$query->bindValue(':user', $_SESSION[Key::USER_ID]);
$query->execute();
return self::refreshFeed($url, $db);
}
/**
* Refresh all feeds
*
* @param SQLite3 $db The database connection to use for refreshing feeds
* @return array|true[] ['ok => true] if successful, ['error' => message] if not (may have multiple error lines)
*/
public static function refreshAll(SQLite3 $db): array {
$query = $db->prepare('SELECT url FROM feed WHERE user_id = :user');
$query->bindValue(':user', $_SESSION[Key::USER_ID]);
$result = $query->execute();
$url = $result ? $result->fetchArray(SQLITE3_NUM) : false;
if ($url) {
$errors = array();
while ($url) {
$updateResult = self::refreshFeed($url[0], $db);
if (array_key_exists('error', $updateResult)) $errors[] = $updateResult['error'];
$url = $result->fetchArray(SQLITE3_NUM);
}
return sizeof($errors) == 0 ? ['ok' => true] : ['error' => implode("\n", $errors)];
}
return ['error' => $db->lastErrorMsg()];
}
}