463 lines
18 KiB
PHP
463 lines
18 KiB
PHP
<?php
|
|
|
|
/**
|
|
* Information for a feed item
|
|
*/
|
|
class FeedItem {
|
|
|
|
/** @var string The title of the feed item */
|
|
public string $title = '';
|
|
|
|
/** @var string The unique ID for the feed item */
|
|
public string $guid = '';
|
|
|
|
/** @var string The link to the original content */
|
|
public string $link = '';
|
|
|
|
/** @var string When this item was published */
|
|
public string $publishedOn = '';
|
|
|
|
/** @var ?string When this item was last updated */
|
|
public ?string $updatedOn = null;
|
|
|
|
/** @var string The content for the item */
|
|
public string $content = '';
|
|
|
|
/**
|
|
* Construct a feed item from an Atom feed's `<entry>` tag
|
|
*
|
|
* @param DOMNode $node The XML node from which a feed item should be constructed
|
|
* @return FeedItem A feed item constructed from the given node
|
|
*/
|
|
public static function fromAtom(DOMNode $node): FeedItem {
|
|
$guid = Feed::atomValue($node, 'id');
|
|
$link = '';
|
|
foreach ($node->getElementsByTagName('link') as $linkElt) {
|
|
if ($linkElt->hasAttributes()) {
|
|
$relAttr = $linkElt->attributes->getNamedItem('rel');
|
|
if ($relAttr && $relAttr->value == 'alternate') {
|
|
$link = $linkElt->attributes->getNamedItem('href')->value;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if ($link == '' && str_starts_with($guid, 'http')) $link = $guid;
|
|
|
|
$item = new FeedItem();
|
|
$item->guid = $guid;
|
|
$item->title = Feed::atomValue($node, 'title');
|
|
$item->link = $link;
|
|
$item->publishedOn = Data::formatDate(Feed::atomValue($node, 'published'));
|
|
$item->updatedOn = Data::formatDate(Feed::atomValue($node, 'updated'));
|
|
$item->content = Feed::atomValue($node, 'content');
|
|
|
|
return $item;
|
|
}
|
|
|
|
/**
|
|
* Construct a feed item from an RSS feed's `<item>` tag
|
|
*
|
|
* @param DOMNode $node The XML node from which a feed item should be constructed
|
|
* @return FeedItem A feed item constructed from the given node
|
|
*/
|
|
public static function fromRSS(DOMNode $node): FeedItem {
|
|
$itemGuid = Feed::rssValue($node, 'guid');
|
|
$updNodes = $node->getElementsByTagNameNS(Feed::ATOM_NS, 'updated');
|
|
$encNodes = $node->getElementsByTagNameNS(Feed::CONTENT_NS, 'encoded');
|
|
|
|
$item = new FeedItem();
|
|
$item->guid = $itemGuid == 'guid not found' ? Feed::rssValue($node, 'link') : $itemGuid;
|
|
$item->title = Feed::rssValue($node, 'title');
|
|
$item->link = Feed::rssValue($node, 'link');
|
|
$item->publishedOn = Data::formatDate(Feed::rssValue($node, 'pubDate'));
|
|
$item->updatedOn = Data::formatDate($updNodes->length > 0 ? $updNodes->item(0)->textContent : null);
|
|
$item->content = $encNodes->length > 0
|
|
? $encNodes->item(0)->textContent
|
|
: Feed::rssValue($node, 'description');
|
|
|
|
return $item;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Feed retrieval, parsing, and manipulation
|
|
*/
|
|
class Feed {
|
|
|
|
/** @var string The URL for the feed */
|
|
public string $url = '';
|
|
|
|
/** @var string The title of the feed */
|
|
public string $title = '';
|
|
|
|
/** @var ?string When the feed was last updated */
|
|
public ?string $updatedOn = null;
|
|
|
|
/** @var FeedItem[] The items contained in the feed */
|
|
public array $items = [];
|
|
|
|
/** @var string The XML namespace for Atom feeds */
|
|
public const string ATOM_NS = 'http://www.w3.org/2005/Atom';
|
|
|
|
/** @var string The XML namespace for the `<content:encoded>` tag that allows HTML content in a feed */
|
|
public const string CONTENT_NS = 'http://purl.org/rss/1.0/modules/content/';
|
|
|
|
/** @var string The XML namespace for XHTML */
|
|
public const string XHTML_NS = 'http://www.w3.org/1999/xhtml';
|
|
|
|
/**
|
|
* When parsing XML into a DOMDocument, errors are presented as warnings; this creates an exception for them
|
|
*
|
|
* @param int $errno The error level encountered
|
|
* @param string $errstr The text of the error encountered
|
|
* @return bool False, to delegate to the next error handler in the chain
|
|
* @throws DOMException If the error is a warning
|
|
*/
|
|
private static function xmlParseError(int $errno, string $errstr): bool {
|
|
if ($errno == E_WARNING && substr_count($errstr, 'DOMDocument::loadXml()') > 0) {
|
|
throw new DOMException($errstr, $errno);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Parse a feed into an XML tree
|
|
*
|
|
* @param string $content The feed's RSS content
|
|
* @return array|DOMDocument[]|string[] ['ok' => feed] if successful, ['error' => message] if not
|
|
*/
|
|
public static function parseFeed(string $content): array {
|
|
set_error_handler(self::xmlParseError(...));
|
|
try {
|
|
$feed = new DOMDocument();
|
|
$feed->loadXML($content);
|
|
return ['ok' => $feed];
|
|
} catch (DOMException $ex) {
|
|
return ['error' => $ex->getMessage()];
|
|
} finally {
|
|
restore_error_handler();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the value of a child element by its tag name for an RSS feed
|
|
*
|
|
* @param DOMNode $element The parent element
|
|
* @param string $tagName The name of the tag whose value should be obtained
|
|
* @return string The value of the element (or "[element] not found" if that element does not exist)
|
|
*/
|
|
public static function rssValue(DOMNode $element, string $tagName): string {
|
|
$tags = $element->getElementsByTagName($tagName);
|
|
return $tags->length == 0 ? "$tagName not found" : $tags->item(0)->textContent;
|
|
}
|
|
|
|
/**
|
|
* Extract items from an RSS feed
|
|
*
|
|
* @param DOMDocument $xml The XML received from the feed
|
|
* @param string $url The actual URL for the feed
|
|
* @return array|Feed[]|string[] ['ok' => feed] if successful, ['error' => message] if not
|
|
*/
|
|
private static function fromRSS(DOMDocument $xml, string $url): array {
|
|
$channel = $xml->getElementsByTagName('channel')->item(0);
|
|
if (!($channel instanceof DOMElement)) {
|
|
return ['error' => "Channel element not found ($channel->nodeType)"];
|
|
}
|
|
|
|
// The Atom namespace provides a lastBuildDate, which contains the last time an item in the feed was updated; if
|
|
// that is not present, use the pubDate element instead
|
|
$updatedOn = self::rssValue($channel, 'lastBuildDate');
|
|
if ($updatedOn == 'lastBuildDate not found') {
|
|
$updatedOn = self::rssValue($channel, 'pubDate');
|
|
if ($updatedOn == 'pubDate not found') $updatedOn = null;
|
|
}
|
|
|
|
$feed = new Feed();
|
|
$feed->title = self::rssValue($channel, 'title');
|
|
$feed->url = $url;
|
|
$feed->updatedOn = Data::formatDate($updatedOn);
|
|
foreach ($channel->getElementsByTagName('item') as $item) $feed->items[] = FeedItem::fromRSS($item);
|
|
|
|
return ['ok' => $feed];
|
|
}
|
|
|
|
/**
|
|
* Get the value of a child element by its tag name for an Atom feed
|
|
*
|
|
* (Atom feeds can have type attributes on nearly any value. For our purposes, types "text" and "html" will work as
|
|
* regular string values; for "xhtml", though, we will need to get the `<div>` and extract its contents instead.)
|
|
*
|
|
* @param DOMNode $element The parent element
|
|
* @param string $tagName The name of the tag whose value should be obtained
|
|
* @return string The value of the element (or "[element] not found" if that element does not exist)
|
|
*/
|
|
public static function atomValue(DOMNode $element, string $tagName): string {
|
|
$tags = $element->getElementsByTagName($tagName);
|
|
if ($tags->length == 0) return "$tagName not found";
|
|
$tag = $tags->item(0);
|
|
if (!($tag instanceof DOMElement)) return $tag->textContent;
|
|
if ($tag->hasAttributes() && $tag->attributes->getNamedItem('type') == 'xhtml') {
|
|
$div = $tag->getElementsByTagNameNS(Feed::XHTML_NS, 'div');
|
|
if ($div->length == 0) return "-- invalid XHTML content --";
|
|
return $div->item(0)->textContent;
|
|
}
|
|
return $tag->textContent;
|
|
}
|
|
|
|
/**
|
|
* Extract items from an Atom feed
|
|
*
|
|
* @param DOMDocument $xml The XML received from the feed
|
|
* @param string $url The actual URL for the feed
|
|
* @return array|Feed[] ['ok' => feed]
|
|
*/
|
|
private static function fromAtom(DOMDocument $xml, string $url): array {
|
|
$root = $xml->getElementsByTagNameNS(self::ATOM_NS, 'feed')->item(0);
|
|
$updatedOn = self::atomValue($root, 'updated');
|
|
if ($updatedOn == 'pubDate not found') $updatedOn = null;
|
|
|
|
$feed = new Feed();
|
|
$feed->title = self::atomValue($root, 'title');
|
|
$feed->url = $url;
|
|
$feed->updatedOn = Data::formatDate($updatedOn);
|
|
foreach ($root->getElementsByTagName('entry') as $entry) $feed->items[] = FeedItem::fromAtom($entry);
|
|
|
|
return ['ok' => $feed];
|
|
}
|
|
|
|
/**
|
|
* Retrieve the feed
|
|
*
|
|
* @param string $url The URL of the feed to retrieve
|
|
* @return array|Feed[]|string[] ['ok' => feed] if successful, ['error' => message] if not
|
|
*/
|
|
public static function retrieveFeed(string $url): array {
|
|
$feedReq = curl_init($url);
|
|
curl_setopt($feedReq, CURLOPT_FOLLOWLOCATION, true);
|
|
curl_setopt($feedReq, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($feedReq, CURLOPT_CONNECTTIMEOUT, 5);
|
|
curl_setopt($feedReq, CURLOPT_TIMEOUT, 15);
|
|
|
|
$feedContent = curl_exec($feedReq);
|
|
|
|
$result = array();
|
|
$error = curl_error($feedReq);
|
|
$code = curl_getinfo($feedReq, CURLINFO_RESPONSE_CODE);
|
|
if ($error) {
|
|
$result['error'] = $error;
|
|
} elseif ($code == 200) {
|
|
$parsed = self::parseFeed($feedContent);
|
|
if (array_key_exists('error', $parsed)) {
|
|
$result['error'] = $parsed['error'];
|
|
} else {
|
|
$extract = $parsed['ok']->getElementsByTagNameNS(self::ATOM_NS, 'feed')->length > 0
|
|
? self::fromAtom(...) : self::fromRSS(...);
|
|
$result = $extract($parsed['ok'], curl_getinfo($feedReq, CURLINFO_EFFECTIVE_URL));
|
|
}
|
|
} else {
|
|
$result['error'] = "Prospective feed URL $url returned HTTP Code $code: $feedContent";
|
|
}
|
|
|
|
curl_close($feedReq);
|
|
return $result;
|
|
}
|
|
|
|
/**
|
|
* Update a feed item
|
|
*
|
|
* @param int $itemId The ID of the item to be updated
|
|
* @param FeedItem $item The item to be updated
|
|
* @param SQLite3 $db A database connection to use for the update
|
|
*/
|
|
private static function updateItem(int $itemId, FeedItem $item, SQLite3 $db): void {
|
|
$query = $db->prepare(<<<'SQL'
|
|
UPDATE item
|
|
SET title = :title,
|
|
published_on = :published,
|
|
updated_on = :updated,
|
|
content = :content,
|
|
is_read = 0
|
|
WHERE id = :id
|
|
SQL);
|
|
$query->bindValue(':title', $item->title);
|
|
$query->bindValue(':published', $item->publishedOn);
|
|
$query->bindValue(':updated', $item->updatedOn);
|
|
$query->bindValue(':content', $item->content);
|
|
$query->bindValue(':id', $itemId);
|
|
$query->execute();
|
|
}
|
|
|
|
/**
|
|
* Add a feed item
|
|
*
|
|
* @param int $feedId The ID of the feed to which the item should be added
|
|
* @param FeedItem $item The item to be added
|
|
* @param SQLite3 $db A database connection to use for the addition
|
|
*/
|
|
private static function addItem(int $feedId, FeedItem $item, SQLite3 $db): void {
|
|
$query = $db->prepare(<<<'SQL'
|
|
INSERT INTO item (
|
|
feed_id, item_guid, item_link, title, published_on, updated_on, content
|
|
) VALUES (
|
|
:feed, :guid, :link, :title, :published, :updated, :content
|
|
)
|
|
SQL);
|
|
$query->bindValue(':feed', $feedId);
|
|
$query->bindValue(':guid', $item->guid);
|
|
$query->bindValue(':link', $item->link);
|
|
$query->bindValue(':title', $item->title);
|
|
$query->bindValue(':published', $item->publishedOn);
|
|
$query->bindValue(':updated', $item->updatedOn);
|
|
$query->bindValue(':content', $item->content);
|
|
$query->execute();
|
|
}
|
|
|
|
/**
|
|
* Update a feed's items
|
|
*
|
|
* @param int $feedId The ID of the feed to which these items belong
|
|
* @param Feed $feed The extracted Atom or RSS feed items
|
|
* @return array ['ok' => true] if successful, ['error' => message] if not
|
|
*/
|
|
public static function updateItems(int $feedId, Feed $feed, SQLite3 $db): array {
|
|
try {
|
|
foreach ($feed->items as $item) {
|
|
$existsQuery = $db->prepare(
|
|
'SELECT id, published_on, updated_on FROM item WHERE feed_id = :feed AND item_guid = :guid');
|
|
$existsQuery->bindValue(':feed', $feedId);
|
|
$existsQuery->bindValue(':guid', $item->guid);
|
|
$exists = $existsQuery->execute();
|
|
if ($exists) {
|
|
$existing = $exists->fetchArray(SQLITE3_ASSOC);
|
|
if ($existing) {
|
|
if ( $existing['published_on'] != $item->publishedOn
|
|
|| ($existing['updated_on'] ?? '') != ($item->updatedOn ?? '')) {
|
|
self::updateItem($existing['id'], $item, $db);
|
|
}
|
|
} else {
|
|
self::addItem($feedId, $item, $db);
|
|
}
|
|
} else {
|
|
throw new Exception($db->lastErrorMsg());
|
|
}
|
|
}
|
|
} catch (Exception $ex) {
|
|
return ['error' => $ex->getMessage()];
|
|
}
|
|
return ['ok', true];
|
|
}
|
|
|
|
/**
|
|
* Refresh a feed
|
|
*
|
|
* @param string $url The URL of the feed to be refreshed
|
|
* @param SQLite3 $db A database connection to use to refresh the feed
|
|
* @return array|string[]|true[] ['ok' => true] if successful, ['error' => message] if not
|
|
*/
|
|
private static function refreshFeed(string $url, SQLite3 $db): array {
|
|
$feedQuery = $db->prepare('SELECT id FROM feed WHERE url = :url AND user_id = :user');
|
|
$feedQuery->bindValue(':url', $url);
|
|
$feedQuery->bindValue(':user', $_SESSION[Key::USER_ID]);
|
|
$feedResult = $feedQuery->execute();
|
|
$feedId = $feedResult ? $feedResult->fetchArray(SQLITE3_NUM)[0] : -1;
|
|
if ($feedId < 0) return ['error' => "No feed for URL $url found"];
|
|
|
|
$feedExtract = self::retrieveFeed($url);
|
|
if (array_key_exists('error', $feedExtract)) return $feedExtract;
|
|
|
|
$feed = $feedExtract['ok'];
|
|
$itemUpdate = self::updateItems($feedId, $feed, $db);
|
|
if (array_key_exists('error', $itemUpdate)) return $itemUpdate;
|
|
|
|
$urlUpdate = $url == $feed->url ? '' : ', url = :url';
|
|
$feedUpdate = $db->prepare(<<<SQL
|
|
UPDATE feed
|
|
SET title = :title,
|
|
updated_on = :updated,
|
|
checked_on = :checked
|
|
$urlUpdate
|
|
WHERE id = :id
|
|
SQL);
|
|
$feedUpdate->bindValue(':title', $feed->title);
|
|
$feedUpdate->bindValue(':updated', $feed->updatedOn);
|
|
$feedUpdate->bindValue(':checked', Data::formatDate('now'));
|
|
$feedUpdate->bindValue(':id', $feedId);
|
|
if ($urlUpdate != '') $feedUpdate->bindValue(':url', $feed->url);
|
|
$feedUpdate->execute();
|
|
|
|
return ['ok' => true];
|
|
}
|
|
|
|
/**
|
|
* Add an RSS feed
|
|
*
|
|
* @param string $url The URL of the RSS feed to add
|
|
* @return array ['ok' => feedId] if successful, ['error' => message] if not
|
|
*/
|
|
public static function add(string $url, SQLite3 $db): array {
|
|
$feedExtract = self::retrieveFeed($url);
|
|
if (array_key_exists('error', $feedExtract)) return $feedExtract;
|
|
|
|
$feed = $feedExtract['ok'];
|
|
$query = $db->prepare(<<<'SQL'
|
|
INSERT INTO feed (user_id, url, title, updated_on, checked_on)
|
|
VALUES (:user, :url, :title, :updated, :checked)
|
|
SQL);
|
|
$query->bindValue(':user', $_SESSION[Key::USER_ID]);
|
|
$query->bindValue(':url', $feed->url);
|
|
$query->bindValue(':title', $feed->title);
|
|
$query->bindValue(':updated', $feed->updatedOn);
|
|
$query->bindValue(':checked', Data::formatDate('now'));
|
|
$result = $query->execute();
|
|
|
|
$feedId = $result ? $db->lastInsertRowID() : -1;
|
|
if ($feedId < 0) return ['error' => $db->lastErrorMsg()];
|
|
|
|
$result = self::updateItems($feedId, $feed, $db);
|
|
if (array_key_exists('error', $result)) return $result;
|
|
|
|
return ['ok' => $feedId];
|
|
}
|
|
|
|
/**
|
|
* Update an RSS feed
|
|
*
|
|
* @param array $existing The existing RSS feed
|
|
* @param string $url The URL with which the existing feed should be modified
|
|
* @param SQLite3 $db The database connection on which to execute the update
|
|
* @return bool[]|string[] [ 'ok' => true ] if successful, [ 'error' => message ] if not
|
|
*/
|
|
public static function update(array $existing, string $url, SQLite3 $db): array {
|
|
$query = $db->prepare('UPDATE feed SET url = :url WHERE id = :id AND user_id = :user');
|
|
$query->bindValue(':url', $url);
|
|
$query->bindValue(':id', $existing['id']);
|
|
$query->bindValue(':user', $_SESSION[Key::USER_ID]);
|
|
$query->execute();
|
|
|
|
return self::refreshFeed($url, $db);
|
|
}
|
|
|
|
/**
|
|
* Refresh all feeds
|
|
*
|
|
* @param SQLite3 $db The database connection to use for refreshing feeds
|
|
* @return array|true[] ['ok => true] if successful, ['error' => message] if not (may have multiple error lines)
|
|
*/
|
|
public static function refreshAll(SQLite3 $db): array {
|
|
$query = $db->prepare('SELECT url FROM feed WHERE user_id = :user');
|
|
$query->bindValue(':user', $_SESSION[Key::USER_ID]);
|
|
$result = $query->execute();
|
|
$url = $result ? $result->fetchArray(SQLITE3_NUM) : false;
|
|
if ($url) {
|
|
$errors = array();
|
|
while ($url) {
|
|
$updateResult = self::refreshFeed($url[0], $db);
|
|
if (array_key_exists('error', $updateResult)) $errors[] = $updateResult['error'];
|
|
$url = $result->fetchArray(SQLITE3_NUM);
|
|
}
|
|
return sizeof($errors) == 0 ? ['ok' => true] : ['error' => implode("\n", $errors)];
|
|
}
|
|
return ['error' => $db->lastErrorMsg()];
|
|
}
|
|
}
|