diff --git a/src/lib/Feed.php b/src/lib/Feed.php
index 3f2cbbc..7224812 100644
--- a/src/lib/Feed.php
+++ b/src/lib/Feed.php
@@ -1,15 +1,55 @@
` tag that allows HTML content in a feed */
public const string CONTENT_NS = 'http://purl.org/rss/1.0/modules/content/';
+ /** @var string The XML namespace for XHTML */
+ public const string XHTML_NS = 'http://www.w3.org/1999/xhtml';
+
/**
* When parsing XML into a DOMDocument, errors are presented as warnings; this creates an exception for them
*
@@ -45,24 +85,135 @@ class Feed {
}
/**
- * Get the value of a child element by its tag name
+ * Get the value of a child element by its tag name for an RSS feed
*
* @param DOMElement $element The parent element
* @param string $tagName The name of the tag whose value should be obtained
* @return string The value of the element (or "[element] not found" if that element does not exist)
*/
- private static function eltValue(DOMElement $element, string $tagName): string {
+ private static function rssValue(DOMElement $element, string $tagName): string {
$tags = $element->getElementsByTagName($tagName);
return $tags->length == 0 ? "$tagName not found" : $tags->item(0)->textContent;
}
+ /**
+ * Extract items from an RSS feed
+ *
+ * @param DOMDocument $xml The XML received from the feed
+ * @param string $url The actual URL for the feed
+ * @return array|Feed[]|string[] ['ok' => feed] if successful, ['error' => message] if not
+ */
+ private static function fromRSS(DOMDocument $xml, string $url): array {
+ $channel = $xml->getElementsByTagName('channel')->item(0);
+ if (!($channel instanceof DOMElement)) {
+ return ['error' => "Channel element not found ($channel->nodeType)"];
+ }
+
+ $feed = new Feed();
+ $feed->title = self::rssValue($channel, 'title');
+ $feed->url = $url;
+
+ // The Atom namespace provides a lastBuildDate, which contains the last time an item in the feed was updated; if
+ // that is not present, use the pubDate element instead
+ $feed->updatedOn = self::rssValue($channel, 'lastBuildDate');
+ if ($feed->updatedOn == 'lastBuildDate not found') {
+ $feed->updatedOn = self::rssValue($channel, 'pubDate');
+ if ($feed->updatedOn == 'pubDate not found') $feed->updatedOn = null;
+ }
+ $feed->updatedOn = Data::formatDate($feed->updatedOn);
+
+ foreach ($channel->getElementsByTagName('item') as $xmlItem) {
+ $itemGuid = self::rssValue($xmlItem, 'guid');
+ $updNodes = $xmlItem->getElementsByTagNameNS(Feed::ATOM_NS, 'updated');
+ $encNodes = $xmlItem->getElementsByTagNameNS(Feed::CONTENT_NS, 'encoded');
+ $item = new FeedItem();
+ $item->guid = $itemGuid == 'guid not found' ? self::rssValue($xmlItem, 'link') : $itemGuid;
+ $item->title = self::rssValue($xmlItem, 'title');
+ $item->link = self::rssValue($xmlItem, 'link');
+ $item->publishedOn = Data::formatDate(self::rssValue($xmlItem, 'pubDate'));
+ $item->updatedOn = Data::formatDate($updNodes->length > 0 ? $updNodes->item(0)->textContent : null);
+ $item->content = $encNodes->length > 0
+ ? $encNodes->item(0)->textContent
+ : self::rssValue($xmlItem, 'description');
+ $feed->items[] = $item;
+ }
+
+ return ['ok' => $feed];
+ }
+
+ /**
+ * Get the value of a child element by its tag name for an Atom feed
+ *
+ * (Atom feeds can have type attributes on nearly any value. For our purposes, types "text" and "html" will work as
+ * regular string values; for "xhtml", though, we will need to get the `
` and extract its contents instead.)
+ *
+ * @param DOMElement $element The parent element
+ * @param string $tagName The name of the tag whose value should be obtained
+ * @return string The value of the element (or "[element] not found" if that element does not exist)
+ */
+ private static function atomValue(DOMElement $element, string $tagName): string {
+ $tags = $element->getElementsByTagName($tagName);
+ if ($tags->length == 0) return "$tagName not found";
+ $tag = $tags->item(0);
+ if (!($tag instanceof DOMElement)) return $tag->textContent;
+ if ($tag->hasAttributes() && $tag->attributes->getNamedItem('type') == 'xhtml') {
+ $div = $tag->getElementsByTagNameNS(Feed::XHTML_NS, 'div');
+ if ($div->length == 0) return "-- invalid XHTML content --";
+ return $div->item(0)->textContent;
+ }
+ return $tag->textContent;
+ }
+
+ /**
+ * Extract items from an Atom feed
+ *
+ * @param DOMDocument $xml The XML received from the feed
+ * @param string $url The actual URL for the feed
+ * @return array|Feed[] ['ok' => feed]
+ */
+ private static function fromAtom(DOMDocument $xml, string $url): array {
+ /** @var DOMElement $root */
+ $root = $xml->getElementsByTagNameNS(self::ATOM_NS, 'feed')->item(0);
+ $feed = new Feed();
+ $feed->title = self::atomValue($root, 'title');
+ $feed->url = $url;
+
+ $feed->updatedOn = self::atomValue($root, 'updated');
+ if ($feed->updatedOn == 'pubDate not found') $feed->updatedOn = null;
+ $feed->updatedOn = Data::formatDate($feed->updatedOn);
+
+ foreach ($root->getElementsByTagName('entry') as $xmlItem) {
+ $guid = self::atomValue($xmlItem, 'id');
+ $link = '';
+ foreach ($xmlItem->getElementsByTagName('link') as $linkElt) {
+ if ($linkElt->hasAttributes()) {
+ $relAttr = $linkElt->attributes->getNamedItem('rel');
+ if ($relAttr && $relAttr->value == 'alternate') {
+ $link = $linkElt->attributes->getNamedItem('href')->value;
+ break;
+ }
+ }
+ }
+ if ($link == '' && str_starts_with($guid, 'http')) $link = $guid;
+
+ $item = new FeedItem();
+ $item->guid = $guid;
+ $item->title = self::atomValue($xmlItem, 'title');
+ $item->link = $link;
+ $item->publishedOn = Data::formatDate(self::atomValue($xmlItem, 'published'));
+ $item->updatedOn = Data::formatDate(self::atomValue($xmlItem, 'updated'));
+ $item->content = self::atomValue($xmlItem, 'content');
+ $feed->items[] = $item;
+ }
+
+ return ['ok' => $feed];
+ }
+
/**
* Retrieve the feed
*
- * @param string $url
- * @return array|DOMDocument[]|string[]|DOMElement[]
- * ['ok' => feedXml, 'url' => actualUrl, 'channel' => channel, 'updated' => updatedDate] if successful,
- * ['error' => message] if not
+ * @param string $url The URL of the feed to retrieve
+ * @return array|Feed[]|string[] ['ok' => feed] if successful, ['error' => message] if not
*/
public static function retrieveFeed(string $url): array {
$feedReq = curl_init($url);
@@ -83,26 +234,9 @@ class Feed {
if (array_key_exists('error', $parsed)) {
$result['error'] = $parsed['error'];
} else {
- $result['ok'] = $parsed['ok'];
- $result['url'] = curl_getinfo($feedReq, CURLINFO_EFFECTIVE_URL);
-
- $channel = $result['ok']->getElementsByTagName('channel')->item(0);
- if ($channel instanceof DOMElement) {
- $result['channel'] = $channel;
- } else {
- return ['error' => "Channel element not found ($channel->nodeType)"];
- }
-
- // In Atom feeds, lastBuildDate contains the last time an item in the feed was updated; if that is not
- // present, use the pubDate element instead
- $updated = self::eltValue($channel, 'lastBuildDate');
- if ($updated == 'lastBuildDate not found') {
- $updated = self::eltValue($channel, 'pubDate');
- if ($updated == 'pubDate not found') $updated = null;
- }
- $result['updated'] = Data::formatDate($updated);
- return $result;
-
+ $extract = $parsed['ok']->getElementsByTagNameNS(self::ATOM_NS, 'feed')->length > 0
+ ? self::fromAtom(...) : self::fromRSS(...);
+ $result = $extract($parsed['ok'], curl_getinfo($feedReq, CURLINFO_EFFECTIVE_URL));
}
} else {
$result['error'] = "Prospective feed URL $url returned HTTP Code $code: $feedContent";
@@ -112,35 +246,14 @@ class Feed {
return $result;
}
- /**
- * Extract the fields we need to keep from the feed
- *
- * @param DOMElement $item The item from the feed
- * @return array The fields for the item as an associative array
- */
- private static function itemFields(DOMElement $item): array {
- $itemGuid = self::eltValue($item, 'guid');
- $updNodes = $item->getElementsByTagNameNS(self::ATOM_NS, 'updated');
- $encNodes = $item->getElementsByTagNameNS(self::CONTENT_NS, 'encoded');
- return [
- 'guid' => $itemGuid == 'guid not found' ? self::eltValue($item, 'link') : $itemGuid,
- 'title' => self::eltValue($item, 'title'),
- 'link' => self::eltValue($item, 'link'),
- 'published' => Data::formatDate(self::eltValue($item, 'pubDate')),
- 'updated' => Data::formatDate($updNodes->length > 0 ? $updNodes->item(0)->textContent : null),
- 'content' => $encNodes->length > 0 ? $encNodes->item(0)->textContent
- : self::eltValue($item, 'description')
- ];
- }
-
/**
* Update a feed item
*
* @param int $itemId The ID of the item to be updated
- * @param array $item The fields from the updated item
+ * @param FeedItem $item The item to be updated
* @param SQLite3 $db A database connection to use for the update
*/
- private static function updateItem(int $itemId, array $item, SQLite3 $db): void {
+ private static function updateItem(int $itemId, FeedItem $item, SQLite3 $db): void {
$query = $db->prepare(<<<'SQL'
UPDATE item
SET title = :title,
@@ -150,10 +263,10 @@ class Feed {
is_read = 0
WHERE id = :id
SQL);
- $query->bindValue(':title', $item['title']);
- $query->bindValue(':published', $item['published']);
- $query->bindValue(':updated', $item['updated']);
- $query->bindValue(':content', $item['content']);
+ $query->bindValue(':title', $item->title);
+ $query->bindValue(':published', $item->publishedOn);
+ $query->bindValue(':updated', $item->updatedOn);
+ $query->bindValue(':content', $item->content);
$query->bindValue(':id', $itemId);
$query->execute();
}
@@ -162,10 +275,10 @@ class Feed {
* Add a feed item
*
* @param int $feedId The ID of the feed to which the item should be added
- * @param array $item The fields for the item
+ * @param FeedItem $item The item to be added
* @param SQLite3 $db A database connection to use for the addition
*/
- private static function addItem(int $feedId, array $item, SQLite3 $db): void {
+ private static function addItem(int $feedId, FeedItem $item, SQLite3 $db): void {
$query = $db->prepare(<<<'SQL'
INSERT INTO item (
feed_id, item_guid, item_link, title, published_on, updated_on, content
@@ -174,12 +287,12 @@ class Feed {
)
SQL);
$query->bindValue(':feed', $feedId);
- $query->bindValue(':guid', $item['guid']);
- $query->bindValue(':link', $item['link']);
- $query->bindValue(':title', $item['title']);
- $query->bindValue(':published', $item['published']);
- $query->bindValue(':updated', $item['updated']);
- $query->bindValue(':content', $item['content']);
+ $query->bindValue(':guid', $item->guid);
+ $query->bindValue(':link', $item->link);
+ $query->bindValue(':title', $item->title);
+ $query->bindValue(':published', $item->publishedOn);
+ $query->bindValue(':updated', $item->updatedOn);
+ $query->bindValue(':content', $item->content);
$query->execute();
}
@@ -187,23 +300,22 @@ class Feed {
* Update a feed's items
*
* @param int $feedId The ID of the feed to which these items belong
- * @param DOMElement $channel The RSS feed items
+ * @param Feed $feed The extracted Atom or RSS feed items
* @return array ['ok' => true] if successful, ['error' => message] if not
*/
- public static function updateItems(int $feedId, DOMElement $channel, SQLite3 $db): array {
+ public static function updateItems(int $feedId, Feed $feed, SQLite3 $db): array {
try {
- foreach ($channel->getElementsByTagName('item') as $rawItem) {
- $item = self::itemFields($rawItem);
+ foreach ($feed->items as $item) {
$existsQuery = $db->prepare(
'SELECT id, published_on, updated_on FROM item WHERE feed_id = :feed AND item_guid = :guid');
$existsQuery->bindValue(':feed', $feedId);
- $existsQuery->bindValue(':guid', $item['guid']);
+ $existsQuery->bindValue(':guid', $item->guid);
$exists = $existsQuery->execute();
if ($exists) {
$existing = $exists->fetchArray(SQLITE3_ASSOC);
if ($existing) {
- if ( $existing['published_on'] != $item['published']
- || $existing['updated_on'] ?? '' != $item['updated'] ?? '') {
+ if ( $existing['published_on'] != $item->publishedOn
+ || ($existing['updated_on'] ?? '') != ($item->updatedOn ?? '')) {
self::updateItem($existing['id'], $item, $db);
}
} else {
@@ -234,13 +346,14 @@ class Feed {
$feedId = $feedResult ? $feedResult->fetchArray(SQLITE3_NUM)[0] : -1;
if ($feedId < 0) return ['error' => "No feed for URL $url found"];
- $feed = self::retrieveFeed($url);
- if (array_key_exists('error', $feed)) return $feed;
+ $feedExtract = self::retrieveFeed($url);
+ if (array_key_exists('error', $feedExtract)) return $feedExtract;
- $itemUpdate = self::updateItems($feedId, $feed['channel'], $db);
+ $feed = $feedExtract['ok'];
+ $itemUpdate = self::updateItems($feedId, $feed, $db);
if (array_key_exists('error', $itemUpdate)) return $itemUpdate;
- $urlUpdate = $url == $feed['url'] ? '' : ', url = :url';
+ $urlUpdate = $url == $feed->url ? '' : ', url = :url';
$feedUpdate = $db->prepare(<<
bindValue(':title', self::eltValue($feed['channel'], 'title'));
- $feedUpdate->bindValue(':updated', $feed['updated']);
+ $feedUpdate->bindValue(':title', $feed->title);
+ $feedUpdate->bindValue(':updated', $feed->updatedOn);
$feedUpdate->bindValue(':checked', Data::formatDate('now'));
$feedUpdate->bindValue(':id', $feedId);
- if ($urlUpdate != '') $feedUpdate->bindValue(':url', $feed['url']);
+ if ($urlUpdate != '') $feedUpdate->bindValue(':url', $feed->url);
$feedUpdate->execute();
return ['ok' => true];
@@ -266,24 +379,25 @@ class Feed {
* @return array ['ok' => feedId] if successful, ['error' => message] if not
*/
public static function add(string $url, SQLite3 $db): array {
- $feed = self::retrieveFeed($url);
- if (array_key_exists('error', $feed)) return $feed;
+ $feedExtract = self::retrieveFeed($url);
+ if (array_key_exists('error', $feedExtract)) return $feedExtract;
+ $feed = $feedExtract['ok'];
$query = $db->prepare(<<<'SQL'
INSERT INTO feed (user_id, url, title, updated_on, checked_on)
VALUES (:user, :url, :title, :updated, :checked)
SQL);
$query->bindValue(':user', $_REQUEST[Key::USER_ID]);
- $query->bindValue(':url', $feed['url']);
- $query->bindValue(':title', self::eltValue($feed['channel'], 'title'));
- $query->bindValue(':updated', $feed['updated']);
+ $query->bindValue(':url', $feed->url);
+ $query->bindValue(':title', $feed->title);
+ $query->bindValue(':updated', $feed->updatedOn);
$query->bindValue(':checked', Data::formatDate('now'));
$result = $query->execute();
$feedId = $result ? $db->lastInsertRowID() : -1;
if ($feedId < 0) return ['error' => $db->lastErrorMsg()];
- $result = self::updateItems($feedId, $feed['channel'], $db);
+ $result = self::updateItems($feedId, $feed, $db);
if (array_key_exists('error', $result)) return $result;
return ['ok' => $feedId];
@@ -294,6 +408,7 @@ class Feed {
*
* @param array $existing The existing RSS feed
* @param string $url The URL with which the existing feed should be modified
+ * @param SQLite3 $db The database connection on which to execute the update
* @return bool[]|string[] [ 'ok' => true ] if successful, [ 'error' => message ] if not
*/
public static function update(array $existing, string $url, SQLite3 $db): array {
@@ -307,7 +422,9 @@ class Feed {
}
/**
- * @param SQLite3 $db
+ * Refresh all feeds
+ *
+ * @param SQLite3 $db The database connection to use for refreshing feeds
* @return array|true[] ['ok => true] if successful, ['error' => message] if not (may have multiple error lines)
*/
public static function refreshAll(SQLite3 $db): array {
diff --git a/src/public/index.php b/src/public/index.php
index 6bcb14f..91bbc16 100644
--- a/src/public/index.php
+++ b/src/public/index.php
@@ -36,7 +36,7 @@ page_head('Welcome'); ?>
if ($item) {
while ($item) { ?>
>=$item['item_title']?>
- =$item['feed_title']?>
=date_time($item['as_of'])?>
=date_time($item['as_of'])?>fetchArray(SQLITE3_ASSOC);
}
} else { ?>