Compare commits

...

6 Commits

Author SHA1 Message Date
93377ffa0e Merge pull request 'Alpha 5' (#20) from add-via-html into main
Reviewed-on: #20
2024-04-30 23:46:36 +00:00
b14399deb8 Add no purge, manual delete options (#12)
- Add htmx, hx attributes
- Only add/update items since last check
- Move messages to session to persist across redirects
- Polish styles a bit (still WIP)
2024-04-30 18:51:09 -04:00
d8ba178c55 Add pruning support (#12) 2024-04-29 23:01:49 -04:00
473dded4f9 Add docs for scheduled refresh (#11)
- Add user agent for feed requests
2024-04-28 21:16:42 -04:00
10638101d3 Add refresh utility script (#11) 2024-04-28 14:09:53 -04:00
1826ecd588 Derive feed from HTML (#10)
- Prevent duplicate feed subscriptions
- Move FeedItem to its own file
2024-04-27 22:05:39 -04:00
15 changed files with 563 additions and 202 deletions

View File

@ -42,4 +42,13 @@ Data is stored under the `/src/data` directory, and the default database name is
### Date/Time Format
The default format for dates and times look like "May 28, 2023 at 3:15pm". Changing the string there will alter the display on the main page and when reading an item. Any [supported PHP date or time token](https://www.php.net/manual/en/datetime.format.php) is supported.
The default format for dates and times look like "May 28, 2023 at 3:15pm". Changing the string there will alter the display on the main page and when reading an item. Any [supported PHP date or time token](https://www.php.net/manual/en/datetime.format.php) is supported.
### Item Purging
Feed Reader Central tries to keep the database tidy by purging items that have been read and are no longer required. There are four variants:
- `Feed::PURGE_NONE` does no purging (items have a "Delete" button, so they may be deleted manually)
- `Feed::PURGE_READ` purges non-bookmarked read items for a feed whenever it is refreshed. This is the most aggressive purging strategy, but it is also the only one that will not purge unread items.
- `Feed::PURGE_BY_DAYS` purges non-bookmarked items that are older than `PURGE_NUMBER` days old. This is the default value, and `PURGE_NUMBER`'s default value is 30; items will be kept for 30 days, read or unread.
- `Feed::PURGE_BY_COUNT` purges items to preserve at most `PURGE_NUMBER` non-bookmarked items for each feed.

View File

@ -1,7 +1,7 @@
<?php
/** The current Feed Reader Central version */
const FRC_VERSION = '1.0.0-alpha4';
const FRC_VERSION = '1.0.0-alpha5';
spl_autoload_register(function ($class) {
$file = implode(DIRECTORY_SEPARATOR, [__DIR__, 'lib', "$class.php"]);
@ -15,3 +15,6 @@ spl_autoload_register(function ($class) {
require 'user-config.php';
Data::ensureDb();
/** @var string The date the world wide web was created */
const WWW_EPOCH = '1993-04-30T00:00:00+00:00';

View File

@ -98,4 +98,14 @@ class Data {
if (is_null($dbConn)) $db->close();
}
}
/**
* Return the last SQLite error message as a result array
*
* @param SQLite3 $db The database connection on which the error has occurred
* @return string[] ['error' => message] for last SQLite error message
*/
public static function error(SQLite3 $db): array {
return ['error' => 'SQLite error: ' . $db->lastErrorMsg()];
}
}

View File

@ -1,84 +1,5 @@
<?php
/**
* Information for a feed item
*/
class FeedItem {
/** @var string The title of the feed item */
public string $title = '';
/** @var string The unique ID for the feed item */
public string $guid = '';
/** @var string The link to the original content */
public string $link = '';
/** @var string When this item was published */
public string $publishedOn = '';
/** @var ?string When this item was last updated */
public ?string $updatedOn = null;
/** @var string The content for the item */
public string $content = '';
/**
* Construct a feed item from an Atom feed's `<entry>` tag
*
* @param DOMNode $node The XML node from which a feed item should be constructed
* @return FeedItem A feed item constructed from the given node
*/
public static function fromAtom(DOMNode $node): FeedItem {
$guid = Feed::atomValue($node, 'id');
$link = '';
foreach ($node->getElementsByTagName('link') as $linkElt) {
if ($linkElt->hasAttributes()) {
$relAttr = $linkElt->attributes->getNamedItem('rel');
if ($relAttr && $relAttr->value == 'alternate') {
$link = $linkElt->attributes->getNamedItem('href')->value;
break;
}
}
}
if ($link == '' && str_starts_with($guid, 'http')) $link = $guid;
$item = new FeedItem();
$item->guid = $guid;
$item->title = Feed::atomValue($node, 'title');
$item->link = $link;
$item->publishedOn = Data::formatDate(Feed::atomValue($node, 'published'));
$item->updatedOn = Data::formatDate(Feed::atomValue($node, 'updated'));
$item->content = Feed::atomValue($node, 'content');
return $item;
}
/**
* Construct a feed item from an RSS feed's `<item>` tag
*
* @param DOMNode $node The XML node from which a feed item should be constructed
* @return FeedItem A feed item constructed from the given node
*/
public static function fromRSS(DOMNode $node): FeedItem {
$itemGuid = Feed::rssValue($node, 'guid');
$updNodes = $node->getElementsByTagNameNS(Feed::ATOM_NS, 'updated');
$encNodes = $node->getElementsByTagNameNS(Feed::CONTENT_NS, 'encoded');
$item = new FeedItem();
$item->guid = $itemGuid == 'guid not found' ? Feed::rssValue($node, 'link') : $itemGuid;
$item->title = Feed::rssValue($node, 'title');
$item->link = Feed::rssValue($node, 'link');
$item->publishedOn = Data::formatDate(Feed::rssValue($node, 'pubDate'));
$item->updatedOn = Data::formatDate($updNodes->length > 0 ? $updNodes->item(0)->textContent : null);
$item->content = $encNodes->length > 0
? $encNodes->item(0)->textContent
: Feed::rssValue($node, 'description');
return $item;
}
}
/**
* Feed retrieval, parsing, and manipulation
*/
@ -105,6 +26,22 @@ class Feed {
/** @var string The XML namespace for XHTML */
public const string XHTML_NS = 'http://www.w3.org/1999/xhtml';
/** @var string The user agent for Feed Reader Central's refresh requests */
private const string USER_AGENT =
'FeedReaderCentral/' . FRC_VERSION . ' +https://bitbadger.solutions/open-source/feed-reader-central';
/** @var int Do not purge items */
public const int PURGE_NONE = 0;
/** @var int Purge all read items (will not purge unread items) */
public const int PURGE_READ = 1;
/** @var int Purge items older than the specified number of days */
public const int PURGE_BY_DAYS = 2;
/** @var int Purge items in number greater than the specified number of items to keep */
public const int PURGE_BY_COUNT = 3;
/**
* When parsing XML into a DOMDocument, errors are presented as warnings; this creates an exception for them
*
@ -114,7 +51,7 @@ class Feed {
* @throws DOMException If the error is a warning
*/
private static function xmlParseError(int $errno, string $errstr): bool {
if ($errno == E_WARNING && substr_count($errstr, 'DOMDocument::loadXml()') > 0) {
if ($errno == E_WARNING && substr_count($errstr, 'DOMDocument::loadXML()') > 0) {
throw new DOMException($errstr, $errno);
}
return false;
@ -161,15 +98,16 @@ class Feed {
private static function fromRSS(DOMDocument $xml, string $url): array {
$channel = $xml->getElementsByTagName('channel')->item(0);
if (!($channel instanceof DOMElement)) {
return ['error' => "Channel element not found ($channel->nodeType)"];
$type = $channel?->nodeType ?? -1;
return ['error' => "Channel element not found ($type)"];
}
// The Atom namespace provides a lastBuildDate, which contains the last time an item in the feed was updated; if
// that is not present, use the pubDate element instead
$updatedOn = self::rssValue($channel, 'lastBuildDate');
if ($updatedOn == 'lastBuildDate not found') {
$updatedOn = self::rssValue($channel, 'pubDate');
if ($updatedOn == 'pubDate not found') $updatedOn = null;
if (($updatedOn = self::rssValue($channel, 'lastBuildDate')) == 'lastBuildDate not found') {
if (($updatedOn = self::rssValue($channel, 'pubDate')) == 'pubDate not found') {
$updatedOn = null;
}
}
$feed = new Feed();
@ -181,6 +119,17 @@ class Feed {
return ['ok' => $feed];
}
/**
* Get an attribute value from a DOM node
*
* @param DOMNode $node The node with an attribute value to obtain
* @param string $name The name of the attribute whose value should be obtained
* @return string The attribute value if it exists, an empty string if not
*/
private static function attrValue(DOMNode $node, string $name): string {
return ($node->hasAttributes() ? $node->attributes->getNamedItem($name)?->value : null) ?? '';
}
/**
* Get the value of a child element by its tag name for an Atom feed
*
@ -196,7 +145,7 @@ class Feed {
if ($tags->length == 0) return "$tagName not found";
$tag = $tags->item(0);
if (!($tag instanceof DOMElement)) return $tag->textContent;
if ($tag->hasAttributes() && $tag->attributes->getNamedItem('type') == 'xhtml') {
if (self::attrValue($tag, 'type') == 'xhtml') {
$div = $tag->getElementsByTagNameNS(Feed::XHTML_NS, 'div');
if ($div->length == 0) return "-- invalid XHTML content --";
return $div->item(0)->textContent;
@ -212,9 +161,8 @@ class Feed {
* @return array|Feed[] ['ok' => feed]
*/
private static function fromAtom(DOMDocument $xml, string $url): array {
$root = $xml->getElementsByTagNameNS(self::ATOM_NS, 'feed')->item(0);
$updatedOn = self::atomValue($root, 'updated');
if ($updatedOn == 'pubDate not found') $updatedOn = null;
$root = $xml->getElementsByTagNameNS(self::ATOM_NS, 'feed')->item(0);
if (($updatedOn = self::atomValue($root, 'updated')) == 'pubDate not found') $updatedOn = null;
$feed = new Feed();
$feed->title = self::atomValue($root, 'title');
@ -225,6 +173,55 @@ class Feed {
return ['ok' => $feed];
}
/**
* Retrieve a document (http/https)
*
* @param string $url The URL of the document to retrieve
* @return array ['content' => document content, 'error' => error message, 'code' => HTTP response code,
* 'url' => effective URL]
*/
private static function retrieveDocument(string $url): array {
$docReq = curl_init($url);
curl_setopt($docReq, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($docReq, CURLOPT_RETURNTRANSFER, true);
curl_setopt($docReq, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($docReq, CURLOPT_TIMEOUT, 15);
curl_setopt($docReq, CURLOPT_USERAGENT, self::USER_AGENT);
$result = [
'content' => curl_exec($docReq),
'error' => curl_error($docReq),
'code' => curl_getinfo($docReq, CURLINFO_RESPONSE_CODE),
'url' => curl_getinfo($docReq, CURLINFO_EFFECTIVE_URL)
];
curl_close($docReq);
return $result;
}
/**
* Derive a feed URL from an HTML document
*
* @param string $content The HTML document content from which to derive a feed URL
* @return array|string[] ['ok' => feed URL] if successful, ['error' => message] if not
*/
private static function deriveFeedFromHTML(string $content): array {
$html = new DOMDocument();
$html->loadHTML(substr($content, 0, strpos($content, '</head>') + 7));
$headTags = $html->getElementsByTagName('head');
if ($headTags->length < 1) return ['error' => 'Cannot find feed at this URL'];
$head = $headTags->item(0);
foreach ($head->getElementsByTagName('link') as $link) {
if (self::attrValue($link, 'rel') == 'alternate') {
$type = self::attrValue($link, 'type');
if ($type == 'application/rss+xml' || $type == 'application/atom+xml') {
return ['ok' => self::attrValue($link, 'href')];
}
}
}
return ['error' => 'Cannot find feed at this URL'];
}
/**
* Retrieve the feed
*
@ -232,34 +229,33 @@ class Feed {
* @return array|Feed[]|string[] ['ok' => feed] if successful, ['error' => message] if not
*/
public static function retrieveFeed(string $url): array {
$feedReq = curl_init($url);
curl_setopt($feedReq, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($feedReq, CURLOPT_RETURNTRANSFER, true);
curl_setopt($feedReq, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($feedReq, CURLOPT_TIMEOUT, 15);
$doc = self::retrieveDocument($url);
$feedContent = curl_exec($feedReq);
$result = array();
$error = curl_error($feedReq);
$code = curl_getinfo($feedReq, CURLINFO_RESPONSE_CODE);
if ($error) {
$result['error'] = $error;
} elseif ($code == 200) {
$parsed = self::parseFeed($feedContent);
if (array_key_exists('error', $parsed)) {
$result['error'] = $parsed['error'];
} else {
$extract = $parsed['ok']->getElementsByTagNameNS(self::ATOM_NS, 'feed')->length > 0
? self::fromAtom(...) : self::fromRSS(...);
$result = $extract($parsed['ok'], curl_getinfo($feedReq, CURLINFO_EFFECTIVE_URL));
}
} else {
$result['error'] = "Prospective feed URL $url returned HTTP Code $code: $feedContent";
if ($doc['error'] != '') return ['error' => $doc['error']];
if ($doc['code'] != 200) {
return ['error' => "Prospective feed URL $url returned HTTP Code {$doc['code']}: {$doc['content']}"];
}
curl_close($feedReq);
return $result;
$start = strtolower(strlen($doc['content']) >= 9 ? substr($doc['content'], 0, 9) : $doc['content']);
if ($start == '<!doctype' || str_starts_with($start, '<html')) {
$derivedURL = self::deriveFeedFromHTML($doc['content']);
if (array_key_exists('error', $derivedURL)) return ['error' => $derivedURL['error']];
$feedURL = $derivedURL['ok'];
if (!str_starts_with($feedURL, 'http')) {
// Relative URL; feed should be retrieved in the context of the original URL
$original = parse_url($url);
$port = array_key_exists('port', $original) ? ":{$original['port']}" : '';
$feedURL = $original['scheme'] . '://' . $original['host'] . $port . $feedURL;
}
$doc = self::retrieveDocument($feedURL);
}
$parsed = self::parseFeed($doc['content']);
if (array_key_exists('error', $parsed)) return ['error' => $parsed['error']];
$extract = $parsed['ok']->getElementsByTagNameNS(self::ATOM_NS, 'feed')->length > 0
? self::fromAtom(...) : self::fromRSS(...);
return $extract($parsed['ok'], $doc['url']);
}
/**
@ -268,8 +264,9 @@ class Feed {
* @param int $itemId The ID of the item to be updated
* @param FeedItem $item The item to be updated
* @param SQLite3 $db A database connection to use for the update
* @return bool|SQLite3Result The result if the update is successful, false if it failed
*/
private static function updateItem(int $itemId, FeedItem $item, SQLite3 $db): void {
private static function updateItem(int $itemId, FeedItem $item, SQLite3 $db): bool|SQLite3Result {
$query = $db->prepare(<<<'SQL'
UPDATE item
SET title = :title,
@ -284,7 +281,7 @@ class Feed {
$query->bindValue(':updated', $item->updatedOn);
$query->bindValue(':content', $item->content);
$query->bindValue(':id', $itemId);
$query->execute();
return $query->execute();
}
/**
@ -293,8 +290,9 @@ class Feed {
* @param int $feedId The ID of the feed to which the item should be added
* @param FeedItem $item The item to be added
* @param SQLite3 $db A database connection to use for the addition
* @return bool|SQLite3Result The result if the update is successful, false if it failed
*/
private static function addItem(int $feedId, FeedItem $item, SQLite3 $db): void {
private static function addItem(int $feedId, FeedItem $item, SQLite3 $db): bool|SQLite3Result {
$query = $db->prepare(<<<'SQL'
INSERT INTO item (
feed_id, item_guid, item_link, title, published_on, updated_on, content
@ -309,7 +307,7 @@ class Feed {
$query->bindValue(':published', $item->publishedOn);
$query->bindValue(':updated', $item->updatedOn);
$query->bindValue(':content', $item->content);
$query->execute();
return $query->execute();
}
/**
@ -317,56 +315,90 @@ class Feed {
*
* @param int $feedId The ID of the feed to which these items belong
* @param Feed $feed The extracted Atom or RSS feed items
* @param DateTimeInterface $lastChecked When this feed was last checked (only new items will be added)
* @return array ['ok' => true] if successful, ['error' => message] if not
*/
public static function updateItems(int $feedId, Feed $feed, SQLite3 $db): array {
try {
foreach ($feed->items as $item) {
public static function updateItems(int $feedId, Feed $feed, DateTimeInterface $lastChecked, SQLite3 $db): array {
$results =
array_map(function ($item) use ($db, $feedId) {
$existsQuery = $db->prepare(
'SELECT id, published_on, updated_on FROM item WHERE feed_id = :feed AND item_guid = :guid');
$existsQuery->bindValue(':feed', $feedId);
$existsQuery->bindValue(':guid', $item->guid);
$exists = $existsQuery->execute();
if ($exists) {
$existing = $exists->fetchArray(SQLITE3_ASSOC);
if ($existing) {
if ($exists = $existsQuery->execute()) {
if ($existing = $exists->fetchArray(SQLITE3_ASSOC)) {
if ( $existing['published_on'] != $item->publishedOn
|| ($existing['updated_on'] ?? '') != ($item->updatedOn ?? '')) {
self::updateItem($existing['id'], $item, $db);
if (!self::updateItem($existing['id'], $item, $db)) return Data::error($db);
}
} else {
self::addItem($feedId, $item, $db);
if (!self::addItem($feedId, $item, $db)) return Data::error($db);
}
} else {
throw new Exception($db->lastErrorMsg());
return Data::error($db);
}
return ['ok' => true];
}, array_filter($feed->items,
fn($it) => date_create_immutable($it->updatedOn ?? $it->publishedOn) >= $lastChecked));
$errors = array_map(fn($it) => $it['error'], array_filter($results, fn($it) => array_key_exists('error', $it)));
return sizeof($errors) > 0 ? ['error' => implode("\n", $errors)] : ['ok' => true];
}
/**
* Purge items for a feed
*
* @param int $feedId The ID of the feed to be purged
* @param SQLite3 $db The database connection on which items should be purged
* @return array|string[]|true[] ['ok' => true] if purging was successful, ['error' => message] if not
*/
private static function purgeItems(int $feedId, SQLite3 $db): array {
if (!array_search(PURGE_TYPE, [self::PURGE_READ, self::PURGE_BY_DAYS, self::PURGE_BY_COUNT])) {
return ['error' => 'Unrecognized purge type ' . PURGE_TYPE];
}
try {
$sql = match (PURGE_TYPE) {
self::PURGE_READ => 'AND is_read = 1',
self::PURGE_BY_DAYS => 'AND date(coalesce(updated_on, published_on)) < date(:oldest)',
self::PURGE_BY_COUNT => 'AND id IN (SELECT id FROM item WHERE feed_id = :feed
ORDER BY date(coalesce(updated_on, published_on)) DESC
LIMIT -1 OFFSET :keep)'
};
$purge = $db->prepare("DELETE FROM item WHERE feed_id = :feed AND is_bookmarked = 0 $sql");
$purge->bindValue(':feed', $feedId);
if (PURGE_TYPE == self::PURGE_BY_DAYS) {
$purge->bindValue(':oldest', Data::formatDate('-' . PURGE_NUMBER . ' day'));
} elseif (PURGE_TYPE == self::PURGE_BY_COUNT) {
$purge->bindValue(':keep', PURGE_NUMBER);
}
return $purge->execute() ? ['ok' => true] : Data::error($db);
} catch (Exception $ex) {
return ['error' => $ex->getMessage()];
}
return ['ok', true];
}
/**
* Refresh a feed
*
* @param int $feedId The ID of the feed to be refreshed
* @param string $url The URL of the feed to be refreshed
* @param SQLite3 $db A database connection to use to refresh the feed
* @return array|string[]|true[] ['ok' => true] if successful, ['error' => message] if not
*/
private static function refreshFeed(string $url, SQLite3 $db): array {
$feedQuery = $db->prepare('SELECT id FROM feed WHERE url = :url AND user_id = :user');
$feedQuery->bindValue(':url', $url);
$feedQuery->bindValue(':user', $_SESSION[Key::USER_ID]);
$feedResult = $feedQuery->execute();
$feedId = $feedResult ? $feedResult->fetchArray(SQLITE3_NUM)[0] : -1;
if ($feedId < 0) return ['error' => "No feed for URL $url found"];
public static function refreshFeed(int $feedId, string $url, SQLite3 $db): array {
$feedRetrieval = self::retrieveFeed($url);
if (array_key_exists('error', $feedRetrieval)) return $feedRetrieval;
$feed = $feedRetrieval['ok'];
$feedExtract = self::retrieveFeed($url);
if (array_key_exists('error', $feedExtract)) return $feedExtract;
$lastCheckedQuery = $db->prepare('SELECT checked_on FROM feed where id = :id');
$lastCheckedQuery->bindValue(':id', $feedId);
if (!($lastCheckedResult = $lastCheckedQuery->execute())) return Data::error($db);
if (!($lastChecked = date_create_immutable($lastCheckedResult->fetchArray(SQLITE3_NUM)[0] ?? WWW_EPOCH))) {
return ['error' => 'Could not derive date last checked for feed'];
}
$feed = $feedExtract['ok'];
$itemUpdate = self::updateItems($feedId, $feed, $db);
$itemUpdate = self::updateItems($feedId, $feed, $lastChecked, $db);
if (array_key_exists('error', $itemUpdate)) return $itemUpdate;
$urlUpdate = $url == $feed->url ? '' : ', url = :url';
@ -383,9 +415,9 @@ class Feed {
$feedUpdate->bindValue(':checked', Data::formatDate('now'));
$feedUpdate->bindValue(':id', $feedId);
if ($urlUpdate != '') $feedUpdate->bindValue(':url', $feed->url);
$feedUpdate->execute();
if (!$feedUpdate->execute()) return Data::error($db);
return ['ok' => true];
return PURGE_TYPE == self::PURGE_NONE ? ['ok' => true] : self::purgeItems($feedId, $db);
}
/**
@ -398,22 +430,30 @@ class Feed {
$feedExtract = self::retrieveFeed($url);
if (array_key_exists('error', $feedExtract)) return $feedExtract;
$feed = $feedExtract['ok'];
$feed = $feedExtract['ok'];
$existsQuery = $db->prepare('SELECT COUNT(*) FROM feed WHERE user_id = :user AND url = :url');
$existsQuery->bindValue(':user', $_SESSION[Key::USER_ID]);
$existsQuery->bindValue(':url', $feed->url);
if (!($exists = $existsQuery->execute())) return Data::error($db);
if ($exists->fetchArray(SQLITE3_NUM)[0] > 0) return ['error' => "Already subscribed to feed $feed->url"];
$query = $db->prepare(<<<'SQL'
INSERT INTO feed (user_id, url, title, updated_on, checked_on)
VALUES (:user, :url, :title, :updated, :checked)
INSERT INTO feed (
user_id, url, title, updated_on, checked_on
) VALUES (
:user, :url, :title, :updated, :checked
)
SQL);
$query->bindValue(':user', $_SESSION[Key::USER_ID]);
$query->bindValue(':url', $feed->url);
$query->bindValue(':title', $feed->title);
$query->bindValue(':updated', $feed->updatedOn);
$query->bindValue(':checked', Data::formatDate('now'));
$result = $query->execute();
if (!$query->execute()) return Data::error($db);
$feedId = $result ? $db->lastInsertRowID() : -1;
if ($feedId < 0) return ['error' => $db->lastErrorMsg()];
$result = self::updateItems($feedId, $feed, $db);
$feedId = $db->lastInsertRowID();
$result = self::updateItems($feedId, $feed, date_create_immutable(WWW_EPOCH), $db);
if (array_key_exists('error', $result)) return $result;
return ['ok' => $feedId];
@ -432,31 +472,46 @@ class Feed {
$query->bindValue(':url', $url);
$query->bindValue(':id', $existing['id']);
$query->bindValue(':user', $_SESSION[Key::USER_ID]);
$query->execute();
if (!$query->execute()) return Data::error($db);
return self::refreshFeed($url, $db);
return self::refreshFeed($existing['id'], $url, $db);
}
/**
* Retrieve all feeds, optionally for a specific user
*
* @param SQLite3 $db The database connection to use to retrieve the feeds
* @param int $user The ID of the user whose feeds should be retrieved (optional, defaults to all feeds)
* @return array An array of arrays with ['id', 'url', 'email'] keys
*/
public static function retrieveAll(SQLite3 $db, int $user = 0): array {
$extraSQL = $user > 0 ? ' WHERE u.id = :user' : '';
$query = $db->prepare(
"SELECT f.id, f.url, u.email FROM feed f INNER JOIN frc_user u ON u.id = f.user_id$extraSQL");
if ($user > 0) $query->bindValue(':user', $user);
if (!($result = $query->execute())) return Data::error($db);
$feeds = [];
while ($feed = $result->fetchArray(SQLITE3_ASSOC)) $feeds[] = $feed;
return $feeds;
}
/**
* Refresh all feeds
*
* @param SQLite3 $db The database connection to use for refreshing feeds
* @return array|true[] ['ok => true] if successful, ['error' => message] if not (may have multiple error lines)
* @return array|true[]|string[] ['ok' => true] if successful,
* ['error' => message] if not (may have multiple error lines)
*/
public static function refreshAll(SQLite3 $db): array {
$query = $db->prepare('SELECT url FROM feed WHERE user_id = :user');
$query->bindValue(':user', $_SESSION[Key::USER_ID]);
$result = $query->execute();
$url = $result ? $result->fetchArray(SQLITE3_NUM) : false;
if ($url) {
$errors = array();
while ($url) {
$updateResult = self::refreshFeed($url[0], $db);
if (array_key_exists('error', $updateResult)) $errors[] = $updateResult['error'];
$url = $result->fetchArray(SQLITE3_NUM);
}
return sizeof($errors) == 0 ? ['ok' => true] : ['error' => implode("\n", $errors)];
}
return ['error' => $db->lastErrorMsg()];
$feeds = self::retrieveAll($db, $_SESSION[Key::USER_ID]);
if (array_key_exists('error', $feeds)) return $feeds;
$errors = [];
array_walk($feeds, function ($feed) use ($db, &$errors) {
$result = self::refreshFeed($feed['id'], $feed['url'], $db);
if (array_key_exists('error', $result)) $errors[] = $result['error'];
});
return sizeof($errors) == 0 ? ['ok' => true] : ['error' => implode("\n", $errors)];
}
}

80
src/lib/FeedItem.php Normal file
View File

@ -0,0 +1,80 @@
<?php
/**
* Information for a feed item
*/
class FeedItem {
/** @var string The title of the feed item */
public string $title = '';
/** @var string The unique ID for the feed item */
public string $guid = '';
/** @var string The link to the original content */
public string $link = '';
/** @var string When this item was published */
public string $publishedOn = '';
/** @var ?string When this item was last updated */
public ?string $updatedOn = null;
/** @var string The content for the item */
public string $content = '';
/**
* Construct a feed item from an Atom feed's `<entry>` tag
*
* @param DOMNode $node The XML node from which a feed item should be constructed
* @return FeedItem A feed item constructed from the given node
*/
public static function fromAtom(DOMNode $node): FeedItem {
$guid = Feed::atomValue($node, 'id');
$link = '';
foreach ($node->getElementsByTagName('link') as $linkElt) {
if ($linkElt->hasAttributes()) {
$relAttr = $linkElt->attributes->getNamedItem('rel');
if ($relAttr && $relAttr->value == 'alternate') {
$link = $linkElt->attributes->getNamedItem('href')->value;
break;
}
}
}
if ($link == '' && str_starts_with($guid, 'http')) $link = $guid;
$item = new FeedItem();
$item->guid = $guid;
$item->title = Feed::atomValue($node, 'title');
$item->link = $link;
$item->publishedOn = Data::formatDate(Feed::atomValue($node, 'published'));
$item->updatedOn = Data::formatDate(Feed::atomValue($node, 'updated'));
$item->content = Feed::atomValue($node, 'content');
return $item;
}
/**
* Construct a feed item from an RSS feed's `<item>` tag
*
* @param DOMNode $node The XML node from which a feed item should be constructed
* @return FeedItem A feed item constructed from the given node
*/
public static function fromRSS(DOMNode $node): FeedItem {
$itemGuid = Feed::rssValue($node, 'guid');
$updNodes = $node->getElementsByTagNameNS(Feed::ATOM_NS, 'updated');
$encNodes = $node->getElementsByTagNameNS(Feed::CONTENT_NS, 'encoded');
$item = new FeedItem();
$item->guid = $itemGuid == 'guid not found' ? Feed::rssValue($node, 'link') : $itemGuid;
$item->title = Feed::rssValue($node, 'title');
$item->link = Feed::rssValue($node, 'link');
$item->publishedOn = Data::formatDate(Feed::rssValue($node, 'pubDate'));
$item->updatedOn = Data::formatDate($updNodes->length > 0 ? $updNodes->item(0)->textContent : null);
$item->content = $encNodes->length > 0
? $encNodes->item(0)->textContent
: Feed::rssValue($node, 'description');
return $item;
}
}

1
src/public/assets/htmx.min.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@ -43,6 +43,38 @@ header {
main {
padding: 0 .5rem;
.refresh, .loading {
font-style: italic;
font-size: .9rem;
}
.htmx-request .refresh {
display: none;
}
.loading {
display: none;
}
.htmx-request .loading {
display: inline;
}
.user_messages {
display: flex;
flex-flow: column;
justify-content: center;
}
.user_message {
margin: .25rem auto;
border: solid 1px navy;
border-radius: .5rem;
background-color: rgba(255, 255, 255, .75);
padding: .25rem;
}
.user_messages + h1 {
margin-top: .25rem;
}
.item_heading {
margin-bottom: 0;
}
@ -78,7 +110,7 @@ input[type=url],
input[type=text],
input[type=email],
input[type=password] {
width: 50%;
width: 40%;
font-size: 1rem;
padding: .25rem;
border-radius: .25rem;

View File

@ -10,6 +10,7 @@ page_head('Documentation'); ?>
<p><a href=./the-cli>About the CLI</a> provides orientation on Feed Reader Central&rsquo;s command line interface
<p><a href=./security-modes>Configuring Security Modes</a> describes the three security modes and how to manage each
of them
<p><a href=./refresh-feeds>Refresh Feeds</a> has instructions on how feeds can be refreshed on a schedule
</article><?php
page_foot();
$db->close();

View File

@ -0,0 +1,52 @@
<?php
include '../../start.php';
$db = Data::getConnection();
Security::verifyUser($db, redirectIfAnonymous: false);
page_head('Refresh Feeds | Documentation'); ?>
<h1>Refresh Feeds</h1>
<p class=back-link><a href=./>&lang;&lang; Documentation Home</a>
<article class=docs>
<h2>Manual Feed Refresh</h2>
<p>Next to the &ldquo;Your Unread Items&rdquo; heading on the main page, there is a link labeled &ldquo;Refresh All
Feeds&rdquo;. Clicking this link will reload the main page once the feeds have been refreshed. Depending on the
number and size of feeds, this may take a bit of time; each feed is refreshed individually.
<h2>Automatic Refreshing</h2>
<p>The <code>refresh</code> utility script will perform this refresh from the CLI. As it runs, it will list the
feeds as it processes them, and if it encounters any errors, that is noted as well. This process can be
automated via <code>cron</code> on Linux or Mac systems. This is most easily implemented by writing a small
shell script to provide some environment settings, then telling <code>cron</code> to run that script.
<pre class=item_content>
#!/bin/bash
exec 1> >(logger -t feed-reader-central) 2>&1
cd /path/to/frc
php-cli util/refresh.php all</pre>
<p>Save this (<code>frc-refresh.sh</code> might be a good name) and be sure it is executable
(<code>chmod +x ./frc-refresh.sh</code>). Before we put it in crontab, though, let&rsquo;s understand what each
line does:
<ul>
<li>Line 1 tells the operating system to use the <code>bash</code> shell.
<li>Line 2 directs all output to the system log (<code>/var/log/syslog</code>), labeling each entry with
<code>feed-reader-central</code>. This lets you review the output for its runs in a log that is already
maintained and rotated by the operating system.
<li>Line 3 changes the current directory to the one where Feed Reader Central is installed; modify it for where
you have installed it. Since we are setting up for a <a href=./the-cli>CLI execution</a>, this should place
us one directory up from <code>/public</code>.
<li>Line 4 executes the refresh script.
</ul>
<p>Finally, we are ready to add this to our crontab. Enter <code>crontab -e</code> to edit the file, then add a row
at the bottom that looks like this:
<pre class=item_content>
0 */6 * * * /path/to/job/frc-refresh.sh</pre>
<p>The items before the path specify when it should run. This example will run at the top of the hour every six
hours. Crontab schedules can be tricky to create; a full treatment is outside the scope of this documentation.
However, <a href=https://crontab.guru/#0_*/6_*_*_* target=_blank rel=noopener title="Crontab.guru">this site</a>
lets you put values in each position and it translates that to words; this lets you see if what you put is what
you meant.
<p>This should not require many resources; the majority of its time will be spent waiting for the websites to return
their feeds so it can process them. However, if you want it to yield to everything else happening on the server,
add <code>nice -n 1</code> (with a trailing space) before the path to the script.
</article><?php
page_foot();
$db->close();

View File

@ -32,13 +32,17 @@ $query->bindValue(':userId', $_SESSION[Key::USER_ID]);
$result = $query->execute();
$item = $result ? $result->fetchArray(SQLITE3_ASSOC) : false;
page_head('Welcome'); ?>
<h1>Your Unread Items &nbsp; <a href=/?refresh><small><small><em>(Refresh All Feeds)</em></small></small></a></h1>
page_head('Your Unread Items'); ?>
<h1>
Your Unread Items &nbsp;
<a class=refresh href=/?refresh hx-get=/?refresh hx-indicator="closest h1">(Refresh All Feeds)</a>
<span class=loading>Refreshing&hellip;</span>
</h1>
<article><?php
if ($item) {
while ($item) { ?>
<p><a href=/item?id=<?=$item['id']?>><?=strip_tags($item['item_title'])?></a><br>
<?=htmlentities($item['feed_title'])?><br><small><em><?=date_time($item['as_of'])?></em></small><?php
<p><a href=/item?id=<?=$item['id']?> hx-get=/item?id=<?=$item['id']?>><?=strip_tags($item['item_title'])?></a>
<br><?=htmlentities($item['feed_title'])?><br><small><em><?=date_time($item['as_of'])?></em></small><?php
$item = $result->fetchArray(SQLITE3_ASSOC);
}
} else { ?>

View File

@ -30,6 +30,26 @@ if ($_SERVER['REQUEST_METHOD'] == 'POST') {
frc_redirect('/');
}
if ($_SERVER['REQUEST_METHOD'] == 'DELETE') {
$deleteQuery = $db->prepare(<<<'SQL'
DELETE FROM item
WHERE id IN (
SELECT item.id
FROM item INNER JOIN feed ON feed.id = item.feed_id
WHERE item.id = :id
AND feed.user_id = :user)
SQL);
$deleteQuery->bindValue(':id', $_GET['id']);
$deleteQuery->bindValue(':user', $_SESSION[Key::USER_ID]);
if ($deleteQuery->execute()) {
add_info('Item deleted');
} else {
add_error(Data::error($db)['error']);
}
$db->close();
frc_redirect('/');
}
$query = $db->prepare(<<<'SQL'
SELECT item.title AS item_title, item.item_link, item.published_on, item.updated_on, item.content,
feed.title AS feed_title
@ -61,10 +81,11 @@ page_head(htmlentities("{$item['item_title']} | {$item['feed_title']}")); ?>
</div>
<article>
<div class=item_content><?=str_replace('<a ', '<a target=_blank rel=noopener ', $item['content'])?></div>
<form class=action_buttons action=/item method=POST>
<form class=action_buttons action=/item method=POST hx-post=/item>
<input type=hidden name=id value=<?=$_GET['id']?>>
<a href="/">Done</a>
<a href=/ hx-get="/">Done</a>
<button type=submit>Keep as New</button>
<button type=button hx-delete=/item>Delete</button>
</form>
</article><?php
page_foot();

View File

@ -18,7 +18,7 @@ $isSingle = SECURITY_MODEL == Security::SINGLE_USER_WITH_PASSWORD;
page_head('Log On'); ?>
<h1>Log On</h1>
<article>
<form method=POST action=/user/log-on hx-post=/user/log-on><?php
<form method=POST action=/user/log-on><?php
if (($_GET['returnTo'] ?? '') != '') { ?>
<input type=hidden name=returnTo value="<?=$_GET['returnTo']?>"><?php
}

View File

@ -16,8 +16,8 @@ session_start([
* @param string $message The message itself
*/
function add_message(string $level, string $message): void {
if (!array_key_exists(Key::USER_MSG, $_REQUEST)) $_REQUEST[Key::USER_MSG] = array();
$_REQUEST[Key::USER_MSG][] = ['level' => $level, 'message' => $message];
if (!array_key_exists(Key::USER_MSG, $_SESSION)) $_SESSION[Key::USER_MSG] = array();
$_SESSION[Key::USER_MSG][] = ['level' => $level, 'message' => $message];
}
/**
@ -38,49 +38,75 @@ function add_info(string $message): void {
add_message('INFO', $message);
}
/** @var bool $is_htmx True if this request was initiated by htmx, false if not */
$is_htmx = array_key_exists('HTTP_HX_REQUEST', $_SERVER)
&& !array_key_exists('HTTP_HX_HISTORY_RESTORE_REQUEST', $_SERVER);
/**
* Render the page title
* @param string $title The title of the page being displayed
*/
function page_head(string $title): void {
global $is_htmx;
$version = match (true) {
str_ends_with(FRC_VERSION, '.0.0') => substr(FRC_VERSION, 0, strlen(FRC_VERSION) - 4),
str_ends_with(FRC_VERSION, '.0') => substr(FRC_VERSION, 0, strlen(FRC_VERSION) - 2),
default => FRC_VERSION
} ?>
};
//if ($is_htmx) header('HX-Push-Url: true');
?>
<!DOCTYPE html>
<html lang=en>
<head>
<meta name=viewport content="width=device-width, initial-scale=1">
<title><?=$title?> | Feed Reader Central</title>
<link href=/assets/style.css rel=stylesheet>
<title><?=$title?> | Feed Reader Central</title><?php
if (!$is_htmx) { ?>
<meta name=viewport content="width=device-width, initial-scale=1">
<link href=/assets/style.css rel=stylesheet><?php
} ?>
</head>
<body>
<header>
<div><a class=title href="/">Feed Reader Central</a><span class=version>v<?=$version?></span></div>
<div><?php
if (array_key_exists(Key::USER_ID, $_SESSION)) {
echo '<a href=/feed?id=new>Add Feed</a> | <a href=/docs/>Docs</a> | <a href=/user/log-off>Log Off</a>';
if ($_SESSION[Key::USER_EMAIL] != Security::SINGLE_USER_EMAIL) echo " | {$_SESSION[Key::USER_EMAIL]}";
} else {
echo '<a href=/user/log-on>Log On</a> | <a href=/docs/>Docs</a>';
} ?>
</div>
</header>
<main hx-target=this><?php
foreach ($_REQUEST[Key::USER_MSG] ?? [] as $msg) { ?>
<div>
<?=$msg['level'] == 'INFO' ? '' : "<strong>{$msg['level']}</strong><br>"?>
<?=$msg['message']?>
</div><?php
<body><?php
if (!$is_htmx) { ?>
<header hx-target=#main hx-push-url=true>
<div><a class=title href=/ hx-get="/">Feed Reader Central</a><span class=version>v<?=$version?></span></div>
<div><?php
if (array_key_exists(Key::USER_ID, $_SESSION)) { ?>
<a href=/feed?id=new hx-get=/feed?id=new>Add Feed</a> |
<a href=/docs/ hx-get=/docs/>Docs</a> |
<a href=/user/log-off hx-get=/user/log-off>Log Off</a><?php
if ($_SESSION[Key::USER_EMAIL] != Security::SINGLE_USER_EMAIL) { ?>
| <?=$_SESSION[Key::USER_EMAIL]?><?php
}
} else { ?>
<a href=/user/log-on hx-get=/user/log-on>Log On</a> | <a href=/docs/ hx-get=/docs/>Docs</a><?php
} ?>
</div>
</header>
<main id=main hx-target=this hx-push-url=true hx-swap="innerHTML show:window:top"><?php
}
if (sizeof($messages = $_SESSION[Key::USER_MSG] ?? []) > 0) { ?>
<div class=user_messages><?php
array_walk($messages, function ($msg) { ?>
<div class=user_message>
<?=$msg['level'] == 'INFO' ? '' : "<strong>{$msg['level']}</strong><br>"?>
<?=$msg['message']?>
</div><?php
}); ?>
</div><?php
$_SESSION[Key::USER_MSG] = [];
}
}
/**
* Render the end of the page
*/
function page_foot(): void {
echo '</main></body></html>';
global $is_htmx; ?>
</main><?php
if (!$is_htmx) { ?>
<script src=/assets/htmx.min.js></script><?php
} ?>
</body>
</html><?php
session_commit();
}

View File

@ -25,3 +25,17 @@ const DATABASE_NAME = 'frc.db';
* The default, 'F j, Y \a\t g:ia', equates to "August 17, 2023 at 4:45pm"
*/
const DATE_TIME_FORMAT = 'F j, Y \a\t g:ia';
/**
* How should item purging be done? (Purging never applies to bookmarked items.) Options are:
* - Feed::PURGE_NONE - Do not purge items
* - Feed::PURGE_READ - Purge all read items whenever purging is run (will not purge unread items)
* - Feed::PURGE_BY_DAYS - Purge read and unread items older than a number of days (PURGE_NUMBER below)
* - Feed::PURGE_BY_COUNT - Purge read and unread items beyond the number to keep (PURGE_NUMBER below)
*/
const PURGE_TYPE = Feed::PURGE_BY_DAYS;
/**
* For purge-by-days, how many days of items should be kept; for purge-by-count, how many items should be kept
*/
const PURGE_NUMBER = 30;

53
src/util/refresh.php Normal file
View File

@ -0,0 +1,53 @@
<?php
use JetBrains\PhpStorm\NoReturn;
require __DIR__ . '/../cli-start.php';
cli_title('FEED REFRESH');
if ($argc < 2) display_help();
switch ($argv[1]) {
case 'all':
refresh_all();
break;
default:
printfn('Unrecognized option "%s"', $argv[1]);
display_help();
}
/**
* Display the options for this utility and exit
*/
#[NoReturn]
function display_help(): void {
printfn('Options:');
printfn(' - all');
printfn(' Refreshes all feeds');
exit(0);
}
function refresh_all(): void {
$db = Data::getConnection();
try {
$feeds = Feed::retrieveAll($db);
if (array_key_exists('error', $feeds)) {
printfn('SQLite error: %s', $feeds['error']);
return;
}
array_walk($feeds, function ($feed) use ($db) {
$result = Feed::refreshFeed($feed['id'], $feed['url'], $db);
if (array_key_exists('error', $result)) {
printfn('ERR (%s) %s', $feed['email'], $feed['url']);
printfn(' %s', $result['error']);
} else {
printfn('OK (%s) %s', $feed['email'], $feed['url']);
}
});
printfn(PHP_EOL . 'All feeds refreshed');
} finally {
$db->close();
}
}