Add pruning support (#12)

This commit is contained in:
Daniel J. Summers 2024-04-29 23:01:49 -04:00
parent 473dded4f9
commit d8ba178c55
4 changed files with 126 additions and 47 deletions

View File

@ -42,4 +42,12 @@ Data is stored under the `/src/data` directory, and the default database name is
### Date/Time Format
The default format for dates and times look like "May 28, 2023 at 3:15pm". Changing the string there will alter the display on the main page and when reading an item. Any [supported PHP date or time token](https://www.php.net/manual/en/datetime.format.php) is supported.
The default format for dates and times look like "May 28, 2023 at 3:15pm". Changing the string there will alter the display on the main page and when reading an item. Any [supported PHP date or time token](https://www.php.net/manual/en/datetime.format.php) is supported.
### Item Purging
Feed Reader Central tries to keep the database tidy by purging items that have been read and are no longer required. There are three variants:
- `Feed::PURGE_READ` purges non-bookmarked read items for a feed whenever it is refreshed. This is the most aggressive purging strategy, but it is also the only one that will not purge unread items.
- `Feed::PURGE_BY_DAYS` purges non-bookmarked items that are older than `PURGE_NUMBER` days old. This is the default value, and `PURGE_NUMBER`'s default value is 30; items will be kept for 30 days, read or unread.
- `Feed::PURGE_BY_COUNT` purges items to preserve at most `PURGE_NUMBER` non-bookmarked items for each feed.

View File

@ -98,4 +98,14 @@ class Data {
if (is_null($dbConn)) $db->close();
}
}
/**
* Return the last SQLite error message as a result array
*
* @param SQLite3 $db The database connection on which the error has occurred
* @return string[] ['error' => message] for last SQLite error message
*/
public static function error(SQLite3 $db): array {
return ['error' => 'SQLite error: ' . $db->lastErrorMsg()];
}
}

View File

@ -30,6 +30,15 @@ class Feed {
private const string USER_AGENT =
'FeedReaderCentral/' . FRC_VERSION . ' +https://bitbadger.solutions/open-source/feed-reader-central';
/** @var int Purge all read items (will not purge unread items) */
public const int PURGE_READ = 1;
/** @var int Purge items older than the specified number of days */
public const int PURGE_BY_DAYS = 2;
/** @var int Purge items in number greater than the specified number of items to keep */
public const int PURGE_BY_COUNT = 3;
/**
* When parsing XML into a DOMDocument, errors are presented as warnings; this creates an exception for them
*
@ -86,15 +95,16 @@ class Feed {
private static function fromRSS(DOMDocument $xml, string $url): array {
$channel = $xml->getElementsByTagName('channel')->item(0);
if (!($channel instanceof DOMElement)) {
return ['error' => "Channel element not found ($channel->nodeType)"];
$type = $channel?->nodeType ?? -1;
return ['error' => "Channel element not found ($type)"];
}
// The Atom namespace provides a lastBuildDate, which contains the last time an item in the feed was updated; if
// that is not present, use the pubDate element instead
$updatedOn = self::rssValue($channel, 'lastBuildDate');
if ($updatedOn == 'lastBuildDate not found') {
$updatedOn = self::rssValue($channel, 'pubDate');
if ($updatedOn == 'pubDate not found') $updatedOn = null;
if (($updatedOn = self::rssValue($channel, 'lastBuildDate')) == 'lastBuildDate not found') {
if (($updatedOn = self::rssValue($channel, 'pubDate')) == 'pubDate not found') {
$updatedOn = null;
}
}
$feed = new Feed();
@ -148,9 +158,8 @@ class Feed {
* @return array|Feed[] ['ok' => feed]
*/
private static function fromAtom(DOMDocument $xml, string $url): array {
$root = $xml->getElementsByTagNameNS(self::ATOM_NS, 'feed')->item(0);
$updatedOn = self::atomValue($root, 'updated');
if ($updatedOn == 'pubDate not found') $updatedOn = null;
$root = $xml->getElementsByTagNameNS(self::ATOM_NS, 'feed')->item(0);
if (($updatedOn = self::atomValue($root, 'updated')) == 'pubDate not found') $updatedOn = null;
$feed = new Feed();
$feed->title = self::atomValue($root, 'title');
@ -252,8 +261,9 @@ class Feed {
* @param int $itemId The ID of the item to be updated
* @param FeedItem $item The item to be updated
* @param SQLite3 $db A database connection to use for the update
* @return bool|SQLite3Result The result if the update is successful, false if it failed
*/
private static function updateItem(int $itemId, FeedItem $item, SQLite3 $db): void {
private static function updateItem(int $itemId, FeedItem $item, SQLite3 $db): bool|SQLite3Result {
$query = $db->prepare(<<<'SQL'
UPDATE item
SET title = :title,
@ -268,7 +278,7 @@ class Feed {
$query->bindValue(':updated', $item->updatedOn);
$query->bindValue(':content', $item->content);
$query->bindValue(':id', $itemId);
$query->execute();
return $query->execute();
}
/**
@ -277,8 +287,9 @@ class Feed {
* @param int $feedId The ID of the feed to which the item should be added
* @param FeedItem $item The item to be added
* @param SQLite3 $db A database connection to use for the addition
* @return bool|SQLite3Result The result if the update is successful, false if it failed
*/
private static function addItem(int $feedId, FeedItem $item, SQLite3 $db): void {
private static function addItem(int $feedId, FeedItem $item, SQLite3 $db): bool|SQLite3Result {
$query = $db->prepare(<<<'SQL'
INSERT INTO item (
feed_id, item_guid, item_link, title, published_on, updated_on, content
@ -293,7 +304,7 @@ class Feed {
$query->bindValue(':published', $item->publishedOn);
$query->bindValue(':updated', $item->updatedOn);
$query->bindValue(':content', $item->content);
$query->execute();
return $query->execute();
}
/**
@ -304,31 +315,71 @@ class Feed {
* @return array ['ok' => true] if successful, ['error' => message] if not
*/
public static function updateItems(int $feedId, Feed $feed, SQLite3 $db): array {
try {
foreach ($feed->items as $item) {
$existsQuery = $db->prepare(
'SELECT id, published_on, updated_on FROM item WHERE feed_id = :feed AND item_guid = :guid');
$existsQuery->bindValue(':feed', $feedId);
$existsQuery->bindValue(':guid', $item->guid);
$exists = $existsQuery->execute();
if ($exists) {
$existing = $exists->fetchArray(SQLITE3_ASSOC);
if ($existing) {
if ( $existing['published_on'] != $item->publishedOn
|| ($existing['updated_on'] ?? '') != ($item->updatedOn ?? '')) {
self::updateItem($existing['id'], $item, $db);
}
} else {
self::addItem($feedId, $item, $db);
// Do not add items that are older than the oldest we currently have; this keeps us from re-adding items that
// have been purged already
$oldestQuery = $db->prepare(
'SELECT MIN(coalesce(updated_on, published_on)) FROM item where feed_id = :feed AND is_bookmarked = 0');
$oldestQuery->bindValue(':feed', $feedId);
if (!($oldest = $oldestQuery->execute())) return Data::error($db);
$minDate = date_create_immutable($oldest->fetchArray(SQLITE3_NUM)[0] ?? '1993-04-30T00:00:00+00:00');
foreach ($feed->items as $item) {
if (date_create_immutable($item->updatedOn ?? $item->publishedOn) < $minDate) continue;
$existsQuery = $db->prepare(
'SELECT id, published_on, updated_on FROM item WHERE feed_id = :feed AND item_guid = :guid');
$existsQuery->bindValue(':feed', $feedId);
$existsQuery->bindValue(':guid', $item->guid);
if ($exists = $existsQuery->execute()) {
if ($existing = $exists->fetchArray(SQLITE3_ASSOC)) {
if ( $existing['published_on'] != $item->publishedOn
|| ($existing['updated_on'] ?? '') != ($item->updatedOn ?? '')) {
if (!self::updateItem($existing['id'], $item, $db)) return Data::error($db);
}
} else {
throw new Exception($db->lastErrorMsg());
if (!self::addItem($feedId, $item, $db)) return Data::error($db);
}
} else {
return Data::error($db);
}
}
return ['ok', true];
}
/**
* Purge items for a feed
*
* @param int $feedId The ID of the feed to be purged
* @param SQLite3 $db The database connection on which items should be purged
* @return array|string[]|true[] ['ok' => true] if purging was successful, ['error' => message] if not
*/
private static function purgeItems(int $feedId, SQLite3 $db): array {
if (!array_search(PURGE_TYPE, [self::PURGE_READ, self::PURGE_BY_DAYS, self::PURGE_BY_COUNT])) {
return ['error' => 'Unrecognized purge type ' . PURGE_TYPE];
}
try {
$sql = match (PURGE_TYPE) {
self::PURGE_READ => 'AND is_read = 1',
self::PURGE_BY_DAYS => 'AND date(coalesce(updated_on, published_on)) < date(:oldest)',
self::PURGE_BY_COUNT => 'AND id IN (SELECT id FROM item WHERE feed_id = :feed
ORDER BY date(coalesce(updated_on, published_on)) DESC
LIMIT -1 OFFSET :keep)',
default => 'AND 1 = 0'
};
$purge = $db->prepare("DELETE FROM item WHERE feed_id = :feed AND is_bookmarked = 0 $sql");
$purge->bindValue(':feed', $feedId);
if (PURGE_TYPE == self::PURGE_BY_DAYS) {
$purge->bindValue(':oldest', Data::formatDate('-' . PURGE_NUMBER . ' day'));
} elseif (PURGE_TYPE == self::PURGE_BY_COUNT) {
$purge->bindValue(':keep', PURGE_NUMBER);
}
return $purge->execute() ? ['ok' => true] : Data::error($db);
} catch (Exception $ex) {
return ['error' => $ex->getMessage()];
}
return ['ok', true];
}
/**
@ -362,9 +413,9 @@ class Feed {
$feedUpdate->bindValue(':checked', Data::formatDate('now'));
$feedUpdate->bindValue(':id', $feedId);
if ($urlUpdate != '') $feedUpdate->bindValue(':url', $feed->url);
$feedUpdate->execute();
if (!$feedUpdate->execute()) return Data::error($db);
return ['ok' => true];
return self::purgeItems($feedId, $db);
}
/**
@ -382,26 +433,24 @@ class Feed {
$existsQuery = $db->prepare('SELECT COUNT(*) FROM feed WHERE user_id = :user AND url = :url');
$existsQuery->bindValue(':user', $_SESSION[Key::USER_ID]);
$existsQuery->bindValue(':url', $feed->url);
$existsResult = $existsQuery->execute();
if (!$existsResult) return ['error' => 'SQLite error: ' . $db->lastErrorMsg()];
$exists = $existsResult->fetchArray(SQLITE3_NUM);
if ($exists[0] != 0) return ['error' => "Already subscribed to feed $feed->url"];
if (!($exists = $existsQuery->execute())) return Data::error($db);
if ($exists->fetchArray(SQLITE3_NUM)[0] > 0) return ['error' => "Already subscribed to feed $feed->url"];
$query = $db->prepare(<<<'SQL'
INSERT INTO feed (user_id, url, title, updated_on, checked_on)
VALUES (:user, :url, :title, :updated, :checked)
INSERT INTO feed (
user_id, url, title, updated_on, checked_on
) VALUES (
:user, :url, :title, :updated, :checked
)
SQL);
$query->bindValue(':user', $_SESSION[Key::USER_ID]);
$query->bindValue(':url', $feed->url);
$query->bindValue(':title', $feed->title);
$query->bindValue(':updated', $feed->updatedOn);
$query->bindValue(':checked', Data::formatDate('now'));
$result = $query->execute();
$feedId = $result ? $db->lastInsertRowID() : -1;
if ($feedId < 0) return ['error' => $db->lastErrorMsg()];
if (!$query->execute()) return Data::error($db);
$feedId = $db->lastInsertRowID();
$result = self::updateItems($feedId, $feed, $db);
if (array_key_exists('error', $result)) return $result;
@ -421,7 +470,7 @@ class Feed {
$query->bindValue(':url', $url);
$query->bindValue(':id', $existing['id']);
$query->bindValue(':user', $_SESSION[Key::USER_ID]);
$query->execute();
if (!$query->execute()) return Data::error($db);
return self::refreshFeed($existing['id'], $url, $db);
}
@ -438,8 +487,7 @@ class Feed {
$query = $db->prepare(
"SELECT f.id, f.url, u.email FROM feed f INNER JOIN frc_user u ON u.id = f.user_id$extraSQL");
if ($user > 0) $query->bindValue(':user', $user);
$result = $query->execute();
if (!$result) return ['error', $db->lastErrorMsg()];
if (!($result = $query->execute())) return Data::error($db);
$feeds = [];
while ($feed = $result->fetchArray(SQLITE3_ASSOC)) $feeds[] = $feed;
return $feeds;

View File

@ -25,3 +25,16 @@ const DATABASE_NAME = 'frc.db';
* The default, 'F j, Y \a\t g:ia', equates to "August 17, 2023 at 4:45pm"
*/
const DATE_TIME_FORMAT = 'F j, Y \a\t g:ia';
/**
* How should item purging be done? (Purging never applies to bookmarked items.) Options are:
* - Feed::PURGE_READ - Purge all read items whenever purging is run (will not purge unread items)
* - Feed::PURGE_BY_DAYS - Purge read and unread items older than a number of days (PURGE_NUMBER below)
* - Feed::PURGE_BY_COUNT - Purge read and unread items beyond the number to keep (PURGE_NUMBER below)
*/
const PURGE_TYPE = Feed::PURGE_BY_DAYS;
/**
* For purge-by-days, how many days of items should be kept; for purge-by-count, how many items should be kept
*/
const PURGE_NUMBER = 30;