diff --git a/INSTALLING.md b/INSTALLING.md index bcfcb3a..d2e88e0 100644 --- a/INSTALLING.md +++ b/INSTALLING.md @@ -42,4 +42,12 @@ Data is stored under the `/src/data` directory, and the default database name is ### Date/Time Format -The default format for dates and times look like "May 28, 2023 at 3:15pm". Changing the string there will alter the display on the main page and when reading an item. Any [supported PHP date or time token](https://www.php.net/manual/en/datetime.format.php) is supported. \ No newline at end of file +The default format for dates and times look like "May 28, 2023 at 3:15pm". Changing the string there will alter the display on the main page and when reading an item. Any [supported PHP date or time token](https://www.php.net/manual/en/datetime.format.php) is supported. + +### Item Purging + +Feed Reader Central tries to keep the database tidy by purging items that have been read and are no longer required. There are three variants: +- `Feed::PURGE_READ` purges non-bookmarked read items for a feed whenever it is refreshed. This is the most aggressive purging strategy, but it is also the only one that will not purge unread items. +- `Feed::PURGE_BY_DAYS` purges non-bookmarked items that are older than `PURGE_NUMBER` days old. This is the default value, and `PURGE_NUMBER`'s default value is 30; items will be kept for 30 days, read or unread. +- `Feed::PURGE_BY_COUNT` purges items to preserve at most `PURGE_NUMBER` non-bookmarked items for each feed. + \ No newline at end of file diff --git a/src/lib/Data.php b/src/lib/Data.php index 6713767..2922186 100644 --- a/src/lib/Data.php +++ b/src/lib/Data.php @@ -98,4 +98,14 @@ class Data { if (is_null($dbConn)) $db->close(); } } + + /** + * Return the last SQLite error message as a result array + * + * @param SQLite3 $db The database connection on which the error has occurred + * @return string[] ['error' => message] for last SQLite error message + */ + public static function error(SQLite3 $db): array { + return ['error' => 'SQLite error: ' . $db->lastErrorMsg()]; + } } diff --git a/src/lib/Feed.php b/src/lib/Feed.php index c5900c7..498200f 100644 --- a/src/lib/Feed.php +++ b/src/lib/Feed.php @@ -30,6 +30,15 @@ class Feed { private const string USER_AGENT = 'FeedReaderCentral/' . FRC_VERSION . ' +https://bitbadger.solutions/open-source/feed-reader-central'; + /** @var int Purge all read items (will not purge unread items) */ + public const int PURGE_READ = 1; + + /** @var int Purge items older than the specified number of days */ + public const int PURGE_BY_DAYS = 2; + + /** @var int Purge items in number greater than the specified number of items to keep */ + public const int PURGE_BY_COUNT = 3; + /** * When parsing XML into a DOMDocument, errors are presented as warnings; this creates an exception for them * @@ -86,15 +95,16 @@ class Feed { private static function fromRSS(DOMDocument $xml, string $url): array { $channel = $xml->getElementsByTagName('channel')->item(0); if (!($channel instanceof DOMElement)) { - return ['error' => "Channel element not found ($channel->nodeType)"]; + $type = $channel?->nodeType ?? -1; + return ['error' => "Channel element not found ($type)"]; } // The Atom namespace provides a lastBuildDate, which contains the last time an item in the feed was updated; if // that is not present, use the pubDate element instead - $updatedOn = self::rssValue($channel, 'lastBuildDate'); - if ($updatedOn == 'lastBuildDate not found') { - $updatedOn = self::rssValue($channel, 'pubDate'); - if ($updatedOn == 'pubDate not found') $updatedOn = null; + if (($updatedOn = self::rssValue($channel, 'lastBuildDate')) == 'lastBuildDate not found') { + if (($updatedOn = self::rssValue($channel, 'pubDate')) == 'pubDate not found') { + $updatedOn = null; + } } $feed = new Feed(); @@ -148,9 +158,8 @@ class Feed { * @return array|Feed[] ['ok' => feed] */ private static function fromAtom(DOMDocument $xml, string $url): array { - $root = $xml->getElementsByTagNameNS(self::ATOM_NS, 'feed')->item(0); - $updatedOn = self::atomValue($root, 'updated'); - if ($updatedOn == 'pubDate not found') $updatedOn = null; + $root = $xml->getElementsByTagNameNS(self::ATOM_NS, 'feed')->item(0); + if (($updatedOn = self::atomValue($root, 'updated')) == 'pubDate not found') $updatedOn = null; $feed = new Feed(); $feed->title = self::atomValue($root, 'title'); @@ -252,8 +261,9 @@ class Feed { * @param int $itemId The ID of the item to be updated * @param FeedItem $item The item to be updated * @param SQLite3 $db A database connection to use for the update + * @return bool|SQLite3Result The result if the update is successful, false if it failed */ - private static function updateItem(int $itemId, FeedItem $item, SQLite3 $db): void { + private static function updateItem(int $itemId, FeedItem $item, SQLite3 $db): bool|SQLite3Result { $query = $db->prepare(<<<'SQL' UPDATE item SET title = :title, @@ -268,7 +278,7 @@ class Feed { $query->bindValue(':updated', $item->updatedOn); $query->bindValue(':content', $item->content); $query->bindValue(':id', $itemId); - $query->execute(); + return $query->execute(); } /** @@ -277,8 +287,9 @@ class Feed { * @param int $feedId The ID of the feed to which the item should be added * @param FeedItem $item The item to be added * @param SQLite3 $db A database connection to use for the addition + * @return bool|SQLite3Result The result if the update is successful, false if it failed */ - private static function addItem(int $feedId, FeedItem $item, SQLite3 $db): void { + private static function addItem(int $feedId, FeedItem $item, SQLite3 $db): bool|SQLite3Result { $query = $db->prepare(<<<'SQL' INSERT INTO item ( feed_id, item_guid, item_link, title, published_on, updated_on, content @@ -293,7 +304,7 @@ class Feed { $query->bindValue(':published', $item->publishedOn); $query->bindValue(':updated', $item->updatedOn); $query->bindValue(':content', $item->content); - $query->execute(); + return $query->execute(); } /** @@ -304,31 +315,71 @@ class Feed { * @return array ['ok' => true] if successful, ['error' => message] if not */ public static function updateItems(int $feedId, Feed $feed, SQLite3 $db): array { - try { - foreach ($feed->items as $item) { - $existsQuery = $db->prepare( - 'SELECT id, published_on, updated_on FROM item WHERE feed_id = :feed AND item_guid = :guid'); - $existsQuery->bindValue(':feed', $feedId); - $existsQuery->bindValue(':guid', $item->guid); - $exists = $existsQuery->execute(); - if ($exists) { - $existing = $exists->fetchArray(SQLITE3_ASSOC); - if ($existing) { - if ( $existing['published_on'] != $item->publishedOn - || ($existing['updated_on'] ?? '') != ($item->updatedOn ?? '')) { - self::updateItem($existing['id'], $item, $db); - } - } else { - self::addItem($feedId, $item, $db); + + // Do not add items that are older than the oldest we currently have; this keeps us from re-adding items that + // have been purged already + $oldestQuery = $db->prepare( + 'SELECT MIN(coalesce(updated_on, published_on)) FROM item where feed_id = :feed AND is_bookmarked = 0'); + $oldestQuery->bindValue(':feed', $feedId); + if (!($oldest = $oldestQuery->execute())) return Data::error($db); + $minDate = date_create_immutable($oldest->fetchArray(SQLITE3_NUM)[0] ?? '1993-04-30T00:00:00+00:00'); + + foreach ($feed->items as $item) { + if (date_create_immutable($item->updatedOn ?? $item->publishedOn) < $minDate) continue; + $existsQuery = $db->prepare( + 'SELECT id, published_on, updated_on FROM item WHERE feed_id = :feed AND item_guid = :guid'); + $existsQuery->bindValue(':feed', $feedId); + $existsQuery->bindValue(':guid', $item->guid); + if ($exists = $existsQuery->execute()) { + if ($existing = $exists->fetchArray(SQLITE3_ASSOC)) { + if ( $existing['published_on'] != $item->publishedOn + || ($existing['updated_on'] ?? '') != ($item->updatedOn ?? '')) { + if (!self::updateItem($existing['id'], $item, $db)) return Data::error($db); } } else { - throw new Exception($db->lastErrorMsg()); + if (!self::addItem($feedId, $item, $db)) return Data::error($db); } + } else { + return Data::error($db); } + } + return ['ok', true]; + } + + /** + * Purge items for a feed + * + * @param int $feedId The ID of the feed to be purged + * @param SQLite3 $db The database connection on which items should be purged + * @return array|string[]|true[] ['ok' => true] if purging was successful, ['error' => message] if not + */ + private static function purgeItems(int $feedId, SQLite3 $db): array { + + if (!array_search(PURGE_TYPE, [self::PURGE_READ, self::PURGE_BY_DAYS, self::PURGE_BY_COUNT])) { + return ['error' => 'Unrecognized purge type ' . PURGE_TYPE]; + } + + try { + $sql = match (PURGE_TYPE) { + self::PURGE_READ => 'AND is_read = 1', + self::PURGE_BY_DAYS => 'AND date(coalesce(updated_on, published_on)) < date(:oldest)', + self::PURGE_BY_COUNT => 'AND id IN (SELECT id FROM item WHERE feed_id = :feed + ORDER BY date(coalesce(updated_on, published_on)) DESC + LIMIT -1 OFFSET :keep)', + default => 'AND 1 = 0' + }; + + $purge = $db->prepare("DELETE FROM item WHERE feed_id = :feed AND is_bookmarked = 0 $sql"); + $purge->bindValue(':feed', $feedId); + if (PURGE_TYPE == self::PURGE_BY_DAYS) { + $purge->bindValue(':oldest', Data::formatDate('-' . PURGE_NUMBER . ' day')); + } elseif (PURGE_TYPE == self::PURGE_BY_COUNT) { + $purge->bindValue(':keep', PURGE_NUMBER); + } + return $purge->execute() ? ['ok' => true] : Data::error($db); } catch (Exception $ex) { return ['error' => $ex->getMessage()]; } - return ['ok', true]; } /** @@ -362,9 +413,9 @@ class Feed { $feedUpdate->bindValue(':checked', Data::formatDate('now')); $feedUpdate->bindValue(':id', $feedId); if ($urlUpdate != '') $feedUpdate->bindValue(':url', $feed->url); - $feedUpdate->execute(); + if (!$feedUpdate->execute()) return Data::error($db); - return ['ok' => true]; + return self::purgeItems($feedId, $db); } /** @@ -382,26 +433,24 @@ class Feed { $existsQuery = $db->prepare('SELECT COUNT(*) FROM feed WHERE user_id = :user AND url = :url'); $existsQuery->bindValue(':user', $_SESSION[Key::USER_ID]); $existsQuery->bindValue(':url', $feed->url); - $existsResult = $existsQuery->execute(); - if (!$existsResult) return ['error' => 'SQLite error: ' . $db->lastErrorMsg()]; - - $exists = $existsResult->fetchArray(SQLITE3_NUM); - if ($exists[0] != 0) return ['error' => "Already subscribed to feed $feed->url"]; + if (!($exists = $existsQuery->execute())) return Data::error($db); + if ($exists->fetchArray(SQLITE3_NUM)[0] > 0) return ['error' => "Already subscribed to feed $feed->url"]; $query = $db->prepare(<<<'SQL' - INSERT INTO feed (user_id, url, title, updated_on, checked_on) - VALUES (:user, :url, :title, :updated, :checked) + INSERT INTO feed ( + user_id, url, title, updated_on, checked_on + ) VALUES ( + :user, :url, :title, :updated, :checked + ) SQL); $query->bindValue(':user', $_SESSION[Key::USER_ID]); $query->bindValue(':url', $feed->url); $query->bindValue(':title', $feed->title); $query->bindValue(':updated', $feed->updatedOn); $query->bindValue(':checked', Data::formatDate('now')); - $result = $query->execute(); - - $feedId = $result ? $db->lastInsertRowID() : -1; - if ($feedId < 0) return ['error' => $db->lastErrorMsg()]; + if (!$query->execute()) return Data::error($db); + $feedId = $db->lastInsertRowID(); $result = self::updateItems($feedId, $feed, $db); if (array_key_exists('error', $result)) return $result; @@ -421,7 +470,7 @@ class Feed { $query->bindValue(':url', $url); $query->bindValue(':id', $existing['id']); $query->bindValue(':user', $_SESSION[Key::USER_ID]); - $query->execute(); + if (!$query->execute()) return Data::error($db); return self::refreshFeed($existing['id'], $url, $db); } @@ -438,8 +487,7 @@ class Feed { $query = $db->prepare( "SELECT f.id, f.url, u.email FROM feed f INNER JOIN frc_user u ON u.id = f.user_id$extraSQL"); if ($user > 0) $query->bindValue(':user', $user); - $result = $query->execute(); - if (!$result) return ['error', $db->lastErrorMsg()]; + if (!($result = $query->execute())) return Data::error($db); $feeds = []; while ($feed = $result->fetchArray(SQLITE3_ASSOC)) $feeds[] = $feed; return $feeds; diff --git a/src/user-config.dist.php b/src/user-config.dist.php index 7b3f824..a68d095 100644 --- a/src/user-config.dist.php +++ b/src/user-config.dist.php @@ -25,3 +25,16 @@ const DATABASE_NAME = 'frc.db'; * The default, 'F j, Y \a\t g:ia', equates to "August 17, 2023 at 4:45pm" */ const DATE_TIME_FORMAT = 'F j, Y \a\t g:ia'; + +/** + * How should item purging be done? (Purging never applies to bookmarked items.) Options are: + * - Feed::PURGE_READ - Purge all read items whenever purging is run (will not purge unread items) + * - Feed::PURGE_BY_DAYS - Purge read and unread items older than a number of days (PURGE_NUMBER below) + * - Feed::PURGE_BY_COUNT - Purge read and unread items beyond the number to keep (PURGE_NUMBER below) + */ +const PURGE_TYPE = Feed::PURGE_BY_DAYS; + +/** + * For purge-by-days, how many days of items should be kept; for purge-by-count, how many items should be kept + */ +const PURGE_NUMBER = 30;