Add user maintenance CLI (#9)

- Add CLI infrastructure
- Add user to index page query
- Strip tags from title
- Move item parsing to FeedItem
This commit is contained in:
2024-04-27 13:01:57 -04:00
parent 7b21b86550
commit c1790b58fd
8 changed files with 318 additions and 76 deletions

View File

@@ -22,6 +22,61 @@ class FeedItem {
/** @var string The content for the item */
public string $content = '';
/**
* Construct a feed item from an Atom feed's `<entry>` tag
*
* @param DOMNode $node The XML node from which a feed item should be constructed
* @return FeedItem A feed item constructed from the given node
*/
public static function fromAtom(DOMNode $node): FeedItem {
$guid = Feed::atomValue($node, 'id');
$link = '';
foreach ($node->getElementsByTagName('link') as $linkElt) {
if ($linkElt->hasAttributes()) {
$relAttr = $linkElt->attributes->getNamedItem('rel');
if ($relAttr && $relAttr->value == 'alternate') {
$link = $linkElt->attributes->getNamedItem('href')->value;
break;
}
}
}
if ($link == '' && str_starts_with($guid, 'http')) $link = $guid;
$item = new FeedItem();
$item->guid = $guid;
$item->title = Feed::atomValue($node, 'title');
$item->link = $link;
$item->publishedOn = Data::formatDate(Feed::atomValue($node, 'published'));
$item->updatedOn = Data::formatDate(Feed::atomValue($node, 'updated'));
$item->content = Feed::atomValue($node, 'content');
return $item;
}
/**
* Construct a feed item from an RSS feed's `<item>` tag
*
* @param DOMNode $node The XML node from which a feed item should be constructed
* @return FeedItem A feed item constructed from the given node
*/
public static function fromRSS(DOMNode $node): FeedItem {
$itemGuid = Feed::rssValue($node, 'guid');
$updNodes = $node->getElementsByTagNameNS(Feed::ATOM_NS, 'updated');
$encNodes = $node->getElementsByTagNameNS(Feed::CONTENT_NS, 'encoded');
$item = new FeedItem();
$item->guid = $itemGuid == 'guid not found' ? Feed::rssValue($node, 'link') : $itemGuid;
$item->title = Feed::rssValue($node, 'title');
$item->link = Feed::rssValue($node, 'link');
$item->publishedOn = Data::formatDate(Feed::rssValue($node, 'pubDate'));
$item->updatedOn = Data::formatDate($updNodes->length > 0 ? $updNodes->item(0)->textContent : null);
$item->content = $encNodes->length > 0
? $encNodes->item(0)->textContent
: Feed::rssValue($node, 'description');
return $item;
}
}
/**
@@ -87,11 +142,11 @@ class Feed {
/**
* Get the value of a child element by its tag name for an RSS feed
*
* @param DOMElement $element The parent element
* @param DOMNode $element The parent element
* @param string $tagName The name of the tag whose value should be obtained
* @return string The value of the element (or "[element] not found" if that element does not exist)
*/
private static function rssValue(DOMElement $element, string $tagName): string {
public static function rssValue(DOMNode $element, string $tagName): string {
$tags = $element->getElementsByTagName($tagName);
return $tags->length == 0 ? "$tagName not found" : $tags->item(0)->textContent;
}
@@ -109,34 +164,19 @@ class Feed {
return ['error' => "Channel element not found ($channel->nodeType)"];
}
$feed = new Feed();
$feed->title = self::rssValue($channel, 'title');
$feed->url = $url;
// The Atom namespace provides a lastBuildDate, which contains the last time an item in the feed was updated; if
// that is not present, use the pubDate element instead
$feed->updatedOn = self::rssValue($channel, 'lastBuildDate');
if ($feed->updatedOn == 'lastBuildDate not found') {
$feed->updatedOn = self::rssValue($channel, 'pubDate');
if ($feed->updatedOn == 'pubDate not found') $feed->updatedOn = null;
$updatedOn = self::rssValue($channel, 'lastBuildDate');
if ($updatedOn == 'lastBuildDate not found') {
$updatedOn = self::rssValue($channel, 'pubDate');
if ($updatedOn == 'pubDate not found') $updatedOn = null;
}
$feed->updatedOn = Data::formatDate($feed->updatedOn);
foreach ($channel->getElementsByTagName('item') as $xmlItem) {
$itemGuid = self::rssValue($xmlItem, 'guid');
$updNodes = $xmlItem->getElementsByTagNameNS(Feed::ATOM_NS, 'updated');
$encNodes = $xmlItem->getElementsByTagNameNS(Feed::CONTENT_NS, 'encoded');
$item = new FeedItem();
$item->guid = $itemGuid == 'guid not found' ? self::rssValue($xmlItem, 'link') : $itemGuid;
$item->title = self::rssValue($xmlItem, 'title');
$item->link = self::rssValue($xmlItem, 'link');
$item->publishedOn = Data::formatDate(self::rssValue($xmlItem, 'pubDate'));
$item->updatedOn = Data::formatDate($updNodes->length > 0 ? $updNodes->item(0)->textContent : null);
$item->content = $encNodes->length > 0
? $encNodes->item(0)->textContent
: self::rssValue($xmlItem, 'description');
$feed->items[] = $item;
}
$feed = new Feed();
$feed->title = self::rssValue($channel, 'title');
$feed->url = $url;
$feed->updatedOn = Data::formatDate($updatedOn);
foreach ($channel->getElementsByTagName('item') as $item) $feed->items[] = FeedItem::fromRSS($item);
return ['ok' => $feed];
}
@@ -147,11 +187,11 @@ class Feed {
* (Atom feeds can have type attributes on nearly any value. For our purposes, types "text" and "html" will work as
* regular string values; for "xhtml", though, we will need to get the `<div>` and extract its contents instead.)
*
* @param DOMElement $element The parent element
* @param DOMNode $element The parent element
* @param string $tagName The name of the tag whose value should be obtained
* @return string The value of the element (or "[element] not found" if that element does not exist)
*/
private static function atomValue(DOMElement $element, string $tagName): string {
public static function atomValue(DOMNode $element, string $tagName): string {
$tags = $element->getElementsByTagName($tagName);
if ($tags->length == 0) return "$tagName not found";
$tag = $tags->item(0);
@@ -172,39 +212,15 @@ class Feed {
* @return array|Feed[] ['ok' => feed]
*/
private static function fromAtom(DOMDocument $xml, string $url): array {
/** @var DOMElement $root */
$root = $xml->getElementsByTagNameNS(self::ATOM_NS, 'feed')->item(0);
$feed = new Feed();
$feed->title = self::atomValue($root, 'title');
$feed->url = $url;
$root = $xml->getElementsByTagNameNS(self::ATOM_NS, 'feed')->item(0);
$updatedOn = self::atomValue($root, 'updated');
if ($updatedOn == 'pubDate not found') $updatedOn = null;
$feed->updatedOn = self::atomValue($root, 'updated');
if ($feed->updatedOn == 'pubDate not found') $feed->updatedOn = null;
$feed->updatedOn = Data::formatDate($feed->updatedOn);
foreach ($root->getElementsByTagName('entry') as $xmlItem) {
$guid = self::atomValue($xmlItem, 'id');
$link = '';
foreach ($xmlItem->getElementsByTagName('link') as $linkElt) {
if ($linkElt->hasAttributes()) {
$relAttr = $linkElt->attributes->getNamedItem('rel');
if ($relAttr && $relAttr->value == 'alternate') {
$link = $linkElt->attributes->getNamedItem('href')->value;
break;
}
}
}
if ($link == '' && str_starts_with($guid, 'http')) $link = $guid;
$item = new FeedItem();
$item->guid = $guid;
$item->title = self::atomValue($xmlItem, 'title');
$item->link = $link;
$item->publishedOn = Data::formatDate(self::atomValue($xmlItem, 'published'));
$item->updatedOn = Data::formatDate(self::atomValue($xmlItem, 'updated'));
$item->content = self::atomValue($xmlItem, 'content');
$feed->items[] = $item;
}
$feed = new Feed();
$feed->title = self::atomValue($root, 'title');
$feed->url = $url;
$feed->updatedOn = Data::formatDate($updatedOn);
foreach ($root->getElementsByTagName('entry') as $entry) $feed->items[] = FeedItem::fromAtom($entry);
return ['ok' => $feed];
}