Alpha 5 #20
|
@ -1,7 +1,7 @@
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
/** The current Feed Reader Central version */
|
/** The current Feed Reader Central version */
|
||||||
const FRC_VERSION = '1.0.0-alpha4';
|
const FRC_VERSION = '1.0.0-alpha5';
|
||||||
|
|
||||||
spl_autoload_register(function ($class) {
|
spl_autoload_register(function ($class) {
|
||||||
$file = implode(DIRECTORY_SEPARATOR, [__DIR__, 'lib', "$class.php"]);
|
$file = implode(DIRECTORY_SEPARATOR, [__DIR__, 'lib', "$class.php"]);
|
||||||
|
|
203
src/lib/Feed.php
203
src/lib/Feed.php
|
@ -1,84 +1,5 @@
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
/**
|
|
||||||
* Information for a feed item
|
|
||||||
*/
|
|
||||||
class FeedItem {
|
|
||||||
|
|
||||||
/** @var string The title of the feed item */
|
|
||||||
public string $title = '';
|
|
||||||
|
|
||||||
/** @var string The unique ID for the feed item */
|
|
||||||
public string $guid = '';
|
|
||||||
|
|
||||||
/** @var string The link to the original content */
|
|
||||||
public string $link = '';
|
|
||||||
|
|
||||||
/** @var string When this item was published */
|
|
||||||
public string $publishedOn = '';
|
|
||||||
|
|
||||||
/** @var ?string When this item was last updated */
|
|
||||||
public ?string $updatedOn = null;
|
|
||||||
|
|
||||||
/** @var string The content for the item */
|
|
||||||
public string $content = '';
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Construct a feed item from an Atom feed's `<entry>` tag
|
|
||||||
*
|
|
||||||
* @param DOMNode $node The XML node from which a feed item should be constructed
|
|
||||||
* @return FeedItem A feed item constructed from the given node
|
|
||||||
*/
|
|
||||||
public static function fromAtom(DOMNode $node): FeedItem {
|
|
||||||
$guid = Feed::atomValue($node, 'id');
|
|
||||||
$link = '';
|
|
||||||
foreach ($node->getElementsByTagName('link') as $linkElt) {
|
|
||||||
if ($linkElt->hasAttributes()) {
|
|
||||||
$relAttr = $linkElt->attributes->getNamedItem('rel');
|
|
||||||
if ($relAttr && $relAttr->value == 'alternate') {
|
|
||||||
$link = $linkElt->attributes->getNamedItem('href')->value;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ($link == '' && str_starts_with($guid, 'http')) $link = $guid;
|
|
||||||
|
|
||||||
$item = new FeedItem();
|
|
||||||
$item->guid = $guid;
|
|
||||||
$item->title = Feed::atomValue($node, 'title');
|
|
||||||
$item->link = $link;
|
|
||||||
$item->publishedOn = Data::formatDate(Feed::atomValue($node, 'published'));
|
|
||||||
$item->updatedOn = Data::formatDate(Feed::atomValue($node, 'updated'));
|
|
||||||
$item->content = Feed::atomValue($node, 'content');
|
|
||||||
|
|
||||||
return $item;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Construct a feed item from an RSS feed's `<item>` tag
|
|
||||||
*
|
|
||||||
* @param DOMNode $node The XML node from which a feed item should be constructed
|
|
||||||
* @return FeedItem A feed item constructed from the given node
|
|
||||||
*/
|
|
||||||
public static function fromRSS(DOMNode $node): FeedItem {
|
|
||||||
$itemGuid = Feed::rssValue($node, 'guid');
|
|
||||||
$updNodes = $node->getElementsByTagNameNS(Feed::ATOM_NS, 'updated');
|
|
||||||
$encNodes = $node->getElementsByTagNameNS(Feed::CONTENT_NS, 'encoded');
|
|
||||||
|
|
||||||
$item = new FeedItem();
|
|
||||||
$item->guid = $itemGuid == 'guid not found' ? Feed::rssValue($node, 'link') : $itemGuid;
|
|
||||||
$item->title = Feed::rssValue($node, 'title');
|
|
||||||
$item->link = Feed::rssValue($node, 'link');
|
|
||||||
$item->publishedOn = Data::formatDate(Feed::rssValue($node, 'pubDate'));
|
|
||||||
$item->updatedOn = Data::formatDate($updNodes->length > 0 ? $updNodes->item(0)->textContent : null);
|
|
||||||
$item->content = $encNodes->length > 0
|
|
||||||
? $encNodes->item(0)->textContent
|
|
||||||
: Feed::rssValue($node, 'description');
|
|
||||||
|
|
||||||
return $item;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Feed retrieval, parsing, and manipulation
|
* Feed retrieval, parsing, and manipulation
|
||||||
*/
|
*/
|
||||||
|
@ -114,7 +35,7 @@ class Feed {
|
||||||
* @throws DOMException If the error is a warning
|
* @throws DOMException If the error is a warning
|
||||||
*/
|
*/
|
||||||
private static function xmlParseError(int $errno, string $errstr): bool {
|
private static function xmlParseError(int $errno, string $errstr): bool {
|
||||||
if ($errno == E_WARNING && substr_count($errstr, 'DOMDocument::loadXml()') > 0) {
|
if ($errno == E_WARNING && substr_count($errstr, 'DOMDocument::loadXML()') > 0) {
|
||||||
throw new DOMException($errstr, $errno);
|
throw new DOMException($errstr, $errno);
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -181,6 +102,17 @@ class Feed {
|
||||||
return ['ok' => $feed];
|
return ['ok' => $feed];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get an attribute value from a DOM node
|
||||||
|
*
|
||||||
|
* @param DOMNode $node The node with an attribute value to obtain
|
||||||
|
* @param string $name The name of the attribute whose value should be obtained
|
||||||
|
* @return string The attribute value if it exists, an empty string if not
|
||||||
|
*/
|
||||||
|
private static function attrValue(DOMNode $node, string $name): string {
|
||||||
|
return ($node->hasAttributes() ? $node->attributes->getNamedItem($name)?->value : null) ?? '';
|
||||||
|
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Get the value of a child element by its tag name for an Atom feed
|
* Get the value of a child element by its tag name for an Atom feed
|
||||||
*
|
*
|
||||||
|
@ -196,7 +128,7 @@ class Feed {
|
||||||
if ($tags->length == 0) return "$tagName not found";
|
if ($tags->length == 0) return "$tagName not found";
|
||||||
$tag = $tags->item(0);
|
$tag = $tags->item(0);
|
||||||
if (!($tag instanceof DOMElement)) return $tag->textContent;
|
if (!($tag instanceof DOMElement)) return $tag->textContent;
|
||||||
if ($tag->hasAttributes() && $tag->attributes->getNamedItem('type') == 'xhtml') {
|
if (self::attrValue($tag, 'type') == 'xhtml') {
|
||||||
$div = $tag->getElementsByTagNameNS(Feed::XHTML_NS, 'div');
|
$div = $tag->getElementsByTagNameNS(Feed::XHTML_NS, 'div');
|
||||||
if ($div->length == 0) return "-- invalid XHTML content --";
|
if ($div->length == 0) return "-- invalid XHTML content --";
|
||||||
return $div->item(0)->textContent;
|
return $div->item(0)->textContent;
|
||||||
|
@ -225,6 +157,54 @@ class Feed {
|
||||||
return ['ok' => $feed];
|
return ['ok' => $feed];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve a document (http/https)
|
||||||
|
*
|
||||||
|
* @param string $url The URL of the document to retrieve
|
||||||
|
* @return array ['content' => document content, 'error' => error message, 'code' => HTTP response code,
|
||||||
|
* 'url' => effective URL]
|
||||||
|
*/
|
||||||
|
private static function retrieveDocument(string $url): array {
|
||||||
|
$docReq = curl_init($url);
|
||||||
|
curl_setopt($docReq, CURLOPT_FOLLOWLOCATION, true);
|
||||||
|
curl_setopt($docReq, CURLOPT_RETURNTRANSFER, true);
|
||||||
|
curl_setopt($docReq, CURLOPT_CONNECTTIMEOUT, 5);
|
||||||
|
curl_setopt($docReq, CURLOPT_TIMEOUT, 15);
|
||||||
|
|
||||||
|
$result = [
|
||||||
|
'content' => curl_exec($docReq),
|
||||||
|
'error' => curl_error($docReq),
|
||||||
|
'code' => curl_getinfo($docReq, CURLINFO_RESPONSE_CODE),
|
||||||
|
'url' => curl_getinfo($docReq, CURLINFO_EFFECTIVE_URL)
|
||||||
|
];
|
||||||
|
|
||||||
|
curl_close($docReq);
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Derive a feed URL from an HTML document
|
||||||
|
*
|
||||||
|
* @param string $content The HTML document content from which to derive a feed URL
|
||||||
|
* @return array|string[] ['ok' => feed URL] if successful, ['error' => message] if not
|
||||||
|
*/
|
||||||
|
private static function deriveFeedFromHTML(string $content): array {
|
||||||
|
$html = new DOMDocument();
|
||||||
|
$html->loadHTML(substr($content, 0, strpos($content, '</head>') + 7));
|
||||||
|
$headTags = $html->getElementsByTagName('head');
|
||||||
|
if ($headTags->length < 1) return ['error' => 'Cannot find feed at this URL'];
|
||||||
|
$head = $headTags->item(0);
|
||||||
|
foreach ($head->getElementsByTagName('link') as $link) {
|
||||||
|
if (self::attrValue($link, 'rel') == 'alternate') {
|
||||||
|
$type = self::attrValue($link, 'type');
|
||||||
|
if ($type == 'application/rss+xml' || $type == 'application/atom+xml') {
|
||||||
|
return ['ok' => self::attrValue($link, 'href')];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ['error' => 'Cannot find feed at this URL'];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieve the feed
|
* Retrieve the feed
|
||||||
*
|
*
|
||||||
|
@ -232,34 +212,33 @@ class Feed {
|
||||||
* @return array|Feed[]|string[] ['ok' => feed] if successful, ['error' => message] if not
|
* @return array|Feed[]|string[] ['ok' => feed] if successful, ['error' => message] if not
|
||||||
*/
|
*/
|
||||||
public static function retrieveFeed(string $url): array {
|
public static function retrieveFeed(string $url): array {
|
||||||
$feedReq = curl_init($url);
|
$doc = self::retrieveDocument($url);
|
||||||
curl_setopt($feedReq, CURLOPT_FOLLOWLOCATION, true);
|
|
||||||
curl_setopt($feedReq, CURLOPT_RETURNTRANSFER, true);
|
|
||||||
curl_setopt($feedReq, CURLOPT_CONNECTTIMEOUT, 5);
|
|
||||||
curl_setopt($feedReq, CURLOPT_TIMEOUT, 15);
|
|
||||||
|
|
||||||
$feedContent = curl_exec($feedReq);
|
if ($doc['error'] != '') return ['error' => $doc['error']];
|
||||||
|
if ($doc['code'] != 200) {
|
||||||
$result = array();
|
return ['error' => "Prospective feed URL $url returned HTTP Code {$doc['code']}: {$doc['content']}"];
|
||||||
$error = curl_error($feedReq);
|
|
||||||
$code = curl_getinfo($feedReq, CURLINFO_RESPONSE_CODE);
|
|
||||||
if ($error) {
|
|
||||||
$result['error'] = $error;
|
|
||||||
} elseif ($code == 200) {
|
|
||||||
$parsed = self::parseFeed($feedContent);
|
|
||||||
if (array_key_exists('error', $parsed)) {
|
|
||||||
$result['error'] = $parsed['error'];
|
|
||||||
} else {
|
|
||||||
$extract = $parsed['ok']->getElementsByTagNameNS(self::ATOM_NS, 'feed')->length > 0
|
|
||||||
? self::fromAtom(...) : self::fromRSS(...);
|
|
||||||
$result = $extract($parsed['ok'], curl_getinfo($feedReq, CURLINFO_EFFECTIVE_URL));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
$result['error'] = "Prospective feed URL $url returned HTTP Code $code: $feedContent";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
curl_close($feedReq);
|
$start = strtolower(strlen($doc['content']) >= 9 ? substr($doc['content'], 0, 9) : $doc['content']);
|
||||||
return $result;
|
if ($start == '<!doctype' || str_starts_with($start, '<html')) {
|
||||||
|
$derivedURL = self::deriveFeedFromHTML($doc['content']);
|
||||||
|
if (array_key_exists('error', $derivedURL)) return ['error' => $derivedURL['error']];
|
||||||
|
$feedURL = $derivedURL['ok'];
|
||||||
|
if (!str_starts_with($feedURL, 'http')) {
|
||||||
|
// Relative URL; feed should be retrieved in the context of the original URL
|
||||||
|
$original = parse_url($url);
|
||||||
|
$port = array_key_exists('port', $original) ? ":{$original['port']}" : '';
|
||||||
|
$feedURL = "{$original['scheme']}://{$original['host']}$port$feedURL";
|
||||||
|
}
|
||||||
|
$doc = self::retrieveDocument($feedURL);
|
||||||
|
}
|
||||||
|
|
||||||
|
$parsed = self::parseFeed($doc['content']);
|
||||||
|
if (array_key_exists('error', $parsed)) return ['error' => $parsed['error']];
|
||||||
|
|
||||||
|
$extract = $parsed['ok']->getElementsByTagNameNS(self::ATOM_NS, 'feed')->length > 0
|
||||||
|
? self::fromAtom(...) : self::fromRSS(...);
|
||||||
|
return $extract($parsed['ok'], $doc['url']);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -398,7 +377,17 @@ class Feed {
|
||||||
$feedExtract = self::retrieveFeed($url);
|
$feedExtract = self::retrieveFeed($url);
|
||||||
if (array_key_exists('error', $feedExtract)) return $feedExtract;
|
if (array_key_exists('error', $feedExtract)) return $feedExtract;
|
||||||
|
|
||||||
$feed = $feedExtract['ok'];
|
$feed = $feedExtract['ok'];
|
||||||
|
|
||||||
|
$existsQuery = $db->prepare('SELECT COUNT(*) FROM feed WHERE user_id = :user AND url = :url');
|
||||||
|
$existsQuery->bindValue(':user', $_SESSION[Key::USER_ID]);
|
||||||
|
$existsQuery->bindValue(':url', $feed->url);
|
||||||
|
$existsResult = $existsQuery->execute();
|
||||||
|
if (!$existsResult) return ['error' => 'SQLite error: ' . $db->lastErrorMsg()];
|
||||||
|
|
||||||
|
$exists = $existsResult->fetchArray(SQLITE3_NUM);
|
||||||
|
if ($exists[0] != 0) return ['error' => "Already subscribed to feed $feed->url"];
|
||||||
|
|
||||||
$query = $db->prepare(<<<'SQL'
|
$query = $db->prepare(<<<'SQL'
|
||||||
INSERT INTO feed (user_id, url, title, updated_on, checked_on)
|
INSERT INTO feed (user_id, url, title, updated_on, checked_on)
|
||||||
VALUES (:user, :url, :title, :updated, :checked)
|
VALUES (:user, :url, :title, :updated, :checked)
|
||||||
|
|
80
src/lib/FeedItem.php
Normal file
80
src/lib/FeedItem.php
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Information for a feed item
|
||||||
|
*/
|
||||||
|
class FeedItem {
|
||||||
|
|
||||||
|
/** @var string The title of the feed item */
|
||||||
|
public string $title = '';
|
||||||
|
|
||||||
|
/** @var string The unique ID for the feed item */
|
||||||
|
public string $guid = '';
|
||||||
|
|
||||||
|
/** @var string The link to the original content */
|
||||||
|
public string $link = '';
|
||||||
|
|
||||||
|
/** @var string When this item was published */
|
||||||
|
public string $publishedOn = '';
|
||||||
|
|
||||||
|
/** @var ?string When this item was last updated */
|
||||||
|
public ?string $updatedOn = null;
|
||||||
|
|
||||||
|
/** @var string The content for the item */
|
||||||
|
public string $content = '';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct a feed item from an Atom feed's `<entry>` tag
|
||||||
|
*
|
||||||
|
* @param DOMNode $node The XML node from which a feed item should be constructed
|
||||||
|
* @return FeedItem A feed item constructed from the given node
|
||||||
|
*/
|
||||||
|
public static function fromAtom(DOMNode $node): FeedItem {
|
||||||
|
$guid = Feed::atomValue($node, 'id');
|
||||||
|
$link = '';
|
||||||
|
foreach ($node->getElementsByTagName('link') as $linkElt) {
|
||||||
|
if ($linkElt->hasAttributes()) {
|
||||||
|
$relAttr = $linkElt->attributes->getNamedItem('rel');
|
||||||
|
if ($relAttr && $relAttr->value == 'alternate') {
|
||||||
|
$link = $linkElt->attributes->getNamedItem('href')->value;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ($link == '' && str_starts_with($guid, 'http')) $link = $guid;
|
||||||
|
|
||||||
|
$item = new FeedItem();
|
||||||
|
$item->guid = $guid;
|
||||||
|
$item->title = Feed::atomValue($node, 'title');
|
||||||
|
$item->link = $link;
|
||||||
|
$item->publishedOn = Data::formatDate(Feed::atomValue($node, 'published'));
|
||||||
|
$item->updatedOn = Data::formatDate(Feed::atomValue($node, 'updated'));
|
||||||
|
$item->content = Feed::atomValue($node, 'content');
|
||||||
|
|
||||||
|
return $item;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct a feed item from an RSS feed's `<item>` tag
|
||||||
|
*
|
||||||
|
* @param DOMNode $node The XML node from which a feed item should be constructed
|
||||||
|
* @return FeedItem A feed item constructed from the given node
|
||||||
|
*/
|
||||||
|
public static function fromRSS(DOMNode $node): FeedItem {
|
||||||
|
$itemGuid = Feed::rssValue($node, 'guid');
|
||||||
|
$updNodes = $node->getElementsByTagNameNS(Feed::ATOM_NS, 'updated');
|
||||||
|
$encNodes = $node->getElementsByTagNameNS(Feed::CONTENT_NS, 'encoded');
|
||||||
|
|
||||||
|
$item = new FeedItem();
|
||||||
|
$item->guid = $itemGuid == 'guid not found' ? Feed::rssValue($node, 'link') : $itemGuid;
|
||||||
|
$item->title = Feed::rssValue($node, 'title');
|
||||||
|
$item->link = Feed::rssValue($node, 'link');
|
||||||
|
$item->publishedOn = Data::formatDate(Feed::rssValue($node, 'pubDate'));
|
||||||
|
$item->updatedOn = Data::formatDate($updNodes->length > 0 ? $updNodes->item(0)->textContent : null);
|
||||||
|
$item->content = $encNodes->length > 0
|
||||||
|
? $encNodes->item(0)->textContent
|
||||||
|
: Feed::rssValue($node, 'description');
|
||||||
|
|
||||||
|
return $item;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user