diff --git a/src/app-config.php b/src/app-config.php index d5e34c9..8e8751d 100644 --- a/src/app-config.php +++ b/src/app-config.php @@ -1,7 +1,7 @@ ` tag - * - * @param DOMNode $node The XML node from which a feed item should be constructed - * @return FeedItem A feed item constructed from the given node - */ - public static function fromAtom(DOMNode $node): FeedItem { - $guid = Feed::atomValue($node, 'id'); - $link = ''; - foreach ($node->getElementsByTagName('link') as $linkElt) { - if ($linkElt->hasAttributes()) { - $relAttr = $linkElt->attributes->getNamedItem('rel'); - if ($relAttr && $relAttr->value == 'alternate') { - $link = $linkElt->attributes->getNamedItem('href')->value; - break; - } - } - } - if ($link == '' && str_starts_with($guid, 'http')) $link = $guid; - - $item = new FeedItem(); - $item->guid = $guid; - $item->title = Feed::atomValue($node, 'title'); - $item->link = $link; - $item->publishedOn = Data::formatDate(Feed::atomValue($node, 'published')); - $item->updatedOn = Data::formatDate(Feed::atomValue($node, 'updated')); - $item->content = Feed::atomValue($node, 'content'); - - return $item; - } - - /** - * Construct a feed item from an RSS feed's `` tag - * - * @param DOMNode $node The XML node from which a feed item should be constructed - * @return FeedItem A feed item constructed from the given node - */ - public static function fromRSS(DOMNode $node): FeedItem { - $itemGuid = Feed::rssValue($node, 'guid'); - $updNodes = $node->getElementsByTagNameNS(Feed::ATOM_NS, 'updated'); - $encNodes = $node->getElementsByTagNameNS(Feed::CONTENT_NS, 'encoded'); - - $item = new FeedItem(); - $item->guid = $itemGuid == 'guid not found' ? Feed::rssValue($node, 'link') : $itemGuid; - $item->title = Feed::rssValue($node, 'title'); - $item->link = Feed::rssValue($node, 'link'); - $item->publishedOn = Data::formatDate(Feed::rssValue($node, 'pubDate')); - $item->updatedOn = Data::formatDate($updNodes->length > 0 ? $updNodes->item(0)->textContent : null); - $item->content = $encNodes->length > 0 - ? $encNodes->item(0)->textContent - : Feed::rssValue($node, 'description'); - - return $item; - } -} - /** * Feed retrieval, parsing, and manipulation */ @@ -114,7 +35,7 @@ class Feed { * @throws DOMException If the error is a warning */ private static function xmlParseError(int $errno, string $errstr): bool { - if ($errno == E_WARNING && substr_count($errstr, 'DOMDocument::loadXml()') > 0) { + if ($errno == E_WARNING && substr_count($errstr, 'DOMDocument::loadXML()') > 0) { throw new DOMException($errstr, $errno); } return false; @@ -181,6 +102,17 @@ class Feed { return ['ok' => $feed]; } + /** + * Get an attribute value from a DOM node + * + * @param DOMNode $node The node with an attribute value to obtain + * @param string $name The name of the attribute whose value should be obtained + * @return string The attribute value if it exists, an empty string if not + */ + private static function attrValue(DOMNode $node, string $name): string { + return ($node->hasAttributes() ? $node->attributes->getNamedItem($name)?->value : null) ?? ''; + + } /** * Get the value of a child element by its tag name for an Atom feed * @@ -196,7 +128,7 @@ class Feed { if ($tags->length == 0) return "$tagName not found"; $tag = $tags->item(0); if (!($tag instanceof DOMElement)) return $tag->textContent; - if ($tag->hasAttributes() && $tag->attributes->getNamedItem('type') == 'xhtml') { + if (self::attrValue($tag, 'type') == 'xhtml') { $div = $tag->getElementsByTagNameNS(Feed::XHTML_NS, 'div'); if ($div->length == 0) return "-- invalid XHTML content --"; return $div->item(0)->textContent; @@ -225,6 +157,54 @@ class Feed { return ['ok' => $feed]; } + /** + * Retrieve a document (http/https) + * + * @param string $url The URL of the document to retrieve + * @return array ['content' => document content, 'error' => error message, 'code' => HTTP response code, + * 'url' => effective URL] + */ + private static function retrieveDocument(string $url): array { + $docReq = curl_init($url); + curl_setopt($docReq, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($docReq, CURLOPT_RETURNTRANSFER, true); + curl_setopt($docReq, CURLOPT_CONNECTTIMEOUT, 5); + curl_setopt($docReq, CURLOPT_TIMEOUT, 15); + + $result = [ + 'content' => curl_exec($docReq), + 'error' => curl_error($docReq), + 'code' => curl_getinfo($docReq, CURLINFO_RESPONSE_CODE), + 'url' => curl_getinfo($docReq, CURLINFO_EFFECTIVE_URL) + ]; + + curl_close($docReq); + return $result; + } + + /** + * Derive a feed URL from an HTML document + * + * @param string $content The HTML document content from which to derive a feed URL + * @return array|string[] ['ok' => feed URL] if successful, ['error' => message] if not + */ + private static function deriveFeedFromHTML(string $content): array { + $html = new DOMDocument(); + $html->loadHTML(substr($content, 0, strpos($content, '') + 7)); + $headTags = $html->getElementsByTagName('head'); + if ($headTags->length < 1) return ['error' => 'Cannot find feed at this URL']; + $head = $headTags->item(0); + foreach ($head->getElementsByTagName('link') as $link) { + if (self::attrValue($link, 'rel') == 'alternate') { + $type = self::attrValue($link, 'type'); + if ($type == 'application/rss+xml' || $type == 'application/atom+xml') { + return ['ok' => self::attrValue($link, 'href')]; + } + } + } + return ['error' => 'Cannot find feed at this URL']; + } + /** * Retrieve the feed * @@ -232,34 +212,33 @@ class Feed { * @return array|Feed[]|string[] ['ok' => feed] if successful, ['error' => message] if not */ public static function retrieveFeed(string $url): array { - $feedReq = curl_init($url); - curl_setopt($feedReq, CURLOPT_FOLLOWLOCATION, true); - curl_setopt($feedReq, CURLOPT_RETURNTRANSFER, true); - curl_setopt($feedReq, CURLOPT_CONNECTTIMEOUT, 5); - curl_setopt($feedReq, CURLOPT_TIMEOUT, 15); + $doc = self::retrieveDocument($url); - $feedContent = curl_exec($feedReq); - - $result = array(); - $error = curl_error($feedReq); - $code = curl_getinfo($feedReq, CURLINFO_RESPONSE_CODE); - if ($error) { - $result['error'] = $error; - } elseif ($code == 200) { - $parsed = self::parseFeed($feedContent); - if (array_key_exists('error', $parsed)) { - $result['error'] = $parsed['error']; - } else { - $extract = $parsed['ok']->getElementsByTagNameNS(self::ATOM_NS, 'feed')->length > 0 - ? self::fromAtom(...) : self::fromRSS(...); - $result = $extract($parsed['ok'], curl_getinfo($feedReq, CURLINFO_EFFECTIVE_URL)); - } - } else { - $result['error'] = "Prospective feed URL $url returned HTTP Code $code: $feedContent"; + if ($doc['error'] != '') return ['error' => $doc['error']]; + if ($doc['code'] != 200) { + return ['error' => "Prospective feed URL $url returned HTTP Code {$doc['code']}: {$doc['content']}"]; } - curl_close($feedReq); - return $result; + $start = strtolower(strlen($doc['content']) >= 9 ? substr($doc['content'], 0, 9) : $doc['content']); + if ($start == ' $derivedURL['error']]; + $feedURL = $derivedURL['ok']; + if (!str_starts_with($feedURL, 'http')) { + // Relative URL; feed should be retrieved in the context of the original URL + $original = parse_url($url); + $port = array_key_exists('port', $original) ? ":{$original['port']}" : ''; + $feedURL = "{$original['scheme']}://{$original['host']}$port$feedURL"; + } + $doc = self::retrieveDocument($feedURL); + } + + $parsed = self::parseFeed($doc['content']); + if (array_key_exists('error', $parsed)) return ['error' => $parsed['error']]; + + $extract = $parsed['ok']->getElementsByTagNameNS(self::ATOM_NS, 'feed')->length > 0 + ? self::fromAtom(...) : self::fromRSS(...); + return $extract($parsed['ok'], $doc['url']); } /** @@ -398,7 +377,17 @@ class Feed { $feedExtract = self::retrieveFeed($url); if (array_key_exists('error', $feedExtract)) return $feedExtract; - $feed = $feedExtract['ok']; + $feed = $feedExtract['ok']; + + $existsQuery = $db->prepare('SELECT COUNT(*) FROM feed WHERE user_id = :user AND url = :url'); + $existsQuery->bindValue(':user', $_SESSION[Key::USER_ID]); + $existsQuery->bindValue(':url', $feed->url); + $existsResult = $existsQuery->execute(); + if (!$existsResult) return ['error' => 'SQLite error: ' . $db->lastErrorMsg()]; + + $exists = $existsResult->fetchArray(SQLITE3_NUM); + if ($exists[0] != 0) return ['error' => "Already subscribed to feed $feed->url"]; + $query = $db->prepare(<<<'SQL' INSERT INTO feed (user_id, url, title, updated_on, checked_on) VALUES (:user, :url, :title, :updated, :checked) diff --git a/src/lib/FeedItem.php b/src/lib/FeedItem.php new file mode 100644 index 0000000..ab6aa04 --- /dev/null +++ b/src/lib/FeedItem.php @@ -0,0 +1,80 @@ +` tag + * + * @param DOMNode $node The XML node from which a feed item should be constructed + * @return FeedItem A feed item constructed from the given node + */ + public static function fromAtom(DOMNode $node): FeedItem { + $guid = Feed::atomValue($node, 'id'); + $link = ''; + foreach ($node->getElementsByTagName('link') as $linkElt) { + if ($linkElt->hasAttributes()) { + $relAttr = $linkElt->attributes->getNamedItem('rel'); + if ($relAttr && $relAttr->value == 'alternate') { + $link = $linkElt->attributes->getNamedItem('href')->value; + break; + } + } + } + if ($link == '' && str_starts_with($guid, 'http')) $link = $guid; + + $item = new FeedItem(); + $item->guid = $guid; + $item->title = Feed::atomValue($node, 'title'); + $item->link = $link; + $item->publishedOn = Data::formatDate(Feed::atomValue($node, 'published')); + $item->updatedOn = Data::formatDate(Feed::atomValue($node, 'updated')); + $item->content = Feed::atomValue($node, 'content'); + + return $item; + } + + /** + * Construct a feed item from an RSS feed's `` tag + * + * @param DOMNode $node The XML node from which a feed item should be constructed + * @return FeedItem A feed item constructed from the given node + */ + public static function fromRSS(DOMNode $node): FeedItem { + $itemGuid = Feed::rssValue($node, 'guid'); + $updNodes = $node->getElementsByTagNameNS(Feed::ATOM_NS, 'updated'); + $encNodes = $node->getElementsByTagNameNS(Feed::CONTENT_NS, 'encoded'); + + $item = new FeedItem(); + $item->guid = $itemGuid == 'guid not found' ? Feed::rssValue($node, 'link') : $itemGuid; + $item->title = Feed::rssValue($node, 'title'); + $item->link = Feed::rssValue($node, 'link'); + $item->publishedOn = Data::formatDate(Feed::rssValue($node, 'pubDate')); + $item->updatedOn = Data::formatDate($updNodes->length > 0 ? $updNodes->item(0)->textContent : null); + $item->content = $encNodes->length > 0 + ? $encNodes->item(0)->textContent + : Feed::rssValue($node, 'description'); + + return $item; + } +}