From 773d2bc4e54afcb1d2464ec61290e352b27cb881 Mon Sep 17 00:00:00 2001 From: Arthur Schiwon Date: Sat, 11 Jan 2025 00:39:10 +0100 Subject: [PATCH] fix: allow DOMProcessingInstruction in Readability and NodeUtility Signed-off-by: Arthur Schiwon --- src/Nodes/NodeUtility.php | 3 ++- src/Readability.php | 9 +++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Nodes/NodeUtility.php b/src/Nodes/NodeUtility.php index 2f094f1..859bb43 100644 --- a/src/Nodes/NodeUtility.php +++ b/src/Nodes/NodeUtility.php @@ -5,6 +5,7 @@ use fivefilters\Readability\Nodes\DOM\DOMDocument; use fivefilters\Readability\Nodes\DOM\DOMElement; use fivefilters\Readability\Nodes\DOM\DOMNode; +use fivefilters\Readability\Nodes\DOM\DOMProcessingInstruction; use fivefilters\Readability\Nodes\DOM\DOMText; use fivefilters\Readability\Nodes\DOM\DOMComment; use fivefilters\Readability\Nodes\DOM\DOMNodeList; @@ -120,7 +121,7 @@ public static function removeNode(DOMNode|DOMComment|DOMText|DOMElement $node): * Returns the next node. First checks for children (if the flag allows it), then for siblings, and finally * for parents. */ - public static function getNextNode(DOMNode|DOMComment|DOMText|DOMElement|DOMDocument $originalNode, bool $ignoreSelfAndKids = false): DOMNode|DOMComment|DOMText|DOMElement|DOMDocument|null + public static function getNextNode(DOMNode|DOMComment|DOMText|DOMElement|DOMDocument|DOMProcessingInstruction $originalNode, bool $ignoreSelfAndKids = false): DOMNode|DOMComment|DOMText|DOMElement|DOMDocument|DOMProcessingInstruction|null { /* * Traverse the DOM from node to node, starting at the node passed in. diff --git a/src/Readability.php b/src/Readability.php index c37c9b6..8f03fa1 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -5,6 +5,7 @@ use fivefilters\Readability\Nodes\DOM\DOMDocument; use fivefilters\Readability\Nodes\DOM\DOMElement; use fivefilters\Readability\Nodes\DOM\DOMNode; +use fivefilters\Readability\Nodes\DOM\DOMProcessingInstruction; use fivefilters\Readability\Nodes\DOM\DOMText; use fivefilters\Readability\Nodes\DOM\DOMComment; use fivefilters\Readability\Nodes\NodeUtility; @@ -391,7 +392,7 @@ private function getJSONLD(DOMDocument $dom): array return $metadata; } catch (\Exception $err) { // The try-catch blocks are from the JS version. Not sure if there's anything - // here in the PHP version that would trigger an error or exception, so perhaps we can + // here in the PHP version that would trigger an error or exception, so perhaps we can // remove the try-catch blocks here (or at least translate errors to exceptions for this bit) $this->logger->debug('[JSON-LD] Error parsing: ' . $err->getMessage()); } @@ -418,7 +419,7 @@ private function getMetadata(): void /* @var DOMNode $meta */ $elementName = $meta->getAttribute('name'); $elementProperty = $meta->getAttribute('property'); - $content = $meta->getAttribute('content'); + $content = $meta->getAttribute('content'); $matches = null; $name = null; @@ -960,7 +961,7 @@ private function textSimilarity(string $textA, string $textB): float /** * Checks if the node is a byline. */ - private function checkByline(DOMNode|DOMText|DOMElement $node, string $matchString): bool + private function checkByline(DOMNode|DOMText|DOMElement|DOMProcessingInstruction $node, string $matchString): bool { if (!$this->configuration->getArticleByline()) { return false; @@ -2043,7 +2044,7 @@ public function _cleanHeaders(DOMDocument $article): void * @param DOMNode the node to check. * @return boolean indicating whether this is a title-like header. */ - private function headerDuplicatesTitle(DOMNode|DOMText|DOMElement $node): bool + private function headerDuplicatesTitle(DOMNode|DOMText|DOMElement|DOMProcessingInstruction $node): bool { if ($node->nodeName !== 'h1' && $node->nodeName !== 'h2') { return false;