From fae4e788450a0d6ff9ef619d67b2b77f8c5d1bd2 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Sat, 16 Mar 2024 17:57:49 +0100 Subject: [PATCH 01/17] Fix missing return value in grabArticle Not sure if this is expected but at least it works the same as before. --- src/Readability.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Readability.php b/src/Readability.php index 290cc28..b8c8898 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -1044,7 +1044,7 @@ protected function grabArticle(?\DOMElement $page = null) foreach ($ancestors as $level => $ancestor) { if (!$ancestor->nodeName || !$ancestor->parentNode) { - return; + return false; } if (!$ancestor->hasAttribute('readability')) { From 4f5360df90b7c3def1309af453ed379366e04422 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Sat, 16 Mar 2024 18:33:56 +0100 Subject: [PATCH 02/17] Use helpers for content score manipulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `DOMAttr::$value` must be a `string`. Let’s add helpers for manipulating the `readability` attribute so that we do not have to keep casting it from and to `string` in order to appease `strict_types`. --- src/Readability.php | 59 ++++++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/src/Readability.php b/src/Readability.php index b8c8898..9c909cb 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -618,7 +618,7 @@ public function cleanConditionally(\DOMElement $e, string $tag): void for ($i = $curTagsLength - 1; $i >= 0; --$i) { $node = $tagsList->item($i); $weight = $this->getWeight($node); - $contentScore = ($node->hasAttribute('readability')) ? (int) $node->getAttribute('readability') : 0; + $contentScore = self::getContentScore($node); $this->logger->debug('Start conditional cleaning of ' . $node->getNodePath() . ' (class=' . $node->getAttribute('class') . '; id=' . $node->getAttribute('id') . ')' . (($node->hasAttribute('readability')) ? (' with score ' . $node->getAttribute('readability')) : '')); // XXX Incomplete implementation @@ -834,29 +834,26 @@ protected function initializeNode(\DOMElement $node): void return; } - $readability = $this->dom->createAttribute('readability'); - // this is our contentScore - $readability->value = 0; - $node->setAttributeNode($readability); + $contentScore = 0; // using strtoupper just in case switch (strtoupper($node->tagName)) { case 'ARTICLE': - $readability->value += 15; + $contentScore += 15; // no break case 'DIV': - $readability->value += 5; + $contentScore += 5; break; case 'PRE': case 'CODE': case 'TD': case 'BLOCKQUOTE': case 'FIGURE': - $readability->value += 3; + $contentScore += 3; break; case 'SECTION': // often misused - // $readability->value += 2; + // $contentScore += 2; break; case 'OL': case 'UL': @@ -864,7 +861,7 @@ protected function initializeNode(\DOMElement $node): void case 'DD': case 'DT': case 'LI': - $readability->value -= 3; + $contentScore -= 3; break; case 'ASIDE': case 'FOOTER': @@ -875,7 +872,7 @@ protected function initializeNode(\DOMElement $node): void case 'TEXTAREA': case 'INPUT': case 'NAV': - $readability->value -= 3; + $contentScore -= 3; break; case 'H1': case 'H2': @@ -885,11 +882,15 @@ protected function initializeNode(\DOMElement $node): void case 'H6': case 'TH': case 'HGROUP': - $readability->value -= 5; + $contentScore -= 5; break; } - $readability->value += $this->getWeight($node); + $contentScore += $this->getWeight($node); + + $readability = $this->dom->createAttribute('readability'); + $readability->value = (string) $contentScore; + $node->setAttributeNode($readability); } /** @@ -1059,7 +1060,8 @@ protected function grabArticle(?\DOMElement $page = null) } else { $scoreDivider = $level * 3; } - $ancestor->getAttributeNode('readability')->value += $contentScore / $scoreDivider; + + self::updateContentScore($ancestor, fn ($prevScore) => $prevScore + $contentScore / $scoreDivider); } } @@ -1074,7 +1076,7 @@ protected function grabArticle(?\DOMElement $page = null) $node = $candidates->item($c); // node should be readable but not inside of an article otherwise it's probably non-readable block if ($node->hasAttribute('readability') && (int) $node->getAttributeNode('readability')->value < 40 && ($node->parentNode ? 0 !== strcasecmp($node->parentNode->tagName, 'article') : true)) { - $this->logger->debug('Removing unlikely candidate (using note) ' . $node->getNodePath() . ' by "' . $node->tagName . '" with readability ' . ($node->hasAttribute('readability') ? (int) $node->getAttributeNode('readability')->value : 0)); + $this->logger->debug('Removing unlikely candidate (using note) ' . $node->getNodePath() . ' by "' . $node->tagName . '" with readability ' . self::getContentScore($node)); $node->parentNode->removeChild($node); } } @@ -1098,14 +1100,13 @@ protected function grabArticle(?\DOMElement $page = null) // Scale the final candidates score based on link density. Good content should have a // relatively small link density (5% or less) and be mostly unaffected by this operation. // If not for this we would have used XPath to find maximum @readability. - $readability = $item->getAttributeNode('readability'); - $readability->value = round($readability->value * (1 - $this->getLinkDensity($item)), 0, \PHP_ROUND_HALF_UP); + self::updateContentScore($item, fn ($prevScore) => round($prevScore * (1 - $this->getLinkDensity($item)), 0, \PHP_ROUND_HALF_UP)); for ($t = 0; $t < 5; ++$t) { $aTopCandidate = $topCandidates[$t]; - if (!$aTopCandidate || $readability->value > (int) $aTopCandidate->getAttribute('readability')) { - $this->logger->debug('Candidate: ' . $item->getNodePath() . ' (' . $item->getAttribute('class') . ':' . $item->getAttribute('id') . ') with score ' . $readability->value); + if (!$aTopCandidate || self::getContentScore($item) > self::getContentScore($aTopCandidate)) { + $this->logger->debug('Candidate: ' . $item->getNodePath() . ' (' . $item->getAttribute('class') . ':' . $item->getAttribute('id') . ') with score ' . self::getContentScore($item)); array_splice($topCandidates, $t, 0, [$item]); if (\count($topCandidates) > 5) { array_pop($topCandidates); @@ -1371,6 +1372,26 @@ protected function reinitBody(): void } } + /** + * Updates the content score for the given element using the provided function. + * + * @param callable(float): float $f + */ + private static function updateContentScore(\DOMElement $element, callable $f): void + { + $readabilityAttr = $element->getAttributeNode('readability'); + $prevScore = (float) $readabilityAttr->value; + $readabilityAttr->value = (string) $f($prevScore); + } + + /** + * Gets the content score for given element. + */ + private static function getContentScore(\DOMElement $element): float + { + return $element->hasAttribute('readability') ? (float) $element->getAttribute('readability') : 0; + } + /** * Load HTML in a DOMDocument. * Apply Pre filters From 32267cb7b45ab79c0d4cff48610f21c8e255b0df Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Wed, 9 Oct 2024 08:51:02 +0200 Subject: [PATCH 03/17] Add type annotations to properties To preserve BC, we are not using type hints for now. --- src/Readability.php | 113 +++++++++++++++++++++++++++++++++----- tests/ReadabilityTest.php | 2 +- 2 files changed, 100 insertions(+), 15 deletions(-) diff --git a/src/Readability.php b/src/Readability.php index 9c909cb..a0bb649 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -24,25 +24,60 @@ class Readability implements LoggerAwareInterface public const MIN_ARTICLE_LENGTH = 200; public const MIN_NODE_LENGTH = 80; public const MAX_LINK_DENSITY = 0.25; + + /** + * @var bool + */ public $convertLinksToFootnotes = false; + + /** + * @var bool + */ public $revertForcedParagraphElements = false; + + /** + * @var ?\DOMElement + */ public $articleTitle; + + /** + * @var ?\DOMElement + */ public $articleContent; + + /** + * @var ?string + */ public $original_html; + /** - * @var \DOMDocument + * @var ?\DOMDocument */ public $dom; - // optional - URL where HTML was retrieved + + /** + * @var ?string URL where HTML was retrieved + */ public $url = null; - // preserves more content (experimental) + + /** + * @var bool preserves more content (experimental) + */ public $lightClean = true; - // no more used, keept to avoid BC + + /** + * @var bool no more used, keept to avoid BC + */ public $debug = false; + + /** + * @var bool + */ public $tidied = false; /** - * All of the regular expressions in use within readability. + * @var array All of the regular expressions in use within readability. + * * Defined up here so we don't instantiate them repeatedly in loops. */ public $regexps = [ @@ -57,10 +92,18 @@ class Readability implements LoggerAwareInterface 'hasContent' => '/\S$/', 'isNotVisible' => '/display\s*:\s*none/', ]; + + /** + * @var array + */ public $defaultTagsToScore = ['section', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'td', 'pre']; - // The commented out elements qualify as phrasing content but tend to be - // removed by readability when put into paragraphs, so we ignore them here. + + /** + * @var array + */ public $phrasingElements = [ + // The commented out elements qualify as phrasing content but tend to be + // removed by readability when put into paragraphs, so we ignore them here. // "CANVAS", "IFRAME", "SVG", "VIDEO", 'ABBR', 'AUDIO', 'B', 'BDO', 'BR', 'BUTTON', 'CITE', 'CODE', 'DATA', 'DATALIST', 'DFN', 'EM', 'EMBED', 'I', 'IMG', 'INPUT', 'KBD', 'LABEL', @@ -68,6 +111,10 @@ class Readability implements LoggerAwareInterface 'RUBY', 'SAMP', 'SCRIPT', 'SELECT', 'SMALL', 'SPAN', 'STRONG', 'SUB', 'SUP', 'TEXTAREA', 'TIME', 'VAR', 'WBR', ]; + + /** + * @var array + */ public $tidy_config = [ 'tidy-mark' => false, 'vertical-space' => false, @@ -92,20 +139,55 @@ class Readability implements LoggerAwareInterface 'output-encoding' => 'utf8', 'hide-comments' => true, ]; - // article domain regexp for calibration + + /** + * @var ?string article domain regexp for calibration + */ protected $domainRegExp = null; + + /** + * @var ?\DOMElement + */ protected $body = null; - // Cache the body HTML in case we need to re-use it later + + /** + * @var ?string Cache the body HTML in case we need to re-use it later + */ protected $bodyCache = null; - // 1 | 2 | 4; // Start with all processing flags set. - protected $flags = 7; - // indicates whether we were able to extract or not + + /** + * @var int-mask-of start with all processing flags set + */ + protected $flags = self::FLAG_STRIP_UNLIKELYS | self::FLAG_WEIGHT_ATTRIBUTES | self::FLAG_CLEAN_CONDITIONALLY; + + /** + * @var bool indicates whether we were able to extract or not + */ protected $success = false; + + /** + * @var LoggerInterface + */ protected $logger; + + /** + * @var string + */ protected $parser; + + /** + * @var string + */ protected $html; + + /** + * @var bool + */ protected $useTidy; - // raw HTML filters + + /** + * @var array raw HTML filters + */ protected $pre_filters = [ // remove spans as we redefine styles and they're probably special-styled '!]*>!is' => '', @@ -118,7 +200,10 @@ class Readability implements LoggerAwareInterface // replace fonts to spans '!<(/?)font[^>]*>!is' => '<\\1span>', ]; - // output HTML filters + + /** + * @var array output HTML filters + */ protected $post_filters = [ // replace excessive br's '/\s*

' Date: Fri, 11 Oct 2024 09:47:35 +0200 Subject: [PATCH 04/17] Throw a BadMethodCallException calling get{Title,Content} uninitialized This is bad state so it should not be a breaking change. --- src/Readability.php | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Readability.php b/src/Readability.php index a0bb649..9c1b983 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -249,6 +249,10 @@ public function setLogger(LoggerInterface $logger): void */ public function getTitle() { + if (null === $this->articleTitle) { + throw new \BadMethodCallException('You need to successfully run Readability::init() before you can get title'); + } + return $this->articleTitle; } @@ -259,6 +263,10 @@ public function getTitle() */ public function getContent() { + if (null === $this->articleContent) { + throw new \BadMethodCallException('You need to successfully run Readability::init() before you can get content'); + } + return $this->articleContent; } From 5040fc1587cde6c8d5ba9a9ee0e2ee13a3ea1cd3 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Fri, 11 Oct 2024 09:57:15 +0200 Subject: [PATCH 05/17] Use method param type hints instead of PHPDoc Since we require PHP 7.4, contravariance in param types is supported, so we do not need to worry about subclasses that widen the param type. It will be only breaking in the unlikely case a subclass uses a type that contradicts the PHPDoc type annotation and does not not extend `DOMNode`. Also fix the type annotation since some invocations pass it a `DOMText`, an arbitrary sibling/child `DOMNode` or even `null`. --- .php-cs-fixer.php | 1 + src/Readability.php | 6 +----- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.php-cs-fixer.php b/.php-cs-fixer.php index 5f09a0c..b8b7aac 100644 --- a/.php-cs-fixer.php +++ b/.php-cs-fixer.php @@ -22,6 +22,7 @@ 'ordered_imports' => true, 'php_unit_strict' => false, 'phpdoc_order' => true, + 'phpdoc_to_param_type' => true, // 'psr4' => true, 'strict_comparison' => true, 'strict_param' => true, diff --git a/src/Readability.php b/src/Readability.php index 9c1b983..0bc2ec8 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -546,12 +546,8 @@ public function prepArticle(\DOMNode $articleContent): void /** * Get the inner text of a node. * This also strips out any excess whitespace to be found. - * - * @param \DOMElement $e - * @param bool $normalizeSpaces (default: true) - * @param bool $flattenLines (default: false) */ - public function getInnerText($e, bool $normalizeSpaces = true, bool $flattenLines = false): string + public function getInnerText(?\DOMNode $e, bool $normalizeSpaces = true, bool $flattenLines = false): string { if (null === $e || !isset($e->textContent) || '' === $e->textContent) { return ''; From f610333040aaf2ef3ddac2d67f886b52ecb26679 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Fri, 11 Oct 2024 23:00:43 +0200 Subject: [PATCH 06/17] Add @var casts for XPath queries Because PHPStan does not currently analyze XPath expressions, we need to use a @var cast: https://phpstan.org/writing-php-code/phpdocs-basics#inline-%40var These are list of elements since asterisk wildcard only selects elements: https://www.w3.org/TR/1999/REC-xpath-19991116/#path-abbrev --- src/Readability.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Readability.php b/src/Readability.php index 0bc2ec8..9bf62c0 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -474,6 +474,7 @@ public function prepArticle(\DOMNode $articleContent): void } // Remove service data-candidate attribute. + /** @var \DOMNodeList<\DOMElement> */ $elems = $xpath->query('.//*[@data-candidate]', $articleContent); foreach ($elems as $elem) { $elem->removeAttribute('data-candidate'); @@ -1159,6 +1160,7 @@ protected function grabArticle(?\DOMElement $page = null) * This is faster to do before scoring but safer after. */ if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS) && $xpath) { + /** @var \DOMNodeList<\DOMElement> */ $candidates = $xpath->query('.//*[(self::footer and count(//footer)<2) or (self::aside and count(//aside)<2)]', $page->documentElement); for ($c = $candidates->length - 1; $c >= 0; --$c) { @@ -1180,6 +1182,7 @@ protected function grabArticle(?\DOMElement $page = null) $topCandidates = array_fill(0, 5, null); if ($xpath) { // Using array of DOMElements after deletion is a path to DOOMElement. + /** @var \DOMNodeList<\DOMElement> */ $candidates = $xpath->query('.//*[@data-candidate]', $page->documentElement); $this->logger->debug('Candidates: ' . $candidates->length); From 1226daa8f8584dd6623f936b413599faaf1ae750 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Sat, 16 Mar 2024 21:51:43 +0100 Subject: [PATCH 07/17] Use JSLikeHTMLElement in type hints We use `DOMDocument::registerNodeClass()` to make DOM methods return `JSLikeHTMLElement` instead of `DOMElement`. Unfortunately, it is not possible for PHPStan to detect that so we need to cast it ourselves: https://github.com/phpstan/phpstan/discussions/10748 We may want to deprecate it in the future just to get rid of this mess. Also add PHPStan stubs for DOM classes so that we do not need to cast everything. It is fine to do that globally as we only ever use DOM with `JSLikeHTMLElement` registered. This patch also allows us to get rid of the assertions in tests. --- phpstan.dist.neon | 3 +++ src/Readability.php | 18 ++++++++--------- stubs/dom.stub | 36 ++++++++++++++++++++++++++++++++++ tests/ReadabilityTest.php | 41 --------------------------------------- 4 files changed, 48 insertions(+), 50 deletions(-) create mode 100644 stubs/dom.stub diff --git a/phpstan.dist.neon b/phpstan.dist.neon index 7e5d5d8..95b916d 100644 --- a/phpstan.dist.neon +++ b/phpstan.dist.neon @@ -8,6 +8,9 @@ parameters: bootstrapFiles: - vendor/bin/.phpunit/phpunit/vendor/autoload.php + stubFiles: + - stubs/dom.stub + includes: - vendor/phpstan/phpstan-phpunit/extension.neon - vendor/phpstan/phpstan-phpunit/rules.neon diff --git a/src/Readability.php b/src/Readability.php index 9bf62c0..9f6f87f 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -36,12 +36,12 @@ class Readability implements LoggerAwareInterface public $revertForcedParagraphElements = false; /** - * @var ?\DOMElement + * @var ?JSLikeHTMLElement */ public $articleTitle; /** - * @var ?\DOMElement + * @var ?JSLikeHTMLElement */ public $articleContent; @@ -245,7 +245,7 @@ public function setLogger(LoggerInterface $logger): void /** * Get article title element. * - * @return \DOMElement + * @return JSLikeHTMLElement */ public function getTitle() { @@ -259,7 +259,7 @@ public function getTitle() /** * Get article content element. * - * @return \DOMElement + * @return JSLikeHTMLElement */ public function getContent() { @@ -447,7 +447,7 @@ public function addFootnotes(\DOMElement $articleContent): void */ public function prepArticle(\DOMNode $articleContent): void { - if (!$articleContent instanceof \DOMElement) { + if (!$articleContent instanceof JSLikeHTMLElement) { return; } @@ -474,7 +474,7 @@ public function prepArticle(\DOMNode $articleContent): void } // Remove service data-candidate attribute. - /** @var \DOMNodeList<\DOMElement> */ + /** @var \DOMNodeList */ $elems = $xpath->query('.//*[@data-candidate]', $articleContent); foreach ($elems as $elem) { $elem->removeAttribute('data-candidate'); @@ -645,7 +645,7 @@ public function getWeight(\DOMElement $e): int /** * Remove extraneous break tags from a node. */ - public function killBreaks(\DOMElement $node): void + public function killBreaks(JSLikeHTMLElement $node): void { $html = $node->getInnerHTML(); $html = preg_replace($this->regexps['killBreaks'], '
', $html); @@ -1160,7 +1160,7 @@ protected function grabArticle(?\DOMElement $page = null) * This is faster to do before scoring but safer after. */ if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS) && $xpath) { - /** @var \DOMNodeList<\DOMElement> */ + /** @var \DOMNodeList */ $candidates = $xpath->query('.//*[(self::footer and count(//footer)<2) or (self::aside and count(//aside)<2)]', $page->documentElement); for ($c = $candidates->length - 1; $c >= 0; --$c) { @@ -1182,7 +1182,7 @@ protected function grabArticle(?\DOMElement $page = null) $topCandidates = array_fill(0, 5, null); if ($xpath) { // Using array of DOMElements after deletion is a path to DOOMElement. - /** @var \DOMNodeList<\DOMElement> */ + /** @var \DOMNodeList */ $candidates = $xpath->query('.//*[@data-candidate]', $page->documentElement); $this->logger->debug('Candidates: ' . $candidates->length); diff --git a/stubs/dom.stub b/stubs/dom.stub new file mode 100644 index 0000000..ac85e9d --- /dev/null +++ b/stubs/dom.stub @@ -0,0 +1,36 @@ + + */ + public function getElementsByTagName($name) {} +} + +class DOMNode +{ + +} + +class DOMElement extends DOMNode +{ + /** + * @param string $name + * @return DOMNodeList + */ + public function getElementsByTagName($name) {} +} diff --git a/tests/ReadabilityTest.php b/tests/ReadabilityTest.php index b09a97a..74b4d31 100644 --- a/tests/ReadabilityTest.php +++ b/tests/ReadabilityTest.php @@ -5,7 +5,6 @@ use Monolog\Handler\TestHandler; use Monolog\Logger; use Psr\Log\LoggerInterface; -use Readability\JSLikeHTMLElement; use Readability\Readability; class ReadabilityTest extends \PHPUnit\Framework\TestCase @@ -80,8 +79,6 @@ public function testInitNoContent(): void $res = $readability->init(); $this->assertFalse($res); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent()); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle()); $this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertStringContainsString('Sorry, Readability was unable to parse this page for content.', $readability->getContent()->getInnerHtml()); } @@ -92,8 +89,6 @@ public function testInitP(): void $res = $readability->init(); $this->assertTrue($res); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent()); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle()); $this->assertStringContainsString('

getContent()->getInnerHtml()); @@ -105,8 +100,6 @@ public function testInitDivP(): void $res = $readability->init(); $this->assertTrue($res); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent()); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle()); $this->assertStringContainsString('
getContent()->getInnerHtml()); @@ -119,8 +112,6 @@ public function testInitDiv(): void $res = $readability->init(); $this->assertTrue($res); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent()); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle()); $this->assertStringContainsString('
getContent()->getInnerHtml()); @@ -134,8 +125,6 @@ public function testWithFootnotes(): void $res = $readability->init(); $this->assertTrue($res); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent()); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle()); $this->assertStringContainsString('
getContent()->getInnerHtml()); @@ -151,8 +140,6 @@ public function testStandardClean(): void $res = $readability->init(); $this->assertTrue($res); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent()); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle()); $this->assertStringContainsString('
getContent()->getInnerHtml()); @@ -167,8 +154,6 @@ public function testWithIframe(): void $res = $readability->init(); $this->assertTrue($res); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent()); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle()); $this->assertStringContainsString('
getContent()->getInnerHtml()); @@ -182,8 +167,6 @@ public function testWithArticle(): void $res = $readability->init(); $this->assertTrue($res); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent()); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle()); $this->assertStringContainsString('alt="article"', $readability->getContent()->getInnerHtml()); $this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); @@ -197,8 +180,6 @@ public function testWithAside(): void $res = $readability->init(); $this->assertTrue($res); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent()); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle()); $this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); $this->assertStringNotContainsString('