Skip to content

Commit 3d746ed

Browse files
committed
Use JSLikeHTMLElement in type hints
We use `DOMDocument::registerNodeClass()` to make DOM methods return `JSLikeHTMLElement` instead of `DOMElement`. Unfortunately, it is not possible for PHPStan to detect that so we need to cast it ourselves: phpstan/phpstan#10748 We may want to deprecate it in the future just to get rid of this mess. Also add PHPStan stubs for DOM classes so that we do not need to cast everything. It is fine to do that globally as we only ever use DOM with `JSLikeHTMLElement` registered. This patch also allows us to get rid of the assertions in tests.
1 parent 47ab252 commit 3d746ed

File tree

4 files changed

+48
-50
lines changed

4 files changed

+48
-50
lines changed

phpstan.dist.neon

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ parameters:
88
bootstrapFiles:
99
- vendor/bin/.phpunit/phpunit/vendor/autoload.php
1010

11+
stubFiles:
12+
- stubs/dom.stub
13+
1114
includes:
1215
- vendor/phpstan/phpstan-phpunit/extension.neon
1316
- vendor/phpstan/phpstan-phpunit/rules.neon

src/Readability.php

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,12 @@ class Readability implements LoggerAwareInterface
3636
public $revertForcedParagraphElements = false;
3737

3838
/**
39-
* @var ?\DOMElement
39+
* @var ?JSLikeHTMLElement
4040
*/
4141
public $articleTitle;
4242

4343
/**
44-
* @var ?\DOMElement
44+
* @var ?JSLikeHTMLElement
4545
*/
4646
public $articleContent;
4747

@@ -245,7 +245,7 @@ public function setLogger(LoggerInterface $logger): void
245245
/**
246246
* Get article title element.
247247
*
248-
* @return \DOMElement
248+
* @return JSLikeHTMLElement
249249
*/
250250
public function getTitle()
251251
{
@@ -259,7 +259,7 @@ public function getTitle()
259259
/**
260260
* Get article content element.
261261
*
262-
* @return \DOMElement
262+
* @return JSLikeHTMLElement
263263
*/
264264
public function getContent()
265265
{
@@ -447,7 +447,7 @@ public function addFootnotes(\DOMElement $articleContent): void
447447
*/
448448
public function prepArticle(\DOMNode $articleContent): void
449449
{
450-
if (!$articleContent instanceof \DOMElement) {
450+
if (!$articleContent instanceof JSLikeHTMLElement) {
451451
return;
452452
}
453453

@@ -474,7 +474,7 @@ public function prepArticle(\DOMNode $articleContent): void
474474
}
475475

476476
// Remove service data-candidate attribute.
477-
/** @var \DOMNodeList<\DOMElement> */
477+
/** @var \DOMNodeList<JSLikeHTMLElement> */
478478
$elems = $xpath->query('.//*[@data-candidate]', $articleContent);
479479
foreach ($elems as $elem) {
480480
$elem->removeAttribute('data-candidate');
@@ -645,7 +645,7 @@ public function getWeight(\DOMElement $e): int
645645
/**
646646
* Remove extraneous break tags from a node.
647647
*/
648-
public function killBreaks(\DOMElement $node): void
648+
public function killBreaks(JSLikeHTMLElement $node): void
649649
{
650650
$html = $node->getInnerHTML();
651651
$html = preg_replace($this->regexps['killBreaks'], '<br />', $html);
@@ -1160,7 +1160,7 @@ protected function grabArticle(?\DOMElement $page = null)
11601160
* This is faster to do before scoring but safer after.
11611161
*/
11621162
if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS) && $xpath) {
1163-
/** @var \DOMNodeList<\DOMElement> */
1163+
/** @var \DOMNodeList<JSLikeHTMLElement> */
11641164
$candidates = $xpath->query('.//*[(self::footer and count(//footer)<2) or (self::aside and count(//aside)<2)]', $page->documentElement);
11651165

11661166
for ($c = $candidates->length - 1; $c >= 0; --$c) {
@@ -1182,7 +1182,7 @@ protected function grabArticle(?\DOMElement $page = null)
11821182
$topCandidates = array_fill(0, 5, null);
11831183
if ($xpath) {
11841184
// Using array of DOMElements after deletion is a path to DOOMElement.
1185-
/** @var \DOMNodeList<\DOMElement> */
1185+
/** @var \DOMNodeList<JSLikeHTMLElement> */
11861186
$candidates = $xpath->query('.//*[@data-candidate]', $page->documentElement);
11871187
$this->logger->debug('Candidates: ' . $candidates->length);
11881188

stubs/dom.stub

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
<?php
2+
3+
// SPDX-FileCopyrightText: 2022 Ondřej Mirtes
4+
// SPDX-License-Identifier: MIT
5+
// Based on https://github.com/phpstan/phpstan-src/blob/b2a9ba4b82d19b01f37eb983746f1840f1213851/stubs/dom.stub
6+
7+
use Readability\JSLikeHTMLElement;
8+
9+
class DOMDocument
10+
{
11+
/** @var JSLikeHTMLElement|null */
12+
public $documentElement;
13+
14+
/** @var null */
15+
public $ownerDocument;
16+
17+
/**
18+
* @param string $name
19+
* @return DOMNodeList<JSLikeHTMLElement>
20+
*/
21+
public function getElementsByTagName($name) {}
22+
}
23+
24+
class DOMNode
25+
{
26+
27+
}
28+
29+
class DOMElement extends DOMNode
30+
{
31+
/**
32+
* @param string $name
33+
* @return DOMNodeList<JSLikeHTMLElement>
34+
*/
35+
public function getElementsByTagName($name) {}
36+
}

tests/ReadabilityTest.php

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
use Monolog\Handler\TestHandler;
66
use Monolog\Logger;
77
use Psr\Log\LoggerInterface;
8-
use Readability\JSLikeHTMLElement;
98
use Readability\Readability;
109

1110
class ReadabilityTest extends \PHPUnit\Framework\TestCase
@@ -80,8 +79,6 @@ public function testInitNoContent(): void
8079
$res = $readability->init();
8180

8281
$this->assertFalse($res);
83-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
84-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
8582
$this->assertEmpty($readability->getTitle()->getInnerHtml());
8683
$this->assertStringContainsString('Sorry, Readability was unable to parse this page for content.', $readability->getContent()->getInnerHtml());
8784
}
@@ -92,8 +89,6 @@ public function testInitP(): void
9289
$res = $readability->init();
9390

9491
$this->assertTrue($res);
95-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
96-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
9792
$this->assertStringContainsString('<div readability=', $readability->getContent()->getInnerHtml());
9893
$this->assertEmpty($readability->getTitle()->getInnerHtml());
9994
$this->assertStringContainsString('This is the awesome content :)', $readability->getContent()->getInnerHtml());
@@ -105,8 +100,6 @@ public function testInitDivP(): void
105100
$res = $readability->init();
106101

107102
$this->assertTrue($res);
108-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
109-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
110103
$this->assertStringContainsString('<div readability=', $readability->getContent()->getInnerHtml());
111104
$this->assertEmpty($readability->getTitle()->getInnerHtml());
112105
$this->assertStringContainsString('This is the awesome content :)', $readability->getContent()->getInnerHtml());
@@ -119,8 +112,6 @@ public function testInitDiv(): void
119112
$res = $readability->init();
120113

121114
$this->assertTrue($res);
122-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
123-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
124115
$this->assertStringContainsString('<div readability=', $readability->getContent()->getInnerHtml());
125116
$this->assertEmpty($readability->getTitle()->getInnerHtml());
126117
$this->assertStringContainsString('This is the awesome content :)', $readability->getContent()->getInnerHtml());
@@ -134,8 +125,6 @@ public function testWithFootnotes(): void
134125
$res = $readability->init();
135126

136127
$this->assertTrue($res);
137-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
138-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
139128
$this->assertStringContainsString('<div readability=', $readability->getContent()->getInnerHtml());
140129
$this->assertEmpty($readability->getTitle()->getInnerHtml());
141130
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
@@ -151,8 +140,6 @@ public function testStandardClean(): void
151140
$res = $readability->init();
152141

153142
$this->assertTrue($res);
154-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
155-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
156143
$this->assertStringContainsString('<div readability=', $readability->getContent()->getInnerHtml());
157144
$this->assertEmpty($readability->getTitle()->getInnerHtml());
158145
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
@@ -167,8 +154,6 @@ public function testWithIframe(): void
167154
$res = $readability->init();
168155

169156
$this->assertTrue($res);
170-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
171-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
172157
$this->assertStringContainsString('<div readability=', $readability->getContent()->getInnerHtml());
173158
$this->assertEmpty($readability->getTitle()->getInnerHtml());
174159
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
@@ -182,8 +167,6 @@ public function testWithArticle(): void
182167
$res = $readability->init();
183168

184169
$this->assertTrue($res);
185-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
186-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
187170
$this->assertStringContainsString('alt="article"', $readability->getContent()->getInnerHtml());
188171
$this->assertEmpty($readability->getTitle()->getInnerHtml());
189172
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
@@ -197,8 +180,6 @@ public function testWithAside(): void
197180
$res = $readability->init();
198181

199182
$this->assertTrue($res);
200-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
201-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
202183
$this->assertEmpty($readability->getTitle()->getInnerHtml());
203184
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
204185
$this->assertStringNotContainsString('<aside>', $readability->getContent()->getInnerHtml());
@@ -212,8 +193,6 @@ public function testWithClasses(): void
212193
$res = $readability->init();
213194

214195
$this->assertTrue($res);
215-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
216-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
217196
$this->assertStringContainsString('alt="article"', $readability->getContent()->getInnerHtml());
218197
$this->assertEmpty($readability->getTitle()->getInnerHtml());
219198
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
@@ -228,8 +207,6 @@ public function testWithClassesWithoutLightClean(): void
228207
$res = $readability->init();
229208

230209
$this->assertTrue($res);
231-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
232-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
233210
$this->assertStringContainsString('alt="article"', $readability->getContent()->getInnerHtml());
234211
$this->assertEmpty($readability->getTitle()->getInnerHtml());
235212
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
@@ -243,8 +220,6 @@ public function testWithTd(): void
243220
$res = $readability->init();
244221

245222
$this->assertTrue($res);
246-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
247-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
248223
$this->assertEmpty($readability->getTitle()->getInnerHtml());
249224
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
250225
}
@@ -256,8 +231,6 @@ public function testWithSameClasses(): void
256231
$res = $readability->init();
257232

258233
$this->assertTrue($res);
259-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
260-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
261234
$this->assertEmpty($readability->getTitle()->getInnerHtml());
262235
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
263236
$this->assertStringContainsString('This text is also an awesome text and you should know that', $readability->getContent()->getInnerHtml());
@@ -270,8 +243,6 @@ public function testWithScript(): void
270243
$res = $readability->init();
271244

272245
$this->assertTrue($res);
273-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
274-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
275246
$this->assertEmpty($readability->getTitle()->getInnerHtml());
276247
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
277248
$this->assertStringNotContainsString('This text is also an awesome text and you should know that', $readability->getContent()->getInnerHtml());
@@ -284,8 +255,6 @@ public function testTitle(): void
284255
$res = $readability->init();
285256

286257
$this->assertTrue($res);
287-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
288-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
289258
$this->assertSame('this is my title', $readability->getTitle()->getInnerHtml());
290259
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
291260
$this->assertStringNotContainsString('This text is also an awesome text and you should know that', $readability->getContent()->getInnerHtml());
@@ -298,8 +267,6 @@ public function testTitleWithDash(): void
298267
$res = $readability->init();
299268

300269
$this->assertTrue($res);
301-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
302-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
303270
$this->assertSame('title2 - title3', $readability->getTitle()->getInnerHtml());
304271
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
305272
$this->assertStringNotContainsString('This text is also an awesome text and you should know that', $readability->getContent()->getInnerHtml());
@@ -312,8 +279,6 @@ public function testTitleWithDoubleDot(): void
312279
$res = $readability->init();
313280

314281
$this->assertTrue($res);
315-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
316-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
317282
$this->assertSame('title2 : title3', $readability->getTitle()->getInnerHtml());
318283
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
319284
$this->assertStringNotContainsString('This text is also an awesome text and you should know that', $readability->getContent()->getInnerHtml());
@@ -326,8 +291,6 @@ public function testTitleTooShortUseH1(): void
326291
$res = $readability->init();
327292

328293
$this->assertTrue($res);
329-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
330-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
331294
$this->assertSame('this is my h1 title !', $readability->getTitle()->getInnerHtml());
332295
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
333296
$this->assertStringNotContainsString('This text is also an awesome text and you should know that', $readability->getContent()->getInnerHtml());
@@ -374,8 +337,6 @@ public function testAutoClosingIframeNotThrowingException(): void
374337
$res = $readability->init();
375338

376339
$this->assertTrue($res);
377-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
378-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
379340
$this->assertStringContainsString('<iframe src="https://www.youtube.com/embed/PUep6xNeKjA" width="560" height="315" frameborder="0" allowfullscreen="allowfullscreen"> </iframe>', $readability->getContent()->getInnerHtml());
380341
$this->assertStringContainsString('3D Touch', $readability->getTitle()->getInnerHtml());
381342
} finally {
@@ -442,8 +403,6 @@ public function testAppendIdAlreadyHere(): void
442403
$res = $readability->init();
443404

444405
$this->assertTrue($res);
445-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
446-
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
447406
}
448407

449408
public function testPostFilters(): void

0 commit comments

Comments
 (0)