From bd7137caf00d60b3209b95ffc3af599b8e2e04fa Mon Sep 17 00:00:00 2001 From: jrfnl Date: Thu, 24 Oct 2024 01:23:14 +0200 Subject: [PATCH 1/3] Tokenizer/PHP: add tests for tokenization of `yield` and `yield from` .. to document and safeguard the existing behaviour. Includes skipping one particular assertion on PHP 5.4. This assertion would fail on PHP 5.4 with PHPUnit 4 as PHPUnit polyfills the `T_YIELD` token too, but with a different value, which causes the token 'type' to be set to `UNKNOWN`. This issue _only_ occurs when running the tests, not when running PHPCS outside of a test situation, so this is not a real problem when running PHPCS on PHP 5.4. For reference: the PHPUnit polyfilled token is declared in the PHP_CodeCoverage_Report_HTML_Renderer_File class in vendor/phpunit/php-code-coverage/src/CodeCoverage/Report/HTML/Renderer/File.php --- tests/Core/Tokenizers/PHP/YieldTest.inc | 49 +++++ tests/Core/Tokenizers/PHP/YieldTest.php | 240 ++++++++++++++++++++++++ 2 files changed, 289 insertions(+) create mode 100644 tests/Core/Tokenizers/PHP/YieldTest.inc create mode 100644 tests/Core/Tokenizers/PHP/YieldTest.php diff --git a/tests/Core/Tokenizers/PHP/YieldTest.inc b/tests/Core/Tokenizers/PHP/YieldTest.inc new file mode 100644 index 0000000000..7677624d50 --- /dev/null +++ b/tests/Core/Tokenizers/PHP/YieldTest.inc @@ -0,0 +1,49 @@ +yield; + + /* testYieldUsedAsPropertyName2 */ + echo $obj?->yield(); + + /* testYieldUsedForClassConstantAccess1 */ + echo MyClass::YIELD; + /* testFromUsedForClassConstantAccess1 */ + echo MyClass::FROM; + } +} + +function myGen() { + /* testYieldLiveCoding */ + yield diff --git a/tests/Core/Tokenizers/PHP/YieldTest.php b/tests/Core/Tokenizers/PHP/YieldTest.php new file mode 100644 index 0000000000..87aaf5784f --- /dev/null +++ b/tests/Core/Tokenizers/PHP/YieldTest.php @@ -0,0 +1,240 @@ + + * @copyright 2021 Squiz Pty Ltd (ABN 77 084 670 600) + * @license https://github.com/PHPCSStandards/PHP_CodeSniffer/blob/master/licence.txt BSD Licence + */ + +namespace PHP_CodeSniffer\Tests\Core\Tokenizers\PHP; + +use PHP_CodeSniffer\Tests\Core\Tokenizers\AbstractTokenizerTestCase; +use PHP_CodeSniffer\Util\Tokens; + +/** + * Tests the tokenization of the `yield` and `yield from` keywords. + * + * @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize + */ +final class YieldTest extends AbstractTokenizerTestCase +{ + + + /** + * Test that the yield keyword is tokenized as such. + * + * @param string $testMarker The comment which prefaces the target token in the test file. + * + * @dataProvider dataYieldKeyword + * + * @return void + */ + public function testYieldKeyword($testMarker) + { + $tokens = $this->phpcsFile->getTokens(); + $target = $this->getTargetToken($testMarker, [T_YIELD, T_YIELD_FROM, T_STRING]); + $tokenArray = $tokens[$target]; + + $this->assertSame(T_YIELD, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_YIELD (code)'); + + // This assertion would fail on PHP 5.4 with PHPUnit 4 as PHPUnit polyfills the `T_YIELD` token too, but + // with a different value, which causes the token 'type' to be set to `UNKNOWN`. + // This issue _only_ occurs when running the tests, not when running PHPCS outside of a test situation. + // The PHPUnit polyfilled token is declared in the PHP_CodeCoverage_Report_HTML_Renderer_File class + // in vendor/phpunit/php-code-coverage/src/CodeCoverage/Report/HTML/Renderer/File.php. + if (PHP_VERSION_ID >= 50500) { + $this->assertSame('T_YIELD', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_YIELD (type)'); + } + + }//end testYieldKeyword() + + + /** + * Data provider. + * + * @see testYieldKeyword() + * + * @return array> + */ + public static function dataYieldKeyword() + { + return [ + 'yield' => ['/* testYield */'], + 'yield followed by comment' => ['/* testYieldFollowedByComment */'], + 'yield at end of file, live coding' => ['/* testYieldLiveCoding */'], + ]; + + }//end dataYieldKeyword() + + + /** + * Test that the yield from keyword is tokenized as a single token when it in on a single line + * and only has whitespace between the words. + * + * @param string $testMarker The comment which prefaces the target token in the test file. + * @param string $content Optional. The test token content to search for. + * Defaults to null. + * + * @dataProvider dataYieldFromKeywordSingleToken + * + * @return void + */ + public function testYieldFromKeywordSingleToken($testMarker, $content=null) + { + $tokens = $this->phpcsFile->getTokens(); + $target = $this->getTargetToken($testMarker, [T_YIELD, T_YIELD_FROM, T_STRING], $content); + $tokenArray = $tokens[$target]; + + $this->assertSame(T_YIELD_FROM, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_YIELD_FROM (code)'); + $this->assertSame('T_YIELD_FROM', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_YIELD_FROM (type)'); + + }//end testYieldFromKeywordSingleToken() + + + /** + * Data provider. + * + * @see testYieldFromKeywordSingleToken() + * + * @return array> + */ + public static function dataYieldFromKeywordSingleToken() + { + return [ + 'yield from' => [ + 'testMarker' => '/* testYieldFrom */', + ], + 'yield from with extra space between' => [ + 'testMarker' => '/* testYieldFromWithExtraSpacesBetween */', + ], + 'yield from with tab between' => [ + 'testMarker' => '/* testYieldFromWithTabBetween */', + ], + ]; + + }//end dataYieldFromKeywordSingleToken() + + + /** + * Test that the yield from keyword is tokenized as a single token when it in on a single line + * and only has whitespace between the words. + * + * @param string $testMarker The comment which prefaces the target token in the test file. + * @param array> $expectedTokens The tokenization expected. + * + * @dataProvider dataYieldFromKeywordMultiToken + * + * @return void + */ + public function testYieldFromKeywordMultiToken($testMarker, $expectedTokens) + { + $tokens = $this->phpcsFile->getTokens(); + $target = $this->getTargetToken($testMarker, [T_YIELD, T_YIELD_FROM, T_STRING]); + + foreach ($expectedTokens as $nr => $tokenInfo) { + $this->assertSame( + constant($tokenInfo['type']), + $tokens[$target]['code'], + 'Token tokenized as '.Tokens::tokenName($tokens[$target]['code']).', not '.$tokenInfo['type'].' (code)' + ); + $this->assertSame( + $tokenInfo['type'], + $tokens[$target]['type'], + 'Token tokenized as '.$tokens[$target]['type'].', not '.$tokenInfo['type'].' (type)' + ); + $this->assertSame( + $tokenInfo['content'], + $tokens[$target]['content'], + 'Content of token '.($nr + 1).' ('.$tokens[$target]['type'].') does not match expectations' + ); + + ++$target; + } + + }//end testYieldFromKeywordMultiToken() + + + /** + * Data provider. + * + * @see testYieldFromKeywordMultiToken() + * + * @return array>>> + */ + public static function dataYieldFromKeywordMultiToken() + { + return [ + 'yield from with new line' => [ + 'testMarker' => '/* testYieldFromSplitByNewLines */', + 'expectedTokens' => [ + [ + 'type' => 'T_YIELD_FROM', + 'content' => 'yield +', + ], + [ + 'type' => 'T_YIELD_FROM', + 'content' => ' +', + ], + [ + 'type' => 'T_YIELD_FROM', + 'content' => ' FROM', + ], + [ + 'type' => 'T_WHITESPACE', + 'content' => ' +', + ], + ], + ], + ]; + + }//end dataYieldFromKeywordMultiToken() + + + /** + * Test that 'yield' or 'from' when not used as the reserved keyword are tokenized as `T_STRING`. + * + * @param string $testMarker The comment which prefaces the target token in the test file. + * + * @dataProvider dataYieldNonKeyword + * + * @return void + */ + public function testYieldNonKeyword($testMarker) + { + $tokens = $this->phpcsFile->getTokens(); + $target = $this->getTargetToken($testMarker, [T_YIELD, T_YIELD_FROM, T_STRING]); + $tokenArray = $tokens[$target]; + + $this->assertSame(T_STRING, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_STRING (code)'); + $this->assertSame('T_STRING', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_STRING (type)'); + + }//end testYieldNonKeyword() + + + /** + * Data provider. + * + * @see testYieldNonKeyword() + * + * @return array> + */ + public static function dataYieldNonKeyword() + { + return [ + 'yield used as class name' => ['/* testYieldUsedAsClassName */'], + 'yield used as class constant name' => ['/* testYieldUsedAsClassConstantName */'], + 'yield used as method name' => ['/* testYieldUsedAsMethodName */'], + 'yield used as property access 1' => ['/* testYieldUsedAsPropertyName1 */'], + 'yield used as property access 2' => ['/* testYieldUsedAsPropertyName2 */'], + 'yield used as class constant access' => ['/* testYieldUsedForClassConstantAccess1 */'], + 'from used as class constant access' => ['/* testFromUsedForClassConstantAccess1 */'], + ]; + + }//end dataYieldNonKeyword() + + +}//end class From 750744218b07c3488cc65d07fd602390557a182d Mon Sep 17 00:00:00 2001 From: jrfnl Date: Thu, 24 Oct 2024 01:00:41 +0200 Subject: [PATCH 2/3] Tokenizer/PHP: simplify the "yield"/"yield from" polyfill code By moving the check a `T_STRING` "yield" keyword up and always adjusting the token in the original token stream, we can remove a lot of duplicate code. --- src/Tokenizers/PHP.php | 66 +++++++++++++----------------------------- 1 file changed, 20 insertions(+), 46 deletions(-) diff --git a/src/Tokenizers/PHP.php b/src/Tokenizers/PHP.php index 3ebc347b73..2fc2ac8afe 100644 --- a/src/Tokenizers/PHP.php +++ b/src/Tokenizers/PHP.php @@ -1492,6 +1492,26 @@ protected function tokenize($string) continue; }//end if + /* + Before PHP 5.5, the yield keyword was tokenized as + T_STRING. So look for and change this token in + earlier versions. + */ + + if (PHP_VERSION_ID < 50500 + && $tokenIsArray === true + && $token[0] === T_STRING + && strtolower($token[1]) === 'yield' + && isset($this->tstringContexts[$finalTokens[$lastNotEmptyToken]['code']]) === false + ) { + // Could still be "yield from" and potentially multi-line, so adjust the token stack. + $token[0] = T_YIELD; + + if (PHP_CODESNIFFER_VERBOSITY > 1) { + echo "\t\t* token $stackPtr changed from T_STRING to T_YIELD".PHP_EOL; + } + } + /* Before PHP 7.0, the "yield from" was tokenized as T_YIELD, T_WHITESPACE and T_STRING. So look for @@ -1499,7 +1519,6 @@ protected function tokenize($string) */ if (PHP_VERSION_ID < 70000 - && PHP_VERSION_ID >= 50500 && $tokenIsArray === true && $token[0] === T_YIELD && isset($tokens[($stackPtr + 1)]) === true @@ -1524,51 +1543,6 @@ protected function tokenize($string) $tokens[($stackPtr + 2)] = null; } - /* - Before PHP 5.5, the yield keyword was tokenized as - T_STRING. So look for and change this token in - earlier versions. - Checks also if it is just "yield" or "yield from". - */ - - if (PHP_VERSION_ID < 50500 - && $tokenIsArray === true - && $token[0] === T_STRING - && strtolower($token[1]) === 'yield' - && isset($this->tstringContexts[$finalTokens[$lastNotEmptyToken]['code']]) === false - ) { - if (isset($tokens[($stackPtr + 1)]) === true - && isset($tokens[($stackPtr + 2)]) === true - && $tokens[($stackPtr + 1)][0] === T_WHITESPACE - && $tokens[($stackPtr + 2)][0] === T_STRING - && strtolower($tokens[($stackPtr + 2)][1]) === 'from' - ) { - // Could be multi-line, so just just the token stack. - $token[0] = T_YIELD_FROM; - $token[1] .= $tokens[($stackPtr + 1)][1].$tokens[($stackPtr + 2)][1]; - - if (PHP_CODESNIFFER_VERBOSITY > 1) { - for ($i = ($stackPtr + 1); $i <= ($stackPtr + 2); $i++) { - $type = Tokens::tokenName($tokens[$i][0]); - $content = Common::prepareForOutput($tokens[$i][1]); - echo "\t\t* token $i merged into T_YIELD_FROM; was: $type => $content".PHP_EOL; - } - } - - $tokens[($stackPtr + 1)] = null; - $tokens[($stackPtr + 2)] = null; - } else { - $newToken = []; - $newToken['code'] = T_YIELD; - $newToken['type'] = 'T_YIELD'; - $newToken['content'] = $token[1]; - $finalTokens[$newStackPtr] = $newToken; - - $newStackPtr++; - continue; - }//end if - }//end if - /* Before PHP 5.6, the ... operator was tokenized as three T_STRING_CONCAT tokens in a row. So look for and combine From df8bfe90ac9f3d36bdad6bc52a9d356d4647c5b3 Mon Sep 17 00:00:00 2001 From: jrfnl Date: Thu, 24 Oct 2024 02:20:42 +0200 Subject: [PATCH 3/3] Tokenizer/PHP: bug fix - "yield" vs function return by ref (PHP 5.4) On PHP 5.4, where PHP doesn't natively have the `T_YIELD` token yet, a `yield` function name for a function declared to return by reference would be tokenized as `T_YIELD`, instead of `T_STRING` due to the `T_YIELD` polyfill happening _after_ the context sensitive keyword check has already run. By moving the check for a `T_STRING` "yield" keyword up to above the check for context sensitive keywords, this bug is fixed. Includes additional test. --- src/Tokenizers/PHP.php | 41 +++++++++++++------------ tests/Core/Tokenizers/PHP/YieldTest.inc | 3 ++ tests/Core/Tokenizers/PHP/YieldTest.php | 1 + 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/src/Tokenizers/PHP.php b/src/Tokenizers/PHP.php index 2fc2ac8afe..42969782b7 100644 --- a/src/Tokenizers/PHP.php +++ b/src/Tokenizers/PHP.php @@ -605,6 +605,27 @@ protected function tokenize($string) echo PHP_EOL; } + /* + Before PHP 5.5, the yield keyword was tokenized as + T_STRING. So look for and change this token in + earlier versions. + */ + + if (PHP_VERSION_ID < 50500 + && $tokenIsArray === true + && $token[0] === T_STRING + && strtolower($token[1]) === 'yield' + && isset($this->tstringContexts[$finalTokens[$lastNotEmptyToken]['code']]) === false + ) { + // Could still be a context sensitive keyword or "yield from" and potentially multi-line, + // so adjust the token stack in place. + $token[0] = T_YIELD; + + if (PHP_CODESNIFFER_VERBOSITY > 1) { + echo "\t\t* token $stackPtr changed from T_STRING to T_YIELD".PHP_EOL; + } + } + /* Tokenize context sensitive keyword as string when it should be string. */ @@ -1492,26 +1513,6 @@ protected function tokenize($string) continue; }//end if - /* - Before PHP 5.5, the yield keyword was tokenized as - T_STRING. So look for and change this token in - earlier versions. - */ - - if (PHP_VERSION_ID < 50500 - && $tokenIsArray === true - && $token[0] === T_STRING - && strtolower($token[1]) === 'yield' - && isset($this->tstringContexts[$finalTokens[$lastNotEmptyToken]['code']]) === false - ) { - // Could still be "yield from" and potentially multi-line, so adjust the token stack. - $token[0] = T_YIELD; - - if (PHP_CODESNIFFER_VERBOSITY > 1) { - echo "\t\t* token $stackPtr changed from T_STRING to T_YIELD".PHP_EOL; - } - } - /* Before PHP 7.0, the "yield from" was tokenized as T_YIELD, T_WHITESPACE and T_STRING. So look for diff --git a/tests/Core/Tokenizers/PHP/YieldTest.inc b/tests/Core/Tokenizers/PHP/YieldTest.inc index 7677624d50..fdac3c71eb 100644 --- a/tests/Core/Tokenizers/PHP/YieldTest.inc +++ b/tests/Core/Tokenizers/PHP/YieldTest.inc @@ -42,6 +42,9 @@ class Yield { /* testFromUsedForClassConstantAccess1 */ echo MyClass::FROM; } + + /* testYieldUsedAsMethodNameReturnByRef */ + public function &yield() {} } function myGen() { diff --git a/tests/Core/Tokenizers/PHP/YieldTest.php b/tests/Core/Tokenizers/PHP/YieldTest.php index 87aaf5784f..39490b47d4 100644 --- a/tests/Core/Tokenizers/PHP/YieldTest.php +++ b/tests/Core/Tokenizers/PHP/YieldTest.php @@ -232,6 +232,7 @@ public static function dataYieldNonKeyword() 'yield used as property access 2' => ['/* testYieldUsedAsPropertyName2 */'], 'yield used as class constant access' => ['/* testYieldUsedForClassConstantAccess1 */'], 'from used as class constant access' => ['/* testFromUsedForClassConstantAccess1 */'], + 'yield used as method name with ref' => ['/* testYieldUsedAsMethodNameReturnByRef */'], ]; }//end dataYieldNonKeyword()