From fdedc770732de21cb2d998a80ff622bf993422b6 Mon Sep 17 00:00:00 2001 From: jrfnl Date: Fri, 31 Jan 2025 19:46:06 +0100 Subject: [PATCH] PEAR/Squiz/InlineComment: document handling of emoji hash comments Based on some rumblings on the interwebs, I've done some investigating. It was basically claimed that the emoji keycap sign for the hash character could be used for hash comments. Based on my investigation, this turns out to be **true**. Technical explanation: as the emoji keycap sign is a combined unicode character of which the first codepoint is the `#` character, the PHP tokenizer will see whatever comes after as a comment and tokenize it as such. In a follow-up message, it was claimed that using the emoji keycap sign for the hash character could also be used for attributes. Based on my investigation, this (unsurprisingly) turns out to be **false**. Technical explanation: for attributes, the `#` sign and the `[` bracket need to be next to each without any characters between them for the syntax to be recognized as the start of an attribute. This was implemented like so to keep the BC-break with hash comments as small as possible when attributes were introduced in PHP 8.0. As the emoji keycap sign is a combined character (U+23 U+FE0F U+20E3) , there are multiple other codepoints between the `#` and the `[`, which means that this will not tokenizer as `T_ATTRIBUTE` in PHP and therefore shouldn't in PHPCS either. For now, I'm just adding a test to both the PEAR and Squiz `InlineComment` sniffs to document how emoji-hash comments are handled by PHPCS. I am aware that the fixer output is not "clean", i.e. it leaves the second and third code point of the emoji in place. I did consider adding special handling in the fixers, but decided against this for the following reasons: 1. This is likely to be a rare edge case. 2. I suspect the most reliable way to handle this would require the `intl` extension to use `graphmeme` functions. I'm not inclined to make the `intl` extension a requirement for PHPCS at this time. 3. An alternative way to handle this could be via unicode escape codes, but those are a PHP 7.0+ feature and cannot be used in PHPCS (yet). For now, I believe documenting the handling will need to suffice. --- Inspired by the following tweet: https://x.com/christophrumpel/status/1862568698730401986 and it's response: https://x.com/joshmanders/status/1862884555910160451 and their mention in the [January 2025 PHP Annotated](https://blog.jetbrains.com/phpstorm/2025/01/php-annotated-january-2025/). --- src/Standards/PEAR/Tests/Commenting/InlineCommentUnitTest.inc | 2 ++ .../PEAR/Tests/Commenting/InlineCommentUnitTest.inc.fixed | 2 ++ src/Standards/PEAR/Tests/Commenting/InlineCommentUnitTest.php | 1 + src/Standards/Squiz/Tests/Commenting/InlineCommentUnitTest.inc | 3 +++ .../Squiz/Tests/Commenting/InlineCommentUnitTest.inc.fixed | 3 +++ src/Standards/Squiz/Tests/Commenting/InlineCommentUnitTest.php | 1 + 6 files changed, 12 insertions(+) diff --git a/src/Standards/PEAR/Tests/Commenting/InlineCommentUnitTest.inc b/src/Standards/PEAR/Tests/Commenting/InlineCommentUnitTest.inc index 187228c2a4..092fbfdc3d 100644 --- a/src/Standards/PEAR/Tests/Commenting/InlineCommentUnitTest.inc +++ b/src/Standards/PEAR/Tests/Commenting/InlineCommentUnitTest.inc @@ -27,3 +27,5 @@ function test() ### use the code from the regex ### over hre ### ok? + +#️⃣ Apparently the emoji keycap number sign (hash) also works and turns this into a comment. diff --git a/src/Standards/PEAR/Tests/Commenting/InlineCommentUnitTest.inc.fixed b/src/Standards/PEAR/Tests/Commenting/InlineCommentUnitTest.inc.fixed index 7bb6c3fb03..4024b4d263 100644 --- a/src/Standards/PEAR/Tests/Commenting/InlineCommentUnitTest.inc.fixed +++ b/src/Standards/PEAR/Tests/Commenting/InlineCommentUnitTest.inc.fixed @@ -27,3 +27,5 @@ function test() // use the code from the regex // over hre // ok? + +// ️⃣ Apparently the emoji keycap number sign (hash) also works and turns this into a comment. diff --git a/src/Standards/PEAR/Tests/Commenting/InlineCommentUnitTest.php b/src/Standards/PEAR/Tests/Commenting/InlineCommentUnitTest.php index 457b5d896d..7d107dd341 100644 --- a/src/Standards/PEAR/Tests/Commenting/InlineCommentUnitTest.php +++ b/src/Standards/PEAR/Tests/Commenting/InlineCommentUnitTest.php @@ -37,6 +37,7 @@ public function getErrorList() 27 => 1, 28 => 1, 29 => 1, + 31 => 1, ]; }//end getErrorList() diff --git a/src/Standards/Squiz/Tests/Commenting/InlineCommentUnitTest.inc b/src/Standards/Squiz/Tests/Commenting/InlineCommentUnitTest.inc index 024876842e..220b916e40 100644 --- a/src/Standards/Squiz/Tests/Commenting/InlineCommentUnitTest.inc +++ b/src/Standards/Squiz/Tests/Commenting/InlineCommentUnitTest.inc @@ -186,6 +186,9 @@ readonly class MyClass readonly $property = 10; } +#️⃣ Apparently the emoji keycap number sign (hash) also works and turns this into a comment. +echo 'hello!'; + /* * N.B.: The below test line must be the last test in the file. * Testing that a new line after an inline comment when it's the last non-whitespace diff --git a/src/Standards/Squiz/Tests/Commenting/InlineCommentUnitTest.inc.fixed b/src/Standards/Squiz/Tests/Commenting/InlineCommentUnitTest.inc.fixed index 949a9ff949..2cc700c6bf 100644 --- a/src/Standards/Squiz/Tests/Commenting/InlineCommentUnitTest.inc.fixed +++ b/src/Standards/Squiz/Tests/Commenting/InlineCommentUnitTest.inc.fixed @@ -179,6 +179,9 @@ readonly class MyClass readonly $property = 10; } +// ️⃣ Apparently the emoji keycap number sign (hash) also works and turns this into a comment. +echo 'hello!'; + /* * N.B.: The below test line must be the last test in the file. * Testing that a new line after an inline comment when it's the last non-whitespace diff --git a/src/Standards/Squiz/Tests/Commenting/InlineCommentUnitTest.php b/src/Standards/Squiz/Tests/Commenting/InlineCommentUnitTest.php index c559c5ae6b..a2bbc4eb61 100644 --- a/src/Standards/Squiz/Tests/Commenting/InlineCommentUnitTest.php +++ b/src/Standards/Squiz/Tests/Commenting/InlineCommentUnitTest.php @@ -52,6 +52,7 @@ public function getErrorList($testFile='') 126 => 2, 130 => 2, 149 => 1, + 189 => 1, ]; return $errors;