@@ -14,6 +14,7 @@ class HtmlDiff
1414 private $ specialCaseOpeningTags = array ();
1515 private $ specialCaseClosingTags = array ();
1616 private $ specialCaseTags = array ('strong ' , 'b ' , 'i ' , 'big ' , 'small ' , 'u ' , 'sub ' , 'sup ' , 'strike ' , 's ' , 'p ' );
17+ private $ specialCaseChars = array ('. ' , ', ' , '( ' , ') ' , '\'' );
1718 private $ groupDiffs = true ;
1819
1920 public function __construct ($ oldText , $ newText , $ encoding = 'UTF-8 ' , $ specialCaseTags = array (), $ groupDiffs = true )
@@ -26,6 +27,31 @@ public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCas
2627
2728 $ this ->setSpecialCaseTags ($ specialCaseTags );
2829 }
30+
31+ public function setSpecialCaseChars (array $ chars )
32+ {
33+ $ this ->specialCaseChars = $ chars ;
34+ }
35+
36+ public function getSpecialCaseChars ()
37+ {
38+ return $ this ->specialCaseChars ;
39+ }
40+
41+ public function addSpecialCaseChar ($ char )
42+ {
43+ if (!in_array ($ char , $ this ->specialCaseChars )) {
44+ $ this ->specialCaseChars [] = $ char ;
45+ }
46+ }
47+
48+ public function removeSpecialCaseChar ($ char )
49+ {
50+ $ key = array_search ($ char , $ this ->specialCaseChars );
51+ if ($ key !== false ) {
52+ unset($ this ->specialCaseChars [$ key ]);
53+ }
54+ }
2955
3056 public function setSpecialCaseTags (array $ tags = array ())
3157 {
@@ -173,13 +199,18 @@ private function splitInputsToWords()
173199 $ this ->oldWords = $ this ->convertHtmlToListOfWords ( $ this ->explode ( $ this ->oldText ) );
174200 $ this ->newWords = $ this ->convertHtmlToListOfWords ( $ this ->explode ( $ this ->newText ) );
175201 }
202+
203+ private function isPartOfWord ($ text )
204+ {
205+ return ctype_alnum (str_replace ($ this ->specialCaseChars , '' , $ text ));
206+ }
176207
177208 private function convertHtmlToListOfWords ($ characterString )
178209 {
179210 $ mode = 'character ' ;
180211 $ current_word = '' ;
181212 $ words = array ();
182- foreach ($ characterString as $ character ) {
213+ foreach ($ characterString as $ i => $ character ) {
183214 switch ($ mode ) {
184215 case 'character ' :
185216 if ( $ this ->isStartOfTag ( $ character ) ) {
@@ -195,7 +226,10 @@ private function convertHtmlToListOfWords($characterString)
195226 $ current_word = $ character ;
196227 $ mode = 'whitespace ' ;
197228 } else {
198- if ( ctype_alnum ( $ character ) && ( strlen ($ current_word ) == 0 || ctype_alnum ( $ current_word ) ) ) {
229+ if (
230+ (ctype_alnum ($ character ) && (strlen ($ current_word ) == 0 || $ this ->isPartOfWord ($ current_word ))) ||
231+ (in_array ($ character , $ this ->specialCaseChars ) && isset ($ characterString [$ i +1 ]) && $ this ->isPartOfWord ($ characterString [$ i +1 ]))
232+ ) {
199233 $ current_word .= $ character ;
200234 } else {
201235 $ words [] = $ current_word ;
0 commit comments