@@ -88,6 +88,15 @@ diff_match_patch.Diff.prototype.toString = function() {
8888 return this [ 0 ] + ',' + this [ 1 ] ;
8989} ;
9090
91+ diff_match_patch . prototype . isHighSurrogate = function ( c ) {
92+ var v = c . charCodeAt ( 0 ) ;
93+ return v >= 0xD800 && v <= 0xDBFF ;
94+ }
95+
96+ diff_match_patch . prototype . isLowSurrogate = function ( c ) {
97+ var v = c . charCodeAt ( 0 ) ;
98+ return v >= 0xDC00 && v <= 0xDFFF ;
99+ }
91100
92101/**
93102 * Find the differences between two texts. Simplifies the problem by stripping
@@ -134,12 +143,18 @@ diff_match_patch.prototype.diff_main = function(text1, text2, opt_checklines,
134143
135144 // Trim off common prefix (speedup).
136145 var commonlength = this . diff_commonPrefix ( text1 , text2 ) ;
146+ if ( commonlength > 0 && this . isHighSurrogate ( text1 [ commonlength - 1 ] ) ) {
147+ commonlength -- ;
148+ }
137149 var commonprefix = text1 . substring ( 0 , commonlength ) ;
138150 text1 = text1 . substring ( commonlength ) ;
139151 text2 = text2 . substring ( commonlength ) ;
140152
141153 // Trim off common suffix (speedup).
142154 commonlength = this . diff_commonSuffix ( text1 , text2 ) ;
155+ if ( commonlength > 0 && this . isLowSurrogate ( text1 [ text1 . length - commonlength ] ) ) {
156+ commonlength -- ;
157+ }
143158 var commonsuffix = text1 . substring ( text1 . length - commonlength ) ;
144159 text1 = text1 . substring ( 0 , text1 . length - commonlength ) ;
145160 text2 = text2 . substring ( 0 , text2 . length - commonlength ) ;
@@ -187,13 +202,23 @@ diff_match_patch.prototype.diff_compute_ = function(text1, text2, checklines,
187202
188203 var longtext = text1 . length > text2 . length ? text1 : text2 ;
189204 var shorttext = text1 . length > text2 . length ? text2 : text1 ;
205+ var shortlength = shorttext . length ;
190206 var i = longtext . indexOf ( shorttext ) ;
191207 if ( i != - 1 ) {
208+ // skip leading unpaired surrogate
209+ if ( this . isLowSurrogate ( longtext [ i ] ) ) {
210+ shortlength -- ;
211+ i ++ ;
212+ }
213+ // skip trailing unpaired surrogate
214+ if ( this . isHighSurrogate ( longtext [ i + shortlength ] ) ) {
215+ shortlength -- ;
216+ }
192217 // Shorter text is inside the longer text (speedup).
193218 diffs = [ new diff_match_patch . Diff ( DIFF_INSERT , longtext . substring ( 0 , i ) ) ,
194219 new diff_match_patch . Diff ( DIFF_EQUAL , shorttext ) ,
195220 new diff_match_patch . Diff ( DIFF_INSERT ,
196- longtext . substring ( i + shorttext . length ) ) ] ;
221+ longtext . substring ( i + shortlength ) ) ] ;
197222 // Swap insertions for deletions if diff is reversed.
198223 if ( text1 . length > text2 . length ) {
199224 diffs [ 0 ] [ 0 ] = diffs [ 2 ] [ 0 ] = DIFF_DELETE ;
@@ -439,6 +464,15 @@ diff_match_patch.prototype.diff_bisect_ = function(text1, text2, deadline) {
439464 */
440465diff_match_patch . prototype . diff_bisectSplit_ = function ( text1 , text2 , x , y ,
441466 deadline ) {
467+ // backup if we split a surrogate
468+ if (
469+ x > 0 && x < text1 . length && this . isLowSurrogate ( text1 [ x ] ) &&
470+ y > 0 && y < text2 . length && this . isLowSurrogate ( text2 [ y ] )
471+ ) {
472+ x -- ;
473+ y -- ;
474+ }
475+
442476 var text1a = text1 . substring ( 0 , x ) ;
443477 var text2a = text2 . substring ( 0 , y ) ;
444478 var text1b = text1 . substring ( x ) ;
@@ -569,6 +603,12 @@ diff_match_patch.prototype.diff_commonPrefix = function(text1, text2) {
569603 }
570604 pointermid = Math . floor ( ( pointermax - pointermin ) / 2 + pointermin ) ;
571605 }
606+
607+ // shorten the prefix if it splits a surrogate
608+ if ( pointermid > 0 && this . isHighSurrogate ( text1 [ pointermid - 1 ] ) ) {
609+ pointermid -- ;
610+ }
611+
572612 return pointermid ;
573613} ;
574614
@@ -601,6 +641,12 @@ diff_match_patch.prototype.diff_commonSuffix = function(text1, text2) {
601641 }
602642 pointermid = Math . floor ( ( pointermax - pointermin ) / 2 + pointermin ) ;
603643 }
644+
645+ // shorten the suffix if it splits a surrogate
646+ if ( pointermid < length - 1 && this . isLowSurrogate ( text1 [ pointermid ] ) ) {
647+ pointermid ++ ;
648+ }
649+
604650 return pointermid ;
605651} ;
606652
@@ -749,6 +795,24 @@ diff_match_patch.prototype.diff_halfMatch_ = function(text1, text2) {
749795 text1_b = hm [ 3 ] ;
750796 }
751797 var mid_common = hm [ 4 ] ;
798+
799+ // move forward to prevent splitting a surrogate pair
800+ if ( mid_common . length > 0 && this . isLowSurrogate ( mid_common [ 0 ] ) ) {
801+ text1_a = text1_a + mid_common [ 0 ] ;
802+ text2_a = text2_a + mid_common [ 0 ] ;
803+ mid_common = mid_common . substring ( 1 ) ;
804+ }
805+
806+ // back up to prevent splitting a surrogate pair
807+ if (
808+ text1_b . length > 0 && this . isLowSurrogate ( text1_b [ 0 ] ) &&
809+ text2_b . length > 0 && this . isLowSurrogate ( text2_b [ 0 ] )
810+ ) {
811+ text1_b = mid_common [ mid_common . length - 1 ] + text1_b ;
812+ text2_b = mid_common [ mid_common . length - 1 ] + text2_b ;
813+ mid_common = mid_common . substring ( 0 , - 1 ) ;
814+ }
815+
752816 return [ text1_a , text1_b , text2_a , text2_b , mid_common ] ;
753817} ;
754818
0 commit comments