@@ -1432,17 +1432,20 @@ public String diff_toDelta(LinkedList<Diff> diffs) {
14321432 char lastEnd = 0 ;
14331433 boolean isFirst = true ;
14341434 for (Diff aDiff : diffs ) {
1435+ if (aDiff .text .isEmpty ()) {
1436+ continue ;
1437+ }
14351438 char thisTop = aDiff .text .charAt (0 );
14361439 char thisEnd = aDiff .text .charAt (aDiff .text .length () - 1 );
14371440 if (Character .isHighSurrogate (thisEnd )) {
1441+ lastEnd = thisEnd ;
14381442 aDiff .text = aDiff .text .substring (0 , aDiff .text .length () - 1 );
14391443 }
1440- if (! isFirst && Character .isHighSurrogate (lastEnd ) && Character .isLowSurrogate (thisTop )) {
1444+ if (!isFirst && Character .isHighSurrogate (lastEnd ) && Character .isLowSurrogate (thisTop )) {
14411445 aDiff .text = lastEnd + aDiff .text ;
14421446 }
14431447 isFirst = false ;
1444- lastEnd = thisEnd ;
1445- if ( aDiff .text .isEmpty () ) {
1448+ if (aDiff .text .isEmpty ()) {
14461449 continue ;
14471450 }
14481451 switch (aDiff .operation ) {
@@ -1472,6 +1475,92 @@ public String diff_toDelta(LinkedList<Diff> diffs) {
14721475 return delta ;
14731476 }
14741477
1478+ private int digit16 (char c ) throws IllegalArgumentException {
1479+ switch (c ) {
1480+ case '0' : return 0 ;
1481+ case '1' : return 1 ;
1482+ case '2' : return 2 ;
1483+ case '3' : return 3 ;
1484+ case '4' : return 4 ;
1485+ case '5' : return 5 ;
1486+ case '6' : return 6 ;
1487+ case '7' : return 7 ;
1488+ case '8' : return 8 ;
1489+ case '9' : return 9 ;
1490+ case 'A' : case 'a' : return 10 ;
1491+ case 'B' : case 'b' : return 11 ;
1492+ case 'C' : case 'c' : return 12 ;
1493+ case 'D' : case 'd' : return 13 ;
1494+ case 'E' : case 'e' : return 14 ;
1495+ case 'F' : case 'f' : return 15 ;
1496+ default : throw new IllegalArgumentException ();
1497+ }
1498+ }
1499+
1500+ private String decodeURI (String text ) throws IllegalArgumentException {
1501+ int i = 0 ;
1502+ StringBuilder decoded = new StringBuilder (text .length ());
1503+ while (i < text .length ()) {
1504+ if (text .charAt (i ) != '%' ) {
1505+ decoded .append (text .charAt (i ++));
1506+ continue ;
1507+ }
1508+ // start a percent-sequence
1509+ int byte1 = (digit16 (text .charAt (i + 1 )) << 4 ) + digit16 (text .charAt (i + 2 ));
1510+ if ((byte1 & 0x80 ) == 0 ) {
1511+ decoded .append (Character .toChars (byte1 ));
1512+ i += 3 ;
1513+ continue ;
1514+ }
1515+ if (text .charAt (i + 3 ) != '%' ) {
1516+ throw new IllegalArgumentException ();
1517+ }
1518+ int byte2 = (digit16 (text .charAt (i + 4 )) << 4 ) + digit16 (text .charAt (i + 5 ));
1519+ if ((byte2 & 0xC0 ) != 0x80 ) {
1520+ throw new IllegalArgumentException ();
1521+ }
1522+ byte2 = byte2 & 0x3F ;
1523+ if ((byte1 & 0xE0 ) == 0xC0 ) {
1524+ decoded .append (Character .toChars (((byte1 & 0x1F ) << 6 ) | byte2 ));
1525+ i += 6 ;
1526+ continue ;
1527+ }
1528+ if (text .charAt (i + 6 ) != '%' ) {
1529+ throw new IllegalArgumentException ();
1530+ }
1531+ int byte3 = (digit16 (text .charAt (i + 7 )) << 4 ) + digit16 (text .charAt (i + 8 ));
1532+ if ((byte3 & 0xC0 ) != 0x80 ) {
1533+ throw new IllegalArgumentException ();
1534+ }
1535+ byte3 = byte3 & 0x3F ;
1536+ if ((byte1 & 0xF0 ) == 0xE0 ) {
1537+ // unpaired surrogate are fine here
1538+ decoded .append (Character .toChars (((byte1 & 0x0F ) << 12 ) | (byte2 << 6 ) | byte3 ));
1539+ i += 9 ;
1540+ continue ;
1541+ }
1542+ if (text .charAt (i + 9 ) != '%' ) {
1543+ throw new IllegalArgumentException ();
1544+ }
1545+ int byte4 = (digit16 (text .charAt (i + 10 )) << 4 ) + digit16 (text .charAt (i + 11 ));
1546+ if ((byte4 & 0xC0 ) != 0x80 ) {
1547+ throw new IllegalArgumentException ();
1548+ }
1549+ byte4 = byte4 & 0x3F ;
1550+ if ((byte1 & 0xF8 ) == 0xF0 ) {
1551+ int codePoint = ((byte1 & 0x07 ) << 0x12 ) | (byte2 << 0x0C ) | (byte3 << 0x06 ) | byte4 ;
1552+ if (codePoint >= 0x010000 && codePoint <= 0x10FFFF ) {
1553+ decoded .append (Character .toChars ((codePoint & 0xFFFF ) >>> 10 & 0x3FF | 0xD800 ));
1554+ decoded .append (Character .toChars (0xDC00 | (codePoint & 0xFFFF ) & 0x3FF ));
1555+ i += 12 ;
1556+ continue ;
1557+ }
1558+ }
1559+ throw new IllegalArgumentException ();
1560+ }
1561+ return decoded .toString ();
1562+ }
1563+
14751564 /**
14761565 * Given the original text1, and an encoded string which describes the
14771566 * operations required to transform text1 into text2, compute the full diff.
@@ -1485,7 +1574,8 @@ public LinkedList<Diff> diff_fromDelta(String text1, String delta)
14851574 LinkedList <Diff > diffs = new LinkedList <Diff >();
14861575 int pointer = 0 ; // Cursor in text1
14871576 String [] tokens = delta .split ("\t " );
1488- for (String token : tokens ) {
1577+ for (int x = 0 ; x < tokens .length ; x ++) {
1578+ String token = tokens [x ];
14891579 if (token .length () == 0 ) {
14901580 // Blank tokens are ok (from a trailing \t).
14911581 continue ;
@@ -1498,10 +1588,7 @@ public LinkedList<Diff> diff_fromDelta(String text1, String delta)
14981588 // decode would change all "+" to " "
14991589 param = param .replace ("+" , "%2B" );
15001590 try {
1501- param = URLDecoder .decode (param , "UTF-8" );
1502- } catch (UnsupportedEncodingException e ) {
1503- // Not likely on modern system.
1504- throw new Error ("This system does not support UTF-8." , e );
1591+ param = this .decodeURI (param );
15051592 } catch (IllegalArgumentException e ) {
15061593 // Malformed URI sequence.
15071594 throw new IllegalArgumentException (
@@ -1524,6 +1611,27 @@ public LinkedList<Diff> diff_fromDelta(String text1, String delta)
15241611 "Negative number in diff_fromDelta: " + param );
15251612 }
15261613 String text ;
1614+ // some objective-c versions of the library produced patches with
1615+ // (null) in the place where surrogates were split across diff
1616+ // boundaries. if we leave those in we'll be stuck with a
1617+ // high-surrogate (null) low-surrogate pattern that will break
1618+ // deeper in the library or consuming application. we'll "fix"
1619+ // these by dropping the (null) and re-joining the surrogate halves
1620+ if (x + 2 < tokens .length &&
1621+ Character .isHighSurrogate (text1 .charAt (pointer + n - 1 )) &&
1622+ tokens [x + 1 ].substring (1 ).equals ("(null)" ) &&
1623+ Character .isLowSurrogate (text1 .charAt (pointer + n ))) {
1624+ n -= 1 ;
1625+ tokens [x + 1 ] = "+" ;
1626+ int m ;
1627+ try {
1628+ m = Integer .parseInt (tokens [x + 2 ].substring (1 ));
1629+ } catch (NumberFormatException e ) {
1630+ throw new IllegalArgumentException (
1631+ "Invalid number in diff_fromDelta: " + tokens [x + 2 ].substring (1 ), e );
1632+ }
1633+ tokens [x + 2 ] = tokens [x + 2 ].charAt (0 ) + String .valueOf (m + 1 );
1634+ }
15271635 try {
15281636 text = text1 .substring (pointer , pointer += n );
15291637 } catch (StringIndexOutOfBoundsException e ) {
@@ -2284,10 +2392,7 @@ public List<Patch> patch_fromText(String textline)
22842392 line = text .getFirst ().substring (1 );
22852393 line = line .replace ("+" , "%2B" ); // decode would change all "+" to " "
22862394 try {
2287- line = URLDecoder .decode (line , "UTF-8" );
2288- } catch (UnsupportedEncodingException e ) {
2289- // Not likely on modern system.
2290- throw new Error ("This system does not support UTF-8." , e );
2395+ line = this .decodeURI (line );
22912396 } catch (IllegalArgumentException e ) {
22922397 // Malformed URI sequence.
22932398 throw new IllegalArgumentException (
0 commit comments