@@ -33,14 +33,14 @@ func advanceLexer(lexer: Lexer) throws -> Token {
3333 lexer. lastToken = lexer. token
3434 var token = lexer. lastToken
3535
36- if token. kind != . eof {
36+ if token. kind != . eof {
3737 repeat {
3838 token. next = try readToken ( lexer: lexer, prev: token)
3939 token = token. next!
4040 } while token. kind == . comment
4141
4242 lexer. token = token
43- }
43+ }
4444
4545 return token
4646}
@@ -105,6 +105,17 @@ func getTokenDesc(_ token: Token) -> String {
105105}
106106
107107extension String {
108+ func offset( of index: Index ) -> Int {
109+ return utf8. distance ( from: startIndex, to: index)
110+ }
111+
112+ func charCode( at index: Index ) -> UInt8 ? {
113+ guard index < utf8. endIndex else {
114+ return nil
115+ }
116+ return utf8 [ index]
117+ }
118+
108119 func charCode( at position: Int ) -> UInt8 ? {
109120 guard position < utf8. count else {
110121 return nil
@@ -121,7 +132,7 @@ extension String {
121132}
122133
123134func character( _ code: UInt8 ) -> Character {
124- return Character ( UnicodeScalar ( code) )
135+ return Character ( UnicodeScalar ( code) )
125136}
126137
127138/**
@@ -220,16 +231,16 @@ func readToken(lexer: Lexer, prev: Token) throws -> Token {
220231 )
221232 // .
222233 case 46 :
223- if body. charCode ( at: position + 1 ) == 46 && body. charCode ( at: position + 2 ) == 46 {
224- return Token (
225- kind: . spread,
226- start: position,
227- end: position + 3 ,
228- line: line,
229- column: col,
230- prev: prev
231- )
232- }
234+ if body. charCode ( at: position + 1 ) == 46 && body. charCode ( at: position + 2 ) == 46 {
235+ return Token (
236+ kind: . spread,
237+ start: position,
238+ end: position + 3 ,
239+ line: line,
240+ column: col,
241+ prev: prev
242+ )
243+ }
233244 // :
234245 case 58 :
235246 return Token (
@@ -535,14 +546,13 @@ func readDigits(source: Source, start: Int, firstCode: UInt8) throws -> Int {
535546 */
536547func readString( source: Source , start: Int , line: Int , col: Int , prev: Token ) throws -> Token {
537548 let body = source. body
538- let bodyLength = body. utf8. count
539- var position = start + 1
540- var chunkStart = position
549+ var positionIndex = body. utf8. index ( body. utf8. startIndex, offsetBy: start + 1 )
550+ var chunkStartIndex = positionIndex
541551 var currentCode : UInt8 ? = 0
542552 var value = " "
543553
544- while position < bodyLength {
545- currentCode = body. charCode ( at: position )
554+ while positionIndex < body . utf8 . endIndex {
555+ currentCode = body. charCode ( at: positionIndex )
546556
547557 // not LineTerminator not Quote (")
548558 guard let code = currentCode, code != 0x000A && code != 0x000D && code != 34 else {
@@ -553,16 +563,17 @@ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) th
553563 if code < 0x0020 && code != 0x0009 {
554564 throw syntaxError (
555565 source: source,
556- position: position ,
566+ position: body . offset ( of : positionIndex ) ,
557567 description: " Invalid character within String: \( character ( code) ) . "
558568 )
559569 }
560570
561- position += 1
571+ let startIterationIndex = positionIndex
572+ positionIndex = body. utf8. index ( after: positionIndex)
562573
563574 if code == 92 { // \
564- value += body. slice ( start : chunkStart , end : position - 1 )
565- currentCode = body. charCode ( at: position )
575+ value += String ( body. utf8 [ chunkStartIndex ..< startIterationIndex ] ) !
576+ currentCode = body. charCode ( at: positionIndex )
566577
567578 if let code = currentCode {
568579 switch code {
@@ -575,53 +586,59 @@ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) th
575586 case 114 : value += " \r "
576587 case 116 : value += " \t "
577588 case 117 : // u
589+ let aIndex = body. utf8. index ( after: positionIndex)
590+ let bIndex = body. utf8. index ( after: aIndex)
591+ let cIndex = body. utf8. index ( after: bIndex)
592+ let dIndex = body. utf8. index ( after: cIndex)
593+
578594 let charCode = uniCharCode (
579- a: body. charCode ( at : position + 1 ) ! ,
580- b: body. charCode ( at : position + 2 ) ! ,
581- c: body. charCode ( at : position + 3 ) ! ,
582- d: body. charCode ( at : position + 4 ) !
595+ a: body. utf8 [ aIndex ] ,
596+ b: body. utf8 [ bIndex ] ,
597+ c: body. utf8 [ cIndex ] ,
598+ d: body. utf8 [ dIndex ]
583599 )
584600
585601 if charCode < 0 {
586602 throw syntaxError (
587603 source: source,
588- position: position ,
604+ position: body . offset ( of : positionIndex ) ,
589605 description:
590606 " Invalid character escape sequence: " +
591- " \\ u \( body. slice ( start : position + 1 , end : position + 5 ) ) . "
607+ " \\ u \( body. utf8 [ aIndex ... dIndex ] ) . "
592608 )
593609 }
594610
595611 value += String ( Character ( UnicodeScalar ( UInt32 ( charCode) ) !) )
596- position += 4
612+
613+ positionIndex = dIndex
597614 default :
598615 throw syntaxError (
599616 source: source,
600- position: position ,
617+ position: body . offset ( of : positionIndex ) ,
601618 description: " Invalid character escape sequence: \\ \( character ( code) ) . "
602619 )
603620 }
604621 }
605622
606- position += 1
607- chunkStart = position
623+ positionIndex = body . utf8 . index ( after : positionIndex )
624+ chunkStartIndex = positionIndex
608625 }
609626 }
610627
611628 if currentCode != 34 { // quote (")
612629 throw syntaxError (
613630 source: source,
614- position: position ,
631+ position: body . offset ( of : positionIndex ) ,
615632 description: " Unterminated string. "
616633 )
617634 }
618635
619- value += body. slice ( start : chunkStart , end : position )
636+ value += String ( body. utf8 [ chunkStartIndex ..< positionIndex ] ) !
620637
621638 return Token (
622639 kind: . string,
623640 start: start,
624- end: position + 1 ,
641+ end: body . offset ( of : positionIndex ) + 1 ,
625642 line: line,
626643 column: col,
627644 value: value,
@@ -640,7 +657,7 @@ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) th
640657 * which means the result of ORing the char2hex() will also be negative.
641658 */
642659func uniCharCode( a: UInt8 , b: UInt8 , c: UInt8 , d: UInt8 ) -> Int {
643- return char2hex ( a) << 12 | char2hex ( b) << 8 | char2hex ( c) << 4 | char2hex ( d)
660+ return char2hex ( a) << 12 | char2hex ( b) << 8 | char2hex ( c) << 4 | char2hex ( d)
644661}
645662
646663/**
@@ -654,9 +671,9 @@ func uniCharCode(a: UInt8, b: UInt8, c: UInt8, d: UInt8) -> Int {
654671func char2hex( _ a: UInt8 ) -> Int {
655672 let a = Int ( a)
656673 return a >= 48 && a <= 57 ? a - 48 : // 0-9
657- a >= 65 && a <= 70 ? a - 55 : // A-F
658- a >= 97 && a <= 102 ? a - 87 : // a-f
659- - 1
674+ a >= 65 && a <= 70 ? a - 55 : // A-F
675+ a >= 97 && a <= 102 ? a - 87 : // a-f
676+ - 1
660677}
661678
662679/**
@@ -670,12 +687,12 @@ func readName(source: Source, position: Int, line: Int, col: Int, prev: Token) -
670687 var end = position + 1
671688
672689 while end != bodyLength,
673- let code = body. charCode ( at: end) ,
674- ( code == 95 || // _
675- code >= 48 && code <= 57 || // 0-9
676- code >= 65 && code <= 90 || // A-Z
677- code >= 97 && code <= 122 ) { // a-z
678- end += 1
690+ let code = body. charCode ( at: end) ,
691+ ( code == 95 || // _
692+ code >= 48 && code <= 57 || // 0-9
693+ code >= 65 && code <= 90 || // A-Z
694+ code >= 97 && code <= 122 ) { // a-z
695+ end += 1
679696 }
680697
681698 return Token (
0 commit comments