Skip to content

Commit 7388582

Browse files
committed
Add internally stored Regex to Token to preserve originally used Regex
Make Parsed.remainder public Optimize TokenizerMatchesSequence to reduce number of objects. Push version to 0.3.2
1 parent 3b99d45 commit 7388582

File tree

6 files changed

+37
-23
lines changed

6 files changed

+37
-23
lines changed

build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
group 'com.github.h0tk3y.betterParse'
2-
version '0.3.1'
2+
version '0.3.2'
33

44
buildscript {
55
ext.kotlin_version = '1.1.51'

src/main/kotlin/com/github/h0tk3y/betterParse/grammar/Grammar.kt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ abstract class Grammar<out T> : Parser<T> {
3333
open val declaredParsers get() = (_parsers + _tokens + rootParser).toSet()
3434

3535
fun token(@Language("RegExp") @RegExp pattern: String, ignore: Boolean = false) = Token(null, pattern, ignore)
36-
fun token(pattern: Pattern, ignore: Boolean = false) = Token(null, pattern.toString(), ignore)
37-
fun token(pattern: Regex, ignore: Boolean = false) = Token(null, pattern.toString(), ignore)
36+
fun token(pattern: Pattern, ignore: Boolean = false) = Token(null, pattern.toRegex(), ignore)
37+
fun token(pattern: Regex, ignore: Boolean = false) = Token(null, pattern, ignore)
3838

3939
/** A [Lexer] that is built with the [Token]s defined within this [Grammar], in their order of declaration */
4040
open val tokenizer: Tokenizer by lazy { DefaultTokenizer(tokens) }
@@ -59,12 +59,12 @@ abstract class Grammar<out T> : Parser<T> {
5959
}
6060

6161
fun token(name: String, @Language("RegExp") @RegExp pattern: String, ignore: Boolean = false) = Token(name, pattern, ignore)
62-
fun token(name: String, pattern: Pattern, ignore: Boolean = false) = Token(name, pattern.toString(), ignore)
63-
fun token(name: String, pattern: Regex, ignore: Boolean = false) = Token(name, pattern.toString(), ignore)
62+
fun token(name: String, pattern: Pattern, ignore: Boolean = false) = Token(name, pattern.toRegex(), ignore)
63+
fun token(name: String, pattern: Regex, ignore: Boolean = false) = Token(name, pattern, ignore)
6464

6565
fun token(@Language("RegExp") @RegExp pattern: String, ignore: Boolean = false) = Token(null, pattern, ignore)
66-
fun token(pattern: Pattern, ignore: Boolean = false) = Token(null, pattern.toString(), ignore)
67-
fun token(pattern: Regex, ignore: Boolean = false) = Token(null, pattern.toString(), ignore)
66+
fun token(pattern: Pattern, ignore: Boolean = false) = Token(null, pattern.toRegex(), ignore)
67+
fun token(pattern: Regex, ignore: Boolean = false) = Token(null, pattern, ignore)
6868

6969
/** A convenience function to use for referencing a parser that is not initialized up to this moment. */
7070
fun <T> parser(block: () -> Parser<T>): Parser<T> = ParserReference(block)

src/main/kotlin/com/github/h0tk3y/betterParse/lexer/Token.kt

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,28 @@ import org.intellij.lang.annotations.RegExp
1010
* Parses to [TokenMatch].
1111
* The [name] only provides additional information.
1212
*/
13-
class Token(
14-
name: String?,
15-
@RegExp @Language("RegExp") val pattern: String,
16-
val ignored: Boolean = false
17-
) : Parser<TokenMatch> {
13+
class Token : Parser<TokenMatch> {
14+
val pattern: String
15+
val regex: Regex?
16+
val ignored: Boolean
1817

19-
var name: String? = name
18+
var name: String? = null
2019
internal set
2120

21+
constructor(name: String?, @RegExp @Language("RegExp") patternString: String, ignored: Boolean = false) {
22+
this.name = name
23+
this.ignored = ignored
24+
pattern = patternString
25+
regex = null
26+
}
27+
28+
constructor(name: String?, regex: Regex, ignored: Boolean = false) {
29+
this.name = name
30+
this.ignored = ignored
31+
pattern = regex.pattern
32+
this.regex = regex
33+
}
34+
2235
override fun toString() =
2336
(if (name != null) "$name ($pattern)" else pattern) +
2437
if (ignored) " [ignorable]" else ""

src/main/kotlin/com/github/h0tk3y/betterParse/lexer/Tokenizer.kt

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
package com.github.h0tk3y.betterParse.lexer
22

33
import com.github.h0tk3y.betterParse.utils.CachedSequence
4-
import com.github.h0tk3y.betterParse.utils.cached
54
import java.io.InputStream
65
import java.util.*
76
import kotlin.coroutines.experimental.buildSequence
87

98
internal class TokenizerMatchesSequence(
10-
val tokens: CachedSequence<TokenMatch>,
11-
val tokenizer: Tokenizer
12-
) : Sequence<TokenMatch> by tokens
9+
iterator: Iterator<TokenMatch>,
10+
val tokenizer: Tokenizer,
11+
cache: ArrayList<TokenMatch> = arrayListOf(),
12+
startAt: Int = 0
13+
) : CachedSequence<TokenMatch>(iterator, cache, startAt)
1314

1415
interface Tokenizer {
1516
val tokens: List<Token>
@@ -34,7 +35,7 @@ class DefaultTokenizer(override val tokens: List<Token>) : Tokenizer {
3435
require(tokens.isNotEmpty()) { "The tokens list should not be empty" }
3536
}
3637

37-
val patterns = tokens.map { it to it.pattern.toPattern() }
38+
val patterns = tokens.map { it to (it.regex?.toPattern() ?: it.pattern.toPattern()) }
3839

3940
/** Tokenizes the [input] from a [String] into a [TokenizerMatchesSequence]. */
4041
override fun tokenize(input: String) = tokenize(Scanner(input))
@@ -81,5 +82,5 @@ class DefaultTokenizer(override val tokens: List<Token>) : Tokenizer {
8182

8283
yield(result)
8384
}
84-
}.constrainOnce().cached().let { TokenizerMatchesSequence(it as CachedSequence, this) }
85+
}.constrainOnce().iterator().let { TokenizerMatchesSequence(it, this) }
8586
}

src/main/kotlin/com/github/h0tk3y/betterParse/parser/Parser.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ sealed class ParseResult<out T>
3232

3333
/** Represents a successful parsing result of a [Parser] that produced [value] and left a
3434
* possibly empty input sequence [remainder] unprocessed.*/
35-
data class Parsed<out T>(val value: T, internal val remainder: Sequence<TokenMatch>) : ParseResult<T>() {
35+
data class Parsed<out T>(val value: T, val remainder: Sequence<TokenMatch>) : ParseResult<T>() {
3636
override fun toString(): String = "Parsed($value)"
3737
}
3838

src/main/kotlin/com/github/h0tk3y/betterParse/utils/CachedSequence.kt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
package com.github.h0tk3y.betterParse.utils
22

3-
import com.github.h0tk3y.betterParse.lexer.TokenizerMatchesSequence
43
import com.github.h0tk3y.betterParse.lexer.TokenMatch
4+
import com.github.h0tk3y.betterParse.lexer.TokenizerMatchesSequence
55
import java.util.*
66

7-
internal class CachedSequence<T> constructor(
7+
internal open class CachedSequence<T> constructor(
88
val source: Iterator<T>,
99
val cache: ArrayList<T>,
1010
val startAt: Int
@@ -28,7 +28,7 @@ internal class CachedSequence<T> constructor(
2828
}
2929

3030
internal fun Sequence<TokenMatch>.skipOne(): Sequence<TokenMatch> = when (this) {
31-
is TokenizerMatchesSequence -> TokenizerMatchesSequence(tokens.skipOne() as CachedSequence, tokenizer)
31+
is TokenizerMatchesSequence -> TokenizerMatchesSequence(source, tokenizer, cache, startAt + 1)
3232
is CachedSequence -> CachedSequence(source, cache, startAt + 1)
3333
else -> drop(1)
3434
}

0 commit comments

Comments
 (0)