Skip to content

Grammar railroad diagram #72

@mingodad

Description

@mingodad

Here is a function that accepts a Grammar instance and output an EBNF understood by https://github.com/GuntherRademacher/rr to generate a nice navigable railroad diagram to document/develop/debug it.

See also a variant of it that I made for this project https://gitlab.com/mingodad/rell/-/commit/7f3fcc256fd5e2ec707d00c87b63bccad266aba4 .

Bellow is the function gram2ebnf embedded in this simple project https://github.com/overminder/kitchen-sink/tree/7fc378da20474832e308780c67d99741be7f26d9/plt-and-algo/partial-eval/peaapg/src/main/kotlin/com/gh/om/peaapg/ch3/fc

--- /home/mingo/dev/kotlin/ch3/parse0.kt
+++ /home/mingo/dev/kotlin/ch3/parse.kt
@@ -1,23 +1,25 @@
 package com.gh.om.peaapg.ch3.fc
 
-import com.github.h0tk3y.betterParse.combinators.leftAssociative
-import com.github.h0tk3y.betterParse.combinators.map
-import com.github.h0tk3y.betterParse.combinators.oneOrMore
-import com.github.h0tk3y.betterParse.combinators.or
-import com.github.h0tk3y.betterParse.combinators.rightAssociative
-import com.github.h0tk3y.betterParse.combinators.separatedTerms
-import com.github.h0tk3y.betterParse.combinators.times
-import com.github.h0tk3y.betterParse.combinators.unaryMinus
-import com.github.h0tk3y.betterParse.combinators.use
-import com.github.h0tk3y.betterParse.combinators.zeroOrMore
-import com.github.h0tk3y.betterParse.grammar.Grammar
-import com.github.h0tk3y.betterParse.grammar.parseToEnd
-import com.github.h0tk3y.betterParse.grammar.parser
-import com.github.h0tk3y.betterParse.lexer.literalToken
-import com.github.h0tk3y.betterParse.lexer.regexToken
-import com.github.h0tk3y.betterParse.parser.Parser
+import com.github.h0tk3y.betterParse.combinators.* //leftAssociative
+//import com.github.h0tk3y.betterParse.combinators.map
+//import com.github.h0tk3y.betterParse.combinators.oneOrMore
+//import com.github.h0tk3y.betterParse.combinators.or
+//import com.github.h0tk3y.betterParse.combinators.rightAssociative
+//import com.github.h0tk3y.betterParse.combinators.separatedTerms
+//import com.github.h0tk3y.betterParse.combinators.times
+//import com.github.h0tk3y.betterParse.combinators.unaryMinus
+//import com.github.h0tk3y.betterParse.combinators.use
+//import com.github.h0tk3y.betterParse.combinators.zeroOrMore
+import com.github.h0tk3y.betterParse.grammar.* //Grammar
+//import com.github.h0tk3y.betterParse.grammar.parseToEnd
+//import com.github.h0tk3y.betterParse.grammar.parser
+import com.github.h0tk3y.betterParse.lexer.* //literalToken
+//import com.github.h0tk3y.betterParse.lexer.regexToken
+import com.github.h0tk3y.betterParse.parser.*
 
 import kotlin.reflect.full.*
+import kotlin.reflect.KProperty
+import kotlin.reflect.jvm.isAccessible
 
 class ProgramGrammar : Grammar<Program>() {
     // Tokens are parsed in the declaration order. So if int is after ident (which subsumes int), int will never
@@ -106,8 +108,180 @@
         get() = program
 }
 
+fun gram2ebnf(g : ProgramGrammar) {
+    var gramProps = ArrayList<Any>()
+    var gramPropsNames = ArrayList<String>()
+    fun findName(o : Parser<*>): String? {
+        for (idx in gramProps.indices) {
+            if(gramProps[idx] == o) {
+                return gramPropsNames[idx]
+            }
+        }
+        return null
+    }
+
+    //first collect names/properties from the grammar
+    for (prop in ProgramGrammar::class.memberProperties) {
+        val prop_value = prop.get(g);
+	    when (prop_value) {
+            //order is important
+            is Token -> {
+                gramProps.add(prop_value)
+                gramPropsNames.add(prop.name)
+			    //println("terminal : ${prop.name} : ${prop_value} : ${prop_value.javaClass.kotlin}")
+            }
+            is Parser<*> -> {
+                gramProps.add(prop_value)
+                gramPropsNames.add(prop.name)
+			    //println("nonterminal : ${prop.name} : ${prop_value}")
+            }
+            //else -> println("?? : ${prop.name} : ${prop_value}")
+        }
+    }
+
+    //now dumpt the EBNF
+    fun obj2rhs(obj : Parser<*>, isFirst : Boolean = false) {
+        when (obj) {
+            is MapCombinator<*,*> -> {
+                val oname = findName(obj.innerParser)
+                if(oname != null) print(" ${oname}")
+                else obj2rhs(obj.innerParser)
+            }
+            is OrCombinator<*> -> {
+                var sep = ""
+                if(!isFirst) print(" (")
+                for(p in obj.parsers) {
+                    val oname = findName(p)
+                    print(sep)
+                    if(oname != null) print(" ${oname}")
+                    else obj2rhs(p)
+                    if (sep == "") sep = " |"
+                }
+                if(!isFirst) print(" )")
+            }
+            is AndCombinator<*> -> {
+                val obj_consummers = obj.consumers
+                for (index in 0 until obj_consummers.size) {
+                    val p = obj_consummers[index]
+                    when (p) {
+                        is Parser<*> -> {
+                            val oname = findName(p)
+                            if(oname != null) print(" ${oname}")
+                            else obj2rhs(p)
+                        }
+                        is SkipParser -> {
+                            obj2rhs(p.innerParser)
+                        }
+                        else -> throw IllegalArgumentException()
+                    }
+                }
+            }
+            is OptionalCombinator<*> -> {
+                val oname = findName(obj.parser)
+                if(oname != null) print(" ${oname}?")
+                else {
+                    print(" (")
+                    obj2rhs(obj.parser)
+                    print(" )?")
+                }
+            }
+            is RepeatCombinator<*> -> {
+                val oname = findName(obj.parser)
+                if(oname != null) print(" ${oname}")
+                else {
+                    obj2rhs(obj.parser)
+                }
+                if (obj.atLeast == 0) print("*")
+                else print("+")
+            }
+            //is SkipParser<*> -> {obj2rhs(obj.innerParser)}
+            is SeparatedCombinator<*,*> -> {
+                val tpname = findName(obj.termParser)
+                if (tpname != null) print(" ${tpname}");
+                else obj2rhs(obj.termParser)
+                print(" (")
+                val sepname = findName(obj.separatorParser)
+                if(sepname != null) print(" ${sepname}");
+                else obj2rhs(obj.separatorParser)
+                if (tpname != null) print(" ${tpname}");
+                else obj2rhs(obj.termParser)
+                if (obj.acceptZero) print(" )*")
+                else print(" )+")
+            }
+            is ParserReference<*> -> {
+                val oname = findName(obj.parser)
+                if(oname != null) print(" ${oname}")
+                else obj2rhs(obj.parser)
+            }
+            //is Separated<*,*> -> {}
+            else -> {
+                val oname = findName(obj)
+                if(oname != null) print(" ${oname}")
+                else {
+                    val obj_class = obj.javaClass.kotlin
+                    //println("--Token? ${obj_class}")
+                    when (obj) {
+                        is com.github.h0tk3y.betterParse.lexer.LiteralToken -> {
+                                //println("${name} ::= '${obj.text}'")
+                                print(" '${obj.text}'")
+                            }
+                            is com.github.h0tk3y.betterParse.lexer.CharToken -> {
+                                //println("${name} ::= '${obj.text}'")
+                                print(" '${obj.text}'")
+                            }
+                        is com.github.h0tk3y.betterParse.lexer.RegexToken -> {
+                            //println("${name} ::= '??'")
+                            print(" '??'")
+                        }
+                        else -> {print(" (? ${obj} ?)")}
+                    }
+                }
+            }
+        }
+    }
+    for (idx in gramPropsNames.indices) {
+        val obj = gramProps[idx]
+        val name = gramPropsNames[idx]
+        //println(obj)
+        print("${name} ::=")
+        when (obj) {
+            is MapCombinator<*,*> -> {
+                obj2rhs(obj.innerParser, true)
+            }
+            is OrCombinator<*> -> {
+                obj2rhs(obj, true)
+            }
+            else -> {
+                val obj_class = obj.javaClass.kotlin
+                //println("--Token? ${obj_class}")
+                when (obj) {
+                    is com.github.h0tk3y.betterParse.lexer.LiteralToken -> {
+                            //println("${name} ::= '${obj.text}'")
+                            print(" '${obj.text}'")
+                        }
+                        is com.github.h0tk3y.betterParse.lexer.CharToken -> {
+                            //println("${name} ::= '${obj.text}'")
+                            print(" '${obj.text}'")
+                        }
+                    is com.github.h0tk3y.betterParse.lexer.RegexToken -> {
+                        //println("${name} ::= '??'")
+			val loadedClass = RegexToken::class
+			val patternField = loadedClass.java.getDeclaredField("pattern")
+			patternField?.isAccessible = true
+			val actualValue = patternField?.get(obj) as? String
+			print(" '${actualValue}'")
+                    }
+                    else -> {print(" (? ${obj} ?)")}
+                }
+            }
+        }
+        println("")
+    }
+}
+
 fun main() {
     val g = ProgramGrammar()
+    gram2ebnf(g)
     val program = g.parseToEnd("""
         read a, b;
         foo:

Output:

assign ::= identString tokEq eqvChain tokSemi
bb ::= label tokColon assign* jump tokSemi
consChain ::= term ( tokColon2 term )+
eqvChain ::= plusChain ( tokEq2 plusChain )+
expr ::= plusChain ( tokEq2 plusChain )+
exprList ::= eqvChain ( tokComma eqvChain )*
goto ::= tokGoto label
identList ::= identString ( tokComma identString )*
identString ::= tokId
ifGoto ::= tokIf eqvChain tokGoto label tokElse label
jump ::= goto | ifGoto | ret
label ::= tokId
mulChain ::= consChain ( tokTimes consChain )+
plusChain ::= mulChain ( ( tokPlus | tokMinus ) mulChain )+
program ::= tokRead identList tokSemi bb+
ret ::= tokReturn eqvChain
rootParser ::= tokRead identList tokSemi bb+
term ::= identString | tokInt | tokLpar eqvChain tokRpar | tokLsquare exprList tokRsquare
tokColon ::= ':'
tokColon2 ::= '::'
tokComma ::= ','
tokElse ::= 'else'
tokEq ::= '='
tokEq2 ::= '=='
tokGoto ::= 'goto'
tokHead ::= 'head'
tokId ::= '[a-zA-Z_][a-zA-Z0-9_]*'
tokIf ::= 'if'
tokInt ::= '\d+'
tokLpar ::= '('
tokLsquare ::= '['
tokMinus ::= '-'
tokPlus ::= '+'
tokRead ::= 'read'
tokReturn ::= 'return'
tokRpar ::= ')'
tokRsquare ::= ']'
tokSemi ::= ';'
tokTail ::= 'tail'
tokTimes ::= '*'
tokWs ::= '\s+'

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions