From ead5dd828b3a45a9de17d4a0c7a41b3c42c4b3c3 Mon Sep 17 00:00:00 2001 From: Adam Megacz Date: Sat, 1 May 2010 18:20:47 -0700 Subject: [PATCH] add test case illustrating indentation-based parsing --- .gitignore | 3 +++ tests/regression.tc | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5a6277b --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +bin +edu.berkeley.sbp.jar +grammar.ser diff --git a/tests/regression.tc b/tests/regression.tc index ce66c93..881a282 100644 --- a/tests/regression.tc +++ b/tests/regression.tc @@ -469,3 +469,50 @@ testcase "long input (reported by David Crawshaw)" { X:: = "a" ("b"* &~ ()) } +testcase "a case PEGs cannot handle" { + input "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; + s:: = X + X:: = "x" X "x" | "x" +} + +testcase "indentation-driven binary tree parsing" { + input " +a +.g +..b +..b +.q +"; + output "block:{interior:{{a} block:{interior:{{g} block:{leaf:{b}} block:{leaf:{b}}}} block:{leaf:{q}}}}"; + + s = block "\n" + + // In the example below, the newline character \n is considered + // to be the "first" character of each line (rather than the "last" + // character of each line). This convention is a bit odd, but makes + // the example easier to understand. + + // this example uses periods for indentation to make the + // examples easier to read; in real life you would use + // whitespace + indent = "."** + + // outdent matches any sequence of lines in which the first + // line has indentation strictly greater than some subsequent line + outdent! = "." outdent "." + | "." ~[.] ~[]* "\n" + + // a block is a properly-indented (that is, non-outdent-matching) + // sequence of lines. It DOES NOT include the trailing newline. + block = block:: " "* "\n" indent! body + &~ " "* "\n" outdent ~[.] ~[]* + + // a body is what's left of a block after you remove the indentation + // from the first line + body = leaf + | interior + + leaf = "leaf":: [a-z0-9]++ + interior = "interior":: ("":: [a-z0-9]++) " "* block block + +} -- 1.7.10.4