add test case illustrating indentation-based parsing
authorAdam Megacz <megacz@cs.berkeley.edu>
Sun, 2 May 2010 01:20:47 +0000 (18:20 -0700)
committerAdam Megacz <megacz@cs.berkeley.edu>
Sun, 2 May 2010 01:20:47 +0000 (18:20 -0700)
.gitignore [new file with mode: 0644]
tests/regression.tc

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..5a6277b
--- /dev/null
@@ -0,0 +1,3 @@
+bin
+edu.berkeley.sbp.jar
+grammar.ser
index ce66c93..881a282 100644 (file)
@@ -469,3 +469,50 @@ testcase "long input (reported by David Crawshaw)" {
     X:: = "a" ("b"* &~ ())
 }
 
+testcase "a case PEGs cannot handle" {
+    input "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
+    s:: = X
+    X:: = "x" X "x" | "x"
+}
+
+testcase "indentation-driven binary tree parsing" {
+  input "
+a
+.g
+..b
+..b
+.q
+";
+  output "block:{interior:{{a} block:{interior:{{g} block:{leaf:{b}} block:{leaf:{b}}}} block:{leaf:{q}}}}";
+
+  s          = block "\n"
+
+  // In the example below, the newline character \n is considered
+  // to be the "first" character of each line (rather than the "last"
+  // character of each line).  This convention is a bit odd, but makes
+  // the example easier to understand.
+
+  // this example uses periods for indentation to make the
+  // examples easier to read; in real life you would use
+  // whitespace
+  indent     = "."**
+
+  // outdent matches any sequence of lines in which the first
+  // line has indentation strictly greater than some subsequent line
+  outdent!   = "." outdent "."
+             | "." ~[.] ~[]* "\n"
+
+  // a block is a properly-indented (that is, non-outdent-matching)
+  // sequence of lines.  It DOES NOT include the trailing newline.
+  block      =  block:: " "* "\n" indent! body
+            &~          " "* "\n" outdent ~[.] ~[]*
+
+  // a body is what's left of a block after you remove the indentation
+  // from the first line
+  body       = leaf
+             | interior
+
+  leaf       = "leaf"::     [a-z0-9]++
+  interior   = "interior":: ("":: [a-z0-9]++) " "* block block
+
+}