add test case illustrating indentation-based parsing
[sbp.git] / tests / regression.tc
index ce66c93..881a282 100644 (file)
@@ -469,3 +469,50 @@ testcase "long input (reported by David Crawshaw)" {
     X:: = "a" ("b"* &~ ())
 }
 
+testcase "a case PEGs cannot handle" {
+    input "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
+    s:: = X
+    X:: = "x" X "x" | "x"
+}
+
+testcase "indentation-driven binary tree parsing" {
+  input "
+a
+.g
+..b
+..b
+.q
+";
+  output "block:{interior:{{a} block:{interior:{{g} block:{leaf:{b}} block:{leaf:{b}}}} block:{leaf:{q}}}}";
+
+  s          = block "\n"
+
+  // In the example below, the newline character \n is considered
+  // to be the "first" character of each line (rather than the "last"
+  // character of each line).  This convention is a bit odd, but makes
+  // the example easier to understand.
+
+  // this example uses periods for indentation to make the
+  // examples easier to read; in real life you would use
+  // whitespace
+  indent     = "."**
+
+  // outdent matches any sequence of lines in which the first
+  // line has indentation strictly greater than some subsequent line
+  outdent!   = "." outdent "."
+             | "." ~[.] ~[]* "\n"
+
+  // a block is a properly-indented (that is, non-outdent-matching)
+  // sequence of lines.  It DOES NOT include the trailing newline.
+  block      =  block:: " "* "\n" indent! body
+            &~          " "* "\n" outdent ~[.] ~[]*
+
+  // a body is what's left of a block after you remove the indentation
+  // from the first line
+  body       = leaf
+             | interior
+
+  leaf       = "leaf"::     [a-z0-9]++
+  interior   = "interior":: ("":: [a-z0-9]++) " "* block block
+
+}