add test case illustrating indentation-based parsing

author Adam Megacz <megacz@cs.berkeley.edu>

Sun, 2 May 2010 01:20:47 +0000 (18:20 -0700)

committer Adam Megacz <megacz@cs.berkeley.edu>

Sun, 2 May 2010 01:20:47 +0000 (18:20 -0700)
author Adam Megacz <megacz@cs.berkeley.edu>
Sun, 2 May 2010 01:20:47 +0000 (18:20 -0700)
committer Adam Megacz <megacz@cs.berkeley.edu>
Sun, 2 May 2010 01:20:47 +0000 (18:20 -0700)
diff --git a/.gitignore b/.gitignore

new file mode 100644 (file)

index 0000000..5a6277b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+bin
+edu.berkeley.sbp.jar
+grammar.ser
diff --git a/tests/regression.tc b/tests/regression.tc

index ce66c93..881a282 100644 (file)
--- a/tests/regression.tc
+++ b/tests/regression.tc
@@ -469,3 +469,50 @@ testcase "long input (reported by David Crawshaw)" {
      X:: = "a" ("b"* &~ ())
  }
  
+testcase "a case PEGs cannot handle" {
+    input "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
+    s:: = X
+    X:: = "x" X "x" | "x"
+}
+
+testcase "indentation-driven binary tree parsing" {
+  input "
+a
+.g
+..b
+..b
+.q
+";
+  output "block:{interior:{{a} block:{interior:{{g} block:{leaf:{b}} block:{leaf:{b}}}} block:{leaf:{q}}}}";
+
+  s          = block "\n"
+
+  // In the example below, the newline character \n is considered
+  // to be the "first" character of each line (rather than the "last"
+  // character of each line).  This convention is a bit odd, but makes
+  // the example easier to understand.
+
+  // this example uses periods for indentation to make the
+  // examples easier to read; in real life you would use
+  // whitespace
+  indent     = "."**
+
+  // outdent matches any sequence of lines in which the first
+  // line has indentation strictly greater than some subsequent line
+  outdent!   = "." outdent "."
+             | "." ~[.] ~[]* "\n"
+
+  // a block is a properly-indented (that is, non-outdent-matching)
+  // sequence of lines.  It DOES NOT include the trailing newline.
+  block      =  block:: " "* "\n" indent! body
+            &~          " "* "\n" outdent ~[.] ~[]*
+
+  // a body is what's left of a block after you remove the indentation
+  // from the first line
+  body       = leaf
+             | interior
+
+  leaf       = "leaf"::     [a-z0-9]++
+  interior   = "interior":: ("":: [a-z0-9]++) " "* block block
+
+}
author	Adam Megacz <megacz@cs.berkeley.edu>
	Sun, 2 May 2010 01:20:47 +0000 (18:20 -0700)
committer	Adam Megacz <megacz@cs.berkeley.edu>
	Sun, 2 May 2010 01:20:47 +0000 (18:20 -0700)
.gitignore	[new file with mode: 0644]	patch \| blob
tests/regression.tc		patch \| blob \| history