idl = [a-d]
}
-testcase "a+b*c" {
+testcase "priority" {
input "a+b*c";
output "plus:{stringify:{a} times:{stringify:{b} stringify:{c}}}";
w = " "
idl = [a-d]
}
+testcase "associativity" {
+ input "a*b*c";
+ output "times:{stringify:{a} times:{stringify:{b} stringify:{c}}}";
+ w = " "
+ l = id
+ s = assign:: l "=" r
+ | r
+ r = l
+ | assign:: l "=" r
+ | plus:: r "+" r
+ | times:: r "*" (r)
+ | "(" r ")"
+ | times:: r r
+ id = stringify:: idl++
+ idl = [a-d]
+}
+
testcase "unnamed" {
input "aa bb";
output "{q:{a a} q:{b b}}";
Z = Z:: "a" "b"* "a"
B = "b"
}
+
+testcase "operator: ... " {
+ input "aaabbbaaa abababababa";
+ output "s:{C:{a a a b b b a a a} B:{a b a b a b a b a b a}}";
+ s:: = A " " A
+ A = B > C
+ B:: = [ab]* &~ (... "bbb" ...)
+ C:: = [ab]*
+}
+
+testcase "operator: ~~" {
+ input "aaabbbaaa abababababa";
+ output "s:{C:{a a a b b b a a a} B:{a b a b a b a b a b a}}";
+ s:: = A " " A
+ A = B > C
+ B:: = ~~(... "bbb" ...)
+ C:: = [ab]*
+}
+
+testcase "lifts" {
+ input "a+(b*c)";
+ output "+:{a *:{id:{b} c}}";
+
+ s = r
+ r = id
+ | r ^"*" `r
+ | `r ^"+" r
+ | "(" r ")"
+ id:: = [a-z]++
+}
+
+testcase "epsilon as a positive conjunct" {
+ input "abababab";
+ s:: = X*
+ X:: = "a" ("b"+ & ())
+}
+
+testcase "ensure sharing of so-called reduction nodes" {
+ input "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a ";
+ ignore output;
+ s:: = (S!)+
+ S:: = A:: "a "
+ | B:: "a "
+}
+
+testcase "epsilon as a negative conjunct" {
+ input "aaaaa";
+ s:: = X*
+ X:: = "a" ("b"* &~ ())
+}
+
+testcase "long input (reported by David Crawshaw)" {
+ input "0123456789";
+ s:: = X*
+ X:: = "a" ("b"* &~ ())
+}
+
+testcase "a case PEGs cannot handle" {
+ input "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
+ s:: = X
+ X:: = "x" X "x" | "x"
+}
+
+testcase "indentation-driven binary tree parsing" {
+ input "
+a
+.g
+..b
+..b
+.q
+";
+ output "block:{interior:{{a} block:{interior:{{g} block:{leaf:{b}} block:{leaf:{b}}}} block:{leaf:{q}}}}";
+
+ s = block "\n"
+
+ // In the example below, the newline character \n is considered
+ // to be the "first" character of each line (rather than the "last"
+ // character of each line). This convention is a bit odd, but makes
+ // the example easier to understand.
+
+ // this example uses periods for indentation to make the
+ // examples easier to read; in real life you would use
+ // whitespace
+ indent = "."**
+
+ // outdent matches any sequence of lines in which the first
+ // line has indentation strictly greater than some subsequent line
+ outdent! = "." outdent "."
+ | "." ~[.] ~[]* "\n"
+
+ // a block is a properly-indented (that is, non-outdent-matching)
+ // sequence of lines. It DOES NOT include the trailing newline.
+ block = block:: " "* "\n" indent! body
+ &~ " "* "\n" outdent ~[.] ~[]*
+
+ // a body is what's left of a block after you remove the indentation
+ // from the first line
+ body = leaf
+ | interior
+
+ leaf = "leaf":: [a-z0-9]++
+ interior = "interior":: ("":: [a-z0-9]++) " "* block block
+
+}