1structure pegML =
2struct
3
4(* A concrete-continuation-passing implementation of PEG-parsing.
5
6   "Concrete" because the continuations are not closures but rather
7   data structures encoding all the possible continuation forms. Being
8   concrete in this way makes it easier to encode the parser as a HOL
9   function, though I expect this will still be non-trivial because
10   the termination argument is difficult. In fact, my expectation is
11   that I will try to encode this with HOL's WHILE. (The use of WHILE
12   is what requires the implementation to be tail-recursive. The fact
13   that there is an analogue of exception handling requires the use of
14   two continuations.)
15*)
16
17fun I x = x
18
19datatype ('tok,'value) pegsym =
20  empty of 'value
21| any of 'tok -> 'value
22| tok of 'tok * 'value
23| nt of string * ('value -> 'value)
24| seq of ('tok,'value) pegsym * ('tok,'value)pegsym * ('value * 'value -> 'value)
25| choice of ('tok,'value) pegsym * ('tok,'value)pegsym * ('value -> 'value)
26| rpt of ('tok,'value) pegsym * ('value list -> 'value)
27| not of ('tok,'value) pegsym * 'value
28
29datatype ('tok,'value) kont =
30         sym of ('tok,'value) pegsym * ('tok,'value) kont * ('tok,'value)kont
31       | applyf of ('value option list -> 'value option list) * ('tok,'value)kont
32       | returnTo of 'tok list * 'value option list * ('tok,'value)kont
33       | poplist of ('value list -> 'value) * ('tok,'value)kont
34       | listsym of ('tok,'value) pegsym * ('value list -> 'value) * ('tok,'value) kont
35       | done
36       | failed
37
38fun binary2list f k =
39    applyf ((fn vl => case vl of
40                         SOME h1::SOME h2::t => SOME (f(h2, h1)) :: t
41                       | _ => raise Fail "binary f - with short list"),
42            k)
43
44fun unary2list f k =
45    applyf ((fn vl => case vl of
46                          SOME h::t => SOME (f h) :: t
47                        | _ => raise Fail "unary f with bad list"),
48            k)
49
50fun poplistval (f:'v list -> 'v) (l:'v option list) = let
51  fun recurse acc l =
52      case l of
53          [] => raise Fail "poplistval failure"
54        | SOME h::t => recurse (h::acc) t
55        | NONE::t => (acc,t)
56  val (values,rest) = recurse [] l
57in
58  SOME (f values) :: rest
59end
60
61fun eval G (e:(''i,'v)pegsym) input (results:'v option list) k fk =
62    case e of
63        empty v => apply G k input (SOME v::results)
64      | any tf => (case input of
65                       [] => apply G fk input results
66                     | h::t => apply G k t (SOME (tf h)::results))
67      | tok (t,v) => (case input of
68                          [] => apply G fk input results
69                        | x::xs => if x = t then apply G k xs (SOME v::results)
70                                   else apply G fk input results)
71      | seq(e1,e2,f) =>
72        eval G e1 input results
73             (sym(e2,binary2list f k,returnTo(input,results,fk)))
74             fk
75      | choice(e1,e2,f) => eval G e1
76                                input results
77                                (unary2list f k)
78                                (returnTo(input,results,
79                                          sym(e2,unary2list f k,fk)))
80      | not(e,v) =>
81          eval G e input results
82               (returnTo(input,results,fk))
83               (returnTo(input,SOME v::results,k))
84      | rpt(e,f) =>
85          eval G e input (NONE::results) (listsym(e,f,k)) (poplist(f,k))
86      | nt(n,f) => eval G (G n) input results k fk
87and apply G (k:(''i,'v)kont) input (results:'v option list) =
88    case k of
89        done => SOME (valOf (hd results), input)
90      | failed => NONE
91      | sym(e,k,f) => eval G e input results k f
92      | applyf(f, k) => apply G k input (f results)
93      | returnTo(i,r,k) => apply G k i r
94      | listsym(e,f,k) =>
95          eval G e input results (listsym(e,f,k)) (poplist(f, k))
96      | poplist(f,k) => apply G k input (poplistval f results : 'v option list)
97
98datatype PT = XN of int | Plus of PT * PT | Times of PT * PT
99              | PTL of PT list
100
101fun leftassoc f leftlist last =
102    case leftlist of
103        [] => last
104      | h::t => f (List.foldl (fn (a,b) => f(b,a)) h t, last)
105
106fun testG s =
107    case s of
108        "E" => seq(rpt (seq(nt("F", I), tok("+", XN 0), #1), PTL),
109                   nt("F", I),
110                   (fn (PTL tlist,t) => leftassoc Plus tlist t))
111      | "F" => seq(rpt (seq(nt("T", I), tok("*", XN 0), #1), PTL),
112                   nt("T", I),
113                   (fn (PTL tlist,t) => leftassoc Times tlist t))
114      | "T" => choice(seq(seq(tok("(", XN 0), nt("E", I), #2), tok(")", XN 0),
115                          #1),
116                      nt("N", I),
117                      I)
118      | "N" => choice(tok("1", XN 1), choice(tok("2", XN 2), tok ("3", XN 3),
119                                             I),
120                      I)
121
122val mk = map str o explode
123
124val results = [
125  eval testG (nt("E",I)) (mk "1+2*3") [] done failed,
126  eval testG (nt("E",I)) (mk "(1+2)*3") [] done failed,
127  eval testG (nt("E",I)) (mk "(1+2)*(3*1)") [] done failed,
128  eval testG (nt("E",I)) (mk "2*3*1") [] done failed,
129  eval testG (nt("E",I)) (mk "2+3+1") [] done failed,
130  eval testG (nt("E",I)) (mk "2+3+1+2+1") [] done failed,
131  eval testG (nt("E",I)) (mk "(1+2)*(3+1)*(1+2+1)") [] done failed,
132  eval testG (nt("E",I)) (mk "(1+2)*(3+1)*(1+2+1") [] done
133       (sym(empty (XN 0), done, done))]
134end (* struct *)
135