1207753Smm(* ========================================================================= *)
2207753Smm(* PARSING                                                                   *)
3207753Smm(* Copyright (c) 2001 Joe Hurd, distributed under the BSD License            *)
4207753Smm(* ========================================================================= *)
5207753Smm
6207753Smmstructure Parse :> Parse =
7207753Smmstruct
8207753Smm
9207753Smmopen Useful;
10207753Smm
11207753Smminfixr 9 >>++
12207753Smminfixr 8 ++
13207753Smminfixr 7 >>
14207753Smminfixr 6 ||
15207753Smm
16207753Smm(* ------------------------------------------------------------------------- *)
17207753Smm(* A "cannot parse" exception.                                               *)
18207753Smm(* ------------------------------------------------------------------------- *)
19207753Smm
20207753Smmexception NoParse;
21207753Smm
22207753Smm(* ------------------------------------------------------------------------- *)
23207753Smm(* Recursive descent parsing combinators.                                    *)
24207753Smm(* ------------------------------------------------------------------------- *)
25207753Smm
26207753Smmval error : 'a -> 'b * 'a = fn _ => raise NoParse;
27207753Smm
28207753Smmfun op ++ (parser1,parser2) input =
29207753Smm    let
30207753Smm      val (result1,input) = parser1 input
31207753Smm      val (result2,input) = parser2 input
32207753Smm    in
33207753Smm      ((result1,result2),input)
34207753Smm    end;
35207753Smm
36207753Smmfun op >> (parser : 'a -> 'b * 'a, treatment) input =
37207753Smm    let
38207753Smm      val (result,input) = parser input
39207753Smm    in
40207753Smm      (treatment result, input)
41207753Smm    end;
42207753Smm
43207753Smmfun op >>++ (parser,treatment) input =
44207753Smm    let
45207753Smm      val (result,input) = parser input
46207753Smm    in
47207753Smm      treatment result input
48207753Smm    end;
49207753Smm
50207753Smmfun op || (parser1,parser2) input =
51207753Smm    parser1 input handle NoParse => parser2 input;
52207753Smm
53207753Smmfun first [] _ = raise NoParse
54207753Smm  | first (parser :: parsers) input = (parser || first parsers) input;
55207753Smm
56207753Smmfun mmany parser state input =
57207753Smm    let
58207753Smm      val (state,input) = parser state input
59207753Smm    in
60207753Smm      mmany parser state input
61207753Smm    end
62207753Smm    handle NoParse => (state,input);
63207753Smm
64207753Smmfun many parser =
65207753Smm    let
66207753Smm      fun sparser l = parser >> (fn x => x :: l)
67207753Smm    in
68207753Smm      mmany sparser [] >> List.rev
69207753Smm    end;
70207753Smm
71207753Smmfun atLeastOne p = (p ++ many p) >> op::;
72207753Smm
73207753Smmfun nothing input = ((),input);
74207753Smm
75207753Smmfun optional p = (p >> SOME) || (nothing >> K NONE);
76207753Smm
77207753Smm(* ------------------------------------------------------------------------- *)
78207753Smm(* Stream-based parsers.                                                     *)
79207753Smm(* ------------------------------------------------------------------------- *)
80207753Smm
81207753Smmtype ('a,'b) parser = 'a Stream.stream -> 'b * 'a Stream.stream
82207753Smm
83207753Smmfun maybe p Stream.Nil = raise NoParse
84207753Smm  | maybe p (Stream.Cons (h,t)) =
85207753Smm    case p h of SOME r => (r, t ()) | NONE => raise NoParse;
86207753Smm
87207753Smmfun finished Stream.Nil = ((), Stream.Nil)
88207753Smm  | finished (Stream.Cons _) = raise NoParse;
89207753Smm
90207753Smmfun some p = maybe (fn x => if p x then SOME x else NONE);
91207753Smm
92207753Smmfun any input = some (K true) input;
93207753Smm
94207753Smm(* ------------------------------------------------------------------------- *)
95207753Smm(* Parsing whole streams.                                                    *)
96207753Smm(* ------------------------------------------------------------------------- *)
97207753Smm
98207753Smmfun fromStream parser input =
99207753Smm    let
100207753Smm      val (res,_) = (parser ++ finished >> fst) input
101207753Smm    in
102207753Smm      res
103207753Smm    end;
104207753Smm
105207753Smmfun fromList parser l = fromStream parser (Stream.fromList l);
106207753Smm
107207753Smmfun everything parser =
108207753Smm    let
109207753Smm      fun parserOption input =
110207753Smm          SOME (parser input)
111207753Smm          handle e as NoParse => if Stream.null input then NONE else raise e
112207753Smm
113207753Smm      fun parserList input =
114207753Smm          case parserOption input of
115207753Smm            NONE => Stream.Nil
116207753Smm          | SOME (result,input) =>
117207753Smm            Stream.append (Stream.fromList result) (fn () => parserList input)
118207753Smm    in
119207753Smm      parserList
120207753Smm    end;
121207753Smm
122207753Smm(* ------------------------------------------------------------------------- *)
123207753Smm(* Parsing lines of text.                                                    *)
124207753Smm(* ------------------------------------------------------------------------- *)
125207753Smm
126207753Smmfun initialize {lines} =
127207753Smm    let
128207753Smm      val lastLine = ref (~1,"","","")
129207753Smm
130207753Smm      val chars =
131207753Smm          let
132207753Smm            fun saveLast line =
133207753Smm                let
134207753Smm                  val ref (n,_,l2,l3) = lastLine
135207753Smm                  val () = lastLine := (n + 1, l2, l3, line)
136207753Smm                in
137207753Smm                  String.explode line
138207753Smm                end
139207753Smm          in
140207753Smm            Stream.memoize (Stream.map saveLast lines)
141207753Smm          end
142207753Smm
143207753Smm      fun parseErrorLocation () =
144207753Smm          let
145207753Smm            val ref (n,l1,l2,l3) = lastLine
146207753Smm          in
147207753Smm            (if n <= 0 then "at start"
148207753Smm             else "around line " ^ Int.toString n) ^
149207753Smm            chomp (":\n" ^ l1 ^ l2 ^ l3)
150207753Smm          end
151207753Smm    in
152207753Smm      {chars = chars,
153207753Smm       parseErrorLocation = parseErrorLocation}
154207753Smm    end;
155207753Smm
156207753Smmfun exactChar (c : char) = some (equal c) >> K ();
157207753Smm
158207753Smmfun exactCharList cs =
159207753Smm    case cs of
160207753Smm      [] => nothing
161207753Smm    | c :: cs => (exactChar c ++ exactCharList cs) >> snd;
162207753Smm
163207753Smmfun exactString s = exactCharList (String.explode s);
164207753Smm
165207753Smmfun escapeString {escape} =
166207753Smm    let
167207753Smm      fun isEscape c = mem c escape
168207753Smm
169207753Smm      fun isNormal c =
170207753Smm          case c of
171207753Smm            #"\\" => false
172207753Smm          | #"\n" => false
173207753Smm          | #"\t" => false
174207753Smm          | _ => not (isEscape c)
175207753Smm
176207753Smm      val escapeParser =
177207753Smm          (exactChar #"\\" >> K #"\\") ||
178207753Smm          (exactChar #"n" >> K #"\n") ||
179207753Smm          (exactChar #"t" >> K #"\t") ||
180207753Smm          some isEscape
181207753Smm
182207753Smm      val charParser =
183207753Smm          ((exactChar #"\\" ++ escapeParser) >> snd) ||
184207753Smm          some isNormal
185207753Smm    in
186207753Smm      many charParser >> String.implode
187207753Smm    end;
188207753Smm
189207753Smmlocal
190207753Smm  val isSpace = Char.isSpace;
191207753Smm
192207753Smm  val space = some isSpace;
193207753Smmin
194207753Smm  val manySpace = many space >> K ();
195207753Smm
196207753Smm  val atLeastOneSpace = atLeastOne space >> K ();
197207753Smmend;
198207753Smm
199207753Smmfun fromString parser s = fromList parser (String.explode s);
200207753Smm
201207753Smm(* ------------------------------------------------------------------------- *)
202207753Smm(* Infix operators.                                                          *)
203207753Smm(* ------------------------------------------------------------------------- *)
204207753Smm
205207753Smmfun parseLayeredInfixes {tokens,assoc} mk tokParser subParser =
206207753Smm    let
207207753Smm      fun layerTokParser inp =
208207753Smm          let
209207753Smm            val tok_rest as (tok,_) = tokParser inp
210207753Smm          in
211207753Smm            if StringSet.member tok tokens then tok_rest
212207753Smm            else raise NoParse
213207753Smm          end
214207753Smm
215207753Smm      fun layerMk (x,txs) =
216207753Smm          case assoc of
217207753Smm            Print.LeftAssoc =>
218207753Smm            let
219207753Smm              fun inc ((t,y),z) = mk (t,z,y)
220207753Smm            in
221207753Smm              List.foldl inc x txs
222207753Smm            end
223207753Smm          | Print.NonAssoc =>
224207753Smm            (case txs of
225207753Smm               [] => x
226207753Smm             | [(t,y)] => mk (t,x,y)
227207753Smm             | _ => raise NoParse)
228207753Smm          | Print.RightAssoc =>
229207753Smm            (case List.rev txs of
230207753Smm               [] => x
231207753Smm             | tx :: txs =>
232207753Smm               let
233207753Smm                 fun inc ((t,y),(u,z)) = (t, mk (u,y,z))
234207753Smm
235207753Smm                 val (t,y) = List.foldl inc tx txs
236207753Smm               in
237207753Smm                 mk (t,x,y)
238207753Smm               end)
239207753Smm
240207753Smm      val layerParser = subParser ++ many (layerTokParser ++ subParser)
241207753Smm    in
242207753Smm      layerParser >> layerMk
243207753Smm    end;
244207753Smm
245207753Smmfun parseInfixes ops =
246207753Smm    let
247207753Smm      val layeredOps = Print.layerInfixes ops
248207753Smm
249207753Smm      val iparsers = List.map parseLayeredInfixes layeredOps
250207753Smm    in
251207753Smm      fn mk => fn tokParser => fn subParser =>
252207753Smm         List.foldr (fn (p,sp) => p mk tokParser sp) subParser iparsers
253207753Smm    end;
254207753Smm
255207753Smm(* ------------------------------------------------------------------------- *)
256207753Smm(* Quotations.                                                               *)
257207753Smm(* ------------------------------------------------------------------------- *)
258207753Smm
259207753Smmtype 'a quotation = 'a frag list;
260207753Smm
261207753Smmfun parseQuotation printer parser quote =
262207753Smm  let
263207753Smm    fun expand (QUOTE q, s) = s ^ q
264207753Smm      | expand (ANTIQUOTE a, s) = s ^ printer a
265207753Smm
266207753Smm    val string = List.foldl expand "" quote
267207753Smm  in
268207753Smm    parser string
269207753Smm  end;
270207753Smm
271207753Smmend
272207753Smm