1207753Smm(* ========================================================================= *) 2207753Smm(* PARSING *) 3207753Smm(* Copyright (c) 2001 Joe Hurd, distributed under the BSD License *) 4207753Smm(* ========================================================================= *) 5207753Smm 6207753Smmstructure Parse :> Parse = 7207753Smmstruct 8207753Smm 9207753Smmopen Useful; 10207753Smm 11207753Smminfixr 9 >>++ 12207753Smminfixr 8 ++ 13207753Smminfixr 7 >> 14207753Smminfixr 6 || 15207753Smm 16207753Smm(* ------------------------------------------------------------------------- *) 17207753Smm(* A "cannot parse" exception. *) 18207753Smm(* ------------------------------------------------------------------------- *) 19207753Smm 20207753Smmexception NoParse; 21207753Smm 22207753Smm(* ------------------------------------------------------------------------- *) 23207753Smm(* Recursive descent parsing combinators. *) 24207753Smm(* ------------------------------------------------------------------------- *) 25207753Smm 26207753Smmval error : 'a -> 'b * 'a = fn _ => raise NoParse; 27207753Smm 28207753Smmfun op ++ (parser1,parser2) input = 29207753Smm let 30207753Smm val (result1,input) = parser1 input 31207753Smm val (result2,input) = parser2 input 32207753Smm in 33207753Smm ((result1,result2),input) 34207753Smm end; 35207753Smm 36207753Smmfun op >> (parser : 'a -> 'b * 'a, treatment) input = 37207753Smm let 38207753Smm val (result,input) = parser input 39207753Smm in 40207753Smm (treatment result, input) 41207753Smm end; 42207753Smm 43207753Smmfun op >>++ (parser,treatment) input = 44207753Smm let 45207753Smm val (result,input) = parser input 46207753Smm in 47207753Smm treatment result input 48207753Smm end; 49207753Smm 50207753Smmfun op || (parser1,parser2) input = 51207753Smm parser1 input handle NoParse => parser2 input; 52207753Smm 53207753Smmfun first [] _ = raise NoParse 54207753Smm | first (parser :: parsers) input = (parser || first parsers) input; 55207753Smm 56207753Smmfun mmany parser state input = 57207753Smm let 58207753Smm val (state,input) = parser state input 59207753Smm in 60207753Smm mmany parser state input 61207753Smm end 62207753Smm handle NoParse => (state,input); 63207753Smm 64207753Smmfun many parser = 65207753Smm let 66207753Smm fun sparser l = parser >> (fn x => x :: l) 67207753Smm in 68207753Smm mmany sparser [] >> List.rev 69207753Smm end; 70207753Smm 71207753Smmfun atLeastOne p = (p ++ many p) >> op::; 72207753Smm 73207753Smmfun nothing input = ((),input); 74207753Smm 75207753Smmfun optional p = (p >> SOME) || (nothing >> K NONE); 76207753Smm 77207753Smm(* ------------------------------------------------------------------------- *) 78207753Smm(* Stream-based parsers. *) 79207753Smm(* ------------------------------------------------------------------------- *) 80207753Smm 81207753Smmtype ('a,'b) parser = 'a Stream.stream -> 'b * 'a Stream.stream 82207753Smm 83207753Smmfun maybe p Stream.Nil = raise NoParse 84207753Smm | maybe p (Stream.Cons (h,t)) = 85207753Smm case p h of SOME r => (r, t ()) | NONE => raise NoParse; 86207753Smm 87207753Smmfun finished Stream.Nil = ((), Stream.Nil) 88207753Smm | finished (Stream.Cons _) = raise NoParse; 89207753Smm 90207753Smmfun some p = maybe (fn x => if p x then SOME x else NONE); 91207753Smm 92207753Smmfun any input = some (K true) input; 93207753Smm 94207753Smm(* ------------------------------------------------------------------------- *) 95207753Smm(* Parsing whole streams. *) 96207753Smm(* ------------------------------------------------------------------------- *) 97207753Smm 98207753Smmfun fromStream parser input = 99207753Smm let 100207753Smm val (res,_) = (parser ++ finished >> fst) input 101207753Smm in 102207753Smm res 103207753Smm end; 104207753Smm 105207753Smmfun fromList parser l = fromStream parser (Stream.fromList l); 106207753Smm 107207753Smmfun everything parser = 108207753Smm let 109207753Smm fun parserOption input = 110207753Smm SOME (parser input) 111207753Smm handle e as NoParse => if Stream.null input then NONE else raise e 112207753Smm 113207753Smm fun parserList input = 114207753Smm case parserOption input of 115207753Smm NONE => Stream.Nil 116207753Smm | SOME (result,input) => 117207753Smm Stream.append (Stream.fromList result) (fn () => parserList input) 118207753Smm in 119207753Smm parserList 120207753Smm end; 121207753Smm 122207753Smm(* ------------------------------------------------------------------------- *) 123207753Smm(* Parsing lines of text. *) 124207753Smm(* ------------------------------------------------------------------------- *) 125207753Smm 126207753Smmfun initialize {lines} = 127207753Smm let 128207753Smm val lastLine = ref (~1,"","","") 129207753Smm 130207753Smm val chars = 131207753Smm let 132207753Smm fun saveLast line = 133207753Smm let 134207753Smm val ref (n,_,l2,l3) = lastLine 135207753Smm val () = lastLine := (n + 1, l2, l3, line) 136207753Smm in 137207753Smm String.explode line 138207753Smm end 139207753Smm in 140207753Smm Stream.memoize (Stream.map saveLast lines) 141207753Smm end 142207753Smm 143207753Smm fun parseErrorLocation () = 144207753Smm let 145207753Smm val ref (n,l1,l2,l3) = lastLine 146207753Smm in 147207753Smm (if n <= 0 then "at start" 148207753Smm else "around line " ^ Int.toString n) ^ 149207753Smm chomp (":\n" ^ l1 ^ l2 ^ l3) 150207753Smm end 151207753Smm in 152207753Smm {chars = chars, 153207753Smm parseErrorLocation = parseErrorLocation} 154207753Smm end; 155207753Smm 156207753Smmfun exactChar (c : char) = some (equal c) >> K (); 157207753Smm 158207753Smmfun exactCharList cs = 159207753Smm case cs of 160207753Smm [] => nothing 161207753Smm | c :: cs => (exactChar c ++ exactCharList cs) >> snd; 162207753Smm 163207753Smmfun exactString s = exactCharList (String.explode s); 164207753Smm 165207753Smmfun escapeString {escape} = 166207753Smm let 167207753Smm fun isEscape c = mem c escape 168207753Smm 169207753Smm fun isNormal c = 170207753Smm case c of 171207753Smm #"\\" => false 172207753Smm | #"\n" => false 173207753Smm | #"\t" => false 174207753Smm | _ => not (isEscape c) 175207753Smm 176207753Smm val escapeParser = 177207753Smm (exactChar #"\\" >> K #"\\") || 178207753Smm (exactChar #"n" >> K #"\n") || 179207753Smm (exactChar #"t" >> K #"\t") || 180207753Smm some isEscape 181207753Smm 182207753Smm val charParser = 183207753Smm ((exactChar #"\\" ++ escapeParser) >> snd) || 184207753Smm some isNormal 185207753Smm in 186207753Smm many charParser >> String.implode 187207753Smm end; 188207753Smm 189207753Smmlocal 190207753Smm val isSpace = Char.isSpace; 191207753Smm 192207753Smm val space = some isSpace; 193207753Smmin 194207753Smm val manySpace = many space >> K (); 195207753Smm 196207753Smm val atLeastOneSpace = atLeastOne space >> K (); 197207753Smmend; 198207753Smm 199207753Smmfun fromString parser s = fromList parser (String.explode s); 200207753Smm 201207753Smm(* ------------------------------------------------------------------------- *) 202207753Smm(* Infix operators. *) 203207753Smm(* ------------------------------------------------------------------------- *) 204207753Smm 205207753Smmfun parseLayeredInfixes {tokens,assoc} mk tokParser subParser = 206207753Smm let 207207753Smm fun layerTokParser inp = 208207753Smm let 209207753Smm val tok_rest as (tok,_) = tokParser inp 210207753Smm in 211207753Smm if StringSet.member tok tokens then tok_rest 212207753Smm else raise NoParse 213207753Smm end 214207753Smm 215207753Smm fun layerMk (x,txs) = 216207753Smm case assoc of 217207753Smm Print.LeftAssoc => 218207753Smm let 219207753Smm fun inc ((t,y),z) = mk (t,z,y) 220207753Smm in 221207753Smm List.foldl inc x txs 222207753Smm end 223207753Smm | Print.NonAssoc => 224207753Smm (case txs of 225207753Smm [] => x 226207753Smm | [(t,y)] => mk (t,x,y) 227207753Smm | _ => raise NoParse) 228207753Smm | Print.RightAssoc => 229207753Smm (case List.rev txs of 230207753Smm [] => x 231207753Smm | tx :: txs => 232207753Smm let 233207753Smm fun inc ((t,y),(u,z)) = (t, mk (u,y,z)) 234207753Smm 235207753Smm val (t,y) = List.foldl inc tx txs 236207753Smm in 237207753Smm mk (t,x,y) 238207753Smm end) 239207753Smm 240207753Smm val layerParser = subParser ++ many (layerTokParser ++ subParser) 241207753Smm in 242207753Smm layerParser >> layerMk 243207753Smm end; 244207753Smm 245207753Smmfun parseInfixes ops = 246207753Smm let 247207753Smm val layeredOps = Print.layerInfixes ops 248207753Smm 249207753Smm val iparsers = List.map parseLayeredInfixes layeredOps 250207753Smm in 251207753Smm fn mk => fn tokParser => fn subParser => 252207753Smm List.foldr (fn (p,sp) => p mk tokParser sp) subParser iparsers 253207753Smm end; 254207753Smm 255207753Smm(* ------------------------------------------------------------------------- *) 256207753Smm(* Quotations. *) 257207753Smm(* ------------------------------------------------------------------------- *) 258207753Smm 259207753Smmtype 'a quotation = 'a frag list; 260207753Smm 261207753Smmfun parseQuotation printer parser quote = 262207753Smm let 263207753Smm fun expand (QUOTE q, s) = s ^ q 264207753Smm | expand (ANTIQUOTE a, s) = s ^ printer a 265207753Smm 266207753Smm val string = List.foldl expand "" quote 267207753Smm in 268207753Smm parser string 269207753Smm end; 270207753Smm 271207753Smmend 272207753Smm