1structure parse_glob :> parse_glob = 2struct 3 4open regexpMatch 5 6type sbuf = string * int 7 8datatype t = RE of regexpMatch.regexp | CHAR of char 9 10fun current(s,i) = SOME(String.sub(s,i)) handle Subscript => NONE 11fun advance (s,i) = (s, i + 1) 12fun new s = (s,0) 13 14fun negate_set cset = let 15 fun foldthis (remove, acc) = Binaryset.delete(acc, remove) 16in 17 Binaryset.foldl foldthis univ_cset cset 18end 19 20fun dot_rassociate re = let 21 val dra = dot_rassociate 22 fun merge (Dot(re1, re2)) re3 = merge re1 (merge re2 re3) 23 | merge re1 re3 = Dot(re1, re3) 24in 25 case re of 26 Not re0 => Not (dra re0) 27 | Sum(re1, re2) => Sum(dra re1, dra re2) 28 | And (re1, re2) => And(dra re1, dra re2) 29 | Dot(re1, re2) => merge re1 (dra re2) 30 | Star re0 => Star (dra re0) 31 | _ => re 32end 33 34infix >- 35fun (st >- f) s = 36 case st s of 37 NONE => NONE 38 | SOME(r, s') => f r s' 39 40infix ++ 41fun (st1 ++ st2) s = 42 case st1 s of 43 NONE => st2 s 44 | x => x 45 46fun lift st f s = 47 case st s of 48 NONE => NONE 49 | SOME(r, s') => SOME(f r, s') 50 51fun return s sb = SOME(s,sb) 52fun sing c = Symbs (Binaryset.singleton Char.compare c) 53 54fun toRegexp1 (CHAR c) = sing c 55 | toRegexp1 (RE re) = re 56 57fun toRegexp [] = Epsilon 58 | toRegexp [e] = toRegexp1 e 59 | toRegexp (e :: rest) = Dot(toRegexp1 e, toRegexp rest) 60 61fun ADVANCE sb = SOME((), advance sb) 62 63fun CURRENT sb = 64 Option.map (fn c => (c, sb)) (current sb) 65 66fun oncurrent f = CURRENT >- f 67 68fun grab_word sl (s, i) = let 69 val rest = String.extract(s, i, NONE) 70in 71 case List.find (fn p => String.isPrefix p rest) sl of 72 NONE => NONE 73 | SOME p => SOME(p, (s, i + size p)) 74end 75 76infix >> 77fun (st1 >> st2) = st1 >- (fn _ => st2) 78fun fail s = NONE 79 80fun consume v sb = SOME(v, advance sb) 81 82(* ignores LC_COLLATE, thereby pretending it's "C" *) 83fun range_finisher start = let 84 fun doit c = 85 case c of 86 #"]" => return (Binaryset.addList(empty_cset, [start, #"-"])) 87 | _ => let 88 val c_i = Char.ord c and start_i = Char.ord start 89 in 90 if c_i >= start_i then 91 consume 92 (Binaryset.addList(empty_cset, 93 List.tabulate(c_i - start_i + 1, 94 fn i => Char.chr (i + start_i)))) 95 else 96 consume empty_cset 97 end 98in 99 oncurrent doit 100end 101 102val classnames = 103 map (fn s => ":" ^ s ^ ":]") 104 ["alnum", "alpha", "ascii", "blank", "cntrl", 105 "digit", "graph", "lower", "print", "punct", 106 "space", "upper", "xdigit", "word"] 107 108val maybe_named_classes = let 109 fun check s = 110 case s of 111 ":alnum:]" => return POSIX.alnum_set 112 | ":alpha:]" => return POSIX.alpha_set 113 | ":ascii:]" => return POSIX.ascii_set 114 | ":blank:]" => return POSIX.blank_set 115 | ":cntrl:]" => return POSIX.cntrl_set 116 | ":digit:]" => return POSIX.digit_set 117 | ":graph:]" => return POSIX.graph_set 118 | ":lower:]" => return POSIX.lower_set 119 | ":print:]" => return POSIX.print_set 120 | ":punct:]" => return POSIX.punct_set 121 | ":space:]" => return POSIX.space_set 122 | ":up1per:]" => return POSIX.upper_set 123 | ":xdigit:]" => return POSIX.xdigit_set 124 | ":word:]" => return POSIX.word_set 125 | _ => return empty_cset 126in 127 grab_word classnames >- check 128end 129 130fun MIF st1 stf2 st3 sb = 131 case st1 sb of 132 NONE => st3 sb 133 | SOME (r, sb') => stf2 r sb' 134 135fun parse_cset_nextchar prev = let 136 fun meld s1 = lift parse_cset_comp (fn s2 => Binaryset.union(s1, s2)) 137 fun default c = 138 ADVANCE >> lift (parse_cset_nextchar c) (fn s => Binaryset.add(s, prev)) 139 fun doit c = 140 case (prev, c) of 141 (_, #"-") => ADVANCE >> (range_finisher prev >- meld) 142 | (_, #"]") => consume (Binaryset.singleton Char.compare prev) 143 | (#"[", #":") => MIF maybe_named_classes meld (default c) 144 | _ => default c 145in 146 oncurrent doit 147end 148and parse_cset_comp sb = let 149 fun doit c = 150 case c of 151 #"]" => consume empty_cset 152 | _ => ADVANCE >> parse_cset_nextchar c 153in 154 oncurrent doit sb 155end 156 157val parse_cset_comps1 = oncurrent (fn c => ADVANCE >> parse_cset_nextchar c) 158 159val parse_cset = let 160 fun doit c = 161 case c of 162 #"^" => ADVANCE >> lift parse_cset_comps1 (Symbs o negate_set) 163 | #"!" => ADVANCE >> lift parse_cset_comps1 (Symbs o negate_set) 164 | _ => lift parse_cset_comps1 Symbs 165in 166 oncurrent doit 167end 168 169val parse_component = let 170 fun doit c = 171 case c of 172 #"*" => consume (RE (Not (Symbs empty_cset))) 173 | #"?" => consume (RE (Symbs univ_cset)) 174 | #"[" => (ADVANCE >> lift parse_cset RE) ++ consume (CHAR #"[") 175 | #"\\" => ADVANCE >> 176 (oncurrent (consume o CHAR) ++ return (CHAR #"\\")) 177 | _ => consume (CHAR c) 178in 179 oncurrent doit 180end 181 182fun parse_components sb = let 183 fun meld r = lift parse_components (fn rs => r::rs) 184in 185 (parse_component >- meld) ++ return [] 186end sb 187 188fun parse_glob_components s = 189 case parse_components (new s) of 190 SOME (l, _) => l 191 | NONE => raise Fail "parse_glob_components failed" 192 193fun parse_glob s = toRegexp (parse_glob_components s) 194 195end 196