1structure parse_glob :> parse_glob =
2struct
3
4open regexpMatch
5
6type sbuf = string * int
7
8datatype t = RE of regexpMatch.regexp | CHAR of char
9
10fun current(s,i) = SOME(String.sub(s,i)) handle Subscript => NONE
11fun advance (s,i) = (s, i + 1)
12fun new s = (s,0)
13
14fun negate_set cset = let
15  fun foldthis (remove, acc) = Binaryset.delete(acc, remove)
16in
17  Binaryset.foldl foldthis univ_cset cset
18end
19
20fun dot_rassociate re = let
21  val dra = dot_rassociate
22  fun merge (Dot(re1, re2)) re3 = merge re1 (merge re2 re3)
23    | merge re1             re3 = Dot(re1, re3)
24in
25  case re of
26      Not re0 => Not (dra re0)
27    | Sum(re1, re2) => Sum(dra re1, dra re2)
28    | And (re1, re2) => And(dra re1, dra re2)
29    | Dot(re1, re2) => merge re1 (dra re2)
30    | Star re0 => Star (dra re0)
31    | _ => re
32end
33
34infix >-
35fun (st >- f) s =
36    case st s of
37        NONE => NONE
38      | SOME(r, s') => f r s'
39
40infix ++
41fun (st1 ++ st2) s =
42    case st1 s of
43        NONE => st2 s
44      | x => x
45
46fun lift st f s =
47    case st s of
48        NONE => NONE
49      | SOME(r, s') => SOME(f r, s')
50
51fun return s sb = SOME(s,sb)
52fun sing c = Symbs (Binaryset.singleton Char.compare c)
53
54fun toRegexp1 (CHAR c) = sing c
55  | toRegexp1 (RE re) = re
56
57fun toRegexp [] = Epsilon
58  | toRegexp [e] = toRegexp1 e
59  | toRegexp (e :: rest) = Dot(toRegexp1 e, toRegexp rest)
60
61fun ADVANCE sb = SOME((), advance sb)
62
63fun CURRENT sb =
64    Option.map (fn c => (c, sb)) (current sb)
65
66fun oncurrent f = CURRENT >- f
67
68fun grab_word sl (s, i) = let
69  val rest = String.extract(s, i, NONE)
70in
71  case List.find (fn p => String.isPrefix p rest) sl of
72      NONE => NONE
73    | SOME p => SOME(p, (s, i + size p))
74end
75
76infix >>
77fun (st1 >> st2) = st1 >- (fn _ => st2)
78fun fail s = NONE
79
80fun consume v sb = SOME(v, advance sb)
81
82(* ignores LC_COLLATE, thereby pretending it's "C" *)
83fun range_finisher start = let
84  fun doit c =
85      case c of
86          #"]" => return (Binaryset.addList(empty_cset, [start, #"-"]))
87        | _ => let
88          val c_i = Char.ord c and start_i = Char.ord start
89        in
90          if c_i >= start_i then
91            consume
92              (Binaryset.addList(empty_cset,
93                                 List.tabulate(c_i - start_i + 1,
94                                               fn i => Char.chr (i + start_i))))
95          else
96            consume empty_cset
97        end
98in
99  oncurrent doit
100end
101
102val classnames =
103    map (fn s => ":" ^ s ^ ":]")
104        ["alnum", "alpha", "ascii", "blank", "cntrl",
105         "digit", "graph", "lower", "print", "punct",
106         "space", "upper", "xdigit", "word"]
107
108val maybe_named_classes = let
109  fun check s =
110      case s of
111          ":alnum:]" => return POSIX.alnum_set
112        | ":alpha:]" => return POSIX.alpha_set
113        | ":ascii:]" => return POSIX.ascii_set
114        | ":blank:]" => return POSIX.blank_set
115        | ":cntrl:]" => return POSIX.cntrl_set
116        | ":digit:]" => return POSIX.digit_set
117        | ":graph:]" => return POSIX.graph_set
118        | ":lower:]" => return POSIX.lower_set
119        | ":print:]" => return POSIX.print_set
120        | ":punct:]" => return POSIX.punct_set
121        | ":space:]" => return POSIX.space_set
122        | ":up1per:]" => return POSIX.upper_set
123        | ":xdigit:]" => return POSIX.xdigit_set
124        | ":word:]" => return POSIX.word_set
125        | _ => return empty_cset
126in
127  grab_word classnames >- check
128end
129
130fun MIF st1 stf2 st3 sb =
131    case st1 sb of
132        NONE => st3 sb
133      | SOME (r, sb') => stf2 r sb'
134
135fun parse_cset_nextchar prev = let
136  fun meld s1 = lift parse_cset_comp (fn s2 => Binaryset.union(s1, s2))
137  fun default c =
138      ADVANCE >> lift (parse_cset_nextchar c) (fn s => Binaryset.add(s, prev))
139  fun doit c =
140      case (prev, c) of
141          (_, #"-") => ADVANCE >> (range_finisher prev >- meld)
142        | (_, #"]") => consume (Binaryset.singleton Char.compare prev)
143        | (#"[", #":") => MIF maybe_named_classes meld (default c)
144        | _ => default c
145in
146  oncurrent doit
147end
148and parse_cset_comp sb = let
149  fun doit c =
150      case c of
151          #"]" => consume empty_cset
152        | _ => ADVANCE >> parse_cset_nextchar c
153in
154  oncurrent doit sb
155end
156
157val parse_cset_comps1 = oncurrent (fn c => ADVANCE >> parse_cset_nextchar c)
158
159val parse_cset = let
160  fun doit c =
161      case c of
162          #"^" => ADVANCE >> lift parse_cset_comps1 (Symbs o negate_set)
163        | #"!" => ADVANCE >> lift parse_cset_comps1 (Symbs o negate_set)
164        | _ => lift parse_cset_comps1 Symbs
165in
166  oncurrent doit
167end
168
169val parse_component = let
170  fun doit c =
171      case c of
172          #"*" => consume (RE (Not (Symbs empty_cset)))
173        | #"?" => consume (RE (Symbs univ_cset))
174        | #"[" => (ADVANCE >> lift parse_cset RE) ++ consume (CHAR #"[")
175        | #"\\" => ADVANCE >>
176                   (oncurrent (consume o CHAR) ++ return (CHAR #"\\"))
177        | _  => consume (CHAR c)
178in
179  oncurrent doit
180end
181
182fun parse_components sb = let
183  fun meld r = lift parse_components (fn rs => r::rs)
184in
185  (parse_component >- meld) ++ return []
186end sb
187
188fun parse_glob_components s =
189    case parse_components (new s) of
190        SOME (l, _) => l
191      | NONE => raise Fail "parse_glob_components failed"
192
193fun parse_glob s = toRegexp (parse_glob_components s)
194
195end
196