1(*
2 * @TAG(OTHER_PRINCETON_OSS)
3 *)
4(* Binaryset -- modified for Moscow ML
5 * from SML/NJ library v. 0.2
6 *
7 * COPYRIGHT (c) 1993 by AT&T Bell Laboratories.
8 * See file mosml/copyrght/copyrght.att for details.
9 *
10 * This code was adapted from Stephen Adams' binary tree implementation
11 * of applicative integer sets.
12 *
13 *    Copyright 1992 Stephen Adams.
14 *
15 *    This software may be used freely provided that:
16 *      1. This copyright notice is attached to any copy, derived work,
17 *         or work including all or part of this software.
18 *      2. Any derived work must contain a prominent notice stating that
19 *         it has been altered from the original.
20 *
21 *   Name(s): Stephen Adams.
22 *   Department, Institution: Electronics & Computer Science,
23 *      University of Southampton
24 *   Address:  Electronics & Computer Science
25 *             University of Southampton
26 *         Southampton  SO9 5NH
27 *         Great Britian
28 *   E-mail:   sra@ecs.soton.ac.uk
29 *
30 *   Comments:
31 *
32 *     1.  The implementation is based on Binary search trees of Bounded
33 *         Balance, similar to Nievergelt & Reingold, SIAM J. Computing
34 *         2(1), March 1973.  The main advantage of these trees is that
35 *         they keep the size of the tree in the node, giving a constant
36 *         time size operation.
37 *
38 *     2.  The bounded balance criterion is simpler than N&R's alpha.
39 *         Simply, one subtree must not have more than `weight' times as
40 *         many elements as the opposite subtree.  Rebalancing is
41 *         guaranteed to reinstate the criterion for weight>2.23, but
42 *         the occasional incorrect behaviour for weight=2 is not
43 *         detrimental to performance.
44 *
45 *     3.  There are two implementations of union.  The default,
46 *         hedge_union, is much more complex and usually 20% faster.  I
47 *         am not sure that the performance increase warrants the
48 *         complexity (and time it took to write), but I am leaving it
49 *         in for the competition.  It is derived from the original
50 *         union by replacing the split_lt(gt) operations with a lazy
51 *         version. The `obvious' version is called old_union.
52 *
53 *     4.  Most time is spent in T', the rebalancing constructor.  If my
54 *         understanding of the output of *<file> in the sml batch
55 *         compiler is correct then the code produced by NJSML 0.75
56 *         (sparc) for the final case is very disappointing.  Most
57 *         invocations fall through to this case and most of these cases
58 *         fall to the else part, i.e. the plain contructor,
59 *         T(v,ln+rn+1,l,r).  The poor code allocates a 16 word vector
60 *         and saves lots of registers into it.  In the common case it
61 *         then retrieves a few of the registers and allocates the 5
62 *         word T node.  The values that it retrieves were live in
63 *         registers before the massive save.
64 *
65 *   Modified to functor to support general ordered values
66 *)
67
68signature BINARYSET =
69sig
70type 'item set
71
72exception NotFound
73
74val empty        : ('item * 'item -> order) -> 'item set
75val singleton    : ('item * 'item -> order) -> 'item -> 'item set
76val add          : 'item set * 'item -> 'item set
77val addList      : 'item set * 'item list -> 'item set
78val retrieve     : 'item set * 'item -> 'item
79val peek         : 'item set * 'item -> 'item option
80val isEmpty      : 'item set -> bool
81val equal        : 'item set * 'item set -> bool
82val isSubset     : 'item set * 'item set -> bool
83val member       : 'item set * 'item -> bool
84val delete       : 'item set * 'item -> 'item set
85val numItems     : 'item set ->  int
86val union        : 'item set * 'item set -> 'item set
87val intersection : 'item set * 'item set -> 'item set
88val difference   : 'item set * 'item set -> 'item set
89val listItems    : 'item set -> 'item list
90val app          : ('item -> unit) -> 'item set -> unit
91val revapp       : ('item -> unit) -> 'item set -> unit
92val foldr        : ('item * 'b -> 'b) -> 'b -> 'item set -> 'b
93val foldl        : ('item * 'b -> 'b) -> 'b -> 'item set -> 'b
94val find         : ('item -> bool) -> 'item set -> 'item option
95end
96
97(*
98   ['item set] is the type of sets of ordered elements of type 'item.
99   The ordering relation on the elements is used in the representation
100   of the set.  The result of combining two sets with different
101   underlying ordering relations is undefined.  The implementation
102   uses ordered balanced binary trees.
103
104   [empty ordr] creates a new empty set with the given ordering
105   relation.
106
107   [singleton ordr i] creates the singleton set containing i, with the
108   given ordering relation.
109
110   [add(s, i)] adds item i to set s.
111
112   [addList(s, xs)] adds all items from the list xs to the set s.
113
114   [retrieve(s, i)] returns i if it is in s; raises NotFound otherwise.
115
116   [peek(s, i)] returns SOME i if i is in s; returns NONE otherwise.
117
118   [isEmpty s] returns true if and only if the set is empty.
119
120   [equal(s1, s2)] returns true if and only if the two sets have the
121   same elements.
122
123   [isSubset(s1, s2)] returns true if and only if s1 is a subset of s2.
124
125   [member(s, i)] returns true if and only if i is in s.
126
127   [delete(s, i)] removes item i from s.  Raises NotFound if i is not in s.
128
129   [numItems s] returns the number of items in set s.
130
131   [union(s1, s2)] returns the union of s1 and s2.
132
133   [intersection(s1, s2)] returns the intersectionof s1 and s2.
134
135   [difference(s1, s2)] returns the difference between s1 and s2 (that
136   is, the set of elements in s1 but not in s2).
137
138   [listItems s] returns a list of the items in set s, in increasing
139   order.
140
141   [app f s] applies function f to the elements of s, in increasing
142   order.
143
144   [revapp f s] applies function f to the elements of s, in decreasing
145   order.
146
147   [foldl f e s] applies the folding function f to the entries of the
148   set in increasing order.
149
150   [foldr f e s] applies the folding function f to the entries of the
151   set in decreasing order.
152
153   [find p s] returns SOME i, where i is an item in s which satisfies
154   p, if one exists; otherwise returns NONE.
155*)
156
157
158structure Binaryset :> BINARYSET =
159struct
160
161datatype 'item set = SET of ('item * 'item -> order) * 'item tree
162and 'item tree =
163    E
164  | T of {elt   : 'item,
165          cnt   : int,
166          left  : 'item tree,
167          right : 'item tree}
168
169fun treeSize E = 0
170  | treeSize (T{cnt,...}) = cnt
171
172fun numItems (SET(_, t)) = treeSize t
173
174fun isEmpty (SET(_, E)) = true
175  | isEmpty _           = false
176
177fun mkT(v,n,l,r) = T{elt=v,cnt=n,left=l,right=r}
178
179(* N(v,l,r) = T(v,1+treeSize(l)+treeSize(r),l,r) *)
180fun N(v,E,E) = mkT(v,1,E,E)
181  | N(v,E,r as T{cnt=n,...}) = mkT(v,n+1,E,r)
182  | N(v,l as T{cnt=n,...}, E) = mkT(v,n+1,l,E)
183  | N(v,l as T{cnt=n,...}, r as T{cnt=m,...}) = mkT(v,n+m+1,l,r)
184
185fun single_L (a,x,T{elt=b,left=y,right=z,...}) = N(b,N(a,x,y),z)
186  | single_L _ = raise Match
187fun single_R (b,T{elt=a,left=x,right=y,...},z) = N(a,x,N(b,y,z))
188  | single_R _ = raise Match
189fun double_L (a,w,T{elt=c,left=T{elt=b,left=x,right=y,...},right=z,...}) =
190      N(b,N(a,w,x),N(c,y,z))
191  | double_L _ = raise Match
192fun double_R (c,T{elt=a,left=w,right=T{elt=b,left=x,right=y,...},...},z) =
193      N(b,N(a,w,x),N(c,y,z))
194  | double_R _ = raise Match
195
196(*
197**  val weight = 3
198**  fun wt i = weight * i
199*)
200fun wt (i : int) = i + i + i
201
202fun T' (v,E,E) = mkT(v,1,E,E)
203  | T' (v,E,r as T{left=E,right=E,...}) = mkT(v,2,E,r)
204  | T' (v,l as T{left=E,right=E,...},E) = mkT(v,2,l,E)
205
206  | T' (p as (_,E,T{left=T _,right=E,...})) = double_L p
207  | T' (p as (_,T{left=E,right=T _,...},E)) = double_R p
208
209    (* these cases almost never happen with small weight*)
210  | T' (p as (_,E,T{left=T{cnt=ln,...},right=T{cnt=rn,...},...})) =
211        if ln<rn then single_L p else double_L p
212  | T' (p as (_,T{left=T{cnt=ln,...},right=T{cnt=rn,...},...},E)) =
213        if ln>rn then single_R p else double_R p
214
215  | T' (p as (_,E,T{left=E,...})) = single_L p
216  | T' (p as (_,T{right=E,...},E)) = single_R p
217
218  | T' (p as (v,l as T{elt=lv,cnt=ln,left=ll,right=lr},
219          r as T{elt=rv,cnt=rn,left=rl,right=rr})) =
220      if rn >= wt ln (*right is too big*)
221        then
222          let val rln = treeSize rl
223              val rrn = treeSize rr
224          in
225            if rln < rrn then single_L p else double_L p
226          end
227      else if ln >= wt rn (*left is too big*)
228        then
229          let val lln = treeSize ll
230              val lrn = treeSize lr
231          in
232            if lrn < lln then single_R p else double_R p
233          end
234      else mkT(v,ln+rn+1,l,r)
235
236fun addt cmpKey t x =
237    let fun h E = mkT(x,1,E,E)
238          | h (T{elt=v,left=l,right=r,cnt}) =
239            case cmpKey(x,v) of
240                LESS    => T'(v, h l, r)
241              | GREATER => T'(v, l, h r)
242              | EQUAL   => mkT(x,cnt,l,r)
243    in h t end
244
245fun concat3 cmpKey E v r = addt cmpKey r v
246  | concat3 cmpKey l v E = addt cmpKey l v
247  | concat3 cmpKey (l as T{elt=v1,cnt=n1,left=l1,right=r1})
248                   v
249                   (r as T{elt=v2,cnt=n2,left=l2,right=r2}) =
250    if wt n1 < n2 then T'(v2, concat3 cmpKey l v l2, r2)
251    else if wt n2 < n1 then T'(v1, l1, concat3 cmpKey r1 v r)
252    else N(v,l,r)
253
254fun split_lt cmpKey E x = E
255  | split_lt cmpKey (T{elt=v,left=l,right=r,...}) x =
256      case cmpKey(v,x) of
257        GREATER => split_lt cmpKey l x
258      | LESS    => concat3 cmpKey l v (split_lt cmpKey r x)
259      | _ => l
260
261fun split_gt cmpKey E x = E
262  | split_gt cmpKey (T{elt=v,left=l,right=r,...}) x =
263      case cmpKey(v,x) of
264        LESS    => split_gt cmpKey r x
265      | GREATER => concat3 cmpKey (split_gt cmpKey l x) v r
266      | _       => r
267
268fun min (T{elt=v,left=E,...}) = v
269  | min (T{left=l,...}) = min l
270  | min _ = raise Match
271
272fun delmin (T{left=E,right=r,...}) = r
273  | delmin (T{elt=v,left=l,right=r,...}) = T'(v,delmin l,r)
274  | delmin _ = raise Match
275
276fun delete' (E,r) = r
277  | delete' (l,E) = l
278  | delete' (l,r) = T'(min r,l,delmin r)
279
280fun concat E s = s
281  | concat s E = s
282  | concat (t1 as T{elt=v1,cnt=n1,left=l1,right=r1})
283           (t2 as T{elt=v2,cnt=n2,left=l2,right=r2}) =
284           if wt n1 < n2 then T'(v2, concat t1 l2, r2)
285           else if wt n2 < n1 then T'(v1, l1, concat r1 t2)
286             else T'(min t2,t1, delmin t2)
287
288fun hedge_union cmpKey s E = s
289  | hedge_union cmpKey E s = s
290  | hedge_union cmpKey (T{elt=v,left=l1,right=r1,...})
291                       (s2 as T{elt=v2,left=l2,right=r2,...}) =
292    let fun trim lo hi E = E
293          | trim lo hi (s as T{elt=v,left=l,right=r,...}) =
294            if cmpKey(v,lo) = GREATER
295                then if cmpKey(v,hi) = LESS then s else trim lo hi l
296            else trim lo hi r
297
298        fun uni_bd s E _ _ = s
299          | uni_bd E (T{elt=v,left=l,right=r,...}) lo hi =
300            concat3 cmpKey (split_gt cmpKey l lo) v (split_lt cmpKey r hi)
301          | uni_bd (T{elt=v,left=l1,right=r1,...})
302                   (s2 as T{elt=v2,left=l2,right=r2,...}) lo hi =
303            concat3 cmpKey (uni_bd l1 (trim lo v s2) lo v)
304                           v (uni_bd r1 (trim v hi s2) v hi)
305          (* inv:  lo < v < hi *)
306
307    (* all the other versions of uni and trim are
308     * specializations of the above two functions with
309     *     lo=-infinity and/or hi=+infinity
310     *)
311
312        fun trim_lo _ E = E
313          | trim_lo lo (s as T{elt=v,right=r,...}) =
314            case cmpKey(v,lo) of
315                GREATER => s
316              | _       => trim_lo lo r
317
318        fun trim_hi _ E = E
319          | trim_hi hi (s as T{elt=v,left=l,...}) =
320            case cmpKey(v,hi) of
321                LESS => s
322              | _    => trim_hi hi l
323
324        fun uni_hi s E _ = s
325          | uni_hi E (T{elt=v,left=l,right=r,...}) hi =
326            concat3 cmpKey l v (split_lt cmpKey r hi)
327          | uni_hi (T{elt=v,left=l1,right=r1,...})
328                   (s2 as T{elt=v2,left=l2,right=r2,...}) hi =
329            concat3 cmpKey (uni_hi l1 (trim_hi v s2) v)
330                           v (uni_bd r1 (trim v hi s2) v hi)
331
332        fun uni_lo s E _ = s
333          | uni_lo E (T{elt=v,left=l,right=r,...}) lo =
334            concat3 cmpKey (split_gt cmpKey l lo) v r
335          | uni_lo (T{elt=v,left=l1,right=r1,...})
336                   (s2 as T{elt=v2,left=l2,right=r2,...}) lo =
337            concat3 cmpKey (uni_bd l1 (trim lo v s2) lo v)
338                           v (uni_lo r1 (trim_lo v s2) v)
339    in
340        concat3 cmpKey (uni_hi l1 (trim_hi v s2) v)
341                     v (uni_lo r1 (trim_lo v s2) v)
342    end
343
344  (* The old_union version is about 20% slower than
345   *  hedge_union in most cases
346   *)
347fun old_union _ E s2 = s2
348  | old_union _ s1 E = s1
349  | old_union cmpKey (T{elt=v,left=l,right=r,...}) s2 =
350      let val l2 = split_lt cmpKey s2 v
351          val r2 = split_gt cmpKey s2 v
352      in
353          concat3 cmpKey (old_union cmpKey l l2) v (old_union cmpKey r r2)
354      end
355
356exception NotFound
357
358fun empty cmpKey = SET(cmpKey, E)
359
360fun singleton cmpKey x = SET(cmpKey, T{elt=x,cnt=1,left=E,right=E})
361
362fun addList (SET(cmpKey, t), l) =
363    SET(cmpKey, List.foldl (fn (i,s) => addt cmpKey s i) t l)
364
365fun add (SET(cmpKey, t), x) = SET(cmpKey, addt cmpKey t x)
366
367fun peekt cmpKey t x =
368    let fun pk E = NONE
369          | pk (T{elt=v,left=l,right=r,...}) =
370            case cmpKey(x,v) of
371                LESS    => pk l
372              | GREATER => pk r
373              | _       => SOME v
374    in pk t end;
375
376fun membert cmpKey t x =
377    case peekt cmpKey t x of NONE => false | _ => true
378
379fun peek (SET(cmpKey, t), x) = peekt cmpKey t x;
380fun member arg = case peek arg of NONE => false | _ => true
381
382local
383    (* true if every item in t is in t' *)
384  fun treeIn cmpKey (t,t') =
385      let fun isIn E = true
386            | isIn (T{elt,left=E,right=E,...}) =
387              membert cmpKey t' elt
388            | isIn (T{elt,left,right=E,...}) =
389              membert cmpKey t' elt andalso isIn left
390            | isIn (T{elt,left=E,right,...}) =
391              membert cmpKey t' elt  andalso isIn right
392            | isIn (T{elt,left,right,...}) =
393              membert cmpKey t' elt andalso isIn left andalso isIn right
394      in isIn t end
395in
396fun isSubset (SET(_, E),_) = true
397  | isSubset (_,SET(_, E)) = false
398  | isSubset (SET(cmpKey, t as T{cnt=n,...}),
399              SET(_,      t' as T{cnt=n',...})) =
400    (n<=n') andalso treeIn cmpKey (t,t')
401
402fun equal (SET(_,E), SET(_, E)) = true
403  | equal (SET(cmpKey, t as T{cnt=n,...}),
404           SET(_,      t' as T{cnt=n',...})) =
405    (n=n') andalso treeIn cmpKey (t,t')
406  | equal _ = false
407end
408
409fun retrieve arg =
410    case peek arg of NONE => raise NotFound | SOME v => v
411
412fun delete (SET(cmpKey, t), x) =
413    let fun delt E = raise NotFound
414          | delt (t as T{elt=v,left=l,right=r,...}) =
415            case cmpKey(x,v) of
416                LESS    => T'(v, delt l, r)
417              | GREATER => T'(v, l, delt r)
418              | _       => delete'(l,r)
419    in SET(cmpKey, delt t) end;
420
421fun union (SET(cmpKey, t1), SET(_, t2)) =
422    SET(cmpKey, hedge_union cmpKey t1 t2)
423
424fun intersection (SET(cmpKey, t1), SET(_, t2)) =
425    let fun intert E _ = E
426          | intert _ E = E
427          | intert t (T{elt=v,left=l,right=r,...}) =
428            let val l2 = split_lt cmpKey t v
429                val r2 = split_gt cmpKey t v
430            in
431                case peekt cmpKey t v of
432                    NONE => concat (intert l2 l) (intert r2 r)
433                  | _    => concat3 cmpKey (intert l2 l) v (intert r2 r)
434            end
435    in SET(cmpKey, intert t1 t2) end
436
437fun difference (SET(cmpKey, t1), SET(_, t2)) =
438    let fun difft E s = E
439          | difft s E  = s
440          | difft s (T{elt=v,left=l,right=r,...}) =
441            let val l2 = split_lt cmpKey s v
442                val r2 = split_gt cmpKey s v
443            in
444                concat (difft l2 l) (difft r2 r)
445            end
446    in SET(cmpKey, difft t1 t2) end
447
448fun foldr f b (SET(_, t)) =
449    let fun foldf E b = b
450          | foldf (T{elt,left,right,...}) b =
451            foldf left (f(elt, foldf right b))
452    in foldf t b end
453
454fun foldl f b (SET(_, t)) =
455    let fun foldf E b = b
456          | foldf (T{elt,left,right,...}) b =
457            foldf right (f(elt, foldf left b))
458    in foldf t b end
459
460fun listItems set = foldr (op::) [] set
461
462fun revapp f (SET(_, t)) =
463    let fun apply E = ()
464          | apply (T{elt,left,right,...}) =
465            (apply right; ignore (f elt); apply left)
466    in apply t end
467
468fun app f (SET(_, t)) =
469    let fun apply E = ()
470          | apply (T{elt,left,right,...}) =
471            (apply left; ignore (f elt); apply right)
472    in apply t end
473
474fun find p (SET(_, t)) =
475    let fun findt E = NONE
476          | findt (T{elt,left,right,...}) =
477            if p elt then SOME elt
478            else case findt left of
479                NONE => findt right
480              | a    => a
481    in findt t end
482
483end;
484