1(* 2 * @TAG(OTHER_PRINCETON_OSS) 3 *) 4(* Binaryset -- modified for Moscow ML 5 * from SML/NJ library v. 0.2 6 * 7 * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. 8 * See file mosml/copyrght/copyrght.att for details. 9 * 10 * This code was adapted from Stephen Adams' binary tree implementation 11 * of applicative integer sets. 12 * 13 * Copyright 1992 Stephen Adams. 14 * 15 * This software may be used freely provided that: 16 * 1. This copyright notice is attached to any copy, derived work, 17 * or work including all or part of this software. 18 * 2. Any derived work must contain a prominent notice stating that 19 * it has been altered from the original. 20 * 21 * Name(s): Stephen Adams. 22 * Department, Institution: Electronics & Computer Science, 23 * University of Southampton 24 * Address: Electronics & Computer Science 25 * University of Southampton 26 * Southampton SO9 5NH 27 * Great Britian 28 * E-mail: sra@ecs.soton.ac.uk 29 * 30 * Comments: 31 * 32 * 1. The implementation is based on Binary search trees of Bounded 33 * Balance, similar to Nievergelt & Reingold, SIAM J. Computing 34 * 2(1), March 1973. The main advantage of these trees is that 35 * they keep the size of the tree in the node, giving a constant 36 * time size operation. 37 * 38 * 2. The bounded balance criterion is simpler than N&R's alpha. 39 * Simply, one subtree must not have more than `weight' times as 40 * many elements as the opposite subtree. Rebalancing is 41 * guaranteed to reinstate the criterion for weight>2.23, but 42 * the occasional incorrect behaviour for weight=2 is not 43 * detrimental to performance. 44 * 45 * 3. There are two implementations of union. The default, 46 * hedge_union, is much more complex and usually 20% faster. I 47 * am not sure that the performance increase warrants the 48 * complexity (and time it took to write), but I am leaving it 49 * in for the competition. It is derived from the original 50 * union by replacing the split_lt(gt) operations with a lazy 51 * version. The `obvious' version is called old_union. 52 * 53 * 4. Most time is spent in T', the rebalancing constructor. If my 54 * understanding of the output of *<file> in the sml batch 55 * compiler is correct then the code produced by NJSML 0.75 56 * (sparc) for the final case is very disappointing. Most 57 * invocations fall through to this case and most of these cases 58 * fall to the else part, i.e. the plain contructor, 59 * T(v,ln+rn+1,l,r). The poor code allocates a 16 word vector 60 * and saves lots of registers into it. In the common case it 61 * then retrieves a few of the registers and allocates the 5 62 * word T node. The values that it retrieves were live in 63 * registers before the massive save. 64 * 65 * Modified to functor to support general ordered values 66 *) 67 68signature BINARYSET = 69sig 70type 'item set 71 72exception NotFound 73 74val empty : ('item * 'item -> order) -> 'item set 75val singleton : ('item * 'item -> order) -> 'item -> 'item set 76val add : 'item set * 'item -> 'item set 77val addList : 'item set * 'item list -> 'item set 78val retrieve : 'item set * 'item -> 'item 79val peek : 'item set * 'item -> 'item option 80val isEmpty : 'item set -> bool 81val equal : 'item set * 'item set -> bool 82val isSubset : 'item set * 'item set -> bool 83val member : 'item set * 'item -> bool 84val delete : 'item set * 'item -> 'item set 85val numItems : 'item set -> int 86val union : 'item set * 'item set -> 'item set 87val intersection : 'item set * 'item set -> 'item set 88val difference : 'item set * 'item set -> 'item set 89val listItems : 'item set -> 'item list 90val app : ('item -> unit) -> 'item set -> unit 91val revapp : ('item -> unit) -> 'item set -> unit 92val foldr : ('item * 'b -> 'b) -> 'b -> 'item set -> 'b 93val foldl : ('item * 'b -> 'b) -> 'b -> 'item set -> 'b 94val find : ('item -> bool) -> 'item set -> 'item option 95end 96 97(* 98 ['item set] is the type of sets of ordered elements of type 'item. 99 The ordering relation on the elements is used in the representation 100 of the set. The result of combining two sets with different 101 underlying ordering relations is undefined. The implementation 102 uses ordered balanced binary trees. 103 104 [empty ordr] creates a new empty set with the given ordering 105 relation. 106 107 [singleton ordr i] creates the singleton set containing i, with the 108 given ordering relation. 109 110 [add(s, i)] adds item i to set s. 111 112 [addList(s, xs)] adds all items from the list xs to the set s. 113 114 [retrieve(s, i)] returns i if it is in s; raises NotFound otherwise. 115 116 [peek(s, i)] returns SOME i if i is in s; returns NONE otherwise. 117 118 [isEmpty s] returns true if and only if the set is empty. 119 120 [equal(s1, s2)] returns true if and only if the two sets have the 121 same elements. 122 123 [isSubset(s1, s2)] returns true if and only if s1 is a subset of s2. 124 125 [member(s, i)] returns true if and only if i is in s. 126 127 [delete(s, i)] removes item i from s. Raises NotFound if i is not in s. 128 129 [numItems s] returns the number of items in set s. 130 131 [union(s1, s2)] returns the union of s1 and s2. 132 133 [intersection(s1, s2)] returns the intersectionof s1 and s2. 134 135 [difference(s1, s2)] returns the difference between s1 and s2 (that 136 is, the set of elements in s1 but not in s2). 137 138 [listItems s] returns a list of the items in set s, in increasing 139 order. 140 141 [app f s] applies function f to the elements of s, in increasing 142 order. 143 144 [revapp f s] applies function f to the elements of s, in decreasing 145 order. 146 147 [foldl f e s] applies the folding function f to the entries of the 148 set in increasing order. 149 150 [foldr f e s] applies the folding function f to the entries of the 151 set in decreasing order. 152 153 [find p s] returns SOME i, where i is an item in s which satisfies 154 p, if one exists; otherwise returns NONE. 155*) 156 157 158structure Binaryset :> BINARYSET = 159struct 160 161datatype 'item set = SET of ('item * 'item -> order) * 'item tree 162and 'item tree = 163 E 164 | T of {elt : 'item, 165 cnt : int, 166 left : 'item tree, 167 right : 'item tree} 168 169fun treeSize E = 0 170 | treeSize (T{cnt,...}) = cnt 171 172fun numItems (SET(_, t)) = treeSize t 173 174fun isEmpty (SET(_, E)) = true 175 | isEmpty _ = false 176 177fun mkT(v,n,l,r) = T{elt=v,cnt=n,left=l,right=r} 178 179(* N(v,l,r) = T(v,1+treeSize(l)+treeSize(r),l,r) *) 180fun N(v,E,E) = mkT(v,1,E,E) 181 | N(v,E,r as T{cnt=n,...}) = mkT(v,n+1,E,r) 182 | N(v,l as T{cnt=n,...}, E) = mkT(v,n+1,l,E) 183 | N(v,l as T{cnt=n,...}, r as T{cnt=m,...}) = mkT(v,n+m+1,l,r) 184 185fun single_L (a,x,T{elt=b,left=y,right=z,...}) = N(b,N(a,x,y),z) 186 | single_L _ = raise Match 187fun single_R (b,T{elt=a,left=x,right=y,...},z) = N(a,x,N(b,y,z)) 188 | single_R _ = raise Match 189fun double_L (a,w,T{elt=c,left=T{elt=b,left=x,right=y,...},right=z,...}) = 190 N(b,N(a,w,x),N(c,y,z)) 191 | double_L _ = raise Match 192fun double_R (c,T{elt=a,left=w,right=T{elt=b,left=x,right=y,...},...},z) = 193 N(b,N(a,w,x),N(c,y,z)) 194 | double_R _ = raise Match 195 196(* 197** val weight = 3 198** fun wt i = weight * i 199*) 200fun wt (i : int) = i + i + i 201 202fun T' (v,E,E) = mkT(v,1,E,E) 203 | T' (v,E,r as T{left=E,right=E,...}) = mkT(v,2,E,r) 204 | T' (v,l as T{left=E,right=E,...},E) = mkT(v,2,l,E) 205 206 | T' (p as (_,E,T{left=T _,right=E,...})) = double_L p 207 | T' (p as (_,T{left=E,right=T _,...},E)) = double_R p 208 209 (* these cases almost never happen with small weight*) 210 | T' (p as (_,E,T{left=T{cnt=ln,...},right=T{cnt=rn,...},...})) = 211 if ln<rn then single_L p else double_L p 212 | T' (p as (_,T{left=T{cnt=ln,...},right=T{cnt=rn,...},...},E)) = 213 if ln>rn then single_R p else double_R p 214 215 | T' (p as (_,E,T{left=E,...})) = single_L p 216 | T' (p as (_,T{right=E,...},E)) = single_R p 217 218 | T' (p as (v,l as T{elt=lv,cnt=ln,left=ll,right=lr}, 219 r as T{elt=rv,cnt=rn,left=rl,right=rr})) = 220 if rn >= wt ln (*right is too big*) 221 then 222 let val rln = treeSize rl 223 val rrn = treeSize rr 224 in 225 if rln < rrn then single_L p else double_L p 226 end 227 else if ln >= wt rn (*left is too big*) 228 then 229 let val lln = treeSize ll 230 val lrn = treeSize lr 231 in 232 if lrn < lln then single_R p else double_R p 233 end 234 else mkT(v,ln+rn+1,l,r) 235 236fun addt cmpKey t x = 237 let fun h E = mkT(x,1,E,E) 238 | h (T{elt=v,left=l,right=r,cnt}) = 239 case cmpKey(x,v) of 240 LESS => T'(v, h l, r) 241 | GREATER => T'(v, l, h r) 242 | EQUAL => mkT(x,cnt,l,r) 243 in h t end 244 245fun concat3 cmpKey E v r = addt cmpKey r v 246 | concat3 cmpKey l v E = addt cmpKey l v 247 | concat3 cmpKey (l as T{elt=v1,cnt=n1,left=l1,right=r1}) 248 v 249 (r as T{elt=v2,cnt=n2,left=l2,right=r2}) = 250 if wt n1 < n2 then T'(v2, concat3 cmpKey l v l2, r2) 251 else if wt n2 < n1 then T'(v1, l1, concat3 cmpKey r1 v r) 252 else N(v,l,r) 253 254fun split_lt cmpKey E x = E 255 | split_lt cmpKey (T{elt=v,left=l,right=r,...}) x = 256 case cmpKey(v,x) of 257 GREATER => split_lt cmpKey l x 258 | LESS => concat3 cmpKey l v (split_lt cmpKey r x) 259 | _ => l 260 261fun split_gt cmpKey E x = E 262 | split_gt cmpKey (T{elt=v,left=l,right=r,...}) x = 263 case cmpKey(v,x) of 264 LESS => split_gt cmpKey r x 265 | GREATER => concat3 cmpKey (split_gt cmpKey l x) v r 266 | _ => r 267 268fun min (T{elt=v,left=E,...}) = v 269 | min (T{left=l,...}) = min l 270 | min _ = raise Match 271 272fun delmin (T{left=E,right=r,...}) = r 273 | delmin (T{elt=v,left=l,right=r,...}) = T'(v,delmin l,r) 274 | delmin _ = raise Match 275 276fun delete' (E,r) = r 277 | delete' (l,E) = l 278 | delete' (l,r) = T'(min r,l,delmin r) 279 280fun concat E s = s 281 | concat s E = s 282 | concat (t1 as T{elt=v1,cnt=n1,left=l1,right=r1}) 283 (t2 as T{elt=v2,cnt=n2,left=l2,right=r2}) = 284 if wt n1 < n2 then T'(v2, concat t1 l2, r2) 285 else if wt n2 < n1 then T'(v1, l1, concat r1 t2) 286 else T'(min t2,t1, delmin t2) 287 288fun hedge_union cmpKey s E = s 289 | hedge_union cmpKey E s = s 290 | hedge_union cmpKey (T{elt=v,left=l1,right=r1,...}) 291 (s2 as T{elt=v2,left=l2,right=r2,...}) = 292 let fun trim lo hi E = E 293 | trim lo hi (s as T{elt=v,left=l,right=r,...}) = 294 if cmpKey(v,lo) = GREATER 295 then if cmpKey(v,hi) = LESS then s else trim lo hi l 296 else trim lo hi r 297 298 fun uni_bd s E _ _ = s 299 | uni_bd E (T{elt=v,left=l,right=r,...}) lo hi = 300 concat3 cmpKey (split_gt cmpKey l lo) v (split_lt cmpKey r hi) 301 | uni_bd (T{elt=v,left=l1,right=r1,...}) 302 (s2 as T{elt=v2,left=l2,right=r2,...}) lo hi = 303 concat3 cmpKey (uni_bd l1 (trim lo v s2) lo v) 304 v (uni_bd r1 (trim v hi s2) v hi) 305 (* inv: lo < v < hi *) 306 307 (* all the other versions of uni and trim are 308 * specializations of the above two functions with 309 * lo=-infinity and/or hi=+infinity 310 *) 311 312 fun trim_lo _ E = E 313 | trim_lo lo (s as T{elt=v,right=r,...}) = 314 case cmpKey(v,lo) of 315 GREATER => s 316 | _ => trim_lo lo r 317 318 fun trim_hi _ E = E 319 | trim_hi hi (s as T{elt=v,left=l,...}) = 320 case cmpKey(v,hi) of 321 LESS => s 322 | _ => trim_hi hi l 323 324 fun uni_hi s E _ = s 325 | uni_hi E (T{elt=v,left=l,right=r,...}) hi = 326 concat3 cmpKey l v (split_lt cmpKey r hi) 327 | uni_hi (T{elt=v,left=l1,right=r1,...}) 328 (s2 as T{elt=v2,left=l2,right=r2,...}) hi = 329 concat3 cmpKey (uni_hi l1 (trim_hi v s2) v) 330 v (uni_bd r1 (trim v hi s2) v hi) 331 332 fun uni_lo s E _ = s 333 | uni_lo E (T{elt=v,left=l,right=r,...}) lo = 334 concat3 cmpKey (split_gt cmpKey l lo) v r 335 | uni_lo (T{elt=v,left=l1,right=r1,...}) 336 (s2 as T{elt=v2,left=l2,right=r2,...}) lo = 337 concat3 cmpKey (uni_bd l1 (trim lo v s2) lo v) 338 v (uni_lo r1 (trim_lo v s2) v) 339 in 340 concat3 cmpKey (uni_hi l1 (trim_hi v s2) v) 341 v (uni_lo r1 (trim_lo v s2) v) 342 end 343 344 (* The old_union version is about 20% slower than 345 * hedge_union in most cases 346 *) 347fun old_union _ E s2 = s2 348 | old_union _ s1 E = s1 349 | old_union cmpKey (T{elt=v,left=l,right=r,...}) s2 = 350 let val l2 = split_lt cmpKey s2 v 351 val r2 = split_gt cmpKey s2 v 352 in 353 concat3 cmpKey (old_union cmpKey l l2) v (old_union cmpKey r r2) 354 end 355 356exception NotFound 357 358fun empty cmpKey = SET(cmpKey, E) 359 360fun singleton cmpKey x = SET(cmpKey, T{elt=x,cnt=1,left=E,right=E}) 361 362fun addList (SET(cmpKey, t), l) = 363 SET(cmpKey, List.foldl (fn (i,s) => addt cmpKey s i) t l) 364 365fun add (SET(cmpKey, t), x) = SET(cmpKey, addt cmpKey t x) 366 367fun peekt cmpKey t x = 368 let fun pk E = NONE 369 | pk (T{elt=v,left=l,right=r,...}) = 370 case cmpKey(x,v) of 371 LESS => pk l 372 | GREATER => pk r 373 | _ => SOME v 374 in pk t end; 375 376fun membert cmpKey t x = 377 case peekt cmpKey t x of NONE => false | _ => true 378 379fun peek (SET(cmpKey, t), x) = peekt cmpKey t x; 380fun member arg = case peek arg of NONE => false | _ => true 381 382local 383 (* true if every item in t is in t' *) 384 fun treeIn cmpKey (t,t') = 385 let fun isIn E = true 386 | isIn (T{elt,left=E,right=E,...}) = 387 membert cmpKey t' elt 388 | isIn (T{elt,left,right=E,...}) = 389 membert cmpKey t' elt andalso isIn left 390 | isIn (T{elt,left=E,right,...}) = 391 membert cmpKey t' elt andalso isIn right 392 | isIn (T{elt,left,right,...}) = 393 membert cmpKey t' elt andalso isIn left andalso isIn right 394 in isIn t end 395in 396fun isSubset (SET(_, E),_) = true 397 | isSubset (_,SET(_, E)) = false 398 | isSubset (SET(cmpKey, t as T{cnt=n,...}), 399 SET(_, t' as T{cnt=n',...})) = 400 (n<=n') andalso treeIn cmpKey (t,t') 401 402fun equal (SET(_,E), SET(_, E)) = true 403 | equal (SET(cmpKey, t as T{cnt=n,...}), 404 SET(_, t' as T{cnt=n',...})) = 405 (n=n') andalso treeIn cmpKey (t,t') 406 | equal _ = false 407end 408 409fun retrieve arg = 410 case peek arg of NONE => raise NotFound | SOME v => v 411 412fun delete (SET(cmpKey, t), x) = 413 let fun delt E = raise NotFound 414 | delt (t as T{elt=v,left=l,right=r,...}) = 415 case cmpKey(x,v) of 416 LESS => T'(v, delt l, r) 417 | GREATER => T'(v, l, delt r) 418 | _ => delete'(l,r) 419 in SET(cmpKey, delt t) end; 420 421fun union (SET(cmpKey, t1), SET(_, t2)) = 422 SET(cmpKey, hedge_union cmpKey t1 t2) 423 424fun intersection (SET(cmpKey, t1), SET(_, t2)) = 425 let fun intert E _ = E 426 | intert _ E = E 427 | intert t (T{elt=v,left=l,right=r,...}) = 428 let val l2 = split_lt cmpKey t v 429 val r2 = split_gt cmpKey t v 430 in 431 case peekt cmpKey t v of 432 NONE => concat (intert l2 l) (intert r2 r) 433 | _ => concat3 cmpKey (intert l2 l) v (intert r2 r) 434 end 435 in SET(cmpKey, intert t1 t2) end 436 437fun difference (SET(cmpKey, t1), SET(_, t2)) = 438 let fun difft E s = E 439 | difft s E = s 440 | difft s (T{elt=v,left=l,right=r,...}) = 441 let val l2 = split_lt cmpKey s v 442 val r2 = split_gt cmpKey s v 443 in 444 concat (difft l2 l) (difft r2 r) 445 end 446 in SET(cmpKey, difft t1 t2) end 447 448fun foldr f b (SET(_, t)) = 449 let fun foldf E b = b 450 | foldf (T{elt,left,right,...}) b = 451 foldf left (f(elt, foldf right b)) 452 in foldf t b end 453 454fun foldl f b (SET(_, t)) = 455 let fun foldf E b = b 456 | foldf (T{elt,left,right,...}) b = 457 foldf right (f(elt, foldf left b)) 458 in foldf t b end 459 460fun listItems set = foldr (op::) [] set 461 462fun revapp f (SET(_, t)) = 463 let fun apply E = () 464 | apply (T{elt,left,right,...}) = 465 (apply right; ignore (f elt); apply left) 466 in apply t end 467 468fun app f (SET(_, t)) = 469 let fun apply E = () 470 | apply (T{elt,left,right,...}) = 471 (apply left; ignore (f elt); apply right) 472 in apply t end 473 474fun find p (SET(_, t)) = 475 let fun findt E = NONE 476 | findt (T{elt,left,right,...}) = 477 if p elt then SOME elt 478 else case findt left of 479 NONE => findt right 480 | a => a 481 in findt t end 482 483end; 484