1structure UnicodeChars :> UnicodeChars = 2struct 3 4val U = UTF8.chr 5 6(* Greek letters *) 7val alpha = U 0x03B1 8val beta = U 0x03B2 9val gamma = U 0x03B3 10val delta = U 0x03B4 11val zeta = U 0x03B6 12val eta = U 0x03B7 13val theta = U 0x03B8 14val kappa = U 0x03BA 15val lambda = U 0x03BB 16val mu = U 0x03BC 17val nu = U 0x03BD 18val xi = U 0x03BE 19val pi = U 0x03C0 20val rho = U 0x03C1 21val sigma = U 0x03C3 22val tau = U 0x03C4 23val phi = U 0x03C6 24val chi = U 0x03C7 25val psi = U 0x03C8 26val omega = U 0x03C9 27 28val Gamma = U 0x0393 29val Delta = U 0x0394 30val Theta = U 0x0398 31val Lambda = U 0x039B 32val Xi = U 0x039E 33val Pi = U 0x03A0 34val Sigma = U 0x03A3 35val Phi = U 0x03A6 36val Psi = U 0x03A8 37val Omega = U 0x03A9 38 39(* Boolean gadgets *) 40val forall = U 0x2200 41val exists = U 0x2203 42val conj = U 0x2227 43val disj = U 0x2228 44val imp = U 0x21D2 45val neg = U 0x00AC 46val iff = U 0x21D4 47val not_iff = U 0x21CE 48 49(* not constants, but might be useful *) 50val neq = U 0x2260 51val turnstile = U 0x22A2 52 53(* arrows *) 54val leftarrow = U 0x2190 55val rightarrow = U 0x2192 56val longleftarrow = U 0x27F5 57val longrightarrow = U 0x27F6 58 59val Leftarrow = U 0x21D0 60val Rightarrow = U 0x21D2 61val longdoubleleftarrow = U 0x27F8 62val longdoublerightarrow = U 0x27F9 63 64(* latter probably needs a proportional font to print well - would be 65 good for implication if available - actually seems OK also on 66 Leopard's Courier font, which is supposedly fixed-width *) 67 68val mapsto = U 0x21A6 69val mapsfrom = U 0x21A4 70val longmapsto = U 0x27FC 71val longmapsfrom = U 0x27FB 72val hookleftarrow = U 0x21A9 73val hookrightarrow = U 0x21AA 74 75(* brackets, braces and parentheses *) 76val double_bracel = U 0x2983 77val double_bracer = U 0x2984 78val langle = U 0x27E8 79val rangle = U 0x27E9 80val double_langle = U 0x27EA 81val double_rangle = U 0x27EB 82val lensel = U 0x2987 83val lenser = U 0x2988 84 85 86(*stars*) 87val blackstar = U 0x2605 88val whitestar = U 0x2606 89val bigasterisk = U 0x229B 90val asterisk = U 0x2217 91val circlestar = U 0x235F 92val stardiaeresis = U 0x2363 93 94(* quotation marks *) 95val ldquo = U 0x201C 96val lsquo = U 0x2018 97val rdquo = U 0x201D 98val rsquo = U 0x2019 99 100(* superscripts *) 101val sup_0 = U 0x2070 102val sup_1 = U 0x00B9 103val sup_2 = U 0x00B2 104val sup_3 = U 0x00B3 105val sup_4 = U 0x2074 106val sup_5 = U 0x2075 107val sup_6 = U 0x2076 108val sup_7 = U 0x2077 109val sup_8 = U 0x2078 110val sup_9 = U 0x2079 111val sup_plus = U 0x207A 112val sup_minus = U 0x207B 113val sup_eq = U 0x207C 114val sup_lparen = U 0x207D 115val sup_rparen = U 0x207E 116val sup_h = U 0x02B0 117val sup_i = U 0x2071 118val sup_j = U 0x02B2 119val sup_l = U 0x02E1 120val sup_n = U 0x207F 121val sup_r = U 0x02B3 122val sup_s = U 0x02E2 123val sup_w = U 0x02B7 124val sup_x = U 0x02E3 125val sup_y = U 0x02B8 126val sup_gamma = U 0x02E0 127 128(* subscripts *) 129val sub_plus = U 0x208A 130val sub_r = U 0x1D63 131 132(* arithmetic *) 133val leq = U 0x2264 134val geq = U 0x2265 135val nats = U 0x2115 136val ints = U 0x2124 137val reals = U 0x211D 138val rats = U 0x211A 139val minus = U 0x2212 140 141(* sets *) 142val emptyset = U 0x2205 143val subset = U 0x2286 144val inter = U 0x2229 145val union = U 0x222A 146val setelementof = U 0x2208 147val not_elementof = U 0x2209 148val universal_set = U 0x1D54C (* outside BMP! *) 149 150(* words *) 151val lo = "<" ^ sub_plus 152val hi = ">" ^ sub_plus 153val ls = leq ^ sub_plus 154val hs = geq ^ sub_plus 155val or = U 0x2016 156val xor = U 0x2295 157val lsl = U 0x226A 158val asr = U 0x226B 159val lsr = U 0x22D9 160val rol = U 0x21C6 161val ror = U 0x21C4 162 163val doubleplus = U 0x29FA 164 165fun isAlpha_i i = 166 if i < 128 then Char.isAlpha (Char.chr i) 167 else 168 0x370 <= i andalso i <= 0x3ff andalso i <> 0x37E (* Greek *) orelse 169 i = 0xAA (* ordinal a *) orelse 170 i = 0xB5 (* Latin-1 mu *) orelse 171 i = 0xBA (* ordinal o *) orelse 172 0xC0 <= i andalso i <= 0xFF andalso i <> 0xD7 andalso i <> 0xF7 173 (* Latin-1 *) orelse 174 0x1D400 <= i andalso i <= 0x1D7CB 175 (* beyond BMP "Math Alphanumeric Symbols", excluding digits starting at 176 U+1D7CE *) 177fun isDigit_i i = 178 if i < 128 then Char.isDigit (Char.chr i) 179 else 180 0x2080 <= i andalso i <= 0x2089 (* subscripts *) 181fun isAlphaNum_i i = isAlpha_i i orelse isDigit_i i 182fun isSymbolic_i i = 183 if i < 128 then Char.isPunct (Char.chr i) 184 else not (isAlphaNum_i i) 185val c' = Char.ord #"'" 186val c_ = Char.ord #"_" 187fun isMLIdent_i i = isAlphaNum_i i orelse i = c' orelse i = c_ 188 189fun flip2itest P s = 190 case UTF8.getChar s of 191 NONE => false 192 | SOME ((_, i), _) => P i 193val isAlpha = flip2itest isAlpha_i 194val isDigit = flip2itest isDigit_i 195val isSymbolic = flip2itest isSymbolic_i 196fun isAlphaNum s = isAlpha s orelse isDigit s 197fun isMLIdent s = isAlphaNum s orelse s = "'" orelse s = "_" 198 199 200(* see Unicode section of 201 https://en.wikipedia.org/wiki/Whitespace_character 202*) 203val individual_pts = [0x20, 0x85, 0xA0, 0x1680, 0x2028, 0x2029, 0x202F, 0x205F, 204 0x3000] 205fun mem i [] = false 206 | mem i (h::t) = i = h orelse mem i t 207fun isSpace_i i = 208 0x9 <= i andalso i <= 0xD orelse 209 0x2000 <= i andalso i <= 0x200A orelse 210 mem i individual_pts 211val isSpace = flip2itest isSpace_i 212 213fun foldthis (s, (sm,im,i)) = 214 let 215 val ((_, k), _) = valOf (UTF8.getChar s) 216 in 217 (Binarymap.insert(sm,s,i), Binarymap.insert(im,k,i), i + 1) 218 end 219val (supdigits_smap, supdigits_imap, _) = 220 List.foldl foldthis (Binarymap.mkDict String.compare, 221 Binarymap.mkDict Int.compare, 222 0) 223 [sup_0, sup_1, sup_2, sup_3, sup_4, sup_5, sup_6, sup_7, 224 sup_8, sup_9] 225fun supDigitVal s = Binarymap.peek(supdigits_smap, s) 226fun supDigitVal_i i = Binarymap.peek(supdigits_imap, i) 227 228fun isSupDigit_i i = isSome (Binarymap.peek(supdigits_imap, i)) 229fun isSupDigit s = isSome (supDigitVal s) 230 231 232end (* struct *) 233