1structure UnicodeChars :> UnicodeChars =
2struct
3
4val U = UTF8.chr
5
6(* Greek letters *)
7val alpha =  U 0x03B1
8val beta =   U 0x03B2
9val gamma =  U 0x03B3
10val delta =  U 0x03B4
11val zeta =   U 0x03B6
12val eta =    U 0x03B7
13val theta =  U 0x03B8
14val kappa =  U 0x03BA
15val lambda = U 0x03BB
16val mu =     U 0x03BC
17val nu =     U 0x03BD
18val xi =     U 0x03BE
19val pi =     U 0x03C0
20val rho =    U 0x03C1
21val sigma =  U 0x03C3
22val tau =    U 0x03C4
23val phi =    U 0x03C6
24val chi =    U 0x03C7
25val psi =    U 0x03C8
26val omega =  U 0x03C9
27
28val Gamma =  U 0x0393
29val Delta =  U 0x0394
30val Theta =  U 0x0398
31val Lambda = U 0x039B
32val Xi =     U 0x039E
33val Pi =     U 0x03A0
34val Sigma =  U 0x03A3
35val Phi =    U 0x03A6
36val Psi =    U 0x03A8
37val Omega =  U 0x03A9
38
39(* Boolean gadgets *)
40val forall =  U 0x2200
41val exists =  U 0x2203
42val conj =    U 0x2227
43val disj =    U 0x2228
44val imp =     U 0x21D2
45val neg =     U 0x00AC
46val iff =     U 0x21D4
47val not_iff = U 0x21CE
48
49(* not constants, but might be useful *)
50val neq =       U 0x2260
51val turnstile = U 0x22A2
52
53(* arrows *)
54val leftarrow            = U 0x2190
55val rightarrow           = U 0x2192
56val longleftarrow        = U 0x27F5
57val longrightarrow       = U 0x27F6
58
59val Leftarrow            = U 0x21D0
60val Rightarrow           = U 0x21D2
61val longdoubleleftarrow  = U 0x27F8
62val longdoublerightarrow = U 0x27F9
63
64(* latter probably needs a proportional font to print well - would be
65   good for implication if available - actually seems OK also on
66   Leopard's Courier font, which is supposedly fixed-width *)
67
68val mapsto               = U 0x21A6
69val mapsfrom             = U 0x21A4
70val longmapsto           = U 0x27FC
71val longmapsfrom         = U 0x27FB
72val hookleftarrow        = U 0x21A9
73val hookrightarrow       = U 0x21AA
74
75(* brackets, braces and parentheses *)
76val double_bracel = U 0x2983
77val double_bracer = U 0x2984
78val langle = U 0x27E8
79val rangle = U 0x27E9
80val double_langle = U 0x27EA
81val double_rangle = U 0x27EB
82val lensel = U 0x2987
83val lenser = U 0x2988
84
85
86(*stars*)
87val blackstar            = U 0x2605
88val whitestar            = U 0x2606
89val bigasterisk          = U 0x229B
90val asterisk             = U 0x2217
91val circlestar           = U 0x235F
92val stardiaeresis        = U 0x2363
93
94(* quotation marks *)
95val ldquo = U 0x201C
96val lsquo = U 0x2018
97val rdquo = U 0x201D
98val rsquo = U 0x2019
99
100(* superscripts *)
101val sup_0 =      U 0x2070
102val sup_1 =      U 0x00B9
103val sup_2 =      U 0x00B2
104val sup_3 =      U 0x00B3
105val sup_4 =      U 0x2074
106val sup_5 =      U 0x2075
107val sup_6 =      U 0x2076
108val sup_7 =      U 0x2077
109val sup_8 =      U 0x2078
110val sup_9 =      U 0x2079
111val sup_plus =   U 0x207A
112val sup_minus =  U 0x207B
113val sup_eq =     U 0x207C
114val sup_lparen = U 0x207D
115val sup_rparen = U 0x207E
116val sup_h =      U 0x02B0
117val sup_i =      U 0x2071
118val sup_j =      U 0x02B2
119val sup_l =      U 0x02E1
120val sup_n =      U 0x207F
121val sup_r =      U 0x02B3
122val sup_s =      U 0x02E2
123val sup_w =      U 0x02B7
124val sup_x =      U 0x02E3
125val sup_y =      U 0x02B8
126val sup_gamma =  U 0x02E0
127
128(* subscripts *)
129val sub_plus =   U 0x208A
130val sub_r =      U 0x1D63
131
132(* arithmetic *)
133val leq =   U 0x2264
134val geq =   U 0x2265
135val nats =  U 0x2115
136val ints =  U 0x2124
137val reals = U 0x211D
138val rats =  U 0x211A
139val minus = U 0x2212
140
141(* sets *)
142val emptyset =      U 0x2205
143val subset =        U 0x2286
144val inter =         U 0x2229
145val union =         U 0x222A
146val setelementof =  U 0x2208
147val not_elementof = U 0x2209
148val universal_set = U 0x1D54C  (* outside BMP! *)
149
150(* words *)
151val lo = "<" ^ sub_plus
152val hi = ">" ^ sub_plus
153val ls = leq ^ sub_plus
154val hs = geq ^ sub_plus
155val or =  U 0x2016
156val xor = U 0x2295
157val lsl = U 0x226A
158val asr = U 0x226B
159val lsr = U 0x22D9
160val rol = U 0x21C6
161val ror = U 0x21C4
162
163val doubleplus = U 0x29FA
164
165fun isAlpha_i i =
166  if i < 128 then Char.isAlpha (Char.chr i)
167  else
168    0x370 <= i andalso i <= 0x3ff andalso i <> 0x37E (* Greek *) orelse
169    i = 0xAA (* ordinal a *) orelse
170    i = 0xB5 (* Latin-1 mu *) orelse
171    i = 0xBA (* ordinal o *) orelse
172    0xC0 <= i andalso i <= 0xFF andalso i <> 0xD7 andalso i <> 0xF7
173     (* Latin-1 *) orelse
174    0x1D400 <= i andalso i <= 0x1D7CB
175     (* beyond BMP "Math Alphanumeric Symbols", excluding digits starting at
176        U+1D7CE *)
177fun isDigit_i i =
178  if i < 128 then Char.isDigit (Char.chr i)
179  else
180    0x2080 <= i andalso i <= 0x2089 (* subscripts *)
181fun isAlphaNum_i i = isAlpha_i i orelse isDigit_i i
182fun isSymbolic_i i =
183  if i < 128 then Char.isPunct (Char.chr i)
184  else not (isAlphaNum_i i)
185val c' = Char.ord #"'"
186val c_ = Char.ord #"_"
187fun isMLIdent_i i = isAlphaNum_i i orelse i = c' orelse i = c_
188
189fun flip2itest P s =
190    case UTF8.getChar s of
191        NONE => false
192      | SOME ((_, i), _) => P i
193val isAlpha = flip2itest isAlpha_i
194val isDigit = flip2itest isDigit_i
195val isSymbolic = flip2itest isSymbolic_i
196fun isAlphaNum s = isAlpha s orelse isDigit s
197fun isMLIdent s = isAlphaNum s orelse s = "'" orelse s = "_"
198
199
200(* see Unicode section of
201      https://en.wikipedia.org/wiki/Whitespace_character
202*)
203val individual_pts = [0x20, 0x85, 0xA0, 0x1680, 0x2028, 0x2029, 0x202F, 0x205F,
204                      0x3000]
205fun mem i [] = false
206  | mem i (h::t) = i = h orelse mem i t
207fun isSpace_i i =
208    0x9 <= i andalso i <= 0xD orelse
209    0x2000 <= i andalso i <= 0x200A orelse
210    mem i individual_pts
211val isSpace = flip2itest isSpace_i
212
213fun foldthis (s, (sm,im,i)) =
214    let
215      val ((_, k), _) = valOf (UTF8.getChar s)
216    in
217      (Binarymap.insert(sm,s,i), Binarymap.insert(im,k,i), i + 1)
218    end
219val (supdigits_smap, supdigits_imap, _) =
220    List.foldl foldthis (Binarymap.mkDict String.compare,
221                         Binarymap.mkDict Int.compare,
222                         0)
223               [sup_0, sup_1, sup_2, sup_3, sup_4, sup_5, sup_6, sup_7,
224                sup_8, sup_9]
225fun supDigitVal s = Binarymap.peek(supdigits_smap, s)
226fun supDigitVal_i i = Binarymap.peek(supdigits_imap, i)
227
228fun isSupDigit_i i = isSome (Binarymap.peek(supdigits_imap, i))
229fun isSupDigit s = isSome (supDigitVal s)
230
231
232end (* struct *)
233