1(* ===================================================================== *)
2(* FILE          : Lexis.sml                                             *)
3(* DESCRIPTION   : predicates defining various lexical classes for HOL:  *)
4(*                                                                       *)
5(*                     1. type variables                                 *)
6(*                     2. type constants                                 *)
7(*                     3. term constants                                 *)
8(*                                                                       *)
9(* AUTHOR        : Konrad Slind, University of Calgary                   *)
10(* DATE          : August 26, 1991                                       *)
11(* REVISED       : October,   1994, to improve efficiency.               *)
12(* MODIFIED      : September 22, 1997, Ken Larsen                        *)
13(* ===================================================================== *)
14
15structure Lexis :> Lexis =
16struct
17
18val ERR = Feedback.mk_HOL_ERR "Lexis";
19
20(*---------------------------------------------------------------------------
21 * Here we use extra space to get better runtimes. Strings are not exploded;
22 * finding out whether a string belongs in a particular syntax class is done
23 * by checking that the ordinal of each character in the string is the
24 * index (in a particular bytearray) of a box containing a 1.
25 *
26 * We work only with ascii characters, so we allocate bytearrays of size 128.
27 * A bytearray is compact - each element of the array occupies only 1 byte.
28 * Since we are using only 0 and 1, we could probably use "bit"arrays, but
29 * sheer laziness prevents us.
30 *
31 * Portability hack by Ken Larsen, bzero is the representation of a byte/bit
32 * zero, bone is the representation of one.
33 *
34 *---------------------------------------------------------------------------*)
35
36
37val bzero = 0wx0 : Word8.word
38val bone  = 0wx1 : Word8.word
39
40val ordof = Portable.ordof;
41
42val hol_symbols   = Word8Array.array(128,bzero);
43val sml_symbols   = Word8Array.array(128,bzero);
44val alphabet      = Word8Array.array(128,bzero);
45val numbers       = Word8Array.array(128,bzero);
46val tyvar_ids     = Word8Array.array(128,bzero);
47val alphanumerics = Word8Array.array(128,bzero);
48val parens        = Word8Array.array(128,bzero);
49
50fun setup table P =
51   Lib.for_se 0 127
52     (fn i => if P(Char.chr i) then Word8Array.update(table,i,bone) else ());
53
54(*---------------------------------------------------------------------------
55 * Utility for examining the contents of character tables
56 *
57 * fun accum table =
58 *    implode
59 *      (Lib.for 0 127
60 *      (fn i => if (Word8Array.sub(table,i) = bone) then chr i
61                 else ""));
62 *---------------------------------------------------------------------------*)
63
64(*---------------------------------------------------------------------------
65 * Various familiar predicates, used only to build the tables, so we can
66 * afford to write them naively.
67 *---------------------------------------------------------------------------*)
68
69val is_alphabetic = Char.isAlpha
70val is_numeric    = Char.isDigit
71
72fun is_alphanumeric #"_" = true
73  | is_alphanumeric #"'" = true
74  | is_alphanumeric ch = Char.isAlphaNum ch
75
76fun is_paren #"(" = true
77  | is_paren #")" = true
78  | is_paren _ = false;
79
80
81(*---------------------------------------------------------------------------
82 * Used for type variables, in which a prime is required in the first
83 * position in the string, but allowed nowhere else.
84 *---------------------------------------------------------------------------*)
85
86fun is_alphanumeric_no_prime ch =
87   is_alphabetic ch orelse is_numeric ch orelse ch = #"_";
88
89fun in_string str =
90   let val strlist = String.explode str
91       val memb = Lib.C Lib.mem strlist
92   in memb
93   end;
94
95val hol_symbolics = "#?+*/\\=<>&%@!,:;_|~-";
96val sml_symbolics = "!%&$+/:<=>?@~|#*\\-~^";
97val is_hol_symbol = in_string hol_symbolics
98val is_sml_symbol = in_string sml_symbolics;
99
100(* Build the character tables *)
101
102val _ = setup hol_symbols is_hol_symbol;
103val _ = setup sml_symbols is_sml_symbol;
104val _ = setup alphabet is_alphabetic;
105val _ = setup numbers is_numeric;
106val _ = setup alphanumerics is_alphanumeric;
107val _ = setup tyvar_ids is_alphanumeric_no_prime;
108val _ = setup parens is_paren;
109
110
111fun in_class(table,i) = (Word8Array.sub(table,i) = bone)
112                         handle _ => false;
113
114(*---------------------------------------------------------------------------
115 * Now for the efficient string predicates. Generally, the empty string
116 * is not allowed.
117 *---------------------------------------------------------------------------*)
118
119fun ok_identifier str =
120   let fun loop i = (Word8Array.sub(alphanumerics,ordof(str,i)) = bone)
121                     andalso loop(i+1)
122   in
123   ((Word8Array.sub(alphabet,ordof(str,0)) = bone) handle _ => false)
124   andalso
125   (loop 1 handle _ => true)
126   end;
127
128
129val allowed_type_constant = ok_identifier;
130
131local val prime = ordof("'",0)
132in
133fun allowed_user_type_var str =
134   let fun loop i = (Word8Array.sub(tyvar_ids,ordof(str,i)) = bone)
135                     andalso loop(i+1)
136   in
137   ((ordof(str,0) = prime) handle _ => false)
138   andalso
139   ((Word8Array.sub(alphabet,ordof(str,1)) = bone) handle _ => false)
140   andalso
141   (loop 2 handle _ => true)
142   end
143end;
144
145
146fun ok_symbolic str =
147   let fun loop i = (Word8Array.sub(hol_symbols,ordof(str,i)) = bone)
148                     andalso loop(i+1)
149   in
150   ((Word8Array.sub(hol_symbols,ordof(str,0)) = bone)
151    handle _ => false)
152   andalso
153   (loop 1 handle _ => true)
154   end;
155
156val sml_keywords =
157    Binaryset.addList (Binaryset.empty String.compare,
158                       ["op", "if", "then", "else", "val", "fun",
159                        "structure", "signature", "struct", "sig", "open",
160                        "infix", "infixl", "infixr", "andalso", "orelse",
161                        "and", "datatype", "type", "where", ":", ":>",
162                        "let", "in", "end", "while", "do"])
163
164fun ok_sml_identifier str = let
165  val sub = Word8Array.sub
166  fun alphaloop i =
167      (sub(alphanumerics,ordof(str,i)) = bone) andalso alphaloop(i+1)
168  fun symloop i =
169      (sub(sml_symbols,ordof(str,i)) = bone) andalso symloop(i+1)
170in
171  if Binaryset.member(sml_keywords, str) then false
172  else if ((sub(alphabet,ordof(str,0)) = bone) handle _ => false) then
173    (alphaloop 1 handle _ => true)
174  else if ((sub(sml_symbols,ordof(str,0)) = bone) handle _ => false) then
175    (symloop 1 handle _ => true)
176  else false
177end
178
179
180(*---------------------------------------------------------------------------
181 * Predicate to tell if a prospective constant to-be-defined has an
182 * acceptable name. Note that this function does not recognize members of
183 * constant families (just those that serve to define such families).
184 *---------------------------------------------------------------------------*)
185
186fun allowed_term_constant "let"        = false
187  | allowed_term_constant "in"         = false
188  | allowed_term_constant "and"        = false
189  | allowed_term_constant "of"         = false
190  | allowed_term_constant "\\"         = false
191  | allowed_term_constant ";"          = false
192  | allowed_term_constant "=>"         = false
193  | allowed_term_constant "|"          = false
194  | allowed_term_constant ":"          = false
195  | allowed_term_constant ":="         = false
196  | allowed_term_constant "with"       = false
197  | allowed_term_constant "updated_by" = false
198  | allowed_term_constant "0"          = true  (* only this numeral is OK *)
199  | allowed_term_constant str =
200    if Word8Array.sub(alphabet,ordof(str,0)) = bone then ok_identifier str else
201    if Word8Array.sub(hol_symbols,ordof(str,0)) = bone then ok_symbolic str
202    else false;
203
204
205(*---------------------------------------------------------------------------
206 * Strings representing negative integers return false, since we are only
207 * (currently) interested in :num.
208 *---------------------------------------------------------------------------*)
209
210fun is_num_literal str =
211   let fun loop i =
212         (Word8Array.sub(numbers,ordof(str,i)) = bone) andalso loop(i+1)
213   in
214   ((Word8Array.sub(numbers,ordof(str,0)) = bone) handle _ => false)
215   andalso
216   (loop 1 handle _ => true)
217   end;
218
219local val dquote = #"\""
220in
221fun is_string_literal s = String.size s > 1
222    andalso (String.sub(s,0) = dquote)
223    andalso (String.sub(s,String.size s - 1) = dquote)
224fun is_char_literal s = String.size s = 4 andalso
225                        String.sub(s,0) = #"#" andalso
226                        String.sub(s,1) = dquote andalso
227                        String.sub(s,size s - 1) = dquote
228end;
229
230
231
232
233(*---------------------------------------------------------------------------*
234 * Renaming support. We allow renaming by priming, and by attaching          *
235 * numeric subscripts.                                                       *
236 *---------------------------------------------------------------------------*)
237
238local fun num2name s i = s^Lib.int_to_string i
239      fun subscripts x s =
240        let val project = num2name (s^x)
241            val cnt = Portable.make_counter{init=1,inc=1}
242            fun incr() = project (cnt())
243        in
244          incr
245        end
246      fun primes s =
247        let val {upd,...} = Portable.syncref s
248            fun next () = upd (fn s => let val s' = Lib.prime s in (s',s') end)
249        in
250          next
251        end
252in
253(* don't eta-contract *)
254fun nameStrm NONE s = primes s
255  | nameStrm (SOME x) s = subscripts x s
256end;
257
258
259(*---------------------------------------------------------------------------
260   String variants, on type variables and term variables. Each,
261   given a string produce another one that is different, but which is in
262   some sense the "next" string in a sequence.
263
264    `tyvar_vary' can be used to generate  'a, 'b, 'c ... 'z, 'a0 ...
265
266    `tmvar_vary' can be used to generate f, f0, f1, f2, f3 ...
267
268     A call to
269
270       gen_variant f avoids s
271
272     uses a varying function such as the two given here to produce a variant
273     of s that doesn't appear in the list avoids.
274 ---------------------------------------------------------------------------*)
275
276fun tyvar_vary s =
277 let open Substring
278     val ss = full s
279     val (nonletters, letters) = splitr Char.isAlpha ss
280     val szletters = size letters
281 in
282  if szletters > 0
283  then case sub(letters, szletters - 1)
284        of #"z" => concat [nonletters,
285                           slice(letters, 0, SOME (szletters - 1)),
286                           full "a0"]
287         | #"Z" => concat [nonletters,
288                           slice(letters, 0, SOME (szletters - 1)),
289                           full "a0"]
290         | c => concat [nonletters,
291                        slice(letters, 0, SOME (szletters - 1)),
292                        full (str (chr (ord c + 1)))]
293  else let val (nondigits, digits) = splitr Char.isDigit ss
294           val szdigits = size digits
295       in
296        if szdigits > 0
297        then let val n = valOf (Int.fromString (string digits))
298             in concat [nondigits, full (Int.toString (n + 1))]
299             end
300        else concat [nondigits, full "0"]
301       end
302 end
303
304fun tmvar_vary s =
305 let open Substring
306     val ss = full s
307     val (nondigits, digits) = splitr Char.isDigit ss
308 in
309   if size digits > 0
310   then let val n = valOf (Int.fromString (string digits))
311        in concat [nondigits, full (Int.toString (n + 1))]
312        end
313  else concat [nondigits, full "0"]
314end
315
316fun gen_variant vfn avoids s =
317  if Lib.mem s avoids then gen_variant vfn avoids (vfn s) else s
318
319(* pc = "prime count", i.e. count of number of prime characters *)
320(* a variable whose name is actually what gets printed to abbreviate a
321   big prime count, e.g., v'4', or v'���' are abbreviations for v'''', have  (UOK)
322   to be treated as "bad" *)
323fun badpc_encode s =
324    let val sz = String.size s
325        fun recurse seendigit i =
326            if i <= 0 then false
327            else
328              let val c = String.sub(s,i)
329              in
330                if Char.isDigit c then recurse true (i - 1)
331                else c = #"'" andalso seendigit
332              end
333    in
334      sz >= 4 andalso String.sub(s, sz - 1) = #"'" andalso
335      recurse false (sz - 2)
336    end
337
338fun is_identish_var s =
339    (let val v0 = #1 (valOf (UTF8.firstChar s))
340     in
341       UnicodeChars.isAlpha v0 orelse v0 = "_"
342     end) andalso
343    UTF8.all UnicodeChars.isMLIdent s andalso
344    not (badpc_encode s)
345
346fun is_symbolish_var s =
347    UTF8.all UnicodeChars.isSymbolic s andalso
348    not (String.isSubstring "(*" s) andalso
349    not (String.isSubstring "*)" s)
350
351fun is_clean_varname s =
352    s <> "" andalso (is_identish_var s orelse is_symbolish_var s)
353    handle UTF8.BadUTF8 _ => false
354
355
356end; (* Lexis *)
357