1(* ===================================================================== *) 2(* FILE : Lexis.sml *) 3(* DESCRIPTION : predicates defining various lexical classes for HOL: *) 4(* *) 5(* 1. type variables *) 6(* 2. type constants *) 7(* 3. term constants *) 8(* *) 9(* AUTHOR : Konrad Slind, University of Calgary *) 10(* DATE : August 26, 1991 *) 11(* REVISED : October, 1994, to improve efficiency. *) 12(* MODIFIED : September 22, 1997, Ken Larsen *) 13(* ===================================================================== *) 14 15structure Lexis :> Lexis = 16struct 17 18val ERR = Feedback.mk_HOL_ERR "Lexis"; 19 20(*--------------------------------------------------------------------------- 21 * Here we use extra space to get better runtimes. Strings are not exploded; 22 * finding out whether a string belongs in a particular syntax class is done 23 * by checking that the ordinal of each character in the string is the 24 * index (in a particular bytearray) of a box containing a 1. 25 * 26 * We work only with ascii characters, so we allocate bytearrays of size 128. 27 * A bytearray is compact - each element of the array occupies only 1 byte. 28 * Since we are using only 0 and 1, we could probably use "bit"arrays, but 29 * sheer laziness prevents us. 30 * 31 * Portability hack by Ken Larsen, bzero is the representation of a byte/bit 32 * zero, bone is the representation of one. 33 * 34 *---------------------------------------------------------------------------*) 35 36 37val bzero = 0wx0 : Word8.word 38val bone = 0wx1 : Word8.word 39 40val ordof = Portable.ordof; 41 42val hol_symbols = Word8Array.array(128,bzero); 43val sml_symbols = Word8Array.array(128,bzero); 44val alphabet = Word8Array.array(128,bzero); 45val numbers = Word8Array.array(128,bzero); 46val tyvar_ids = Word8Array.array(128,bzero); 47val alphanumerics = Word8Array.array(128,bzero); 48val parens = Word8Array.array(128,bzero); 49 50fun setup table P = 51 Lib.for_se 0 127 52 (fn i => if P(Char.chr i) then Word8Array.update(table,i,bone) else ()); 53 54(*--------------------------------------------------------------------------- 55 * Utility for examining the contents of character tables 56 * 57 * fun accum table = 58 * implode 59 * (Lib.for 0 127 60 * (fn i => if (Word8Array.sub(table,i) = bone) then chr i 61 else "")); 62 *---------------------------------------------------------------------------*) 63 64(*--------------------------------------------------------------------------- 65 * Various familiar predicates, used only to build the tables, so we can 66 * afford to write them naively. 67 *---------------------------------------------------------------------------*) 68 69val is_alphabetic = Char.isAlpha 70val is_numeric = Char.isDigit 71 72fun is_alphanumeric #"_" = true 73 | is_alphanumeric #"'" = true 74 | is_alphanumeric ch = Char.isAlphaNum ch 75 76fun is_paren #"(" = true 77 | is_paren #")" = true 78 | is_paren _ = false; 79 80 81(*--------------------------------------------------------------------------- 82 * Used for type variables, in which a prime is required in the first 83 * position in the string, but allowed nowhere else. 84 *---------------------------------------------------------------------------*) 85 86fun is_alphanumeric_no_prime ch = 87 is_alphabetic ch orelse is_numeric ch orelse ch = #"_"; 88 89fun in_string str = 90 let val strlist = String.explode str 91 val memb = Lib.C Lib.mem strlist 92 in memb 93 end; 94 95val hol_symbolics = "#?+*/\\=<>&%@!,:;_|~-"; 96val sml_symbolics = "!%&$+/:<=>?@~|#*\\-~^"; 97val is_hol_symbol = in_string hol_symbolics 98val is_sml_symbol = in_string sml_symbolics; 99 100(* Build the character tables *) 101 102val _ = setup hol_symbols is_hol_symbol; 103val _ = setup sml_symbols is_sml_symbol; 104val _ = setup alphabet is_alphabetic; 105val _ = setup numbers is_numeric; 106val _ = setup alphanumerics is_alphanumeric; 107val _ = setup tyvar_ids is_alphanumeric_no_prime; 108val _ = setup parens is_paren; 109 110 111fun in_class(table,i) = (Word8Array.sub(table,i) = bone) 112 handle _ => false; 113 114(*--------------------------------------------------------------------------- 115 * Now for the efficient string predicates. Generally, the empty string 116 * is not allowed. 117 *---------------------------------------------------------------------------*) 118 119fun ok_identifier str = 120 let fun loop i = (Word8Array.sub(alphanumerics,ordof(str,i)) = bone) 121 andalso loop(i+1) 122 in 123 ((Word8Array.sub(alphabet,ordof(str,0)) = bone) handle _ => false) 124 andalso 125 (loop 1 handle _ => true) 126 end; 127 128 129val allowed_type_constant = ok_identifier; 130 131local val prime = ordof("'",0) 132in 133fun allowed_user_type_var str = 134 let fun loop i = (Word8Array.sub(tyvar_ids,ordof(str,i)) = bone) 135 andalso loop(i+1) 136 in 137 ((ordof(str,0) = prime) handle _ => false) 138 andalso 139 ((Word8Array.sub(alphabet,ordof(str,1)) = bone) handle _ => false) 140 andalso 141 (loop 2 handle _ => true) 142 end 143end; 144 145 146fun ok_symbolic str = 147 let fun loop i = (Word8Array.sub(hol_symbols,ordof(str,i)) = bone) 148 andalso loop(i+1) 149 in 150 ((Word8Array.sub(hol_symbols,ordof(str,0)) = bone) 151 handle _ => false) 152 andalso 153 (loop 1 handle _ => true) 154 end; 155 156val sml_keywords = 157 Binaryset.addList (Binaryset.empty String.compare, 158 ["op", "if", "then", "else", "val", "fun", 159 "structure", "signature", "struct", "sig", "open", 160 "infix", "infixl", "infixr", "andalso", "orelse", 161 "and", "datatype", "type", "where", ":", ":>", 162 "let", "in", "end", "while", "do"]) 163 164fun ok_sml_identifier str = let 165 val sub = Word8Array.sub 166 fun alphaloop i = 167 (sub(alphanumerics,ordof(str,i)) = bone) andalso alphaloop(i+1) 168 fun symloop i = 169 (sub(sml_symbols,ordof(str,i)) = bone) andalso symloop(i+1) 170in 171 if Binaryset.member(sml_keywords, str) then false 172 else if ((sub(alphabet,ordof(str,0)) = bone) handle _ => false) then 173 (alphaloop 1 handle _ => true) 174 else if ((sub(sml_symbols,ordof(str,0)) = bone) handle _ => false) then 175 (symloop 1 handle _ => true) 176 else false 177end 178 179 180(*--------------------------------------------------------------------------- 181 * Predicate to tell if a prospective constant to-be-defined has an 182 * acceptable name. Note that this function does not recognize members of 183 * constant families (just those that serve to define such families). 184 *---------------------------------------------------------------------------*) 185 186fun allowed_term_constant "let" = false 187 | allowed_term_constant "in" = false 188 | allowed_term_constant "and" = false 189 | allowed_term_constant "of" = false 190 | allowed_term_constant "\\" = false 191 | allowed_term_constant ";" = false 192 | allowed_term_constant "=>" = false 193 | allowed_term_constant "|" = false 194 | allowed_term_constant ":" = false 195 | allowed_term_constant ":=" = false 196 | allowed_term_constant "with" = false 197 | allowed_term_constant "updated_by" = false 198 | allowed_term_constant "0" = true (* only this numeral is OK *) 199 | allowed_term_constant str = 200 if Word8Array.sub(alphabet,ordof(str,0)) = bone then ok_identifier str else 201 if Word8Array.sub(hol_symbols,ordof(str,0)) = bone then ok_symbolic str 202 else false; 203 204 205(*--------------------------------------------------------------------------- 206 * Strings representing negative integers return false, since we are only 207 * (currently) interested in :num. 208 *---------------------------------------------------------------------------*) 209 210fun is_num_literal str = 211 let fun loop i = 212 (Word8Array.sub(numbers,ordof(str,i)) = bone) andalso loop(i+1) 213 in 214 ((Word8Array.sub(numbers,ordof(str,0)) = bone) handle _ => false) 215 andalso 216 (loop 1 handle _ => true) 217 end; 218 219local val dquote = #"\"" 220in 221fun is_string_literal s = String.size s > 1 222 andalso (String.sub(s,0) = dquote) 223 andalso (String.sub(s,String.size s - 1) = dquote) 224fun is_char_literal s = String.size s = 4 andalso 225 String.sub(s,0) = #"#" andalso 226 String.sub(s,1) = dquote andalso 227 String.sub(s,size s - 1) = dquote 228end; 229 230 231 232 233(*---------------------------------------------------------------------------* 234 * Renaming support. We allow renaming by priming, and by attaching * 235 * numeric subscripts. * 236 *---------------------------------------------------------------------------*) 237 238local fun num2name s i = s^Lib.int_to_string i 239 fun subscripts x s = 240 let val project = num2name (s^x) 241 val cnt = Portable.make_counter{init=1,inc=1} 242 fun incr() = project (cnt()) 243 in 244 incr 245 end 246 fun primes s = 247 let val {upd,...} = Portable.syncref s 248 fun next () = upd (fn s => let val s' = Lib.prime s in (s',s') end) 249 in 250 next 251 end 252in 253(* don't eta-contract *) 254fun nameStrm NONE s = primes s 255 | nameStrm (SOME x) s = subscripts x s 256end; 257 258 259(*--------------------------------------------------------------------------- 260 String variants, on type variables and term variables. Each, 261 given a string produce another one that is different, but which is in 262 some sense the "next" string in a sequence. 263 264 `tyvar_vary' can be used to generate 'a, 'b, 'c ... 'z, 'a0 ... 265 266 `tmvar_vary' can be used to generate f, f0, f1, f2, f3 ... 267 268 A call to 269 270 gen_variant f avoids s 271 272 uses a varying function such as the two given here to produce a variant 273 of s that doesn't appear in the list avoids. 274 ---------------------------------------------------------------------------*) 275 276fun tyvar_vary s = 277 let open Substring 278 val ss = full s 279 val (nonletters, letters) = splitr Char.isAlpha ss 280 val szletters = size letters 281 in 282 if szletters > 0 283 then case sub(letters, szletters - 1) 284 of #"z" => concat [nonletters, 285 slice(letters, 0, SOME (szletters - 1)), 286 full "a0"] 287 | #"Z" => concat [nonletters, 288 slice(letters, 0, SOME (szletters - 1)), 289 full "a0"] 290 | c => concat [nonletters, 291 slice(letters, 0, SOME (szletters - 1)), 292 full (str (chr (ord c + 1)))] 293 else let val (nondigits, digits) = splitr Char.isDigit ss 294 val szdigits = size digits 295 in 296 if szdigits > 0 297 then let val n = valOf (Int.fromString (string digits)) 298 in concat [nondigits, full (Int.toString (n + 1))] 299 end 300 else concat [nondigits, full "0"] 301 end 302 end 303 304fun tmvar_vary s = 305 let open Substring 306 val ss = full s 307 val (nondigits, digits) = splitr Char.isDigit ss 308 in 309 if size digits > 0 310 then let val n = valOf (Int.fromString (string digits)) 311 in concat [nondigits, full (Int.toString (n + 1))] 312 end 313 else concat [nondigits, full "0"] 314end 315 316fun gen_variant vfn avoids s = 317 if Lib.mem s avoids then gen_variant vfn avoids (vfn s) else s 318 319(* pc = "prime count", i.e. count of number of prime characters *) 320(* a variable whose name is actually what gets printed to abbreviate a 321 big prime count, e.g., v'4', or v'���' are abbreviations for v'''', have (UOK) 322 to be treated as "bad" *) 323fun badpc_encode s = 324 let val sz = String.size s 325 fun recurse seendigit i = 326 if i <= 0 then false 327 else 328 let val c = String.sub(s,i) 329 in 330 if Char.isDigit c then recurse true (i - 1) 331 else c = #"'" andalso seendigit 332 end 333 in 334 sz >= 4 andalso String.sub(s, sz - 1) = #"'" andalso 335 recurse false (sz - 2) 336 end 337 338fun is_identish_var s = 339 (let val v0 = #1 (valOf (UTF8.firstChar s)) 340 in 341 UnicodeChars.isAlpha v0 orelse v0 = "_" 342 end) andalso 343 UTF8.all UnicodeChars.isMLIdent s andalso 344 not (badpc_encode s) 345 346fun is_symbolish_var s = 347 UTF8.all UnicodeChars.isSymbolic s andalso 348 not (String.isSubstring "(*" s) andalso 349 not (String.isSubstring "*)" s) 350 351fun is_clean_varname s = 352 s <> "" andalso (is_identish_var s orelse is_symbolish_var s) 353 handle UTF8.BadUTF8 _ => false 354 355 356end; (* Lexis *) 357