144963Sjbstructure Holdep_tokens :> Holdep_tokens = 244963Sjbstruct 344963Sjb 444963Sjbinfix |> 544963Sjbfun x |> f = f x 644963Sjb 744963Sjbexception LEX_ERROR of string 844963Sjbtype result = (string,int) Binarymap.dict 944963Sjb 1044963Sjbdatatype char_reader = CR of {reader : unit -> string, 1144963Sjb current : char option, 1244963Sjb maxpos : int, 1344963Sjb buffer : string, 1444963Sjb closer : unit -> unit, 1544963Sjb pos : int} 1644963Sjb(* invariants: 1744963Sjb * buffer = "" ��� current = NONE ��� maxpos = 0 ��� pos = 0 1844963Sjb * 0 ��� pos ��� maxpos 1944963Sjb*) 2044963Sjb 2144963Sjbfun current (CR {current = c, ...}) = c 2244963Sjbfun make inp close = let 2349439Sdeischen val newbuf = inp() 2444963Sjbin 2544963Sjb if newbuf = "" then CR {pos = 0, maxpos = 0, buffer = newbuf, 2644963Sjb reader = inp, current = NONE, 2744963Sjb closer = close} 2844963Sjb else CR {pos = 0, maxpos = size newbuf - 1, buffer = newbuf, 2944963Sjb reader = inp, current = SOME(String.sub(newbuf, 0)), 3044963Sjb closer = close} 3144963Sjbend 3250476Speterfun fromFile f = let 3344963Sjb val is = TextIO.openIn f 34174112Sdeischenin 35174112Sdeischen make (fn () => TextIO.input is) (fn () => TextIO.closeIn is) 3698976Sdeischenend 3798976Sdeischenfun fromStream is = make (fn () => TextIO.input is) (fn () => ()) 3898976Sdeischenfun fromReader uc = make (fn () => case uc() of NONE => "" | SOME c => str c) 3944963Sjb (fn () => ()) 4044963Sjbfun closeCR (CR {closer,...}) = closer() 4144963Sjb 42174112Sdeischenfun advance (c as CR {pos, buffer, maxpos, reader, current, closer}) = 43103388Smini if pos < maxpos then 4444963Sjb CR { pos = pos + 1, buffer = buffer, reader = reader, 45174112Sdeischen current = SOME(String.sub(buffer, pos + 1)), 46174112Sdeischen maxpos = maxpos, closer = closer } 4775369Sdeischen else if buffer = "" then c 4871581Sdeischen else make reader closer 4944963Sjb 5098976Sdeischendatatype SCR = SCR of {linenum : int, 5144963Sjb filename : string, 5298976Sdeischen colnum : int, 53113658Sdeischen ids : (string,int) Binarymap.dict, 54117300Sdavidxu cr : char_reader} 5544963Sjb 5644963Sjb 5744963Sjbfun SCRfromNamedCR (name, cr) = 5844963Sjb SCR { linenum = 1, colnum = 0, filename = name, 5944963Sjb ids = Binarymap.mkDict String.compare, cr = cr } 6044963Sjb 6144963Sjbfun makeSCR fname = SCRfromNamedCR (fname, fromFile fname) 6244963Sjbfun SCRfromStream (name, is) = SCRfromNamedCR (name, fromStream is) 63119063Sdavidxufun SCRfromReader (name, uc) = SCRfromNamedCR (name, fromReader uc) 64117706Sdavidxu 65116977Sdavidxufun currentChar (SCR{cr,...}) = current cr 66116977Sdavidxufun closeSCR (SCR{cr,...}) = closeCR cr 67116977Sdavidxufun getIDs (SCR{ids,...}) = ids 68117300Sdavidxufun inc (SCR {linenum, filename, colnum, ids, cr}) = 69116977Sdavidxu SCR{linenum = linenum, filename = filename, colnum = colnum + 1, 70113658Sdeischen ids = ids, cr = advance cr} 71117300Sdavidxufun newline (SCR{linenum, filename, colnum, ids, cr}) = 72113658Sdeischen SCR{linenum = linenum + 1, filename = filename, colnum = 0, 73113658Sdeischen ids = ids, cr = advance cr} 74117300Sdavidxufun completeID s (SCR{linenum, filename, colnum, ids, cr}) = 7544963Sjb SCR{linenum = linenum, filename = filename, colnum = colnum, 7644963Sjb ids = case Binarymap.peek(ids,s) of 7744963Sjb NONE => Binarymap.insert(ids, s, linenum) 7844963Sjb | SOME _ => ids, 79 cr = cr} 80 81fun mem x [] = false 82 | mem x (y::ys) = x = y orelse mem x ys 83val SMLsyms = String.explode "!%&$+/:<=>?@~|#*\\-~^" 84val symbools = List.tabulate(256, fn i => mem (Char.chr i) SMLsyms) 85val symb_vec = Vector.fromList symbools 86fun isSMLSym c = Vector.sub(symb_vec, Char.ord c) 87 88fun isSMLAlphaCont c = 89 Char.isAlphaNum c orelse c = #"_" orelse c = #"'" 90 91fun Error(SCR {filename,colnum,linenum,...}, msg) = 92 raise LEX_ERROR (filename^" "^Int.toString linenum ^ "." ^ 93 Int.toString colnum ^ " " ^ msg) 94 95 96 97fun clean_open scr = 98 case currentChar scr of 99 NONE => scr 100 | SOME #"d" => openTermPFX "datatype" 1 (inc scr) 101 | SOME #"e" => opene (inc scr) (* exception, end *) 102 | SOME #"f" => openf (inc scr) (* fun, functor *) 103 | SOME #"i" => openi (inc scr) (* in, infixl, infix, infixr *) 104 | SOME #"l" => openTermPFX "local" 1 (inc scr) 105 | SOME #"n" => openTermPFX "nonfix" 1 (inc scr) 106 | SOME #"o" => modTermPFX true "open" 1 (clean_open, clean_open) (inc scr) 107 | SOME #"p" => openTermPFX "prim_val" 1 (inc scr) 108 | SOME #"s" => opens (inc scr) (* structure, signature *) 109 | SOME #"t" => openTermPFX "type" 1 (inc scr) 110 | SOME #"v" => openTermPFX "val" 1 (inc scr) 111 | SOME #";" => clean_initial (inc scr) 112 | SOME #"(" => openLPAREN (inc scr) 113 | SOME #"#" => openHASH (inc scr) 114 | SOME #"\n" => clean_open (newline scr) 115 | SOME c => if Char.isSpace c then clean_open (inc scr) 116 else if Char.isAlpha c then 117 modAlphaID true clean_open ("", [c]) (scr |> inc) 118 else if isSMLSym c then 119 modSymID true clean_open [c] (scr |> inc) 120 else Error(scr, "Bad character >"^str c^"< after open") 121and openTermPFX kstr c scr = 122 modTermPFX true kstr c (clean_initial, clean_open) scr 123and openOnKWord kstr scr = OnKWord true kstr (clean_initial, clean_open) scr 124and opene scr = 125 case currentChar scr of 126 NONE => scr |> completeID "e" 127 | SOME #"n" => openTermPFX "end" 2 (inc scr) 128 | SOME #"x" => openTermPFX "exception" 2 (inc scr) 129 | SOME c => extend_openAlpha "e" c scr 130and openf scr = 131 case currentChar scr of 132 NONE => scr |> completeID "f" 133 | SOME #"u" => openfu (inc scr) 134 | SOME c => extend_openAlpha "f" c scr 135and openfu scr = 136 case currentChar scr of 137 NONE => scr |> completeID "fu" 138 | SOME #"n" => openfun (inc scr) 139 | SOME c => extend_openAlpha "fu" c scr 140and openfun scr = 141 case currentChar scr of 142 NONE => scr 143 | SOME #"c" => openTermPFX "functor" 4 (inc scr) 144 | SOME c => openOnKWord "fun" scr 145and openi scr = 146 case currentChar scr of 147 NONE => scr |> completeID "i" 148 | SOME #"n" => openin (inc scr) 149 | SOME c => extend_openAlpha "i" c scr 150and openin scr = 151 case currentChar scr of 152 NONE => Error(scr, "Don't expect to see 'in'-EOF") 153 | SOME #"f" => scr |> inc |> openinf 154 | SOME c => openOnKWord "in" scr 155and openinf scr = 156 case currentChar scr of 157 NONE => scr |> completeID "inf" 158 | SOME #"i" => scr |> inc |> openinfi 159 | SOME c => extend_openAlpha "inf" c scr 160and openinfi scr = 161 case currentChar scr of 162 NONE => scr |> completeID "infi" 163 | SOME #"x" => scr |> inc |> openinfix 164 | SOME c => extend_openAlpha "infi" c scr 165and openinfix scr = 166 case currentChar scr of 167 NONE => Error(scr, "Don't expect to see 'infix'-EOF") 168 | SOME #"l" => openOnKWord "infixl" (inc scr) 169 | SOME #"r" => openOnKWord "infixr" (inc scr) 170 | SOME c => openOnKWord "infix" scr 171and opens scr = 172 case currentChar scr of 173 NONE => scr |> completeID "s" 174 | SOME #"i" => openTermPFX "signature" 2 (inc scr) 175 | SOME #"t" => openTermPFX "structure" 2 (inc scr) 176 | SOME c => extend_openAlpha "s" c scr 177and extend_openAlpha pfx c scr = 178 if Char.isSpace c then clean_open (scr |> inc |> completeID pfx) 179 else if isSMLAlphaCont c then 180 modAlphaID true clean_open (pfx, [c]) (scr |> inc) 181 else if isSMLSym c then 182 modSymID true clean_open [c] (scr |> inc |> completeID pfx) 183 else Error(scr, "Bad character >"^str c^"< after 'open'") 184and openLPAREN scr = 185 case currentChar scr of 186 NONE => Error(scr, "Don't expect to see '('-EOF") 187 | SOME #"*" => COMMENT clean_open (inc scr) 188 | SOME c => Error(scr, "Don't expect to see '("^str c^"' after 'open'") 189and openHASH scr = 190 case currentChar scr of 191 NONE => Error(scr, "Don't expect to see 'open'-'#'") 192 | SOME c => if isSMLSym c then 193 modSymID true clean_open [c,#"#"] (scr |> inc) 194 else Error(scr, "Don't expect to see 'open'-'#'") 195and openQID0 scr = (* seen the dot *) 196 case currentChar scr of 197 NONE => Error(scr, "'.'-EOF unexpected") 198 | SOME c => if Char.isAlpha c then openQIDalpha (inc scr) 199 else if isSMLSym c then openQIDsym (inc scr) 200 else Error(scr, "'."^str c^" unexpected") 201and openQIDalpha scr = 202 case currentChar scr of 203 NONE => scr 204 | SOME #"." => openQID0 (inc scr) 205 | SOME c => if isSMLAlphaCont c then openQIDalpha (inc scr) 206 else clean_open scr 207and openQIDsym scr = 208 case currentChar scr of 209 NONE => scr 210 | SOME #"." => openQID0 (inc scr) 211 | SOME c => if isSMLSym c then openQIDsym (inc scr) 212 else clean_open scr 213and modTermPFX dotok kword numseen (seenk, notk) scr = let 214 fun get() = String.extract(kword, 0, SOME numseen) 215in 216 case currentChar scr of 217 NONE => notk (scr |> completeID (get())) 218 | SOME c => if c = String.sub(kword, numseen) then 219 if numseen + 1 = size kword then 220 OnKWord dotok kword (seenk,notk) (inc scr) 221 else 222 modTermPFX dotok kword (numseen + 1) (seenk,notk) (inc scr) 223 else if isSMLAlphaCont c then 224 modAlphaID dotok notk (get(), [c]) (inc scr) 225 else notk (scr |> completeID (get())) 226end 227and OnKWord dotok kword (seenk,notseenk) scr = 228 case currentChar scr of 229 NONE => seenk scr 230 | SOME c => if isSMLAlphaCont c then 231 modAlphaID dotok notseenk (kword, [c]) (inc scr) 232 else seenk scr 233and modAlphaID dotok k (base,cs) scr = 234 case currentChar scr of 235 NONE => scr |> completeID (base ^ implode (List.rev cs)) 236 | SOME #"." => 237 if dotok then 238 openQID0 (scr |> inc |> completeID (base ^ implode (List.rev cs))) 239 else Error(scr, "Didn't expect to see qualified ident here") 240 | SOME c => 241 if isSMLAlphaCont c then modAlphaID dotok k (base,c::cs) (inc scr) 242 else k (scr |> completeID (base ^ implode (List.rev cs))) 243and modSymID dotok k cs scr = 244 case currentChar scr of 245 NONE => scr |> completeID (implode (List.rev cs)) 246 | SOME #"." => 247 if dotok then 248 openQID0 (scr |> inc |> completeID (implode (List.rev cs))) 249 else Error(scr, "Didn't expect to see qualified ident here") 250 | SOME c => if isSMLSym c then modSymID dotok k (c::cs) (inc scr) 251 else k (scr |> completeID (implode (List.rev cs))) 252and includeLPAR scr = 253 case currentChar scr of 254 NONE => Error(scr, "Don't expect 'include'-'('-EOF") 255 | SOME #"*" => COMMENT clean_include (inc scr) 256 | SOME c => Error(scr, "Don't expect 'include'-'('-'"^str c^"'") 257and clean_include scr = 258 case currentChar scr of 259 NONE => scr 260 | SOME #"d" => 261 modTermPFX false "datatype" 1 (clean_initial, clean_include) (inc scr) 262 | SOME #"e" => includee (inc scr) 263 | SOME #"s" => 264 modTermPFX false "structure" 1 (clean_initial, clean_include) (inc scr) 265 | SOME #"t" => 266 modTermPFX false "type" 1 (clean_initial, clean_include) (inc scr) 267 | SOME #"v" => 268 modTermPFX false "val" 1 (clean_initial, clean_include) (inc scr) 269 | SOME #"w" => 270 modTermPFX false "where" 1 (clean_initial, clean_include) (inc scr) 271 | SOME #"\n" => clean_include (newline scr) 272 | SOME #"(" => includeLPAR (inc scr) 273 | SOME #";" => clean_initial (inc scr) 274 | SOME c => if Char.isSpace c then clean_include (inc scr) 275 else if Char.isAlpha c then 276 modAlphaID false clean_include ("", [c]) (inc scr) 277 else if isSMLSym c then 278 modSymID false clean_include [c] (inc scr) 279 else Error(scr, "Bad character >"^str c^"< after 'include'") 280and includee scr = 281 case currentChar scr of 282 NONE => scr |> completeID "e" 283 | SOME #"\n" => clean_include (scr |> newline |> completeID "e") 284 | SOME #"n" => 285 modTermPFX false "end" 2 (clean_initial, clean_include) (inc scr) 286 | SOME #"x" => 287 modTermPFX false "exception" 2 (clean_initial, clean_include) (inc scr) 288 | SOME c => if Char.isSpace c then clean_include (scr |> inc |> completeID "e") 289 else if isSMLSym c then 290 modSymID false clean_include [c] (scr |> inc |> completeID "e") 291 else if isSMLAlphaCont c then 292 modAlphaID false clean_include ("e", [c]) (inc scr) 293 else Error(scr, "Bad character >"^str c^"< after 'include'") 294and clean_initial scr = 295 case currentChar scr of 296 NONE => scr 297 | SOME #"i" => initialAlphaKWordPFX "include" 1 clean_include (inc scr) 298 | SOME #"o" => initialAlphaKWordPFX "open" 1 clean_open (inc scr) 299 | SOME #"\n" => clean_initial (newline scr) 300 | SOME #"(" => initialLPAREN (inc scr) 301 | SOME #"\"" => STRING (inc scr) 302 | SOME c => 303 if Char.isSpace c then clean_initial (inc scr) 304 else if Char.isAlpha c then initialAlphaID("", [c]) (inc scr) 305 else if isSMLSym c then initialSymID("", [c]) (inc scr) 306 else clean_initial (inc scr) 307and initialAlphaID (pfx, cs) scr = 308 case currentChar scr of 309 NONE => scr 310 | SOME #"." => 311 initialQID0 (scr |> inc 312 |> completeID (pfx ^ String.implode (List.rev cs))) 313 | SOME #"\n" => clean_initial (newline scr) 314 | SOME c => if isSMLAlphaCont c then initialAlphaID (pfx, c::cs) (inc scr) 315 else clean_initial scr 316and initialSymID (pfx, cs) scr = 317 case currentChar scr of 318 NONE => scr 319 | SOME #"." => initialQID0 (scr |> inc |> completeID (pfx ^ String.implode (List.rev cs))) 320 | SOME #"\n" => clean_initial (newline scr) 321 | SOME c => if isSMLSym c then initialSymID (pfx, c::cs) (inc scr) 322 else clean_initial scr 323and initialLPAREN scr = 324 case currentChar scr of 325 NONE => Error(scr, "'('-EOF unexpected") 326 | SOME #"*" => COMMENT clean_initial (inc scr) 327 | SOME #"(" => initialLPAREN (inc scr) 328 | _ => clean_initial scr 329and COMMENT k scr = 330 case currentChar scr of 331 NONE => Error(scr, "Unterminated comment") 332 | SOME #"(" => COMMENTlpar k (inc scr) 333 | SOME #"*" => COMMENTast k (inc scr) 334 | SOME #"\n" => COMMENT k (newline scr) 335 | _ => COMMENT k (inc scr) 336and COMMENTlpar k scr = 337 case currentChar scr of 338 NONE => Error(scr, "Unterminated comment") 339 | SOME #"(" => COMMENTlpar k (inc scr) 340 | SOME #"*" => COMMENT (COMMENT k) (inc scr) 341 | SOME #"\n" => COMMENT k (newline scr) 342 | _ => COMMENT k (inc scr) 343and COMMENTast k scr = 344 case currentChar scr of 345 NONE => Error(scr, "Unterminated comment") 346 | SOME #"*" => COMMENTast k (inc scr) 347 | SOME #")" => k (inc scr) 348 | SOME #"\n" => COMMENT k (newline scr) 349 | _ => COMMENT k (inc scr) 350and initialQID0 scr = (* have just seen '.' *) 351 case currentChar scr of 352 NONE => Error(scr, "'.'-EOF unexpected") 353 | SOME c => if Char.isSpace c then Error(scr, "'.'-whitespace unexpected") 354 else if Char.isAlpha c then 355 initialAlphaQID (inc scr) 356 else if isSMLSym c then 357 initialSymQID (inc scr) 358 else Error(scr, "Bad character >"^str c^"< after qualifying '.'") 359and initialAlphaQID scr = (* have seen some characters of ID *) 360 case currentChar scr of 361 NONE => scr 362 | SOME #"." => initialQID0 (inc scr) 363 | SOME c => if isSMLAlphaCont c then initialAlphaQID (inc scr) 364 else clean_initial scr 365and initialSymQID scr = (* have seen some characters of ID *) 366 case currentChar scr of 367 NONE => scr 368 | SOME #"." => initialQID0 (inc scr) 369 | SOME c => if isSMLSym c then initialSymQID (inc scr) 370 else clean_initial scr 371and STRING scr = (* have seen the quote *) 372 case currentChar scr of 373 NONE => Error(scr, "Unterminated string literal") 374 | SOME #"\"" => clean_initial (inc scr) 375 | SOME #"\\" => STRINGslash (inc scr) 376 | SOME #"\n" => Error(scr, "Unescaped newline in string literal") 377 | _ => STRING (inc scr) 378and STRINGcaret scr = 379 case currentChar scr of 380 NONE => Error(scr, "Unterminated string literal") 381 | SOME c => let val i = Char.ord c 382 in 383 if i < 64 orelse i >= 96 then 384 Error(scr, "Illegal caret-escape in string literal") 385 else STRING (inc scr) 386 end 387and STRINGslash scr = 388 case currentChar scr of 389 NONE => Error(scr, "Unterminated string literal") 390 | SOME #"\n" => STRINGelidews (newline scr) 391 | SOME #"\\" => STRING (inc scr) 392 | SOME #"\"" => STRING (inc scr) 393 | SOME #"^" => STRINGcaret (inc scr) 394 | SOME #"a" => STRING (inc scr) 395 | SOME #"b" => STRING (inc scr) 396 | SOME #"f" => STRING (inc scr) 397 | SOME #"n" => STRING (inc scr) 398 | SOME #"r" => STRING (inc scr) 399 | SOME #"t" => STRING (inc scr) 400 | SOME #"v" => STRING (inc scr) 401 | SOME c => if Char.isDigit c then 402 STRINGslashdigit 1 (inc scr) 403 else if Char.isSpace c then 404 STRINGelidews (inc scr) 405 else Error(scr, "Illegal backslash escape >" ^ str c ^ 406 "< in string literal") 407and STRINGelidews scr = 408 case currentChar scr of 409 NONE => Error(scr, "Unterminated string literal") 410 | SOME #"\\" => STRING (inc scr) 411 | SOME #"\n" => STRINGelidews (newline scr) 412 | SOME c => if Char.isSpace c then STRINGelidews (inc scr) 413 else Error(scr, "Illegal char >" ^ str c ^ 414 "< in string \\...\\ elision") 415and STRINGslashdigit cnt scr = 416 case currentChar scr of 417 NONE => Error(scr, "Unterminated string literal") 418 | SOME c => if Char.isDigit c then 419 if cnt = 2 then STRING (inc scr) 420 else STRINGslashdigit (cnt + 1) (inc scr) 421 else Error(scr, "Illegal backslash escape in string literal") 422and initialAlphaKWordPFX kword numseen k scr = 423 case currentChar scr of 424 NONE => scr 425 | SOME c => 426 if c = String.sub(kword, numseen) then 427 if numseen + 1 = size kword then 428 initialAlphaKWord kword k (inc scr) 429 else 430 initialAlphaKWordPFX kword (numseen + 1) k (inc scr) 431 else if isSMLAlphaCont c then 432 initialAlphaID (String.extract(kword, 0, SOME numseen), [c]) 433 (inc scr) 434 else clean_initial scr 435and initialAlphaKWord kword k scr = 436 case currentChar scr of 437 NONE => k scr 438 | SOME c => if isSMLAlphaCont c then 439 initialAlphaID (kword, [c]) (inc scr) 440 else k scr 441 442fun scrdeps scr = 443 getIDs (clean_initial scr) before 444 closeSCR scr 445 446fun file_deps fname = scrdeps (makeSCR fname) 447fun stream_deps p = scrdeps (SCRfromStream p) 448fun reader_deps p = scrdeps (SCRfromReader p) 449 450end (* struct *) 451