1type pos = (int * int); 2type arg = int; 3open Tokens; 4type lexresult = (svalue,pos) token 5exception Impossible of string; 6fun fatalError s = raise(Impossible s); 7 8fun mkTok f text pos line = 9 (f text, (pos - String.size text, line), (pos, line)); 10 11fun mkMtTok text pos line = 12 ((pos - String.size text, line), (pos, line)); 13 14 15 16(* To translate escape sequences *) 17 18(* No problem that this isn't correct for Macintosh *) 19val char_for_backslash = fn 20(* *) #"n" => #"\010" 21(* *) | #"r" => #"\013" 22(* *) | #"a" => #"\007" 23(* *) | #"b" => #"\008" 24(* *) | #"t" => #"\009" 25(* *) | #"v" => #"\011" 26(* *) | #"f" => #"\012" 27(* *) | c => c 28; 29 30(* The table of keywords *) 31 32val keyword_table = 33List.foldl (fn ((str, tok), t) => Binarymap.insert (t, str, tok)) 34(Binarymap.mkDict String.compare) 35[ 36 ("abstype", ABSTYPE), 37 ("and", AND), 38 ("andalso", ANDALSO), 39 ("as", AS), 40 ("case", CASE), 41 ("datatype", DATATYPE), 42 ("do", DO), 43 ("else", ELSE), 44 ("eqtype", EQTYPE), 45 ("end", END), 46 ("exception", EXCEPTION), 47 ("fn", FN), 48 ("fun", FUN), 49 ("handle", HANDLE), 50 ("if", IF), 51 ("in", IN), 52 ("include", INCLUDE), 53 ("infix", INFIX), 54 ("infixr", INFIXR), 55 ("let", LET), 56 ("local", LOCAL), 57 ("nonfix", NONFIX), 58 ("of", OF), 59 ("op", OP), 60 ("open", OPEN), 61 ("orelse", ORELSE), 62 ("prim_eqtype", PRIM_EQTYPE), 63 ("prim_EQtype", PRIM_REFTYPE), 64 ("prim_type", PRIM_TYPE), 65 ("prim_val", PRIM_VAL), 66 ("raise", RAISE), 67 ("rec", REC), 68 ("sig", SIG), 69 ("signature", SIGNATURE), 70 ("struct", STRUCT), 71 ("structure", STRUCTURE), 72 ("then", THEN), 73 ("type", TYPE), 74 ("val", VAL), 75 ("while", WHILE), 76 ("where", WHERE), 77 ("with", WITH), 78 ("withtype", WITHTYPE), 79 ("#", HASH), 80 ("->", ARROW), 81 ("|", BAR), 82 (":", COLON), 83 ("=>", DARROW), 84 ("=", EQUALS), 85 ("*", STAR) 86]; 87(* 88local (* Make sure that strings are shared (interned); this saves space 89 when writing to disk: *) 90 val intern_table = (Hasht.new 123 : (string, string) Hasht.t); 91in 92 fun share s = 93 case Hasht.peek intern_table s of 94 NONE => (Hasht.insert intern_table s s; s) 95 | SOME s' => s' 96end 97*) 98fun share s = s; 99 100fun mkKeyword text pos line = 101 (Binarymap.find (keyword_table, text)) (mkMtTok text pos line) 102 handle Binarymap.NotFound => ID (mkTok share text pos line); 103 104val savedLexemeStart = ref 0; 105 106fun splitQualId s = 107 let open CharVector 108 val len' = size s - 1 109 fun parse n = 110 if n = 0 then 111 ("", s) 112 else if sub(s, n) = #"." then 113 ( String.extract(s, 0, SOME n), 114 String.extract(s, n + 1, SOME(len' - n)) ) 115 else 116 parse (n-1) 117 in parse len' end; 118 119fun mkQualId text pos line = 120 let val (qual, id) = splitQualId text in 121 if id = "*" then 122 QUAL_STAR (mkTok (fn x => { qual=qual, id=id }) text pos line) 123 else 124 QUAL_ID (mkTok (fn x => { qual=qual, id=id }) text pos line) 125 end 126; 127 128exception LexicalError of string * string * int (* (message, loc1, loc2) *) 129fun lexError msg text pos line = 130 raise (LexicalError (msg, text, line)) 131 132fun eof commentDepth = 133 if not (commentDepth = 0) then 134 lexError "Unclosed comment" "" ~1 (!savedLexemeStart) 135 else 136 EOF ((~1, ~1), (~1, ~1)); 137 138fun constTooLarge msg yytext yypos yylineno = 139( 140 lexError (msg ^ " constant is too large") yytext yypos yylineno 141); 142 143fun string_of_string s = 144 case String.fromString (String.substring (s, 1, String.size s - 2)) of 145 NONE => raise Fail "" 146 | SOME s => s; 147 148fun char_of_string s = 149 case Char.fromString (String.substring (s, 2, String.size s - 3)) of 150 NONE => raise Fail "" 151 | SOME s => s; 152 153fun int_of_string s = 154 case StringCvt.scanString (Int.scan StringCvt.DEC) s of 155 NONE => raise Fail "" 156 | SOME i => i; 157 158fun hex_of_string s = 159 case StringCvt.scanString (Int.scan StringCvt.HEX) s of 160 NONE => raise Fail "" 161 | SOME i => i; 162 163fun word_of_string s = 164 case StringCvt.scanString (Word.scan StringCvt.DEC) s of 165 NONE => raise Fail "" 166 | SOME i => i; 167 168fun word_of_hexstring s = 169 case StringCvt.scanString (Word.scan StringCvt.HEX) s of 170 NONE => raise Fail "" 171 | SOME i => i; 172 173fun real_of_string s = 174 case Real.fromString s of 175 NONE => raise Fail "" 176 | SOME r => r; 177 178%% 179%header (functor SMLLexFun(structure Tokens : SML_TOKENS)); 180%s Comment; 181%full 182%arg (commentDepth); 183%count 184id=[A-Za-z][A-Za-z0-9_']* | [-!%&$#+/:<=>?@~^|*\\]+; 185stringchar=(\\["abtnvfr\\])|(\\[ \t\n\r]+\\)|(\\\^[@-_])|(\\[0-9][0-9][0-9])|[^\\\n\r\127\255\001-\026]; 186 187%% 188<INITIAL>[\ \t\n]+ => 189 ( continue () ); 190<INITIAL>"(*" => 191 ( (savedLexemeStart:=(!yylineno); YYBEGIN Comment; lex 1 ()) ); 192<INITIAL>"*)" => 193 ( lexError "unmatched comment bracket" yytext yypos (!yylineno) ); 194<INITIAL>'[A-Za-z0-9_']+ => 195 ( TYVAR (mkTok (fn x => x) yytext yypos (!yylineno)) ); 196<INITIAL>0 => 197 ( ZDIGIT (mkTok (fn x => 0) yytext yypos (!yylineno)) ); 198<INITIAL>[1-9] => 199 ( NZDIGIT (mkTok int_of_string yytext yypos (!yylineno)) ); 200<INITIAL>0[0-9]+ => 201 ( ZPOSINT2 (mkTok int_of_string yytext yypos (!yylineno)) 202 handle Fail _ => constTooLarge "integer" yytext yypos (!yylineno) ); 203<INITIAL>[1-9][0-9]+ => 204 ( NZPOSINT2 (mkTok int_of_string yytext yypos (!yylineno)) 205 handle Fail _ => constTooLarge "integer" yytext yypos (!yylineno) ); 206<INITIAL>~[0-9]+ => 207 ( NEGINT (mkTok int_of_string yytext yypos (!yylineno)) 208 handle Fail _ => constTooLarge "integer" yytext yypos (!yylineno) ); 209<INITIAL>~?0x[0-9a-fA-F]+ => 210 ( NEGINT (mkTok hex_of_string yytext yypos (!yylineno)) 211 handle Fail _ => constTooLarge "integer" yytext yypos (!yylineno) ); 212<INITIAL>0w[0-9]+ => 213 ( WORD (mkTok word_of_string yytext yypos (!yylineno)) 214 handle Fail _ => constTooLarge "word" yytext yypos (!yylineno) ); 215<INITIAL>0wx[0-9a-fA-F]+ => 216 ( WORD (mkTok word_of_hexstring yytext yypos (!yylineno)) 217 handle Fail _ => constTooLarge "word" yytext yypos (!yylineno) ); 218<INITIAL>~?[0-9]+(\.[0-9]+)?([eE]~?[0-9]+)? => 219 ( REAL (mkTok real_of_string yytext yypos (!yylineno)) 220 handle Fail _ => constTooLarge "real" yytext yypos (!yylineno) ); 221<INITIAL>"{stringchar}*" => 222 ( STRING (mkTok string_of_string yytext yypos (!yylineno)) ); 223<INITIAL>#"{stringchar}" => 224 ( CHAR (mkTok char_of_string yytext yypos (!yylineno)) ); 225<INITIAL>_ => 226 ( UNDERBAR (mkMtTok yytext yypos (!yylineno)) ); 227<INITIAL>, => 228 ( COMMA (mkMtTok yytext yypos (!yylineno)) ); 229<INITIAL>\.\.\. => 230 ( DOTDOTDOT (mkMtTok yytext yypos (!yylineno)) ); 231<INITIAL>\{ => 232 ( LBRACE (mkMtTok yytext yypos (!yylineno)) ); 233<INITIAL>} => 234 ( RBRACE (mkMtTok yytext yypos (!yylineno)) ); 235<INITIAL>\[ => 236 ( LBRACKET (mkMtTok yytext yypos (!yylineno)) ); 237<INITIAL>#\[ => 238 ( HASHLBRACKET (mkMtTok yytext yypos (!yylineno)) ); 239<INITIAL>] => 240 ( RBRACKET (mkMtTok yytext yypos (!yylineno)) ); 241<INITIAL>\( => 242 ( LPAREN (mkMtTok yytext yypos (!yylineno)) ); 243<INITIAL>\) => 244 ( RPAREN (mkMtTok yytext yypos (!yylineno)) ); 245<INITIAL>\; => 246 ( SEMICOLON (mkMtTok yytext yypos (!yylineno)) ); 247<INITIAL>{id} => 248 ( mkKeyword yytext yypos (!yylineno) ); 249<INITIAL>({id}\.)+{id} => 250 ( mkQualId yytext yypos (!yylineno) ); 251<INITIAL>. => 252 ( lexError "ill-formed token" yytext yypos (!yylineno) ); 253 254<Comment>"(*" => 255 ( (lex (commentDepth + 1) ()) ); 256<Comment>"*)" => 257 ( (if commentDepth = 1 then YYBEGIN INITIAL else ()); lex (commentDepth - 1) ()); 258<Comment>[^*()]+ => 259 ( continue () ); 260<Comment>. => 261 ( continue () ); 262 263