1(* Title: HOL/TPTP/TPTP_Parser/tptp.lex 2 Author: Nik Sultana, Cambridge University Computer Laboratory 3 4 Notes: 5 * Omit %full in definitions to restrict alphabet to ascii. 6 * Could include %posarg to ensure that we'd start counting character positions 7 from 0, but it would punish performance. 8 * %s AF F COMMENT; -- could improve by making stateful. 9 10 Acknowledgements: 11 * Geoff Sutcliffe for help with TPTP. 12 * Timothy Bourke for his tips on getting ML-Yacc working with Poly/ML. 13 * An early version of this was ported from the specification shipped with 14 Leo-II, written by Frank Theiss. 15 * Some boilerplate bits were taken from the ml-yacc/ml-lex manual by Roger Price. 16 * Jasmin Blanchette and Makarius Wenzel for help with Isabelle integration. 17*) 18 19structure T = Tokens 20type pos = int (* Position in file *) 21type lineNo = int 22type svalue = T.svalue 23type ('a,'b) token = ('a,'b) T.token 24type lexresult = (svalue,pos) token 25type lexarg = string 26type arg = lexarg 27val col = ref 0; 28val linep = ref 1; (* Line pointer *) 29val eolpos = ref 0; 30 31val badCh : string * string * int * int -> unit = fn 32 (file_name, bad, line, col) => 33 TextIO.output(TextIO.stdOut, file_name ^ "[" 34 ^ Int.toString line ^ "." ^ Int.toString col 35 ^ "] Invalid character \"" ^ bad ^ "\"\n"); 36 37val eof = fn file_name => 38 let 39 val result = T.EOF (!linep,!col); 40 val _ = linep := 0; 41 in result end 42(*here could check whether file ended prematurely: 43 see if have open brackets, or if we're in some state other than INITIAL*) 44 45val count_commentlines : string -> unit = fn str => 46 let 47 val str' = String.explode str 48 val newlines = List.filter (fn x => x = #"\n") str' 49 in linep := (!linep) + (List.length newlines) end 50 51%% 52%header (functor TPTPLexFun(structure Tokens: TPTP_TOKENS)); 53%arg (file_name:string); 54 55percentage_sign = "%"; 56double_quote = ["]; 57do_char = ([^"\\]|[\\]["\\]); 58single_quote = [']; 59sq_char = ([^'\\]|[\\]['\\]); 60sign = [+-]; 61dot = [.]; 62exponent = [Ee]; 63slash = [/]; 64zero_numeric = [0]; 65non_zero_numeric = [1-9]; 66numeric = [0-9]; 67lower_alpha = [a-z]; 68upper_alpha = [A-Z]; 69alpha_numeric = ({lower_alpha}|{upper_alpha}|{numeric}|_); 70dollar = \$; 71printable_char = .; 72viewable_char = [.\n]; 73 74dot_decimal = {dot}{numeric}+; 75 76ddollar = \$\$; 77unsigned_integer = {numeric}+; 78divide = [/]; 79 80signed_integer = {sign}{unsigned_integer}; 81exp_suffix = {exponent}({signed_integer}|{unsigned_integer}); 82real = ({signed_integer}|{unsigned_integer}){dot_decimal}{exp_suffix}?; 83rational = ({signed_integer}|{unsigned_integer}){divide}{unsigned_integer}; 84 85lower_word = {lower_alpha}{alpha_numeric}*; 86upper_word = {upper_alpha}{alpha_numeric}*; 87dollar_dollar_word = {ddollar}{lower_word}; 88dollar_word = {dollar}{lower_word}; 89dollar_underscore = {dollar}_; 90 91distinct_object = {double_quote}{do_char}+{double_quote}; 92 93ws = ([\ ]|[\t]); 94single_quoted = {single_quote}{sq_char}+{single_quote}; 95 96system_comment_one = [%][\ ]*{ddollar}[_]*; 97system_comment_multi = [/][\*][\ ]*(ddollar)([^\*]*[\*][\*]*[^/\*])*[^\*]*[\*][\*]*[/]; 98system_comment = (system_comment_one)|(system_comment_multi); 99 100comment_line = {percentage_sign}[^\n]*; 101comment_block = [/][\*]([^\*]*[\*]+[^/\*])*[^\*]*[\*]+[/]; 102comment = ({comment_line}|{comment_block})+; 103 104eol = ("\013\010"|"\010"|"\013"); 105 106%% 107 108{ws}* => (col:=(!col)+size yytext; continue () ); 109 110{eol} => (linep:=(!linep)+1; 111 eolpos:=yypos+size yytext; continue ()); 112 113"&" => (col:=yypos-(!eolpos); T.AMPERSAND(!linep,!col)); 114 115"@+" => (col:=yypos-(!eolpos); T.INDEF_CHOICE(!linep,!col)); 116"@-" => (col:=yypos-(!eolpos); T.DEFIN_CHOICE(!linep,!col)); 117 118"!!" => (col:=yypos-(!eolpos); T.OPERATOR_FORALL(!linep,!col)); 119"??" => (col:=yypos-(!eolpos); T.OPERATOR_EXISTS(!linep,!col)); 120 121"@" => (col:=yypos-(!eolpos); T.AT_SIGN(!linep,!col)); 122"^" => (col:=yypos-(!eolpos); T.CARET(!linep,!col)); 123 124":" => (col:=yypos-(!eolpos); T.COLON(!linep,!col)); 125"," => (col:=yypos-(!eolpos); T.COMMA(!linep,!col)); 126"=" => (col:=yypos-(!eolpos); T.EQUALS(!linep,!col)); 127"!" => (col:=yypos-(!eolpos); T.EXCLAMATION(!linep,!col)); 128":=" => (col:=yypos-(!eolpos); T.LET(!linep,!col)); 129">" => (col:=yypos-(!eolpos); T.ARROW(!linep,!col)); 130 131"<=" => (col:=yypos-(!eolpos); T.FI(!linep,!col)); 132"<=>" => (col:=yypos-(!eolpos); T.IFF(!linep,!col)); 133"=>" => (col:=yypos-(!eolpos); T.IMPLIES(!linep,!col)); 134"[" => (col:=yypos-(!eolpos); T.LBRKT(!linep,!col)); 135"(" => (col:=yypos-(!eolpos); T.LPAREN(!linep,!col)); 136"->" => (col:=yypos-(!eolpos); T.MAP_TO(!linep,!col)); 137"--" => (col:=yypos-(!eolpos); T.MMINUS(!linep,!col)); 138"~&" => (col:=yypos-(!eolpos); T.NAND(!linep,!col)); 139"!=" => (col:=yypos-(!eolpos); T.NEQUALS(!linep,!col)); 140"<~>" => (col:=yypos-(!eolpos); T.XOR(!linep,!col)); 141"~|" => (col:=yypos-(!eolpos); T.NOR(!linep,!col)); 142"." => (col:=yypos-(!eolpos); T.PERIOD(!linep,!col)); 143"++" => (col:=yypos-(!eolpos); T.PPLUS(!linep,!col)); 144"?" => (col:=yypos-(!eolpos); T.QUESTION(!linep,!col)); 145"]" => (col:=yypos-(!eolpos); T.RBRKT(!linep,!col)); 146")" => (col:=yypos-(!eolpos); T.RPAREN(!linep,!col)); 147"~" => (col:=yypos-(!eolpos); T.TILDE(!linep,!col)); 148"|" => (col:=yypos-(!eolpos); T.VLINE(!linep,!col)); 149 150{distinct_object} => (col:=yypos-(!eolpos); T.DISTINCT_OBJECT(yytext,!linep,!col)); 151{rational} => (col:=yypos-(!eolpos); T.RATIONAL(yytext,!linep,!col)); 152{real} => (col:=yypos-(!eolpos); T.REAL(yytext,!linep,!col)); 153{signed_integer} => (col:=yypos-(!eolpos); T.SIGNED_INTEGER(yytext,!linep,!col)); 154{unsigned_integer} => (col:=yypos-(!eolpos); T.UNSIGNED_INTEGER(yytext,!linep,!col)); 155{dot_decimal} => (col:=yypos-(!eolpos); T.DOT_DECIMAL(yytext,!linep,!col)); 156{single_quoted} => (col:=yypos-(!eolpos); T.SINGLE_QUOTED(yytext,!linep,!col)); 157{upper_word} => (col:=yypos-(!eolpos); T.UPPER_WORD(yytext,!linep,!col)); 158{comment} => (col:=yypos-(!eolpos); count_commentlines yytext;T.COMMENT(yytext,!linep,!col)); 159 160"thf" => (col:=yypos-(!eolpos); T.THF(!linep,!col)); 161"fof" => (col:=yypos-(!eolpos); T.FOF(!linep,!col)); 162"cnf" => (col:=yypos-(!eolpos); T.CNF(!linep,!col)); 163"tff" => (col:=yypos-(!eolpos); T.TFF(!linep,!col)); 164"include" => (col:=yypos-(!eolpos); T.INCLUDE(!linep,!col)); 165 166"$thf" => (col:=yypos-(!eolpos); T.DTHF(!linep,!col)); 167"$fof" => (col:=yypos-(!eolpos); T.DFOF(!linep,!col)); 168"$cnf" => (col:=yypos-(!eolpos); T.DCNF(!linep,!col)); 169"$fot" => (col:=yypos-(!eolpos); T.DFOT(!linep,!col)); 170"$tff" => (col:=yypos-(!eolpos); T.DTFF(!linep,!col)); 171 172"$ite_f" => (col:=yypos-(!eolpos); T.ITE_F(!linep,!col)); 173"$ite_t" => (col:=yypos-(!eolpos); T.ITE_T(!linep,!col)); 174"$let_tf" => (col:=yypos-(!eolpos); T.LET_TF(!linep,!col)); 175"$let_ff" => (col:=yypos-(!eolpos); T.LET_FF(!linep,!col)); 176"$let_ft" => (col:=yypos-(!eolpos); T.LET_FT(!linep,!col)); 177"$let_tt" => (col:=yypos-(!eolpos); T.LET_TT(!linep,!col)); 178 179{lower_word} => (col:=yypos-(!eolpos); T.LOWER_WORD(yytext,!linep,!col)); 180{dollar_word} => (col:=yypos-(!eolpos); T.DOLLAR_WORD(yytext,!linep,!col)); 181{dollar_underscore} => (col:=yypos-(!eolpos); T.DOLLAR_WORD(yytext,!linep,!col)); 182{dollar_dollar_word} => (col:=yypos-(!eolpos); T.DOLLAR_DOLLAR_WORD(yytext,!linep,!col)); 183 184"+" => (col:=yypos-(!eolpos); T.PLUS(!linep,!col)); 185"*" => (col:=yypos-(!eolpos); T.TIMES(!linep,!col)); 186"-->" => (col:=yypos-(!eolpos); T.GENTZEN_ARROW(!linep,!col)); 187"<<" => (col:=yypos-(!eolpos); T.SUBTYPE(!linep,!col)); 188"!>" => (col:=yypos-(!eolpos); T.DEP_PROD(!linep,!col)); 189"?*" => (col:=yypos-(!eolpos); T.DEP_SUM(!linep,!col)); 190 191":-" => (col:=yypos-(!eolpos); T.LET_TERM(!linep,!col)); 192