1 2open HolKernel boolLib bossLib Parse; 3open wordsTheory stringTheory stringLib listTheory stringSimps listLib simpLib; 4open decoderTheory bit_listTheory opmonTheory; 5 6open x86_astTheory; 7 8val _ = new_theory "x86_decoder"; 9val _ = ParseExtras.temp_loose_equality() 10 11 12(* ---------------------------------------------------------------------------------- *> 13 14 A decoder for x86 instructions is defined and at the end pre-evaliuated for 15 fast execution with EVAL. 16 17<* ---------------------------------------------------------------------------------- *) 18 19(* reading hex strings to bit lists *) 20 21val STR_SPACE_AUX_def = Define ` 22 (STR_SPACE_AUX n "" = "") /\ 23 (STR_SPACE_AUX n (STRING c s) = 24 if n = 0 then STRING #" " (STRING c (STR_SPACE_AUX 1 s)) 25 else STRING c (STR_SPACE_AUX (n-1) s))`; 26 27val bytebits_def = Define ` 28 bytebits = hex2bits o STR_SPACE_AUX 2`; 29 30(* interprete the IA-32 manuals' syntax *) 31 32val check_opcode_def = Define ` 33 check_opcode s = 34 let y = (n2bits 3 o char2num o HD o TL o EXPLODE) s in 35 assert (\g. g "Reg/Opcode" = y)`; 36 37val read_SIB_def = Define ` 38 read_SIB = 39 assign_drop "Base" 3 >> assign_drop "Index" 3 >> assign_drop "Scale" 2 >> 40 option_try [ 41 assert (\g. (g "Mod" = [F;F]) /\ (g "Base" = [T;F;T])) >> assign_drop "disp32" 32; 42 assert (\g. (g "Mod" = [F;F]) /\ ~(g "Base" = [T;F;T])); 43 assert (\g. (g "Mod" = [T;F])) >> assign_drop "disp8" 8; 44 assert (\g. (g "Mod" = [F;T])) >> assign_drop "disp32" 32]`; 45 46val read_ModRM_def = Define ` 47 read_ModRM = 48 assign_drop "R/M" 3 >> assign_drop "Reg/Opcode" 3 >> assign_drop "Mod" 2 >> 49 option_try [ 50 assert (\g. (g "Mod" = [T;T])); 51 assert (\g. (g "Mod" = [F;F]) /\ ~(g "R/M" = [F;F;T]) /\ ~(g "R/M" = [T;F;T])); 52 assert (\g. (g "Mod" = [F;F]) /\ (g "R/M" = [T;F;T])) >> assign_drop "disp32" 32; 53 assert (\g. (g "Mod" = [T;F]) /\ ~(g "R/M" = [F;F;T])) >> assign_drop "disp8" 8; 54 assert (\g. (g "Mod" = [F;T]) /\ ~(g "R/M" = [F;F;T])) >> assign_drop "disp32" 32; 55 assert (\g. ~(g "Mod" = [T;T]) /\ (g "R/M" = [F;F;T])) >> read_SIB]`; 56 57val is_hex_add_def = Define ` 58 is_hex_add x = ((IMPLODE o DROP 2 o EXPLODE) x = "+rd")`; 59 60val process_hex_add_def = Define ` 61 process_hex_add name = 62 let n = (hex2num o IMPLODE o TAKE 2 o EXPLODE) name in 63 option_try [drop_eq (n2bits 8 (n+0)) >> assign "reg" (n2bits 3 0); 64 drop_eq (n2bits 8 (n+1)) >> assign "reg" (n2bits 3 1); 65 drop_eq (n2bits 8 (n+2)) >> assign "reg" (n2bits 3 2); 66 drop_eq (n2bits 8 (n+3)) >> assign "reg" (n2bits 3 3); 67 drop_eq (n2bits 8 (n+4)) >> assign "reg" (n2bits 3 4); 68 drop_eq (n2bits 8 (n+5)) >> assign "reg" (n2bits 3 5); 69 drop_eq (n2bits 8 (n+6)) >> assign "reg" (n2bits 3 6); 70 drop_eq (n2bits 8 (n+7)) >> assign "reg" (n2bits 3 7)]`; 71 72val x86_match_step_def = Define ` 73 x86_match_step name = 74 if is_hex name /\ (STRLEN name = 2) then (* opcode e.g. FE, 83 and 04 *) 75 drop_eq (n2bits 8 (hex2num name)) 76 else if is_hex_add name then (* opcode + rd, e.g. F8+rd *) 77 process_hex_add name 78 else if name = "1" then (* constant 1 *) 79 assign_drop name 0 80 else if MEM name ["ib";"cb";"rel8";"imm8"] then (* 8-bit immediate or address *) 81 assign_drop name 8 82 else if MEM name ["iw";"cw";"imm16"] then (* 16-bit immediate or address *) 83 assign_drop name 16 84 else if MEM name ["id";"cd";"rel32";"imm32"] then (* 32-bit immediate or address *) 85 assign_drop name 32 86 else if name = "/r" then (* normal reg + reg/mem *) 87 read_ModRM 88 else if MEM name ["/0";"/1";"/2";"/3";"/4";"/5";"/6";"/7"] then (* opcode extension in ModRM *) 89 read_ModRM >> check_opcode name 90 else 91 option_fail`; 92 93(* syntax classes *) 94 95val x86_binop_def = Define `x86_binop = 96 [("ADC",Xadc); ("ADD",Xadd); ("AND",Xand); ("CMP",Xcmp); 97 ("OR",Xor); ("SAR",Xsar); ("SHR",Xshr); ("SHL",Xshl); 98 ("SBB",Xsbb); ("SUB",Xsub); ("TEST",Xtest); ("XOR",Xxor)]`; 99 100val x86_monop_def = Define `x86_monop = 101 [("DEC",Xdec); ("INC",Xinc); ("NOT",Xnot); ("NEG",Xneg)]`; 102 103(* x86 - decoder *) 104 105val b2reg_def = Define ` 106 b2reg g name = (EL (bits2num (g name)) [EAX;ECX;EDX;EBX;ESP;EBP;ESI;EDI]):Xreg`; 107 108val decode_Xr32_def = Define ` 109 decode_Xr32 g name = 110 if name = "EAX" then EAX else 111 if g "reg" = [] then b2reg g "Reg/Opcode" else b2reg g "reg"`; 112 113val decode_SIB_def = Define ` 114 decode_SIB g = 115 let scaled_index = (if g "Index" = [F;F;T] then NONE else SOME (b2w g "Scale",b2reg g "Index")) in 116 if g "Base" = [T;F;T] then (* the special case indicated by "[*]" *) 117 if g "Mod" = [F;F] then Xm scaled_index NONE (b2w g "disp32") else 118 if g "Mod" = [T;F] then Xm scaled_index (SOME EBP) (b2w g "disp8") else 119 (* g "Mod" = [F;T] *) Xm scaled_index (SOME EBP) (b2w g "disp32") 120 else (* normal cases, just need to read off the correct displacement *) 121 let disp = (if (g "Mod" = [T;F]) then sw2sw ((b2w g "disp8"):word8) else b2w g "disp32") in 122 let disp = (if (g "Mod" = [F;F]) then 0w else disp) in 123 Xm scaled_index (SOME (b2reg g "Base")) disp`; 124 125val decode_Xrm32_def = Define ` (* sw2sw = sign-extension *) 126 decode_Xrm32 g name = 127 if name = "EAX" then Xr EAX else 128 if (g "Mod" = [F;F]) /\ (g "R/M" = [T;F;T]) then Xm NONE NONE (b2w g "disp32") else 129 if ~(g "Mod" = [T;T]) /\ (g "R/M" = [F;F;T]) then decode_SIB g else 130 if (g "Mod" = [F;F]) then Xm NONE (SOME (b2reg g "R/M")) 0w else 131 if (g "Mod" = [T;F]) then Xm NONE (SOME (b2reg g "R/M")) (sw2sw:word8->word32 (b2w g "disp8")) else 132 if (g "Mod" = [F;T]) then Xm NONE (SOME (b2reg g "R/M")) (b2w g "disp32") else 133 if (g "Mod" = [T;T]) then Xr (b2reg g "R/M") else Xr (b2reg g "reg") `; 134 135val decode_Xconst_def = Define ` 136 decode_Xconst name g = 137 if name = "imm8" then sw2sw:word8 ->word32 (b2w g "ib") else 138 if name = "rel8" then sw2sw:word8 ->word32 (b2w g "cb") else 139 if name = "imm16" then sw2sw:word16->word32 (b2w g "iw") else 140 if name = "imm32" then b2w g "id" else 141 if name = "rel32" then b2w g "cd" else 142 if name = "1" then 1w else 0w`; 143 144val decode_Xdest_src_def = Define ` 145 decode_Xdest_src g dest src = 146 if MEM src ["r32";"r8"] then 147 Xrm_r (decode_Xrm32 g dest) (decode_Xr32 g src) 148 else if MEM src ["r/m32";"r/m8"] then 149 Xr_rm (decode_Xr32 g dest) (decode_Xrm32 g src) 150 else if src = "m" then 151 Xr_rm (decode_Xr32 g dest) (decode_Xrm32 g src) 152 else 153 Xrm_i (decode_Xrm32 g dest) (decode_Xconst src g)`; 154 155val decode_Xconst_or_zero_def = Define ` 156 decode_Xconst_or_zero ts g = 157 if LENGTH ts < 2 then 0w else decode_Xconst (EL 1 ts) g`; 158 159val decode_Ximm_rm_def = Define ` 160 decode_Ximm_rm ts g = 161 if MEM (EL 1 ts) ["r/m32";"r32";"r/m8";"r8"] 162 then Xi_rm (decode_Xrm32 g (EL 1 ts)) 163 else Xi (decode_Xconst (EL 1 ts) g)`; 164 165val x86_select_op_def = Define ` 166 x86_select_op name list = SND (HD (FILTER (\x. FST x = name) list))`; 167 168val x86_syntax_binop = `` 169 Xbinop (x86_select_op (HD ts) x86_binop) (decode_Xdest_src g (EL 1 ts) (EL 2 ts))``; 170 171val x86_syntax_monop = `` 172 Xmonop (x86_select_op (HD ts) x86_monop) (decode_Xrm32 g (EL 1 ts))``; 173 174val X_SOME_def = Define `X_SOME f = SOME o (\(g,w). (f g,w))`; 175 176val x86_syntax_def = Define ` 177 x86_syntax ts = 178 if LENGTH ts = 0 then option_fail else 179 if (HD ts = "CMP") /\ MEM "r/m8" ts then X_SOME (\g. Xcmp_byte (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else 180 if (HD ts = "MOV") /\ MEM "r/m8" ts then X_SOME (\g. Xmov_byte (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else 181 if (HD ts = "DEC") /\ MEM "r/m8" ts then X_SOME (\g. Xdec_byte (decode_Xrm32 g (EL 1 ts))) else 182 if HD ts = "MOVZX" then X_SOME (\g. Xmovzx (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else 183 if MEM (HD ts) (MAP FST x86_binop) then X_SOME (\g. ^x86_syntax_binop) else 184 if MEM (HD ts) (MAP FST x86_monop) then X_SOME (\g. ^x86_syntax_monop) else 185 if HD ts = "POP" then X_SOME (\g. Xpop (decode_Xrm32 g (EL 1 ts))) else 186 if HD ts = "PUSH" then X_SOME (\g. Xpush (decode_Ximm_rm ts g)) else 187 if HD ts = "PUSHAD" then X_SOME (\g. Xpushad) else 188 if HD ts = "POPAD" then X_SOME (\g. Xpopad) else 189 if HD ts = "CMPXCHG" then X_SOME (\g. Xcmpxchg (decode_Xrm32 g (EL 1 ts)) (decode_Xr32 g (EL 2 ts))) else 190 if HD ts = "XCHG" then X_SOME (\g. Xxchg (decode_Xrm32 g (EL 1 ts)) (decode_Xr32 g (EL 2 ts))) else 191 if HD ts = "XADD" then X_SOME (\g. Xxadd (decode_Xrm32 g (EL 1 ts)) (decode_Xr32 g (EL 2 ts))) else 192 if (HD ts = "JMP") /\ (TL ts = ["r/m32"]) then X_SOME (\g. Xjmp (decode_Xrm32 g (EL 1 ts))) else 193 if HD ts = "JMP" then X_SOME (\g. Xjcc X_ALWAYS (decode_Xconst_or_zero ts g)) else 194 if HD ts = "JE" then X_SOME (\g. Xjcc X_E (decode_Xconst_or_zero ts g)) else 195 if HD ts = "JNE" then X_SOME (\g. Xjcc X_NE (decode_Xconst_or_zero ts g)) else 196 if HD ts = "JS" then X_SOME (\g. Xjcc X_S (decode_Xconst_or_zero ts g)) else 197 if HD ts = "JNS" then X_SOME (\g. Xjcc X_NS (decode_Xconst_or_zero ts g)) else 198 if HD ts = "JA" then X_SOME (\g. Xjcc X_A (decode_Xconst_or_zero ts g)) else 199 if HD ts = "JNA" then X_SOME (\g. Xjcc X_NA (decode_Xconst_or_zero ts g)) else 200 if HD ts = "JB" then X_SOME (\g. Xjcc X_B (decode_Xconst_or_zero ts g)) else 201 if HD ts = "JNB" then X_SOME (\g. Xjcc X_NB (decode_Xconst_or_zero ts g)) else 202 if HD ts = "LEA" then X_SOME (\g. Xlea (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else 203 if HD ts = "LOOP" then X_SOME (\g. Xloop X_ALWAYS (decode_Xconst_or_zero ts g)) else 204 if HD ts = "LOOPE" then X_SOME (\g. Xloop X_E (decode_Xconst_or_zero ts g)) else 205 if HD ts = "LOOPNE" then X_SOME (\g. Xloop X_NE (decode_Xconst_or_zero ts g)) else 206 if HD ts = "MOV" then X_SOME (\g. Xmov X_ALWAYS (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else 207 if HD ts = "CMOVE" then X_SOME (\g. Xmov X_E (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else 208 if HD ts = "CMOVNE" then X_SOME (\g. Xmov X_NE (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else 209 if HD ts = "CMOVS" then X_SOME (\g. Xmov X_S (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else 210 if HD ts = "CMOVNS" then X_SOME (\g. Xmov X_NS (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else 211 if HD ts = "CMOVA" then X_SOME (\g. Xmov X_A (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else 212 if HD ts = "CMOVNA" then X_SOME (\g. Xmov X_NA (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else 213 if HD ts = "CMOVB" then X_SOME (\g. Xmov X_B (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else 214 if HD ts = "CMOVNB" then X_SOME (\g. Xmov X_NB (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else 215 if HD ts = "MUL" then X_SOME (\g. Xmul (decode_Xrm32 g (EL 1 ts))) else 216 if HD ts = "DIV" then X_SOME (\g. Xdiv (decode_Xrm32 g (EL 1 ts))) else 217 if HD ts = "CALL" then X_SOME (\g. Xcall (decode_Ximm_rm ts g)) else 218 if HD ts = "RET" then X_SOME (\g. Xret (decode_Xconst_or_zero ts g)) else option_fail`; 219 220 221(* a list of x86 instructions, ordered by the combination of addressing modes they support *) 222 223val x86_syntax_list = `` [ 224 225 " 15 id | ADC EAX, imm32 "; 226 " 83 /2 ib | ADC r/m32, imm8 "; 227 " 81 /2 id | ADC r/m32, imm32 "; 228 " 11 /r | ADC r/m32, r32 "; 229 " 13 /r | ADC r32, r/m32 "; 230 " 05 id | ADD EAX, imm32 "; 231 " 83 /0 ib | ADD r/m32, imm8 "; 232 " 81 /0 id | ADD r/m32, imm32 "; 233 " 01 /r | ADD r/m32, r32 "; 234 " 03 /r | ADD r32, r/m32 "; 235 " 25 id | AND EAX, imm32 "; 236 " 81 /4 id | AND r/m32, imm32 "; 237 " 83 /4 ib | AND r/m32, imm8 "; 238 " 21 /r | AND r/m32, r32 "; 239 " 23 /r | AND r32, r/m32 "; 240 " 3D id | CMP EAX, imm32 "; 241 " 81 /7 id | CMP r/m32, imm32 "; 242 " 83 /7 ib | CMP r/m32, imm8 "; 243 " 39 /r | CMP r/m32, r32 "; 244 " 3B /r | CMP r32, r/m32 "; 245 " 89 /r | MOV r/m32,r32 "; 246 " 8B /r | MOV r32,r/m32 "; 247 " B8+rd id | MOV r32, imm32 "; 248 " C7 /0 id | MOV r/m32, imm32 "; 249 " 0D id | OR EAX, imm32 "; 250 " 81 /1 id | OR r/m32, imm32 "; 251 " 83 /1 ib | OR r/m32, imm8 "; 252 " 09 /r | OR r/m32, r32 "; 253 " 0B /r | OR r32, r/m32 "; 254 " 1D id | SBB EAX, imm32 "; 255 " 83 /3 ib | SBB r/m32, imm8 "; 256 " 81 /3 id | SBB r/m32, imm32 "; 257 " 19 /r | SBB r/m32, r32 "; 258 " 1B /r | SBB r32, r/m32 "; 259 " 2D id | SUB EAX, imm32 "; 260 " 83 /5 ib | SUB r/m32, imm8 "; 261 " 81 /5 id | SUB r/m32, imm32 "; 262 " 29 /r | SUB r/m32, r32 "; 263 " 2B /r | SUB r32, r/m32 "; 264 " A9 id | TEST EAX, imm32 "; 265 " F7 /0 id | TEST r/m32, imm32 "; 266 " 85 /r | TEST r/m32, r32 "; 267 " 35 id | XOR EAX, imm32 "; 268 " 81 /6 id | XOR r/m32, imm32 "; 269 " 31 /r | XOR r/m32, r32 "; 270 " 83 /6 ib | XOR r/m32, imm8 "; 271 " 33 /r | XOR r32, r/m32 "; 272 273 " 0F B1 /r | CMPXCHG r/m32, r32"; 274 " 0F C1 /r | XADD r/m32, r32 "; 275 " 90+rd | XCHG EAX, r32 "; 276 " 87 /r | XCHG r/m32, r32 "; 277 278 " FF /1 | DEC r/m32 "; 279 " 48+rd | DEC r32 "; 280 " FF /0 | INC r/m32 "; 281 " 40+rd | INC r32 "; 282 " F7 /3 | NEG r/m32 "; 283 " F7 /2 | NOT r/m32 "; 284 " 8F /0 | POP r/m32 "; 285 " 58+rd | POP r32 "; 286 " FF /6 | PUSH r/m32 "; 287 " 50+rd | PUSH r32 "; 288 289 " E8 cd | CALL rel32 "; 290 " FF /2 | CALL r/m32 "; 291 292 " 8D /r | LEA r32, m "; 293 " D1 /4 | SHL r/m32, 1 "; 294 " C1 /4 ib | SHL r/m32, imm8 "; 295 " D1 /5 | SHR r/m32, 1 "; 296 " C1 /5 ib | SHR r/m32, imm8 "; 297 " D1 /7 | SAR r/m32, 1 "; 298 " C1 /7 ib | SAR r/m32, imm8 "; 299 300 " FF /4 | JMP r/m32 "; 301 " EB cb | JMP rel8 "; 302 " E9 cd | JMP rel32 "; 303 " 74 cb | JE rel8 "; 304 " 75 cb | JNE rel8 "; 305 " 0F 84 cd | JE rel32 "; 306 " 0F 85 cd | JNE rel32 "; 307 " 78 cb | JS rel8 "; 308 " 79 cb | JNS rel8 "; 309 " 0F 88 cd | JS rel32 "; 310 " 0F 89 cd | JNS rel32 "; 311 " 77 cb | JA rel8 "; 312 " 76 cb | JNA rel8 "; 313 " 0F 87 cd | JA rel32 "; 314 " 0F 86 cd | JNA rel32 "; 315 " 72 cb | JB rel8 "; 316 " 73 cb | JNB rel8 "; 317 " 0F 82 cd | JB rel32 "; 318 " 0F 83 cd | JNB rel32 "; 319 320 " 0F 44 /r | CMOVE r32, r/m32 "; 321 " 0F 45 /r | CMOVNE r32, r/m32 "; 322 " 0F 48 /r | CMOVS r32, r/m32 "; 323 " 0F 49 /r | CMOVNS r32, r/m32 "; 324 " 0F 47 /r | CMOVA r32, r/m32 "; 325 " 0F 46 /r | CMOVNA r32, r/m32 "; 326 " 0F 42 /r | CMOVB r32, r/m32 "; 327 " 0F 43 /r | CMOVNB r32, r/m32 "; 328 329 " C3 | RET "; (* short for "RET 0" *) 330 " C2 iw | RET imm16 "; 331 " E2 cb | LOOP rel8 "; 332 " E1 cb | LOOPE rel8 "; 333 " E0 cb | LOOPNE rel8 "; 334 335 " 60 | PUSHAD "; 336 " 61 | POPAD "; 337 338 " F7 /6 | DIV r/m32 "; 339 " F7 /4 | MUL r/m32 "; 340 341 " C6 /0 ib | MOV r/m8, imm8 "; 342 " 88 /r | MOV r/m8, r8 "; 343 " 0F B6 /r | MOVZX r32, r/m8 "; 344 " 38 /r | CMP r/m8, r8 "; 345 " 3A /r | CMP r8, r/m8 "; 346 " 80 /7 ib | CMP r/m8, imm8 "; 347 " FE /1 | DEC r/m8 " 348 349 ] ``; 350 351val x86_decode_aux_def = zDefine ` 352 x86_decode_aux = (match_list x86_match_step tokenise (x86_syntax o tokenise) o 353 MAP (\s. let x = STR_SPLIT [#"|"] s in (EL 0 x, EL 1 x))) ^x86_syntax_list`; 354 355val x86_decode_prefixes_def = Define ` 356 x86_decode_prefixes w = 357 if LENGTH w < 16 then (Xg1_none,Xg2_none,w) else 358 let bt1 = (TAKE 8 w = n2bits 8 (hex2num "2E")) in 359 let bnt1 = (TAKE 8 w = n2bits 8 (hex2num "3E")) in 360 let l1 = (TAKE 8 w = n2bits 8 (hex2num "F0")) in 361 let bt2 = (TAKE 8 (DROP 8 w) = n2bits 8 (hex2num "2E")) /\ l1 in 362 let bnt2 = (TAKE 8 (DROP 8 w) = n2bits 8 (hex2num "3E")) /\ l1 in 363 let l2 = (TAKE 8 (DROP 8 w) = n2bits 8 (hex2num "F0")) /\ (bt1 \/ bnt1) in 364 let g1 = if l1 \/ l2 then Xlock else Xg1_none in 365 let g2 = if bt1 \/ bt2 then Xbranch_taken else Xg2_none in 366 let g2 = if bnt1 \/ bnt2 then Xbranch_not_taken else g2 in 367 let n = if bt1 \/ bnt1 \/ l1 then (if bt2 \/ bnt2 \/ l2 then 16 else 8) else 0 in 368 (g1,g2,DROP n w)`; 369 370val dest_accesses_memory_def = Define ` 371 (dest_accesses_memory (Xrm_i rm i) = rm_is_memory_access rm) /\ 372 (dest_accesses_memory (Xrm_r rm r) = rm_is_memory_access rm) /\ 373 (dest_accesses_memory (Xr_rm r rm) = F)`; 374 375val x86_lock_ok_def = Define ` 376 (x86_lock_ok (Xbinop binop_name ds) = 377 MEM binop_name [Xadd;Xand;Xor;Xsub;Xxor] /\ 378 dest_accesses_memory ds) /\ 379 (x86_lock_ok (Xmonop monop_name rm) = 380 MEM monop_name [Xdec;Xinc;Xneg;Xnot] /\ 381 rm_is_memory_access rm) /\ 382 (x86_lock_ok (Xxadd rm r) = rm_is_memory_access rm) /\ 383 (x86_lock_ok (Xxchg rm r) = rm_is_memory_access rm) /\ 384 (x86_lock_ok (Xcmpxchg rm r) = rm_is_memory_access rm) /\ 385 (x86_lock_ok (Xpop rm) = F) /\ 386 (x86_lock_ok (Xlea ds) = F) /\ 387 (x86_lock_ok (Xpush imm_rm) = F) /\ 388 (x86_lock_ok (Xcall imm_rm) = F) /\ 389 (x86_lock_ok (Xret imm) = F) /\ 390 (x86_lock_ok (Xmov c ds) = F) /\ 391 (x86_lock_ok (Xjcc c imm) = F) /\ 392 (x86_lock_ok (Xjmp rm) = F) /\ 393 (x86_lock_ok (Xloop c imm) = F) /\ 394 (x86_lock_ok (Xpushad) = F) /\ 395 (x86_lock_ok (Xpopad) = F)`; 396 397val x86_decode_def = Define ` 398 x86_decode w = 399 let (g1,g2,w) = x86_decode_prefixes w in 400 let result = x86_decode_aux w in 401 if result = NONE then NONE else 402 let (i,w) = THE result in 403 if ~(g1 = Xlock) \/ x86_lock_ok i then SOME (Xprefix g1 g2 i, w) else NONE`; 404 405val x86_decode_bytes_def = Define ` 406 x86_decode_bytes b = x86_decode (FOLDR APPEND [] (MAP w2bits b))`; 407 408val rm_args_ok_def = Define ` 409 (rm_args_ok (Xr r) = T) /\ 410 (rm_args_ok (Xm NONE NONE t3) = T) /\ 411 (rm_args_ok (Xm NONE (SOME br) t3) = T) /\ 412 (rm_args_ok (Xm (SOME (s,ir)) NONE t3) = ~(ir = ESP)) /\ 413 (rm_args_ok (Xm (SOME (s,ir)) (SOME br) t3) = ~(ir = ESP))`; 414 415val dest_src_args_ok_def = Define ` 416 (dest_src_args_ok (Xrm_i rm i) = rm_args_ok rm) /\ 417 (dest_src_args_ok (Xrm_r rm r) = rm_args_ok rm ) /\ 418 (dest_src_args_ok (Xr_rm r rm) = rm_args_ok rm )`; 419 420val imm_rm_args_ok_def = Define ` 421 (imm_rm_args_ok (Xi_rm rm) = rm_args_ok rm) /\ 422 (imm_rm_args_ok (Xi i) = T)`; 423 424val x86_args_ok_def = Define ` 425 (x86_args_ok (Xbinop binop_name ds) = dest_src_args_ok ds) /\ 426 (x86_args_ok (Xmonop monop_name rm) = rm_args_ok rm) /\ 427 (x86_args_ok (Xxadd rm r) = rm_args_ok rm ) /\ 428 (x86_args_ok (Xxchg rm r) = rm_args_ok rm ) /\ 429 (x86_args_ok (Xcmpxchg rm r) = rm_args_ok rm ) /\ 430 (x86_args_ok (Xpop rm) = rm_args_ok rm) /\ 431 (x86_args_ok (Xpush imm_rm) = imm_rm_args_ok imm_rm) /\ 432 (x86_args_ok (Xcall imm_rm) = imm_rm_args_ok imm_rm) /\ 433 (x86_args_ok (Xret imm) = w2n imm < 2**16) /\ 434 (x86_args_ok (Xmov c ds) = dest_src_args_ok ds /\ MEM c [X_NE;X_E;X_ALWAYS]) /\ (* partial list *) 435 (x86_args_ok (Xjcc c imm) = T) /\ 436 (x86_args_ok (Xjmp rm) = T) /\ 437 (x86_args_ok (Xloop c imm) = MEM c [X_NE;X_E;X_ALWAYS]) /\ 438 (x86_args_ok (Xpushad) = T) /\ 439 (x86_args_ok (Xpopad) = T)`; 440 441val x86_well_formed_instruction_def = Define ` 442 (x86_well_formed_instruction (Xprefix Xlock g2 i) = x86_lock_ok i /\ x86_args_ok i) /\ 443 (x86_well_formed_instruction (Xprefix Xg1_none g2 i) = x86_args_ok i)`; 444 445 446 447(* -- partially pre-evaluate x86_decode_aux -- *) 448 449val x86_decode_aux_thm = let 450 fun eval_term_ss tm_name tm = conv_ss 451 { name = tm_name, trace = 3, key = SOME ([],tm), conv = K (K EVAL) }; 452 val token_ss = eval_term_ss "tokenise" ``tokenise x``; 453 val if_ss = conv_ss { name = "if", trace = 3, 454 key = SOME ([],``if x then (y:'a) else z``), 455 conv = K (K ((RATOR_CONV o RATOR_CONV o RAND_CONV) EVAL)) }; 456 val hex_ss = eval_term_ss "hex" ``n2bits 8 (hex2num y)``; 457 val hex_add_ss = eval_term_ss "hex" ``n2bits 8 ((hex2num y) + k)``; 458 val n2bits_3_ss = eval_term_ss "n2bits_3" ``n2bits 3 y``; 459 val select_op_ss = eval_term_ss "select_op" ``x86_select_op x (y:('a#'b)list)``; 460 val EL_LEMMA = prove( 461 ``!x y z t. (EL 0 (x::t) = x) /\ (EL 1 (x::y::t) = y) /\ (EL 2 (x:'a::y::z::t) = z)``, 462 REPEAT STRIP_TAC THEN EVAL_TAC); 463 val SOME_LEMMA = prove(``!(f :'a -> 'b option) (g :'c) (h :'d). (SOME >> f = f)``, 464 SIMP_TAC std_ss [FUN_EQ_THM,option_then_def,LET_DEF]); 465 val th1 = REWRITE_CONV [MAP,x86_decode_aux_def,combinTheory.o_DEF] ``x86_decode_aux`` 466 val th2 = CONV_RULE (ONCE_DEPTH_CONV (BETA_CONV) THENC (RAND_CONV (RAND_CONV EVAL))) th1 467 val th3 = REWRITE_RULE [match_list_def,MAP] th2 468 val th4 = SIMP_RULE (std_ss++token_ss) [match_def] th3 469 val th5 = SIMP_RULE (bool_ss++if_ss) [MAP,x86_match_step_def] th4 470 val th6 = SIMP_RULE (bool_ss++if_ss++select_op_ss) [x86_syntax_def,EL_LEMMA,HD] th5 471 val th6a = SIMP_RULE (std_ss) [LET_DEF,process_hex_add_def] th6 472 val th7 = SIMP_RULE (bool_ss++if_ss++hex_ss++hex_add_ss++n2bits_3_ss) [decode_Xdest_src_def,decode_Xconst_def] th6a 473 val th8 = REWRITE_RULE [GSYM option_then_assoc,option_do_def,option_try_def,GSYM option_orelse_assoc] th7 474 val th9 = SIMP_RULE std_ss [option_orelse_SOME,GSYM option_orelse_assoc,X_SOME_def] th8 475 val thA = REWRITE_RULE [GSYM option_then_assoc,EL_LEMMA,drop_eq_thm,SOME_LEMMA,option_do_def] th9 476 val thB = REWRITE_RULE [option_then_assoc,SOME_LEMMA] thA 477 val thC = REWRITE_RULE [option_try_def,GSYM option_orelse_assoc] thB 478 val thD = REWRITE_RULE [option_then_OVER_orelse] thC 479 val thE = REWRITE_RULE [option_orelse_assoc] thD 480 val thX = REWRITE_RULE [DT_over_DF,option_then_assoc,option_orelse_assoc,option_then_OVER_orelse] thE 481 val thZ = REWRITE_RULE [DTF_THM] thX 482 in thZ end; 483 484val _ = save_thm("x86_decode_aux_thm",x86_decode_aux_thm); 485val _ = computeLib.add_persistent_funs ["x86_decode_aux_thm"]; 486 487 488(* x86 examples/tests: 489 490val _ = wordsLib.output_words_as_hex(); 491 492val th = EVAL ``x86_decode(bytebits "8D84B6EE711202")``; (* lea eax, [esi + 4*esi + 34763246] *) 493val th = EVAL ``x86_decode(bytebits "D1E1")``; (* shl ecx, 1 *) 494val th = EVAL ``x86_decode(bytebits "C1E108")``; (* shl ecx, 8 *) 495val th = EVAL ``x86_decode(bytebits "D1EB")``; (* shr ebx, 1 *) 496val th = EVAL ``x86_decode(bytebits "C1EB08")``; (* shr ebx, 8 *) 497val th = EVAL ``x86_decode(bytebits "D1F8")``; (* sar eax, 1 *) 498val th = EVAL ``x86_decode(bytebits "C1F808")``; (* sar eax, 8 *) 499val th = EVAL ``x86_decode(bytebits "0538000000")``; (* add eax,56 *) 500val th = EVAL ``x86_decode(bytebits "810037020000")``; (* add dword [eax],567 *) 501val th = EVAL ``x86_decode(bytebits "010B")``; (* add [ebx], ecx *) 502val th = EVAL ``x86_decode(bytebits "0119")``; (* add [ecx], ebx *) 503val th = EVAL ``x86_decode(bytebits "2538000000")``; (* and eax,56 *) 504val th = EVAL ``x86_decode(bytebits "812037020000")``; (* and dword [eax],567 *) 505val th = EVAL ``x86_decode(bytebits "210B")``; (* and [ebx], ecx *) 506val th = EVAL ``x86_decode(bytebits "2319")``; (* and ebx, [ecx] *) 507val th = EVAL ``x86_decode(bytebits "0F44C1")``; (* cmove eax, ecx *) 508val th = EVAL ``x86_decode(bytebits "0F454104")``; (* cmovne eax, [ecx+4] *) 509val th = EVAL ``x86_decode(bytebits "81FE38000000")``; (* cmp esi,56 *) 510val th = EVAL ``x86_decode(bytebits "813B37020000")``; (* cmp dword [ebx],567 *) 511val th = EVAL ``x86_decode(bytebits "390B")``; (* cmp [ebx], ecx *) 512val th = EVAL ``x86_decode(bytebits "3B19")``; (* cmp ebx, [ecx] *) 513val th = EVAL ``x86_decode(bytebits "893E")``; (* mov [esi],edi *) 514val th = EVAL ``x86_decode(bytebits "8B3E")``; (* mov edi,[esi] *) 515val th = EVAL ``x86_decode(bytebits "BC37020000")``; (* mov esp,567 *) 516val th = EVAL ``x86_decode(bytebits "81CE38000000")``; (* or esi,56 *) 517val th = EVAL ``x86_decode(bytebits "810B37020000")``; (* or dword [ebx],567 *) 518val th = EVAL ``x86_decode(bytebits "090B")``; (* or [ebx], ecx *) 519val th = EVAL ``x86_decode(bytebits "0B19")``; (* or ebx, [ecx] *) 520val th = EVAL ``x86_decode(bytebits "81EE38000000")``; (* sub esi,56 *) 521val th = EVAL ``x86_decode(bytebits "812B37020000")``; (* sub dword [ebx],567 *) 522val th = EVAL ``x86_decode(bytebits "290B")``; (* sub [ebx], ecx *) 523val th = EVAL ``x86_decode(bytebits "2B19")``; (* sub ebx, [ecx] *) 524val th = EVAL ``x86_decode(bytebits "F7C638000000")``; (* test esi,56 *) 525val th = EVAL ``x86_decode(bytebits "F70337020000")``; (* test dword [ebx],567 *) 526val th = EVAL ``x86_decode(bytebits "850B")``; (* test [ebx], ecx *) 527val th = EVAL ``x86_decode(bytebits "81F638000000")``; (* xor esi,56 *) 528val th = EVAL ``x86_decode(bytebits "813337020000")``; (* xor dword [ebx],567 *) 529val th = EVAL ``x86_decode(bytebits "310B")``; (* xor [ebx], ecx *) 530val th = EVAL ``x86_decode(bytebits "3319")``; (* xor ebx, [ecx] *) 531val th = EVAL ``x86_decode(bytebits "0FB110")``; (* cmpxchg [eax],edx *) 532val th = EVAL ``x86_decode(bytebits "0FC11E")``; (* xadd [esi],ebx *) 533val th = EVAL ``x86_decode(bytebits "FF4E3C")``; (* dec dword [esi+60] *) 534val th = EVAL ``x86_decode(bytebits "4C")``; (* dec esp *) 535val th = EVAL ``x86_decode(bytebits "FF80401F0000")``; (* inc dword [eax+8000] *) 536val th = EVAL ``x86_decode(bytebits "40")``; (* inc eax *) 537val th = EVAL ``x86_decode(bytebits "F750C8")``; (* not dword [eax-56] *) 538val th = EVAL ``x86_decode(bytebits "8F0590010000")``; (* pop dword [400] *) 539val th = EVAL ``x86_decode(bytebits "59")``; (* pop ecx *) 540val th = EVAL ``x86_decode(bytebits "FFB498C8010000")``; (* push dword [eax + 4*ebx + 456] *) 541val th = EVAL ``x86_decode(bytebits "FF749801")``; (* push dword [eax + 4*ebx + 1] *) 542val th = EVAL ``x86_decode(bytebits "FF74AD02")``; (* push dword [ebp + 4*ebp + 2] *) 543val th = EVAL ``x86_decode(bytebits "FF746D2D")``; (* push dword [ebp + 2*ebp + 45] *) 544val th = EVAL ``x86_decode(bytebits "FFB42DEA000000")``; (* push dword [ebp + ebp + 234] *) 545val th = EVAL ``x86_decode(bytebits "FFB4B6EE711202")``; (* push dword [esi + 4*esi + 34763246] *) 546val th = EVAL ``x86_decode(bytebits "50")``; (* push eax *) 547val th = EVAL ``x86_decode(bytebits "E8BDFFFFFF")``; (* call l1 *) 548val th = EVAL ``x86_decode(bytebits "FFD0")``; (* call eax *) 549val th = EVAL ``x86_decode(bytebits "EBB9")``; (* jmp l1 *) 550val th = EVAL ``x86_decode(bytebits "74B7")``; (* je l1 *) 551val th = EVAL ``x86_decode(bytebits "75B5")``; (* jne l1 *) 552val th = EVAL ``x86_decode(bytebits "C3")``; (* ret *) 553val th = EVAL ``x86_decode(bytebits "C20500")``; (* ret 5 *) 554val th = EVAL ``x86_decode(bytebits "E2AF")``; (* loop l1 *) 555val th = EVAL ``x86_decode(bytebits "E1AD")``; (* loope l1 *) 556val th = EVAL ``x86_decode(bytebits "E0AB")``; (* loopne l1 *) 557val th = EVAL ``x86_decode(bytebits "60")``; (* pushad *) 558val th = EVAL ``x86_decode(bytebits "61")``; (* popad *) 559val th = EVAL ``x86_decode(bytebits "0FAEF0")``; (* mfence *) 560val th = EVAL ``x86_decode(bytebits "0FAEF8")``; (* sfence *) 561val th = EVAL ``x86_decode(bytebits "0FAEE8")``; (* lfence *) 562val th = EVAL ``x86_decode(bytebits "93")``; (* xchg eax, ebx *) 563val th = EVAL ``x86_decode(bytebits "8731")``; (* xchg [ecx], esi *) 564val th = EVAL ``x86_decode(bytebits "F7583C")``; (* neg dword [eax+60] *) 565val th = EVAL ``x86_decode(bytebits "8325C49B040800")``; (* and [8049BC4], 0 *) 566val th = EVAL ``x86_decode(bytebits "8125C49B040800000000")``; (* and [8049BC4], 0 *) 567val th = EVAL ``x86_decode(bytebits "0FB1F8")``; (* cmpxchg eax, edi *) 568val th = EVAL ``x86_decode(bytebits "0FB1441500")``; (* cmpxchg [ebp + edx], eax *) 569val th = EVAL ``x86_decode(bytebits "56")``; 570val th = EVAL ``x86_decode(bytebits "5B")``; 571 572*) 573 574 575(* test whether decoding works, if this is slow then something's wrong *) 576 577val t1 = Time.now(); 578val th = EVAL ``x86_decode(bytebits "D1F8")``; (* sar eax, 1 *) 579val t2 = Time.now(); 580val elapsed_time = Time.toReal t2 - Time.toReal t1 581val _ = elapsed_time < 5.0 orelse failwith("Decoding failed to use compset properly.") 582(* The time difference should really be under a second, but 5 seconds 583 gives a bit of a margin. *) 584 585val _ = export_theory (); 586