1 2open HolKernel boolLib bossLib Parse; 3open wordsTheory stringTheory stringLib listTheory stringSimps listLib simpLib; 4open decoderTheory bit_listTheory opmonTheory; 5 6open x64_astTheory x64_coretypesTheory; 7 8val _ = new_theory "x64_decoder"; 9 10 11(* ---------------------------------------------------------------------------------- *> 12 13 A decoder for x86 instructions is defined and at the end pre-evaliuated for 14 fast execution with EVAL. 15 16 NOTES: 17 18 All instructions can have a REX prefix, making them have access to 19 16 registers. Some instructions require a REX prefix. The REX 20 prefic must appear immediately before the rest of opcode part of 21 the instruction, i.e. after all the other prefixes. 22 23 The presence of the REX prefix allows for 20 byte-valued registers 24 (AL, AH, CL, CH, DL, DH, BL, BH, SPL, BPL, SIL, DIL and 25 R8B-R15B). However, this implementation of the x86_64 semantics 26 disallows use of AH, CH, DH, BH in order to make the semantics a 27 bit more uniform. Formalised in instr_accesses_bad_byte_reg_def. 28 29 ( Addresses are by default 64-bit, but an address-size override 30 prefix (0x67) can turn the address size into 32-bits. This feature 31 is not supported by this implementation of the x86_64 semantics. ) 32 33 TODO: 34 - support address-size over-ride prefix 35 - make sure the 16-bit operations performs the appropriate sw2sw 36 37<* ---------------------------------------------------------------------------------- *) 38 39(* reading hex strings to bit lists *) 40 41val STR_SPACE_AUX_def = Define ` 42 (STR_SPACE_AUX n "" = "") /\ 43 (STR_SPACE_AUX n (STRING c s) = 44 if n = 0 then STRING #" " (STRING c (STR_SPACE_AUX 1 s)) 45 else STRING c (STR_SPACE_AUX (n-1) s))`; 46 47val bytebits_def = Define ` 48 bytebits = hex2bits o STR_SPACE_AUX 2`; 49 50(* interprete the IA-32 manuals' syntax *) 51 52val check_opcode_def = Define ` 53 check_opcode s = 54 let y = (n2bits 3 o char2num o HD o TL o EXPLODE) s in 55 assert (\g. g "Reg/Opcode" = y)`; 56 57val read_SIB_def = Define ` 58 read_SIB = 59 assign_drop "Base" 3 >> assign_drop "Index" 3 >> assign_drop "Scale" 2 >> 60 option_try [ 61 assert (\g. (g "Mod" = [F;F]) /\ (g "Base" = [T;F;T])) >> assign_drop "disp32" 32; 62 assert (\g. (g "Mod" = [F;F]) /\ ~(g "Base" = [T;F;T])); 63 assert (\g. (g "Mod" = [T;F])) >> assign_drop "disp8" 8; 64 assert (\g. (g "Mod" = [F;T])) >> assign_drop "disp32" 32]`; 65 66val read_ModRM_def = Define ` 67 read_ModRM = 68 assign_drop "R/M" 3 >> assign_drop "Reg/Opcode" 3 >> assign_drop "Mod" 2 >> 69 option_try [ 70 assert (\g. (g "Mod" = [T;T])); 71 assert (\g. (g "Mod" = [F;F]) /\ ~(g "R/M" = [F;F;T]) /\ ~(g "R/M" = [T;F;T])); 72 assert (\g. (g "Mod" = [F;F]) /\ (g "R/M" = [T;F;T])) >> assign_drop "disp32" 32; 73 assert (\g. (g "Mod" = [T;F]) /\ ~(g "R/M" = [F;F;T])) >> assign_drop "disp8" 8; 74 assert (\g. (g "Mod" = [F;T]) /\ ~(g "R/M" = [F;F;T])) >> assign_drop "disp32" 32; 75 assert (\g. ~(g "Mod" = [T;T]) /\ (g "R/M" = [F;F;T])) >> read_SIB]`; 76 77val is_hex_add_def = Define ` 78 is_hex_add x = ((IMPLODE o DROP 2 o EXPLODE) x = "+rd")`; 79 80val process_hex_add_def = Define ` 81 process_hex_add name = 82 let n = (hex2num o IMPLODE o TAKE 2 o EXPLODE) name in 83 option_try [drop_eq (n2bits 8 (n+0)) >> assign "reg" (n2bits 3 0); 84 drop_eq (n2bits 8 (n+1)) >> assign "reg" (n2bits 3 1); 85 drop_eq (n2bits 8 (n+2)) >> assign "reg" (n2bits 3 2); 86 drop_eq (n2bits 8 (n+3)) >> assign "reg" (n2bits 3 3); 87 drop_eq (n2bits 8 (n+4)) >> assign "reg" (n2bits 3 4); 88 drop_eq (n2bits 8 (n+5)) >> assign "reg" (n2bits 3 5); 89 drop_eq (n2bits 8 (n+6)) >> assign "reg" (n2bits 3 6); 90 drop_eq (n2bits 8 (n+7)) >> assign "reg" (n2bits 3 7)]`; 91 92val assign_drop_cond_def = Define ` 93 assign_drop_cond name i1 i2 c (g,bits) = 94 if c g then assign_drop name i1 (g,bits) 95 else assign_drop name i2 (g,bits)`; 96 97val x64_match_step_def = Define ` 98 x64_match_step name = 99 if is_hex name /\ (STRLEN name = 2) then (* opcode e.g. FE, 83 and 04 *) 100 drop_eq (n2bits 8 (hex2num name)) 101 else if is_hex_add name then (* opcode + rd, e.g. F8+rd *) 102 process_hex_add name 103 else if name = "1" then (* constant 1 *) 104 assign_drop name 0 105 else if MEM name ["ib";"cb";"rel8";"imm8"] then (* 8-bit immediate or address *) 106 assign_drop name 8 107 else if MEM name ["iw";"cw";"imm16"] then (* 16-bit immediate or address *) 108 assign_drop name 16 109 else if MEM name ["id";"cd";"rel32";"imm32"] then (* 32-bit immediate or address *) 110 assign_drop_cond name 16 32 (\g. g "16BIT" = [T]) 111 else if MEM name ["iq";"imm64"] then (* 64-bit immediate *) 112 assign_drop name 64 113 else if name = "/r" then (* normal reg + reg/mem *) 114 read_ModRM 115 else if MEM name ["/0";"/1";"/2";"/3";"/4";"/5";"/6";"/7"] then (* opcode extension in ModRM *) 116 read_ModRM >> check_opcode name 117 else if name = "REX.W" then 118 assert (\g. g "REX.W" = [T]) 119 else if name = "REX" then 120 assert (\g. g "REX.W" = [F]) 121 else if name = "+" then 122 assert (\g. T) 123 else 124 option_fail`; 125 126(* syntax classes *) 127 128val x64_binop_def = Define `x64_binop = 129 [("ADC",Zadc); ("ADD",Zadd); ("AND",Zand); ("CMP",Zcmp); 130 ("OR",Zor); ("SAR",Zsar); ("SHR",Zshr); ("SHL",Zshl); 131 ("SBB",Zsbb); ("SUB",Zsub); ("TEST",Ztest); ("XOR",Zxor)]`; 132 133val x64_monop_def = Define `x64_monop = 134 [("DEC",Zdec); ("INC",Zinc); ("NOT",Znot); ("NEG",Zneg)]`; 135 136 137 138(* x86 - decoder *) 139 140val b2reg_def = Define ` 141 b2reg g rex_name name = (EL (bits2num (g name ++ g rex_name)) 142 [RAX;RCX;RDX;RBX;RSP;RBP;RSI;RDI;zR8;zR9;zR10;zR11;zR12;zR13;zR14;zR15]):Zreg`; 143 144val decode_Zr32_def = Define ` 145 decode_Zr32 g name = 146 if name = "RAX" then RAX else 147 if g "reg" = [] then b2reg g "REX.R" "Reg/Opcode" (* "Reg/Opcode" comes from ModRM *) 148 else b2reg g "REX.B" "reg"`; (* "reg" comes from opcode byte, e.g. "rd" in B8+rd *) 149 150val decode_SIB_def = Define ` 151 decode_SIB g = 152 let scaled_index = (if b2reg g "REX.X" "Index" = RSP then NONE else SOME (b2w g "Scale",b2reg g "REX.X" "Index")) in 153 if b2reg g "REX.B" "Base" = RBP then (* the special case indicated by "[*]" *) 154 if g "Mod" = [F;F] then Zm scaled_index NONE (sw2sw ((b2w g "disp32"):word32)) else 155 if g "Mod" = [T;F] then Zm scaled_index (SOME RBP) (sw2sw ((b2w g "disp8"):word8)) else 156 (* g "Mod" = [F;T] *) Zm scaled_index (SOME RBP) (sw2sw ((b2w g "disp32"):word32)) 157 else (* normal cases, just need to read off the correct displacement *) 158 let disp = (if (g "Mod" = [T;F]) then sw2sw ((b2w g "disp8"):word8) else sw2sw ((b2w g "disp32"):word32)) in 159 let disp = (if (g "Mod" = [F;F]) then 0w else disp) in 160 Zm scaled_index (SOME (b2reg g "REX.B" "Base")) disp`; 161 162val decode_Zrm32_def = Define ` (* sw2sw = sign-extension *) 163 decode_Zrm32 g name = 164 if name = "RAX" then Zr RAX else 165 if (g "Mod" = [F;F]) /\ (g "R/M" = [T;F;T]) then Zm NONE NONE (sw2sw:word32->word64 (b2w g "disp32")) else 166 if ~(g "Mod" = [T;T]) /\ (g "R/M" = [F;F;T]) then decode_SIB g else 167 if (g "Mod" = [F;F]) then Zm NONE (SOME (b2reg g "REX.B" "R/M")) 0w else 168 if (g "Mod" = [T;F]) then Zm NONE (SOME (b2reg g "REX.B" "R/M")) (sw2sw:word8->word64 (b2w g "disp8")) else 169 if (g "Mod" = [F;T]) then Zm NONE (SOME (b2reg g "REX.B" "R/M")) (sw2sw:word32->word64 (b2w g "disp32")) else 170 if (g "Mod" = [T;T]) then Zr (b2reg g "REX.B" "R/M") else Zr (b2reg g "REX.B" "reg") `; 171 172val decode_Zconst_def = Define ` 173 decode_Zconst name g = 174 if name = "imm8" then sw2sw:word8 ->word64 (b2w g "ib") else 175 if name = "rel8" then sw2sw:word8 ->word64 (b2w g "cb") else 176 if name = "imm16" then sw2sw:word16->word64 (b2w g "iw") else 177 if name = "imm32" then sw2sw:word32->word64 (b2w g "id") else 178 if name = "rel32" then sw2sw:word32->word64 (b2w g "cd") else 179 if name = "imm64" then b2w g "iq" else 180 if name = "1" then 1w else 0w`; 181 182val decode_Zdest_src_def = Define ` 183 decode_Zdest_src g dest src = 184 if MEM src ["r64";"r32";"r16";"r8"] then 185 Zrm_r (decode_Zrm32 g dest) (decode_Zr32 g src) 186 else if MEM src ["r/m64";"r/m32";"r/m16";"r/m8"] then 187 Zr_rm (decode_Zr32 g dest) (decode_Zrm32 g src) 188 else if src = "m" then 189 Zr_rm (decode_Zr32 g dest) (decode_Zrm32 g src) 190 else 191 Zrm_i (decode_Zrm32 g dest) (decode_Zconst src g)`; 192 193val decode_Zconst_or_zero_def = Define ` 194 decode_Zconst_or_zero ts g = 195 if LENGTH ts < 2 then 0w else decode_Zconst (EL 1 ts) g`; 196 197val decode_Zimm_rm_def = Define ` 198 decode_Zimm_rm ts g = 199 if MEM (EL 1 ts) ["r/m32";"r32";"r/m8";"r8"] 200 then Zi_rm (decode_Zrm32 g (EL 1 ts)) 201 else Zi (decode_Zconst (EL 1 ts) g)`; 202 203val x64_select_op_def = Define ` 204 x64_select_op name list = SND (HD (FILTER (\x. FST x = name) list))`; 205 206val x86_size_def = Define ` 207 x86_size g name = 208 if MEM name ["r8";"m8";"r/m8";"AL"] then Z8 else 209 if MEM name ["r16";"m16";"r/m16";"AX"] then Z16 else 210 if MEM name ["r64";"m64";"r/m64";"RAX"] then Z64 else 211 if g "REX.W" = [T] then Z64 else 212 if g "16BIT" = [T] then Z16 else Z32`; 213 214val Zsize_tm = ``(x86_size g (EL 1 ts))`` 215 216val x64_syntax_binop = `` 217 Zbinop (x64_select_op (HD ts) x64_binop) ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))``; 218 219val x64_syntax_monop = `` 220 Zmonop (x64_select_op (HD ts) x64_monop) ^Zsize_tm (decode_Zrm32 g (EL 1 ts))``; 221 222val Z_SOME_def = Define `Z_SOME f = SOME o (\(g,w). (f g,w))`; 223 224val x64_syntax_def = Define ` 225 x64_syntax ts = 226 if LENGTH ts = 0 then option_fail else 227 if HD ts = "MOVZX" then Z_SOME (\g. Zmovzx ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts)) (if EL 2 ts = "r/m8" then Z8 else Z16)) else 228 if MEM (HD ts) (MAP FST x64_binop) then Z_SOME (\g. ^x64_syntax_binop) else 229 if MEM (HD ts) (MAP FST x64_monop) then Z_SOME (\g. ^x64_syntax_monop) else 230 if HD ts = "POP" then Z_SOME (\g. Zpop (decode_Zrm32 g (EL 1 ts))) else 231 if HD ts = "PUSH" then Z_SOME (\g. Zpush (decode_Zimm_rm ts g)) else 232 if HD ts = "CMPXCHG" then Z_SOME (\g. Zcmpxchg ^Zsize_tm (decode_Zrm32 g (EL 1 ts)) (decode_Zr32 g (EL 2 ts))) else 233 if HD ts = "XCHG" then Z_SOME (\g. Zxchg ^Zsize_tm (decode_Zrm32 g (EL 1 ts)) (decode_Zr32 g (EL 2 ts))) else 234 if HD ts = "XADD" then Z_SOME (\g. Zxadd ^Zsize_tm (decode_Zrm32 g (EL 1 ts)) (decode_Zr32 g (EL 2 ts))) else 235 if (HD ts = "JMP") /\ (TL ts = ["r/m32"]) then Z_SOME (\g. Zjmp (decode_Zrm32 g (EL 1 ts))) else 236 if HD ts = "JMP" then Z_SOME (\g. Zjcc Z_ALWAYS (decode_Zconst_or_zero ts g)) else 237 if HD ts = "JE" then Z_SOME (\g. Zjcc Z_E (decode_Zconst_or_zero ts g)) else 238 if HD ts = "JNE" then Z_SOME (\g. Zjcc Z_NE (decode_Zconst_or_zero ts g)) else 239 if HD ts = "JS" then Z_SOME (\g. Zjcc Z_S (decode_Zconst_or_zero ts g)) else 240 if HD ts = "JNS" then Z_SOME (\g. Zjcc Z_NS (decode_Zconst_or_zero ts g)) else 241 if HD ts = "JA" then Z_SOME (\g. Zjcc Z_A (decode_Zconst_or_zero ts g)) else 242 if HD ts = "JNA" then Z_SOME (\g. Zjcc Z_NA (decode_Zconst_or_zero ts g)) else 243 if HD ts = "JB" then Z_SOME (\g. Zjcc Z_B (decode_Zconst_or_zero ts g)) else 244 if HD ts = "JNB" then Z_SOME (\g. Zjcc Z_NB (decode_Zconst_or_zero ts g)) else 245 if HD ts = "LEA" then Z_SOME (\g. Zlea ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else 246 if HD ts = "LOOP" then Z_SOME (\g. Zloop Z_ALWAYS (decode_Zconst_or_zero ts g)) else 247 if HD ts = "LOOPE" then Z_SOME (\g. Zloop Z_E (decode_Zconst_or_zero ts g)) else 248 if HD ts = "LOOPNE" then Z_SOME (\g. Zloop Z_NE (decode_Zconst_or_zero ts g)) else 249 if HD ts = "MOV" then Z_SOME (\g. Zmov Z_ALWAYS ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else 250 if HD ts = "CMOVE" then Z_SOME (\g. Zmov Z_E ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else 251 if HD ts = "CMOVNE" then Z_SOME (\g. Zmov Z_NE ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else 252 if HD ts = "CMOVS" then Z_SOME (\g. Zmov Z_S ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else 253 if HD ts = "CMOVNS" then Z_SOME (\g. Zmov Z_NS ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else 254 if HD ts = "CMOVA" then Z_SOME (\g. Zmov Z_A ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else 255 if HD ts = "CMOVNA" then Z_SOME (\g. Zmov Z_NA ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else 256 if HD ts = "CMOVB" then Z_SOME (\g. Zmov Z_B ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else 257 if HD ts = "CMOVNB" then Z_SOME (\g. Zmov Z_NB ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else 258 if HD ts = "MUL" then Z_SOME (\g. Zmul ^Zsize_tm (decode_Zrm32 g (EL 1 ts))) else 259 if HD ts = "DIV" then Z_SOME (\g. Zdiv ^Zsize_tm (decode_Zrm32 g (EL 1 ts))) else 260 if HD ts = "CALL" then Z_SOME (\g. Zcall (decode_Zimm_rm ts g)) else 261 if HD ts = "CPUID" then Z_SOME (\g. Zcpuid) else 262 if HD ts = "RET" then Z_SOME (\g. Zret (decode_Zconst_or_zero ts g)) else option_fail`; 263 264 265(* a list of x64 instructions, ordered by the combination of addressing modes they support *) 266 267(* Each line which produces an operation over size Z32 can be modified 268 into a size Z64 by setting REX.W. The Intel manual explicitly 269 lisits each such inferrable line, but we allow such lines to be 270 inferred from the 32-bit lines. For example, the line: 271 272 " 81 /0 id | ADD r/m32, imm32 "; 273 274 Immediately, represents three lines in the manual, namely: 275 276 " 81 /0 iw | ADD r/m16, imm16 "; 277 " 81 /0 id | ADD r/m32, imm32 "; 278 " REX.W + 81 /0 id | ADD r/m64, imm32 "; 279 280 Note that the size of the immediate constant does not change for 64-bit. *) 281 282val x64_syntax_list = `` [ 283 284 " 05 id | ADD EAX, imm32 "; 285 " 15 id | ADC EAX, imm32 "; 286 " 25 id | AND EAX, imm32 "; 287 " 35 id | XOR EAX, imm32 "; 288 " 0D id | OR EAX, imm32 "; 289 " 1D id | SBB EAX, imm32 "; 290 " 2D id | SUB EAX, imm32 "; 291 " 3D id | CMP EAX, imm32 "; 292 293 " 83 /0 ib | ADD r/m32, imm8 "; 294 " 83 /1 ib | OR r/m32, imm8 "; 295 " 83 /2 ib | ADC r/m32, imm8 "; 296 " 83 /3 ib | SBB r/m32, imm8 "; 297 " 83 /4 ib | AND r/m32, imm8 "; 298 " 83 /5 ib | SUB r/m32, imm8 "; 299 " 83 /6 ib | XOR r/m32, imm8 "; 300 " 83 /7 ib | CMP r/m32, imm8 "; 301 302 " 81 /0 id | ADD r/m32, imm32 "; 303 " 81 /1 id | OR r/m32, imm32 "; 304 " 81 /2 id | ADC r/m32, imm32 "; 305 " 81 /3 id | SBB r/m32, imm32 "; 306 " 81 /4 id | AND r/m32, imm32 "; 307 " 81 /5 id | SUB r/m32, imm32 "; 308 " 81 /6 id | XOR r/m32, imm32 "; 309 " 81 /7 id | CMP r/m32, imm32 "; 310 311 " 01 /r | ADD r/m32, r32 "; 312 " 11 /r | ADC r/m32, r32 "; 313 " 21 /r | AND r/m32, r32 "; 314 " 31 /r | XOR r/m32, r32 "; 315 " 09 /r | OR r/m32, r32 "; 316 " 19 /r | SBB r/m32, r32 "; 317 " 29 /r | SUB r/m32, r32 "; 318 " 39 /r | CMP r/m32, r32 "; 319 320 " 03 /r | ADD r32, r/m32 "; 321 " 13 /r | ADC r32, r/m32 "; 322 " 23 /r | AND r32, r/m32 "; 323 " 33 /r | XOR r32, r/m32 "; 324 " 0B /r | OR r32, r/m32 "; 325 " 1B /r | SBB r32, r/m32 "; 326 " 2B /r | SUB r32, r/m32 "; 327 " 3B /r | CMP r32, r/m32 "; 328 329 " A9 id | TEST EAX, imm32 "; 330 " F7 /0 id | TEST r/m32, imm32 "; 331 " 85 /r | TEST r/m32, r32 "; 332 333 " 38 /r | CMP r/m8, r8 "; 334 " 3A /r | CMP r8, r/m8 "; 335 " 80 /7 ib | CMP r/m8, imm8 "; 336 " FE /0 | INC r/m8 "; 337 " FE /1 | DEC r/m8 "; 338 339 " REX.W + B8+rd iq | MOV r64, imm64 "; (* need to mention this explicitly due to imm64 *) 340 " B8+rd id | MOV r32, imm32 "; (* REX.W variant must be above in the list *) 341 " C7 /0 id | MOV r/m32, imm32 "; 342 " C6 /0 ib | MOV r/m8, imm8 "; 343 " 8B /r | MOV r32,r/m32 "; 344 " 8A /r | MOV r8,r/m8 "; 345 " 89 /r | MOV r/m32,r32 "; 346 " 88 /r | MOV r/m8, r8 "; 347 348 " 0F B6 /r | MOVZX r32, r/m8 "; 349 " 0F B7 /r | MOVZX r32, r/m16 "; 350 351 " 0F B1 /r | CMPXCHG r/m32, r32"; 352 " 0F C1 /r | XADD r/m32, r32 "; 353 " 90+rd | XCHG EAX, r32 "; 354 " 87 /r | XCHG r/m32, r32 "; 355 356 " FF /1 | DEC r/m32 "; 357 " FF /0 | INC r/m32 "; 358 " F7 /3 | NEG r/m32 "; 359 " F7 /2 | NOT r/m32 "; 360 " 8F /0 | POP r/m32 "; 361 " 58+rd | POP r32 "; 362 " FF /6 | PUSH r/m32 "; 363 " 50+rd | PUSH r32 "; 364 365 " E8 cd | CALL rel32 "; 366 " FF /2 | CALL r/m32 "; 367 368 " 8D /r | LEA r32, m "; 369 " D1 /4 | SHL r/m32, 1 "; 370 " C1 /4 ib | SHL r/m32, imm8 "; 371 " D1 /5 | SHR r/m32, 1 "; 372 " C1 /5 ib | SHR r/m32, imm8 "; 373 " D1 /7 | SAR r/m32, 1 "; 374 " C1 /7 ib | SAR r/m32, imm8 "; 375 376 " FF /4 | JMP r/m32 "; 377 " EB cb | JMP rel8 "; 378 " E9 cd | JMP rel32 "; 379 " 74 cb | JE rel8 "; 380 " 75 cb | JNE rel8 "; 381 " 0F 84 cd | JE rel32 "; 382 " 0F 85 cd | JNE rel32 "; 383 " 78 cb | JS rel8 "; 384 " 79 cb | JNS rel8 "; 385 " 0F 88 cd | JS rel32 "; 386 " 0F 89 cd | JNS rel32 "; 387 " 77 cb | JA rel8 "; 388 " 76 cb | JNA rel8 "; 389 " 0F 87 cd | JA rel32 "; 390 " 0F 86 cd | JNA rel32 "; 391 " 72 cb | JB rel8 "; 392 " 73 cb | JNB rel8 "; 393 " 0F 82 cd | JB rel32 "; 394 " 0F 83 cd | JNB rel32 "; 395 396 " 0F 44 /r | CMOVE r32, r/m32 "; 397 " 0F 45 /r | CMOVNE r32, r/m32 "; 398 " 0F 48 /r | CMOVS r32, r/m32 "; 399 " 0F 49 /r | CMOVNS r32, r/m32 "; 400 " 0F 47 /r | CMOVA r32, r/m32 "; 401 " 0F 46 /r | CMOVNA r32, r/m32 "; 402 " 0F 42 /r | CMOVB r32, r/m32 "; 403 " 0F 43 /r | CMOVNB r32, r/m32 "; 404 405 " 0F A2 | CPUID "; 406 407 " C3 | RET "; (* short for "RET 0" *) 408 " C2 iw | RET imm16 "; 409 " E2 cb | LOOP rel8 "; 410 " E1 cb | LOOPE rel8 "; 411 " E0 cb | LOOPNE rel8 "; 412 413 " F7 /6 | DIV r/m32 "; 414 " F7 /4 | MUL r/m32 " 415 416 ]``; 417 418val x64_decode_aux_def = zDefine ` 419 x64_decode_aux g = 420 (match_list_raw g x64_match_step tokenise (x64_syntax o tokenise) o 421 MAP (\s. let x = STR_SPLIT [#"|"] s in (EL 0 x, EL 1 x))) ^x64_syntax_list`; 422 423val x64_decode_prefixes_def = tDefine "x64_decode_prefixes" ` 424 x64_decode_prefixes w = 425 if LENGTH w < 8 then ([],w) else 426 let b = n2w (bits2num (TAKE 8 w)):word8 in 427 if ~(MEM b [0x2Ew;0x3Ew;0xF0w;0x66w]) then ([],w) else 428 let (pres,v) = x64_decode_prefixes (DROP 8 w) in 429 let pre = (if b = 0x2Ew then Zbranch_not_taken else 430 if b = 0x3Ew then Zbranch_taken else 431 if b = 0xF0w then Zlock else 432 if b = 0x66w then Zoperand_size_override else ARB) in 433 (pre :: pres, v)` 434 (WF_REL_TAC `measure LENGTH` 435 THEN ASM_SIMP_TAC std_ss [LENGTH_DROP] THEN REPEAT STRIP_TAC THEN DECIDE_TAC); 436 437val x64_decode_REX_def = Define ` 438 x64_decode_REX w = 439 let g = (\n. []) in 440 if LENGTH w < 8 then (g,w) else 441 if ~(TAKE 4 (DROP 4 w) = [F;F;T;F]) then (g,w) else 442 let g = ("REX.B" =+ [EL 0 w]) g in 443 let g = ("REX.X" =+ [EL 1 w]) g in 444 let g = ("REX.R" =+ [EL 2 w]) g in 445 let g = ("REX.W" =+ [EL 3 w]) g in 446 let w = DROP 8 w in 447 (g,w)`; 448 449val dest_accesses_memory_def = Define ` 450 (dest_accesses_memory (Zrm_i rm i) = Zrm_is_memory_access rm) /\ 451 (dest_accesses_memory (Zrm_r rm r) = Zrm_is_memory_access rm) /\ 452 (dest_accesses_memory (Zr_rm r rm) = F)`; 453 454val x64_lock_ok_def = Define ` 455 (x64_lock_ok (Zbinop binop_name dsize ds) = 456 MEM binop_name [Zadd;Zand;Zor;Zsub;Zxor] /\ 457 dest_accesses_memory ds) /\ 458 (x64_lock_ok (Zmonop monop_name dsize rm) = 459 MEM monop_name [Zdec;Zinc;Zneg;Znot] /\ 460 Zrm_is_memory_access rm) /\ 461 (x64_lock_ok (Zxadd dsize rm r) = Zrm_is_memory_access rm) /\ 462 (x64_lock_ok (Zxchg dsize rm r) = Zrm_is_memory_access rm) /\ 463 (x64_lock_ok (Zcmpxchg dsize rm r) = Zrm_is_memory_access rm) /\ 464 (x64_lock_ok (Zpop rm) = F) /\ 465 (x64_lock_ok (Zlea dsize ds) = F) /\ 466 (x64_lock_ok (Zpush imm_rm) = F) /\ 467 (x64_lock_ok (Zcall imm_rm) = F) /\ 468 (x64_lock_ok (Zret imm) = F) /\ 469 (x64_lock_ok (Zmov c dsize ds) = F) /\ 470 (x64_lock_ok (Zjcc c imm) = F) /\ 471 (x64_lock_ok (Zjmp rm) = F) /\ 472 (x64_lock_ok (Zloop c imm) = F) /\ 473 (x64_lock_ok (Zpushad) = F) /\ 474 (x64_lock_ok (Zpopad) = F)`; 475 476val x64_is_jcc_def = Define ` 477 (x64_is_jcc (Zjcc c imm) = T) /\ 478 (x64_is_jcc _ = F)`; 479 480val byte_regs_in_rm_def = Define ` 481 (byte_regs_in_rm (Zr r) = [r]) /\ 482 (byte_regs_in_rm (Zm r1 r2 offset) = [])`; 483 484val byte_regs_in_ds_def = Define ` 485 (byte_regs_in_ds (Zrm_i rm i) = byte_regs_in_rm rm) /\ 486 (byte_regs_in_ds (Zrm_r rm r) = r::byte_regs_in_rm rm) /\ 487 (byte_regs_in_ds (Zr_rm r rm) = r::byte_regs_in_rm rm)`; 488 489val has_bad_byte_regs_def = Define ` 490 has_bad_byte_regs dsize xs = 491 (dsize = Z8) /\ ~(FILTER (\x. MEM x [RSP;RBP;RSI;RDI]) xs = [])`; 492 493val instr_accesses_bad_byte_reg_def = Define ` 494 (instr_accesses_bad_byte_reg (Zbinop binop_name dsize ds) = has_bad_byte_regs dsize (byte_regs_in_ds ds)) /\ 495 (instr_accesses_bad_byte_reg (Zmonop monop_name dsize rm) = has_bad_byte_regs dsize (byte_regs_in_rm rm)) /\ 496 (instr_accesses_bad_byte_reg (Zxadd dsize rm r) = has_bad_byte_regs dsize (r::byte_regs_in_rm rm)) /\ 497 (instr_accesses_bad_byte_reg (Zxchg dsize rm r) = has_bad_byte_regs dsize (r::byte_regs_in_rm rm)) /\ 498 (instr_accesses_bad_byte_reg (Zcmpxchg dsize rm r) = has_bad_byte_regs dsize (r::byte_regs_in_rm rm)) /\ 499 (instr_accesses_bad_byte_reg (Zpop rm) = F) /\ 500 (instr_accesses_bad_byte_reg (Zlea dsize ds) = has_bad_byte_regs dsize (byte_regs_in_ds ds)) /\ 501 (instr_accesses_bad_byte_reg (Zpush imm_rm) = F) /\ 502 (instr_accesses_bad_byte_reg (Zcall imm_rm) = F) /\ 503 (instr_accesses_bad_byte_reg (Zret imm) = F) /\ 504 (instr_accesses_bad_byte_reg (Zmov c dsize ds) = has_bad_byte_regs dsize (byte_regs_in_ds ds)) /\ 505 (instr_accesses_bad_byte_reg (Zjcc c imm) = F) /\ 506 (instr_accesses_bad_byte_reg (Zjmp rm) = F) /\ 507 (instr_accesses_bad_byte_reg (Zloop c imm) = F) /\ 508 (instr_accesses_bad_byte_reg (Zpushad) = F) /\ 509 (instr_accesses_bad_byte_reg (Zpopad) = F)`; 510 511val Zprefixes_ok_def = Define ` 512 Zprefixes_ok pres i = ALL_DISTINCT (MAP Zprefix_group pres) /\ 513 (MEM Zlock pres ==> x64_lock_ok i) /\ 514 (MEM Zbranch_taken pres ==> x64_is_jcc i) /\ 515 (MEM Zbranch_not_taken pres ==> x64_is_jcc i)`; 516 517val x64_decode_def = Define ` 518 x64_decode w = 519 let (pres,w) = x64_decode_prefixes w in 520 let (g,w) = x64_decode_REX w in 521 let g = (if MEM Zoperand_size_override pres then (("16BIT" =+ [T]) g) else g) in 522 let result = x64_decode_aux g w in 523 if result = NONE then NONE else 524 let (i,w) = THE result in 525 if (g "REX.W" = []) /\ instr_accesses_bad_byte_reg i then NONE else 526 if (g "REX.W" = [T]) /\ (g "16BIT" = [T]) then NONE else 527 if Zprefixes_ok pres i then SOME (Zfull_inst pres i, w) else NONE`; 528 529val padbyte_def = Define ` 530 (padbyte [] = [F;F;F;F;F;F;F;F]) /\ 531 (padbyte [x0] = [x0;F;F;F;F;F;F;F]) /\ 532 (padbyte [x0;x1] = [x0;x1;F;F;F;F;F;F]) /\ 533 (padbyte [x0;x1;x2] = [x0;x1;x2;F;F;F;F;F]) /\ 534 (padbyte [x0;x1;x2;x3] = [x0;x1;x2;x3;F;F;F;F]) /\ 535 (padbyte [x0;x1;x2;x3;x4] = [x0;x1;x2;x3;x4;F;F;F]) /\ 536 (padbyte [x0;x1;x2;x3;x4;x5] = [x0;x1;x2;x3;x4;x5;F;F]) /\ 537 (padbyte [x0;x1;x2;x3;x4;x5;x6] = [x0;x1;x2;x3;x4;x5;x6;F]) /\ 538 (padbyte [x0;x1;x2;x3;x4;x5;x6;x7] = [x0;x1;x2;x3;x4;x5;x6;x7])`; 539 540val word2byte_def = Define ` 541 word2byte (w:word8) = padbyte (MAP ($= 1) (word_to_bin_list w))`; 542 543val x64_decode_bytes_def = Define ` 544 x64_decode_bytes b = x64_decode (FLAT (MAP word2byte b))`; 545 546 547(* -- partially pre-evaluate x64_decode_aux -- *) 548 549val Zreg_distinct = save_thm("Zreg_distinct", 550 SIMP_RULE std_ss [GSYM CONJ_ASSOC,ALL_DISTINCT,MEM,REVERSE_DEF,APPEND] 551 (CONJ ALL_DISTINCT_Zreg (ONCE_REWRITE_RULE [GSYM ALL_DISTINCT_REVERSE] ALL_DISTINCT_Zreg))); 552 553val DTF_DTF = prove( 554 ``(DTF p1 p2 ++ DTF q1 q2 = DTF (p1 ++ q1) (p2 ++ q2))``, 555 SIMP_TAC std_ss [FUN_EQ_THM,option_orelse_def,LET_DEF] THEN REPEAT STRIP_TAC 556 THEN Cases_on `x` THEN Cases_on `r` THEN SIMP_TAC std_ss [DTF_def] THEN Cases_on `h` 557 THEN SIMP_TAC std_ss [DTF_def,option_orelse_def,LET_DEF]); 558 559val DTF_INTRO = prove( 560 ``(DF >> f = DTF (\x.NONE) f) /\ (DT >> f = DTF f (\x.NONE))``, 561 SIMP_TAC std_ss [FUN_EQ_THM,option_then_def,LET_DEF] THEN REPEAT STRIP_TAC 562 THEN Cases_on `x` THEN Cases_on `r` THEN SIMP_TAC std_ss [DTF_def,DF_def,DT_def] 563 THEN Cases_on `h` THEN SIMP_TAC std_ss [DTF_def,option_orelse_def,LET_DEF,DF_def,DT_def]); 564 565val ORELSE_NONE = prove( 566 ``((\x.NONE) ++ f = f) /\ (f ++ (\x.NONE) = f) /\ 567 ((\x.NONE) >> f = (\x.NONE)) /\ (f >> (\x.NONE) = (\x.NONE))``, 568 SIMP_TAC std_ss [FUN_EQ_THM,option_orelse_def,option_then_def,LET_DEF] THEN METIS_TAC []); 569 570val PUSH_assert_lemma = prove( 571 ``(assert k >> (DTF p q >> t) = DTF (assert k >> p >> t) (assert k >> q >> t))``, 572 SIMP_TAC std_ss [FUN_EQ_THM] THEN Cases THEN Cases_on `r` THEN REPEAT (Cases_on `h`) 573 THEN SIMP_TAC std_ss [assert_def,option_then_def,LET_DEF,DTF_def] 574 THEN SRW_TAC [] [] THEN FULL_SIMP_TAC std_ss [] THEN REV_FULL_SIMP_TAC std_ss []); 575 576val DTF_THEN = prove( 577 ``DTF p q >> t = DTF (p >> t) (q >> t)``, 578 SIMP_TAC std_ss [FUN_EQ_THM] THEN Cases THEN Cases_on `r` THEN REPEAT (Cases_on `h`) 579 THEN SIMP_TAC std_ss [assert_def,option_then_def,LET_DEF,DTF_def]); 580 581val PUSH_assert = prove( 582 ``(assert k >> (DF >> f) = DF >> (assert k >> f)) /\ 583 (assert k >> (DT >> f) = DT >> (assert k >> f)) /\ 584 (assert k >> (p ++ q) = (assert k >> p ++ assert k >> q)) /\ 585 (assert k >> DTF p q = DTF (assert k >> p) (assert k >> q))``, 586 SIMP_TAC std_ss [FUN_EQ_THM,option_then_def,option_orelse_def,LET_DEF,GSYM DTF_THM] 587 THEN REPEAT STRIP_TAC THEN Cases_on `x` THEN Cases_on `r` THEN REPEAT (Cases_on `h`) 588 THEN SIMP_TAC std_ss [assert_def,DF_def,DT_def] THEN 589 BasicProvers.EVERY_CASE_TAC THEN REV_FULL_SIMP_TAC std_ss []); 590 591val car = fst o dest_comb; 592val cdr = snd o dest_comb; 593 594val x64_decode_aux_thm = let 595 fun eval_term_ss tm_name tm = conv_ss 596 { name = tm_name, trace = 3, key = SOME ([],tm), conv = K (K EVAL) }; 597 val token_ss = eval_term_ss "tokenise" ``tokenise x``; 598 val if_ss = conv_ss { name = "if", trace = 3, 599 key = SOME ([],``if x then (y:'a) else z``), 600 conv = K (K ((RATOR_CONV o RATOR_CONV o RAND_CONV) EVAL)) }; 601 val hex_ss = eval_term_ss "hex" ``n2bits 8 (hex2num y)``; 602 val hex_add_ss = eval_term_ss "hex" ``n2bits 8 ((hex2num y) + k)``; 603 val n2bits_3_ss = eval_term_ss "n2bits_3" ``n2bits 3 y``; 604 val select_op_ss = eval_term_ss "select_op" ``x64_select_op x (y:('a#'b)list)``; 605 val EL_LEMMA = prove( 606 ``!x y z t. (EL 0 (x::t) = x) /\ (EL 1 (x::y::t) = y) /\ (EL 2 (x:'a::y::z::t) = z)``, 607 REPEAT STRIP_TAC THEN EVAL_TAC); 608 val SOME_LEMMA = prove(``!(f :'a -> 'b option) (g :'c) (h :'d). (SOME >> f = f)``, 609 SIMP_TAC std_ss [FUN_EQ_THM,option_then_def,LET_DEF]); 610 val DT_THEN = prove( 611 ``((DT >> f) (g,T::w) = f (g,w)) /\ ((DF >> f) (g,F::w) = f (g,w))``, 612 SIMP_TAC std_ss [option_then_def,DT_def,DF_def,LET_DEF]); 613 val tm = ``x64_decode_aux g`` 614 val th1 = REWRITE_CONV [MAP,x64_decode_aux_def,combinTheory.o_DEF] tm 615 val th2 = CONV_RULE (ONCE_DEPTH_CONV (BETA_CONV) THENC (RAND_CONV (RAND_CONV EVAL))) th1 616 val th3 = REWRITE_RULE [match_list_raw_def,MAP] th2 617 val th4 = SIMP_RULE (std_ss++token_ss) [match_def] th3 618 val th5 = SIMP_RULE (bool_ss++if_ss) [MAP,x64_match_step_def] th4 619 val th6 = SIMP_RULE (bool_ss++if_ss++select_op_ss) [x64_syntax_def,EL_LEMMA,HD] th5 620 val th6a = SIMP_RULE (std_ss) [LET_DEF,process_hex_add_def] th6 621 val th7 = SIMP_RULE (bool_ss++if_ss++hex_ss++hex_add_ss++n2bits_3_ss) [decode_Zdest_src_def,decode_Zconst_def] th6a 622 val th8 = REWRITE_RULE [GSYM option_then_assoc,option_do_def,option_try_def,GSYM option_orelse_assoc] th7 623 val th9 = SIMP_RULE std_ss [option_orelse_SOME,GSYM option_orelse_assoc,Z_SOME_def] th8 624 val thA = REWRITE_RULE [GSYM option_then_assoc,EL_LEMMA,drop_eq_thm,SOME_LEMMA,option_do_def] th9 625 val thB = thA |> REWRITE_RULE [assert_option_then_thm,option_then_assoc] 626 |> REWRITE_RULE [assert_option_then_thm,GSYM option_then_assoc,SOME_LEMMA] 627 val thC = REWRITE_RULE [option_try_def,GSYM option_orelse_assoc] thB 628 val thD = REWRITE_RULE [option_then_OVER_orelse] thC 629 val thE = REWRITE_RULE [option_orelse_assoc] thD 630 val thX = REWRITE_RULE [DT_over_DF,option_then_assoc,option_orelse_assoc,option_then_OVER_orelse] thE 631 val thZ = thX |> REWRITE_RULE [DTF_INTRO,DTF_DTF,DTF_THEN,ORELSE_NONE,PUSH_assert] 632 (* find pre-evaulatable prefixes *) 633 val DTF_tm = ``DTF p (q:'a # bool list -> 'b option)`` 634 fun dest_DTF tm = if can (match_term DTF_tm) tm then (cdr (car tm), cdr tm) else fail() 635 val w_var = mk_var("w",``:bool list``) 636 fun rec_dest_DTF tm = let 637 val (x1,x2) = dest_DTF tm 638 val xs1 = rec_dest_DTF x1 639 val xs2 = rec_dest_DTF x2 640 in (map (curry listSyntax.mk_cons T) xs1) @ (map (curry listSyntax.mk_cons F) xs2) end 641 handle HOL_ERR _ => [w_var] 642 val prefixes = thZ |> concl |> dest_eq |> snd |> cdr |> rec_dest_DTF 643 (* evaluate the prefixes *) 644 val LEMMA = prove( 645 ``!g w. ((\w. SOME (g,w)) >> f) w = f (g,w)``, 646 SIMP_TAC std_ss [LET_DEF,option_then_def]); 647 val c = ONCE_REWRITE_CONV [thZ] THENC REWRITE_CONV [LEMMA,DTF_def] 648 val decode_aux_tm = ``x64_decode_aux g`` 649 fun eval_for prefix = c (mk_comb(decode_aux_tm,prefix)) 650 val pre_evaluated_thm = LIST_CONJ (map eval_for prefixes) 651 in pre_evaluated_thm end; 652 653fun permanently_add_to_compset name thm = let 654 val _ = save_thm(name,thm) 655 val _ = computeLib.add_persistent_funs [name] 656 in print ("Permanently added to compset: "^name^"\n") end; 657 658val _ = permanently_add_to_compset "Zreg_distinct" Zreg_distinct; 659val _ = permanently_add_to_compset "x64_decode_aux_thm" x64_decode_aux_thm; 660 661(* 662 EVAL ``x64_decode_bytes [0x48w; 0x01w; 0xD1w]`` 663*) 664 665 666val _ = export_theory (); 667