1
2open HolKernel boolLib bossLib Parse;
3open wordsTheory stringTheory stringLib listTheory stringSimps listLib simpLib;
4open decoderTheory bit_listTheory opmonTheory;
5
6open x64_astTheory x64_coretypesTheory;
7
8val _ = new_theory "x64_decoder";
9
10
11(* ---------------------------------------------------------------------------------- *>
12
13  A decoder for x86 instructions is defined and at the end pre-evaliuated for
14  fast execution with EVAL.
15
16  NOTES:
17
18    All instructions can have a REX prefix, making them have access to
19    16 registers.  Some instructions require a REX prefix. The REX
20    prefic must appear immediately before the rest of opcode part of
21    the instruction, i.e. after all the other prefixes.
22
23    The presence of the REX prefix allows for 20 byte-valued registers
24    (AL, AH, CL, CH, DL, DH, BL, BH, SPL, BPL, SIL, DIL and
25    R8B-R15B). However, this implementation of the x86_64 semantics
26    disallows use of AH, CH, DH, BH in order to make the semantics a
27    bit more uniform. Formalised in instr_accesses_bad_byte_reg_def.
28
29  ( Addresses are by default 64-bit, but an address-size override
30    prefix (0x67) can turn the address size into 32-bits. This feature
31    is not supported by this implementation of the x86_64 semantics. )
32
33  TODO:
34   - support address-size over-ride prefix
35   - make sure the 16-bit operations performs the appropriate sw2sw
36
37<* ---------------------------------------------------------------------------------- *)
38
39(* reading hex strings to bit lists *)
40
41val STR_SPACE_AUX_def = Define `
42  (STR_SPACE_AUX n "" = "") /\
43  (STR_SPACE_AUX n (STRING c s) =
44     if n = 0 then STRING #" " (STRING c (STR_SPACE_AUX 1 s))
45              else STRING c (STR_SPACE_AUX (n-1) s))`;
46
47val bytebits_def = Define `
48  bytebits = hex2bits o STR_SPACE_AUX 2`;
49
50(* interprete the IA-32 manuals' syntax *)
51
52val check_opcode_def = Define `
53  check_opcode s =
54    let y = (n2bits 3 o char2num o HD o TL o EXPLODE) s in
55      assert (\g. g "Reg/Opcode" = y)`;
56
57val read_SIB_def = Define `
58  read_SIB =
59    assign_drop "Base" 3 >> assign_drop "Index" 3 >> assign_drop "Scale" 2 >>
60    option_try [
61      assert (\g. (g "Mod" = [F;F]) /\  (g "Base" = [T;F;T])) >> assign_drop "disp32" 32;
62      assert (\g. (g "Mod" = [F;F]) /\ ~(g "Base" = [T;F;T]));
63      assert (\g. (g "Mod" = [T;F])) >> assign_drop "disp8" 8;
64      assert (\g. (g "Mod" = [F;T])) >> assign_drop "disp32" 32]`;
65
66val read_ModRM_def = Define `
67  read_ModRM =
68    assign_drop "R/M" 3 >> assign_drop "Reg/Opcode" 3 >> assign_drop "Mod" 2 >>
69    option_try [
70      assert (\g.  (g "Mod" = [T;T]));
71      assert (\g.  (g "Mod" = [F;F]) /\ ~(g "R/M" = [F;F;T]) /\ ~(g "R/M" = [T;F;T]));
72      assert (\g.  (g "Mod" = [F;F]) /\  (g "R/M" = [T;F;T])) >> assign_drop "disp32" 32;
73      assert (\g.  (g "Mod" = [T;F]) /\ ~(g "R/M" = [F;F;T])) >> assign_drop "disp8" 8;
74      assert (\g.  (g "Mod" = [F;T]) /\ ~(g "R/M" = [F;F;T])) >> assign_drop "disp32" 32;
75      assert (\g. ~(g "Mod" = [T;T]) /\  (g "R/M" = [F;F;T])) >> read_SIB]`;
76
77val is_hex_add_def = Define `
78  is_hex_add x = ((IMPLODE o DROP 2 o EXPLODE) x = "+rd")`;
79
80val process_hex_add_def = Define `
81  process_hex_add name =
82    let n = (hex2num o IMPLODE o TAKE 2 o EXPLODE) name in
83      option_try [drop_eq (n2bits 8 (n+0)) >> assign "reg" (n2bits 3 0);
84                  drop_eq (n2bits 8 (n+1)) >> assign "reg" (n2bits 3 1);
85                  drop_eq (n2bits 8 (n+2)) >> assign "reg" (n2bits 3 2);
86                  drop_eq (n2bits 8 (n+3)) >> assign "reg" (n2bits 3 3);
87                  drop_eq (n2bits 8 (n+4)) >> assign "reg" (n2bits 3 4);
88                  drop_eq (n2bits 8 (n+5)) >> assign "reg" (n2bits 3 5);
89                  drop_eq (n2bits 8 (n+6)) >> assign "reg" (n2bits 3 6);
90                  drop_eq (n2bits 8 (n+7)) >> assign "reg" (n2bits 3 7)]`;
91
92val assign_drop_cond_def = Define `
93  assign_drop_cond name i1 i2 c (g,bits) =
94    if c g then assign_drop name i1 (g,bits)
95           else assign_drop name i2 (g,bits)`;
96
97val x64_match_step_def = Define `
98  x64_match_step name =
99    if is_hex name /\ (STRLEN name = 2) then     (* opcode e.g. FE, 83 and 04 *)
100      drop_eq (n2bits 8 (hex2num name))
101    else if is_hex_add name then                 (* opcode + rd, e.g. F8+rd *)
102      process_hex_add name
103    else if name = "1" then                           (* constant 1 *)
104      assign_drop name 0
105    else if MEM name ["ib";"cb";"rel8";"imm8"] then   (* 8-bit immediate or address *)
106      assign_drop name 8
107    else if MEM name ["iw";"cw";"imm16"] then         (* 16-bit immediate or address *)
108      assign_drop name 16
109    else if MEM name ["id";"cd";"rel32";"imm32"] then (* 32-bit immediate or address *)
110      assign_drop_cond name 16 32 (\g. g "16BIT" = [T])
111    else if MEM name ["iq";"imm64"] then              (* 64-bit immediate *)
112      assign_drop name 64
113    else if name = "/r" then                     (* normal reg + reg/mem *)
114      read_ModRM
115    else if MEM name ["/0";"/1";"/2";"/3";"/4";"/5";"/6";"/7"] then (* opcode extension in ModRM *)
116      read_ModRM >> check_opcode name
117    else if name = "REX.W" then
118      assert (\g. g "REX.W" = [T])
119    else if name = "REX" then
120      assert (\g. g "REX.W" = [F])
121    else if name = "+" then
122      assert (\g. T)
123    else
124      option_fail`;
125
126(* syntax classes *)
127
128val x64_binop_def = Define `x64_binop =
129  [("ADC",Zadc); ("ADD",Zadd); ("AND",Zand); ("CMP",Zcmp);
130   ("OR",Zor); ("SAR",Zsar); ("SHR",Zshr); ("SHL",Zshl);
131   ("SBB",Zsbb); ("SUB",Zsub); ("TEST",Ztest); ("XOR",Zxor)]`;
132
133val x64_monop_def = Define `x64_monop =
134  [("DEC",Zdec); ("INC",Zinc); ("NOT",Znot); ("NEG",Zneg)]`;
135
136
137
138(* x86 - decoder *)
139
140val b2reg_def = Define `
141  b2reg g rex_name name = (EL (bits2num (g name ++ g rex_name))
142    [RAX;RCX;RDX;RBX;RSP;RBP;RSI;RDI;zR8;zR9;zR10;zR11;zR12;zR13;zR14;zR15]):Zreg`;
143
144val decode_Zr32_def = Define `
145  decode_Zr32 g name =
146    if name = "RAX" then RAX else
147      if g "reg" = [] then b2reg g "REX.R" "Reg/Opcode"  (* "Reg/Opcode" comes from ModRM *)
148                      else b2reg g "REX.B" "reg"`;       (* "reg" comes from opcode byte, e.g. "rd" in B8+rd *)
149
150val decode_SIB_def = Define `
151  decode_SIB g =
152    let scaled_index = (if b2reg g "REX.X" "Index" = RSP then NONE else SOME (b2w g "Scale",b2reg g "REX.X" "Index")) in
153      if b2reg g "REX.B" "Base" = RBP then (* the special case indicated by "[*]" *)
154        if g "Mod" = [F;F] then Zm scaled_index NONE (sw2sw ((b2w g "disp32"):word32)) else
155        if g "Mod" = [T;F] then Zm scaled_index (SOME RBP) (sw2sw ((b2w g "disp8"):word8)) else
156        (* g "Mod" = [F;T] *)   Zm scaled_index (SOME RBP) (sw2sw ((b2w g "disp32"):word32))
157      else (* normal cases, just need to read off the correct displacement *)
158        let disp = (if (g "Mod" = [T;F]) then sw2sw ((b2w g "disp8"):word8) else sw2sw ((b2w g "disp32"):word32)) in
159        let disp = (if (g "Mod" = [F;F]) then 0w else disp) in
160          Zm scaled_index (SOME (b2reg g "REX.B" "Base")) disp`;
161
162val decode_Zrm32_def = Define `  (* sw2sw = sign-extension *)
163  decode_Zrm32 g name =
164    if name = "RAX" then Zr RAX else
165      if  (g "Mod" = [F;F]) /\ (g "R/M" = [T;F;T]) then Zm NONE NONE (sw2sw:word32->word64 (b2w g "disp32")) else
166      if ~(g "Mod" = [T;T]) /\ (g "R/M" = [F;F;T]) then decode_SIB g else
167      if  (g "Mod" = [F;F]) then Zm NONE (SOME (b2reg g "REX.B" "R/M")) 0w else
168      if  (g "Mod" = [T;F]) then Zm NONE (SOME (b2reg g "REX.B" "R/M")) (sw2sw:word8->word64 (b2w g "disp8")) else
169      if  (g "Mod" = [F;T]) then Zm NONE (SOME (b2reg g "REX.B" "R/M")) (sw2sw:word32->word64 (b2w g "disp32")) else
170      if  (g "Mod" = [T;T]) then Zr (b2reg g "REX.B" "R/M") else Zr (b2reg g "REX.B" "reg") `;
171
172val decode_Zconst_def = Define `
173  decode_Zconst name g =
174   if name = "imm8"  then sw2sw:word8 ->word64 (b2w g "ib") else
175   if name = "rel8"  then sw2sw:word8 ->word64 (b2w g "cb") else
176   if name = "imm16" then sw2sw:word16->word64 (b2w g "iw") else
177   if name = "imm32" then sw2sw:word32->word64 (b2w g "id") else
178   if name = "rel32" then sw2sw:word32->word64 (b2w g "cd") else
179   if name = "imm64" then b2w g "iq" else
180   if name = "1"     then 1w else 0w`;
181
182val decode_Zdest_src_def = Define `
183  decode_Zdest_src g dest src =
184    if MEM src ["r64";"r32";"r16";"r8"] then
185      Zrm_r (decode_Zrm32 g dest) (decode_Zr32 g src)
186    else if MEM src ["r/m64";"r/m32";"r/m16";"r/m8"] then
187      Zr_rm (decode_Zr32 g dest)  (decode_Zrm32 g src)
188    else if src = "m" then
189      Zr_rm (decode_Zr32 g dest)  (decode_Zrm32 g src)
190    else
191      Zrm_i (decode_Zrm32 g dest) (decode_Zconst src g)`;
192
193val decode_Zconst_or_zero_def = Define `
194  decode_Zconst_or_zero ts g =
195    if LENGTH ts < 2 then 0w else decode_Zconst (EL 1 ts) g`;
196
197val decode_Zimm_rm_def = Define `
198  decode_Zimm_rm ts g =
199    if MEM (EL 1 ts) ["r/m32";"r32";"r/m8";"r8"]
200    then Zi_rm (decode_Zrm32 g (EL 1 ts))
201    else Zi (decode_Zconst (EL 1 ts) g)`;
202
203val x64_select_op_def = Define `
204  x64_select_op name list = SND (HD (FILTER (\x. FST x = name) list))`;
205
206val x86_size_def = Define `
207  x86_size g name =
208    if MEM name ["r8";"m8";"r/m8";"AL"] then Z8 else
209    if MEM name ["r16";"m16";"r/m16";"AX"] then Z16 else
210    if MEM name ["r64";"m64";"r/m64";"RAX"] then Z64 else
211    if g "REX.W" = [T] then Z64 else
212    if g "16BIT" = [T] then Z16 else Z32`;
213
214val Zsize_tm = ``(x86_size g (EL 1 ts))``
215
216val x64_syntax_binop = ``
217  Zbinop (x64_select_op (HD ts) x64_binop) ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))``;
218
219val x64_syntax_monop = ``
220  Zmonop (x64_select_op (HD ts) x64_monop) ^Zsize_tm (decode_Zrm32 g (EL 1 ts))``;
221
222val Z_SOME_def = Define `Z_SOME f = SOME o (\(g,w). (f g,w))`;
223
224val x64_syntax_def = Define `
225  x64_syntax ts =
226    if LENGTH ts = 0 then option_fail else
227    if HD ts = "MOVZX" then Z_SOME (\g. Zmovzx ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts)) (if EL 2 ts = "r/m8" then Z8 else Z16)) else
228    if MEM (HD ts) (MAP FST x64_binop) then Z_SOME (\g. ^x64_syntax_binop) else
229    if MEM (HD ts) (MAP FST x64_monop) then Z_SOME (\g. ^x64_syntax_monop) else
230    if HD ts = "POP"  then Z_SOME (\g. Zpop  (decode_Zrm32 g (EL 1 ts))) else
231    if HD ts = "PUSH" then Z_SOME (\g. Zpush (decode_Zimm_rm ts g)) else
232    if HD ts = "CMPXCHG" then Z_SOME (\g. Zcmpxchg ^Zsize_tm (decode_Zrm32 g (EL 1 ts)) (decode_Zr32 g (EL 2 ts))) else
233    if HD ts = "XCHG"    then Z_SOME (\g. Zxchg ^Zsize_tm (decode_Zrm32 g (EL 1 ts)) (decode_Zr32 g (EL 2 ts))) else
234    if HD ts = "XADD"    then Z_SOME (\g. Zxadd ^Zsize_tm (decode_Zrm32 g (EL 1 ts)) (decode_Zr32 g (EL 2 ts))) else
235    if (HD ts = "JMP") /\ (TL ts = ["r/m32"]) then Z_SOME (\g. Zjmp (decode_Zrm32 g (EL 1 ts))) else
236    if HD ts = "JMP"     then Z_SOME (\g. Zjcc Z_ALWAYS (decode_Zconst_or_zero ts g)) else
237    if HD ts = "JE"      then Z_SOME (\g. Zjcc Z_E (decode_Zconst_or_zero ts g)) else
238    if HD ts = "JNE"     then Z_SOME (\g. Zjcc Z_NE (decode_Zconst_or_zero ts g)) else
239    if HD ts = "JS"      then Z_SOME (\g. Zjcc Z_S (decode_Zconst_or_zero ts g)) else
240    if HD ts = "JNS"     then Z_SOME (\g. Zjcc Z_NS (decode_Zconst_or_zero ts g)) else
241    if HD ts = "JA"      then Z_SOME (\g. Zjcc Z_A (decode_Zconst_or_zero ts g)) else
242    if HD ts = "JNA"     then Z_SOME (\g. Zjcc Z_NA (decode_Zconst_or_zero ts g)) else
243    if HD ts = "JB"      then Z_SOME (\g. Zjcc Z_B (decode_Zconst_or_zero ts g)) else
244    if HD ts = "JNB"     then Z_SOME (\g. Zjcc Z_NB (decode_Zconst_or_zero ts g)) else
245    if HD ts = "LEA"     then Z_SOME (\g. Zlea ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else
246    if HD ts = "LOOP"    then Z_SOME (\g. Zloop Z_ALWAYS (decode_Zconst_or_zero ts g)) else
247    if HD ts = "LOOPE"   then Z_SOME (\g. Zloop Z_E (decode_Zconst_or_zero ts g)) else
248    if HD ts = "LOOPNE"  then Z_SOME (\g. Zloop Z_NE (decode_Zconst_or_zero ts g)) else
249    if HD ts = "MOV"     then Z_SOME (\g. Zmov Z_ALWAYS ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else
250    if HD ts = "CMOVE"   then Z_SOME (\g. Zmov Z_E ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else
251    if HD ts = "CMOVNE"  then Z_SOME (\g. Zmov Z_NE ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else
252    if HD ts = "CMOVS"   then Z_SOME (\g. Zmov Z_S ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else
253    if HD ts = "CMOVNS"  then Z_SOME (\g. Zmov Z_NS ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else
254    if HD ts = "CMOVA"   then Z_SOME (\g. Zmov Z_A ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else
255    if HD ts = "CMOVNA"  then Z_SOME (\g. Zmov Z_NA ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else
256    if HD ts = "CMOVB"   then Z_SOME (\g. Zmov Z_B ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else
257    if HD ts = "CMOVNB"  then Z_SOME (\g. Zmov Z_NB ^Zsize_tm (decode_Zdest_src g (EL 1 ts) (EL 2 ts))) else
258    if HD ts = "MUL"     then Z_SOME (\g. Zmul ^Zsize_tm (decode_Zrm32 g (EL 1 ts))) else
259    if HD ts = "DIV"     then Z_SOME (\g. Zdiv ^Zsize_tm (decode_Zrm32 g (EL 1 ts))) else
260    if HD ts = "CALL"    then Z_SOME (\g. Zcall (decode_Zimm_rm ts g)) else
261    if HD ts = "CPUID"   then Z_SOME (\g. Zcpuid) else
262    if HD ts = "RET"     then Z_SOME (\g. Zret (decode_Zconst_or_zero ts g)) else option_fail`;
263
264
265(* a list of x64 instructions, ordered by the combination of addressing modes they support *)
266
267(* Each line which produces an operation over size Z32 can be modified
268   into a size Z64 by setting REX.W. The Intel manual explicitly
269   lisits each such inferrable line, but we allow such lines to be
270   inferred from the 32-bit lines. For example, the line:
271
272    " 81 /0 id  | ADD r/m32, imm32  ";
273
274   Immediately, represents three lines in the manual, namely:
275
276    " 81 /0 iw          | ADD r/m16, imm16  ";
277    " 81 /0 id          | ADD r/m32, imm32  ";
278    " REX.W + 81 /0 id  | ADD r/m64, imm32  ";
279
280   Note that the size of the immediate constant does not change for 64-bit. *)
281
282val x64_syntax_list = `` [
283
284    " 05 id     | ADD EAX, imm32    ";
285    " 15 id     | ADC EAX, imm32    ";
286    " 25 id     | AND EAX, imm32    ";
287    " 35 id     | XOR EAX, imm32    ";
288    " 0D id     | OR EAX, imm32     ";
289    " 1D id     | SBB EAX, imm32    ";
290    " 2D id     | SUB EAX, imm32    ";
291    " 3D id     | CMP EAX, imm32    ";
292
293    " 83 /0 ib  | ADD r/m32, imm8   ";
294    " 83 /1 ib  | OR r/m32, imm8    ";
295    " 83 /2 ib  | ADC r/m32, imm8   ";
296    " 83 /3 ib  | SBB r/m32, imm8   ";
297    " 83 /4 ib  | AND r/m32, imm8   ";
298    " 83 /5 ib  | SUB r/m32, imm8   ";
299    " 83 /6 ib  | XOR r/m32, imm8   ";
300    " 83 /7 ib  | CMP r/m32, imm8   ";
301
302    " 81 /0 id  | ADD r/m32, imm32  ";
303    " 81 /1 id  | OR r/m32, imm32   ";
304    " 81 /2 id  | ADC r/m32, imm32  ";
305    " 81 /3 id  | SBB r/m32, imm32  ";
306    " 81 /4 id  | AND r/m32, imm32  ";
307    " 81 /5 id  | SUB r/m32, imm32  ";
308    " 81 /6 id  | XOR r/m32, imm32  ";
309    " 81 /7 id  | CMP r/m32, imm32  ";
310
311    " 01 /r     | ADD r/m32, r32    ";
312    " 11 /r     | ADC r/m32, r32    ";
313    " 21 /r     | AND r/m32, r32    ";
314    " 31 /r     | XOR r/m32, r32    ";
315    " 09 /r     | OR r/m32, r32     ";
316    " 19 /r     | SBB r/m32, r32    ";
317    " 29 /r     | SUB r/m32, r32    ";
318    " 39 /r     | CMP r/m32, r32    ";
319
320    " 03 /r     | ADD r32, r/m32    ";
321    " 13 /r     | ADC r32, r/m32    ";
322    " 23 /r     | AND r32, r/m32    ";
323    " 33 /r     | XOR r32, r/m32    ";
324    " 0B /r     | OR r32, r/m32     ";
325    " 1B /r     | SBB r32, r/m32    ";
326    " 2B /r     | SUB r32, r/m32    ";
327    " 3B /r     | CMP r32, r/m32    ";
328
329    " A9 id     | TEST EAX, imm32   ";
330    " F7 /0 id  | TEST r/m32, imm32 ";
331    " 85 /r     | TEST r/m32, r32   ";
332
333    " 38 /r     | CMP r/m8, r8      ";
334    " 3A /r     | CMP r8, r/m8      ";
335    " 80 /7 ib  | CMP r/m8, imm8    ";
336    " FE /0     | INC r/m8          ";
337    " FE /1     | DEC r/m8          ";
338
339    " REX.W + B8+rd iq | MOV r64, imm64    ";  (* need to mention this explicitly due to imm64 *)
340    " B8+rd id         | MOV r32, imm32    ";  (* REX.W variant must be above in the list *)
341    " C7 /0 id         | MOV r/m32, imm32  ";
342    " C6 /0 ib         | MOV r/m8, imm8    ";
343    " 8B /r            | MOV r32,r/m32     ";
344    " 8A /r            | MOV r8,r/m8       ";
345    " 89 /r            | MOV r/m32,r32     ";
346    " 88 /r            | MOV r/m8, r8      ";
347
348    " 0F B6 /r  | MOVZX r32, r/m8   ";
349    " 0F B7 /r  | MOVZX r32, r/m16  ";
350
351    " 0F B1 /r  | CMPXCHG r/m32, r32";
352    " 0F C1 /r  | XADD r/m32, r32   ";
353    " 90+rd     | XCHG EAX, r32     ";
354    " 87 /r     | XCHG r/m32, r32   ";
355
356    " FF /1     | DEC r/m32         ";
357    " FF /0     | INC r/m32         ";
358    " F7 /3     | NEG r/m32         ";
359    " F7 /2     | NOT r/m32         ";
360    " 8F /0     | POP r/m32         ";
361    " 58+rd     | POP r32           ";
362    " FF /6     | PUSH r/m32        ";
363    " 50+rd     | PUSH r32          ";
364
365    " E8 cd     | CALL rel32        ";
366    " FF /2     | CALL r/m32        ";
367
368    " 8D /r     | LEA r32, m        ";
369    " D1 /4     | SHL r/m32, 1      ";
370    " C1 /4 ib  | SHL r/m32, imm8   ";
371    " D1 /5     | SHR r/m32, 1      ";
372    " C1 /5 ib  | SHR r/m32, imm8   ";
373    " D1 /7     | SAR r/m32, 1      ";
374    " C1 /7 ib  | SAR r/m32, imm8   ";
375
376    " FF /4     | JMP r/m32         ";
377    " EB cb     | JMP rel8          ";
378    " E9 cd     | JMP rel32         ";
379    " 74 cb     | JE rel8           ";
380    " 75 cb     | JNE rel8          ";
381    " 0F 84 cd  | JE rel32          ";
382    " 0F 85 cd  | JNE rel32         ";
383    " 78 cb     | JS rel8           ";
384    " 79 cb     | JNS rel8          ";
385    " 0F 88 cd  | JS rel32          ";
386    " 0F 89 cd  | JNS rel32         ";
387    " 77 cb     | JA rel8           ";
388    " 76 cb     | JNA rel8          ";
389    " 0F 87 cd  | JA rel32          ";
390    " 0F 86 cd  | JNA rel32         ";
391    " 72 cb     | JB rel8           ";
392    " 73 cb     | JNB rel8          ";
393    " 0F 82 cd  | JB rel32          ";
394    " 0F 83 cd  | JNB rel32         ";
395
396    " 0F 44 /r  | CMOVE r32, r/m32  ";
397    " 0F 45 /r  | CMOVNE r32, r/m32 ";
398    " 0F 48 /r  | CMOVS r32, r/m32  ";
399    " 0F 49 /r  | CMOVNS r32, r/m32 ";
400    " 0F 47 /r  | CMOVA r32, r/m32  ";
401    " 0F 46 /r  | CMOVNA r32, r/m32 ";
402    " 0F 42 /r  | CMOVB r32, r/m32  ";
403    " 0F 43 /r  | CMOVNB r32, r/m32 ";
404
405    " 0F A2     | CPUID             ";
406
407    " C3        | RET               "; (* short for "RET 0" *)
408    " C2 iw     | RET imm16         ";
409    " E2 cb     | LOOP rel8         ";
410    " E1 cb     | LOOPE rel8        ";
411    " E0 cb     | LOOPNE rel8       ";
412
413    " F7 /6     | DIV r/m32         ";
414    " F7 /4     | MUL r/m32         "
415
416  ]``;
417
418val x64_decode_aux_def = zDefine `
419  x64_decode_aux g =
420    (match_list_raw g x64_match_step tokenise (x64_syntax o tokenise) o
421    MAP (\s. let x = STR_SPLIT [#"|"] s in (EL 0 x, EL 1 x))) ^x64_syntax_list`;
422
423val x64_decode_prefixes_def = tDefine "x64_decode_prefixes" `
424  x64_decode_prefixes w =
425    if LENGTH w < 8 then ([],w) else
426      let b = n2w (bits2num (TAKE 8 w)):word8 in
427        if ~(MEM b [0x2Ew;0x3Ew;0xF0w;0x66w]) then ([],w) else
428          let (pres,v) = x64_decode_prefixes (DROP 8 w) in
429          let pre = (if b = 0x2Ew then Zbranch_not_taken else
430                     if b = 0x3Ew then Zbranch_taken else
431                     if b = 0xF0w then Zlock else
432                     if b = 0x66w then Zoperand_size_override else ARB) in
433            (pre :: pres, v)`
434 (WF_REL_TAC `measure LENGTH`
435  THEN ASM_SIMP_TAC std_ss [LENGTH_DROP] THEN REPEAT STRIP_TAC THEN DECIDE_TAC);
436
437val x64_decode_REX_def = Define `
438  x64_decode_REX w =
439    let g = (\n. []) in
440      if LENGTH w < 8 then (g,w) else
441        if ~(TAKE 4 (DROP 4 w) = [F;F;T;F]) then (g,w) else
442          let g = ("REX.B" =+ [EL 0 w]) g in
443          let g = ("REX.X" =+ [EL 1 w]) g in
444          let g = ("REX.R" =+ [EL 2 w]) g in
445          let g = ("REX.W" =+ [EL 3 w]) g in
446          let w = DROP 8 w in
447            (g,w)`;
448
449val dest_accesses_memory_def = Define `
450  (dest_accesses_memory (Zrm_i rm i) = Zrm_is_memory_access rm) /\
451  (dest_accesses_memory (Zrm_r rm r) = Zrm_is_memory_access rm) /\
452  (dest_accesses_memory (Zr_rm r rm) = F)`;
453
454val x64_lock_ok_def = Define `
455  (x64_lock_ok (Zbinop binop_name dsize ds) =
456    MEM binop_name [Zadd;Zand;Zor;Zsub;Zxor] /\
457    dest_accesses_memory ds) /\
458  (x64_lock_ok (Zmonop monop_name dsize rm) =
459    MEM monop_name [Zdec;Zinc;Zneg;Znot] /\
460    Zrm_is_memory_access rm) /\
461  (x64_lock_ok (Zxadd dsize rm r) = Zrm_is_memory_access rm) /\
462  (x64_lock_ok (Zxchg dsize rm r) = Zrm_is_memory_access rm) /\
463  (x64_lock_ok (Zcmpxchg dsize rm r) = Zrm_is_memory_access rm) /\
464  (x64_lock_ok (Zpop rm) = F) /\
465  (x64_lock_ok (Zlea dsize ds) = F) /\
466  (x64_lock_ok (Zpush imm_rm) = F) /\
467  (x64_lock_ok (Zcall imm_rm) = F) /\
468  (x64_lock_ok (Zret imm) = F) /\
469  (x64_lock_ok (Zmov c dsize ds) = F) /\
470  (x64_lock_ok (Zjcc c imm) = F) /\
471  (x64_lock_ok (Zjmp rm) = F) /\
472  (x64_lock_ok (Zloop c imm) = F) /\
473  (x64_lock_ok (Zpushad) = F) /\
474  (x64_lock_ok (Zpopad) = F)`;
475
476val x64_is_jcc_def = Define `
477  (x64_is_jcc (Zjcc c imm) = T) /\
478  (x64_is_jcc _ = F)`;
479
480val byte_regs_in_rm_def = Define `
481  (byte_regs_in_rm (Zr r) = [r]) /\
482  (byte_regs_in_rm (Zm r1 r2 offset) = [])`;
483
484val byte_regs_in_ds_def = Define `
485  (byte_regs_in_ds (Zrm_i rm i) = byte_regs_in_rm rm) /\
486  (byte_regs_in_ds (Zrm_r rm r) = r::byte_regs_in_rm rm) /\
487  (byte_regs_in_ds (Zr_rm r rm) = r::byte_regs_in_rm rm)`;
488
489val has_bad_byte_regs_def = Define `
490  has_bad_byte_regs dsize xs =
491    (dsize = Z8) /\ ~(FILTER (\x. MEM x [RSP;RBP;RSI;RDI]) xs = [])`;
492
493val instr_accesses_bad_byte_reg_def = Define `
494  (instr_accesses_bad_byte_reg (Zbinop binop_name dsize ds) = has_bad_byte_regs dsize (byte_regs_in_ds ds)) /\
495  (instr_accesses_bad_byte_reg (Zmonop monop_name dsize rm) = has_bad_byte_regs dsize (byte_regs_in_rm rm)) /\
496  (instr_accesses_bad_byte_reg (Zxadd dsize rm r) = has_bad_byte_regs dsize (r::byte_regs_in_rm rm)) /\
497  (instr_accesses_bad_byte_reg (Zxchg dsize rm r) = has_bad_byte_regs dsize (r::byte_regs_in_rm rm)) /\
498  (instr_accesses_bad_byte_reg (Zcmpxchg dsize rm r) = has_bad_byte_regs dsize (r::byte_regs_in_rm rm)) /\
499  (instr_accesses_bad_byte_reg (Zpop rm) = F) /\
500  (instr_accesses_bad_byte_reg (Zlea dsize ds) = has_bad_byte_regs dsize (byte_regs_in_ds ds)) /\
501  (instr_accesses_bad_byte_reg (Zpush imm_rm) = F) /\
502  (instr_accesses_bad_byte_reg (Zcall imm_rm) = F) /\
503  (instr_accesses_bad_byte_reg (Zret imm) = F) /\
504  (instr_accesses_bad_byte_reg (Zmov c dsize ds) = has_bad_byte_regs dsize (byte_regs_in_ds ds)) /\
505  (instr_accesses_bad_byte_reg (Zjcc c imm) = F) /\
506  (instr_accesses_bad_byte_reg (Zjmp rm) = F) /\
507  (instr_accesses_bad_byte_reg (Zloop c imm) = F) /\
508  (instr_accesses_bad_byte_reg (Zpushad) = F) /\
509  (instr_accesses_bad_byte_reg (Zpopad) = F)`;
510
511val Zprefixes_ok_def = Define `
512  Zprefixes_ok pres i = ALL_DISTINCT (MAP Zprefix_group pres) /\
513                        (MEM Zlock pres ==> x64_lock_ok i) /\
514                        (MEM Zbranch_taken pres ==> x64_is_jcc i) /\
515                        (MEM Zbranch_not_taken pres ==> x64_is_jcc i)`;
516
517val x64_decode_def = Define `
518  x64_decode w =
519    let (pres,w) = x64_decode_prefixes w in
520    let (g,w) = x64_decode_REX w in
521    let g = (if MEM Zoperand_size_override pres then (("16BIT" =+ [T]) g) else g) in
522    let result = x64_decode_aux g w in
523      if result = NONE then NONE else
524        let (i,w) = THE result in
525          if (g "REX.W" = []) /\ instr_accesses_bad_byte_reg i then NONE else
526          if (g "REX.W" = [T]) /\ (g "16BIT" = [T]) then NONE else
527          if Zprefixes_ok pres i then SOME (Zfull_inst pres i, w) else NONE`;
528
529val padbyte_def = Define `
530  (padbyte [] = [F;F;F;F;F;F;F;F]) /\
531  (padbyte [x0] = [x0;F;F;F;F;F;F;F]) /\
532  (padbyte [x0;x1] = [x0;x1;F;F;F;F;F;F]) /\
533  (padbyte [x0;x1;x2] = [x0;x1;x2;F;F;F;F;F]) /\
534  (padbyte [x0;x1;x2;x3] = [x0;x1;x2;x3;F;F;F;F]) /\
535  (padbyte [x0;x1;x2;x3;x4] = [x0;x1;x2;x3;x4;F;F;F]) /\
536  (padbyte [x0;x1;x2;x3;x4;x5] = [x0;x1;x2;x3;x4;x5;F;F]) /\
537  (padbyte [x0;x1;x2;x3;x4;x5;x6] = [x0;x1;x2;x3;x4;x5;x6;F]) /\
538  (padbyte [x0;x1;x2;x3;x4;x5;x6;x7] = [x0;x1;x2;x3;x4;x5;x6;x7])`;
539
540val word2byte_def = Define `
541  word2byte (w:word8) = padbyte (MAP ($= 1) (word_to_bin_list w))`;
542
543val x64_decode_bytes_def = Define `
544  x64_decode_bytes b = x64_decode (FLAT (MAP word2byte b))`;
545
546
547(* -- partially pre-evaluate x64_decode_aux -- *)
548
549val Zreg_distinct = save_thm("Zreg_distinct",
550  SIMP_RULE std_ss [GSYM CONJ_ASSOC,ALL_DISTINCT,MEM,REVERSE_DEF,APPEND]
551    (CONJ ALL_DISTINCT_Zreg (ONCE_REWRITE_RULE [GSYM ALL_DISTINCT_REVERSE] ALL_DISTINCT_Zreg)));
552
553val DTF_DTF = prove(
554  ``(DTF p1 p2 ++ DTF q1 q2 = DTF (p1 ++ q1) (p2 ++ q2))``,
555  SIMP_TAC std_ss [FUN_EQ_THM,option_orelse_def,LET_DEF] THEN REPEAT STRIP_TAC
556  THEN Cases_on `x` THEN Cases_on `r` THEN SIMP_TAC std_ss [DTF_def] THEN Cases_on `h`
557  THEN SIMP_TAC std_ss [DTF_def,option_orelse_def,LET_DEF]);
558
559val DTF_INTRO = prove(
560  ``(DF >> f = DTF (\x.NONE) f) /\ (DT >> f = DTF f (\x.NONE))``,
561  SIMP_TAC std_ss [FUN_EQ_THM,option_then_def,LET_DEF] THEN REPEAT STRIP_TAC
562  THEN Cases_on `x` THEN Cases_on `r` THEN SIMP_TAC std_ss [DTF_def,DF_def,DT_def]
563  THEN Cases_on `h` THEN SIMP_TAC std_ss [DTF_def,option_orelse_def,LET_DEF,DF_def,DT_def]);
564
565val ORELSE_NONE = prove(
566  ``((\x.NONE) ++ f = f) /\ (f ++ (\x.NONE) = f) /\
567    ((\x.NONE) >> f = (\x.NONE)) /\ (f >> (\x.NONE) = (\x.NONE))``,
568  SIMP_TAC std_ss [FUN_EQ_THM,option_orelse_def,option_then_def,LET_DEF] THEN METIS_TAC []);
569
570val PUSH_assert_lemma = prove(
571  ``(assert k >> (DTF p q >> t) = DTF (assert k >> p >> t) (assert k >> q >> t))``,
572  SIMP_TAC std_ss [FUN_EQ_THM] THEN Cases THEN Cases_on `r` THEN REPEAT (Cases_on `h`)
573  THEN SIMP_TAC std_ss [assert_def,option_then_def,LET_DEF,DTF_def]
574  THEN SRW_TAC [] [] THEN FULL_SIMP_TAC std_ss [] THEN REV_FULL_SIMP_TAC std_ss []);
575
576val DTF_THEN = prove(
577  ``DTF p q >> t = DTF (p >> t) (q >> t)``,
578  SIMP_TAC std_ss [FUN_EQ_THM] THEN Cases THEN Cases_on `r` THEN REPEAT (Cases_on `h`)
579  THEN SIMP_TAC std_ss [assert_def,option_then_def,LET_DEF,DTF_def]);
580
581val PUSH_assert = prove(
582  ``(assert k >> (DF >> f) = DF >> (assert k >> f)) /\
583    (assert k >> (DT >> f) = DT >> (assert k >> f)) /\
584    (assert k >> (p ++ q) = (assert k >> p ++ assert k >> q)) /\
585    (assert k >> DTF p q = DTF (assert k >> p) (assert k >> q))``,
586  SIMP_TAC std_ss [FUN_EQ_THM,option_then_def,option_orelse_def,LET_DEF,GSYM DTF_THM]
587  THEN REPEAT STRIP_TAC THEN Cases_on `x` THEN Cases_on `r` THEN REPEAT (Cases_on `h`)
588  THEN SIMP_TAC std_ss [assert_def,DF_def,DT_def] THEN
589  BasicProvers.EVERY_CASE_TAC THEN REV_FULL_SIMP_TAC std_ss []);
590
591val car = fst o dest_comb;
592val cdr = snd o dest_comb;
593
594val x64_decode_aux_thm = let
595  fun eval_term_ss tm_name tm = conv_ss
596    { name = tm_name, trace = 3, key = SOME ([],tm), conv = K (K EVAL) };
597  val token_ss = eval_term_ss "tokenise" ``tokenise x``;
598  val if_ss = conv_ss { name = "if", trace = 3,
599    key = SOME ([],``if x then (y:'a) else z``),
600    conv = K (K ((RATOR_CONV o RATOR_CONV o RAND_CONV) EVAL)) };
601  val hex_ss = eval_term_ss "hex" ``n2bits 8 (hex2num y)``;
602  val hex_add_ss = eval_term_ss "hex" ``n2bits 8 ((hex2num y) + k)``;
603  val n2bits_3_ss = eval_term_ss "n2bits_3" ``n2bits 3 y``;
604  val select_op_ss = eval_term_ss "select_op" ``x64_select_op x (y:('a#'b)list)``;
605  val EL_LEMMA = prove(
606    ``!x y z t. (EL 0 (x::t) = x) /\ (EL 1 (x::y::t) = y) /\ (EL 2 (x:'a::y::z::t) = z)``,
607    REPEAT STRIP_TAC THEN EVAL_TAC);
608  val SOME_LEMMA = prove(``!(f :'a -> 'b option) (g :'c) (h :'d). (SOME >> f = f)``,
609    SIMP_TAC std_ss [FUN_EQ_THM,option_then_def,LET_DEF]);
610  val DT_THEN = prove(
611    ``((DT >> f) (g,T::w) = f (g,w)) /\ ((DF >> f) (g,F::w) = f (g,w))``,
612    SIMP_TAC std_ss [option_then_def,DT_def,DF_def,LET_DEF]);
613  val tm = ``x64_decode_aux g``
614  val th1 = REWRITE_CONV [MAP,x64_decode_aux_def,combinTheory.o_DEF] tm
615  val th2 = CONV_RULE (ONCE_DEPTH_CONV (BETA_CONV) THENC (RAND_CONV (RAND_CONV EVAL))) th1
616  val th3 = REWRITE_RULE [match_list_raw_def,MAP] th2
617  val th4 = SIMP_RULE (std_ss++token_ss) [match_def] th3
618  val th5 = SIMP_RULE (bool_ss++if_ss) [MAP,x64_match_step_def] th4
619  val th6 = SIMP_RULE (bool_ss++if_ss++select_op_ss) [x64_syntax_def,EL_LEMMA,HD] th5
620  val th6a = SIMP_RULE (std_ss) [LET_DEF,process_hex_add_def] th6
621  val th7 = SIMP_RULE (bool_ss++if_ss++hex_ss++hex_add_ss++n2bits_3_ss) [decode_Zdest_src_def,decode_Zconst_def] th6a
622  val th8 = REWRITE_RULE [GSYM option_then_assoc,option_do_def,option_try_def,GSYM option_orelse_assoc] th7
623  val th9 = SIMP_RULE std_ss [option_orelse_SOME,GSYM option_orelse_assoc,Z_SOME_def] th8
624  val thA = REWRITE_RULE [GSYM option_then_assoc,EL_LEMMA,drop_eq_thm,SOME_LEMMA,option_do_def] th9
625  val thB = thA |> REWRITE_RULE [assert_option_then_thm,option_then_assoc]
626                |> REWRITE_RULE [assert_option_then_thm,GSYM option_then_assoc,SOME_LEMMA]
627  val thC = REWRITE_RULE [option_try_def,GSYM option_orelse_assoc] thB
628  val thD = REWRITE_RULE [option_then_OVER_orelse] thC
629  val thE = REWRITE_RULE [option_orelse_assoc] thD
630  val thX = REWRITE_RULE [DT_over_DF,option_then_assoc,option_orelse_assoc,option_then_OVER_orelse] thE
631  val thZ = thX |> REWRITE_RULE [DTF_INTRO,DTF_DTF,DTF_THEN,ORELSE_NONE,PUSH_assert]
632  (* find pre-evaulatable prefixes *)
633  val DTF_tm = ``DTF p (q:'a # bool list -> 'b option)``
634  fun dest_DTF tm = if can (match_term DTF_tm) tm then (cdr (car tm), cdr tm) else fail()
635  val w_var = mk_var("w",``:bool list``)
636  fun rec_dest_DTF tm = let
637    val (x1,x2) = dest_DTF tm
638    val xs1 = rec_dest_DTF x1
639    val xs2 = rec_dest_DTF x2
640    in (map (curry listSyntax.mk_cons T) xs1) @ (map (curry listSyntax.mk_cons F) xs2) end
641    handle HOL_ERR _ => [w_var]
642  val prefixes = thZ |> concl |> dest_eq |> snd |> cdr |> rec_dest_DTF
643  (* evaluate the prefixes *)
644  val LEMMA = prove(
645    ``!g w. ((\w. SOME (g,w)) >> f) w = f (g,w)``,
646    SIMP_TAC std_ss [LET_DEF,option_then_def]);
647  val c = ONCE_REWRITE_CONV [thZ] THENC REWRITE_CONV [LEMMA,DTF_def]
648  val decode_aux_tm = ``x64_decode_aux g``
649  fun eval_for prefix = c (mk_comb(decode_aux_tm,prefix))
650  val pre_evaluated_thm = LIST_CONJ (map eval_for prefixes)
651  in pre_evaluated_thm end;
652
653fun permanently_add_to_compset name thm = let
654  val _ = save_thm(name,thm)
655  val _ = computeLib.add_persistent_funs [name]
656  in print ("Permanently added to compset: "^name^"\n") end;
657
658val _ = permanently_add_to_compset "Zreg_distinct" Zreg_distinct;
659val _ = permanently_add_to_compset "x64_decode_aux_thm" x64_decode_aux_thm;
660
661(*
662   EVAL ``x64_decode_bytes [0x48w; 0x01w; 0xD1w]``
663*)
664
665
666val _ = export_theory ();
667