1
2open HolKernel boolLib bossLib Parse;
3open wordsTheory stringTheory stringLib listTheory stringSimps listLib simpLib;
4open decoderTheory bit_listTheory opmonTheory;
5
6open x86_astTheory;
7
8val _ = new_theory "x86_decoder";
9val _ = ParseExtras.temp_loose_equality()
10
11
12(* ---------------------------------------------------------------------------------- *>
13
14  A decoder for x86 instructions is defined and at the end pre-evaliuated for
15  fast execution with EVAL.
16
17<* ---------------------------------------------------------------------------------- *)
18
19(* reading hex strings to bit lists *)
20
21val STR_SPACE_AUX_def = Define `
22  (STR_SPACE_AUX n "" = "") /\
23  (STR_SPACE_AUX n (STRING c s) =
24     if n = 0 then STRING #" " (STRING c (STR_SPACE_AUX 1 s))
25              else STRING c (STR_SPACE_AUX (n-1) s))`;
26
27val bytebits_def = Define `
28  bytebits = hex2bits o STR_SPACE_AUX 2`;
29
30(* interprete the IA-32 manuals' syntax *)
31
32val check_opcode_def = Define `
33  check_opcode s =
34    let y = (n2bits 3 o char2num o HD o TL o EXPLODE) s in
35      assert (\g. g "Reg/Opcode" = y)`;
36
37val read_SIB_def = Define `
38  read_SIB =
39    assign_drop "Base" 3 >> assign_drop "Index" 3 >> assign_drop "Scale" 2 >>
40    option_try [
41      assert (\g. (g "Mod" = [F;F]) /\  (g "Base" = [T;F;T])) >> assign_drop "disp32" 32;
42      assert (\g. (g "Mod" = [F;F]) /\ ~(g "Base" = [T;F;T]));
43      assert (\g. (g "Mod" = [T;F])) >> assign_drop "disp8" 8;
44      assert (\g. (g "Mod" = [F;T])) >> assign_drop "disp32" 32]`;
45
46val read_ModRM_def = Define `
47  read_ModRM =
48    assign_drop "R/M" 3 >> assign_drop "Reg/Opcode" 3 >> assign_drop "Mod" 2 >>
49    option_try [
50      assert (\g.  (g "Mod" = [T;T]));
51      assert (\g.  (g "Mod" = [F;F]) /\ ~(g "R/M" = [F;F;T]) /\ ~(g "R/M" = [T;F;T]));
52      assert (\g.  (g "Mod" = [F;F]) /\  (g "R/M" = [T;F;T])) >> assign_drop "disp32" 32;
53      assert (\g.  (g "Mod" = [T;F]) /\ ~(g "R/M" = [F;F;T])) >> assign_drop "disp8" 8;
54      assert (\g.  (g "Mod" = [F;T]) /\ ~(g "R/M" = [F;F;T])) >> assign_drop "disp32" 32;
55      assert (\g. ~(g "Mod" = [T;T]) /\  (g "R/M" = [F;F;T])) >> read_SIB]`;
56
57val is_hex_add_def = Define `
58  is_hex_add x = ((IMPLODE o DROP 2 o EXPLODE) x = "+rd")`;
59
60val process_hex_add_def = Define `
61  process_hex_add name =
62    let n = (hex2num o IMPLODE o TAKE 2 o EXPLODE) name in
63      option_try [drop_eq (n2bits 8 (n+0)) >> assign "reg" (n2bits 3 0);
64                  drop_eq (n2bits 8 (n+1)) >> assign "reg" (n2bits 3 1);
65                  drop_eq (n2bits 8 (n+2)) >> assign "reg" (n2bits 3 2);
66                  drop_eq (n2bits 8 (n+3)) >> assign "reg" (n2bits 3 3);
67                  drop_eq (n2bits 8 (n+4)) >> assign "reg" (n2bits 3 4);
68                  drop_eq (n2bits 8 (n+5)) >> assign "reg" (n2bits 3 5);
69                  drop_eq (n2bits 8 (n+6)) >> assign "reg" (n2bits 3 6);
70                  drop_eq (n2bits 8 (n+7)) >> assign "reg" (n2bits 3 7)]`;
71
72val x86_match_step_def = Define `
73  x86_match_step name =
74    if is_hex name /\ (STRLEN name = 2) then     (* opcode e.g. FE, 83 and 04 *)
75      drop_eq (n2bits 8 (hex2num name))
76    else if is_hex_add name then                 (* opcode + rd, e.g. F8+rd *)
77      process_hex_add name
78    else if name = "1" then                           (* constant 1 *)
79      assign_drop name 0
80    else if MEM name ["ib";"cb";"rel8";"imm8"] then   (* 8-bit immediate or address *)
81      assign_drop name 8
82    else if MEM name ["iw";"cw";"imm16"] then         (* 16-bit immediate or address *)
83      assign_drop name 16
84    else if MEM name ["id";"cd";"rel32";"imm32"] then (* 32-bit immediate or address *)
85      assign_drop name 32
86    else if name = "/r" then                     (* normal reg + reg/mem *)
87      read_ModRM
88    else if MEM name ["/0";"/1";"/2";"/3";"/4";"/5";"/6";"/7"] then (* opcode extension in ModRM *)
89      read_ModRM >> check_opcode name
90    else
91      option_fail`;
92
93(* syntax classes *)
94
95val x86_binop_def = Define `x86_binop =
96  [("ADC",Xadc); ("ADD",Xadd); ("AND",Xand); ("CMP",Xcmp);
97   ("OR",Xor); ("SAR",Xsar); ("SHR",Xshr); ("SHL",Xshl);
98   ("SBB",Xsbb); ("SUB",Xsub); ("TEST",Xtest); ("XOR",Xxor)]`;
99
100val x86_monop_def = Define `x86_monop =
101  [("DEC",Xdec); ("INC",Xinc); ("NOT",Xnot); ("NEG",Xneg)]`;
102
103(* x86 - decoder *)
104
105val b2reg_def = Define `
106  b2reg g name = (EL (bits2num (g name)) [EAX;ECX;EDX;EBX;ESP;EBP;ESI;EDI]):Xreg`;
107
108val decode_Xr32_def = Define `
109  decode_Xr32 g name =
110    if name = "EAX" then EAX else
111      if g "reg" = [] then b2reg g "Reg/Opcode" else b2reg g "reg"`;
112
113val decode_SIB_def = Define `
114  decode_SIB g =
115    let scaled_index = (if g "Index" = [F;F;T] then NONE else SOME (b2w g "Scale",b2reg g "Index")) in
116      if g "Base" = [T;F;T] then (* the special case indicated by "[*]" *)
117        if g "Mod" = [F;F] then Xm scaled_index NONE (b2w g "disp32") else
118        if g "Mod" = [T;F] then Xm scaled_index (SOME EBP) (b2w g "disp8") else
119        (* g "Mod" = [F;T] *)   Xm scaled_index (SOME EBP) (b2w g "disp32")
120      else (* normal cases, just need to read off the correct displacement *)
121        let disp = (if (g "Mod" = [T;F]) then sw2sw ((b2w g "disp8"):word8) else b2w g "disp32") in
122        let disp = (if (g "Mod" = [F;F]) then 0w else disp) in
123          Xm scaled_index (SOME (b2reg g "Base")) disp`;
124
125val decode_Xrm32_def = Define `  (* sw2sw = sign-extension *)
126  decode_Xrm32 g name =
127    if name = "EAX" then Xr EAX else
128      if  (g "Mod" = [F;F]) /\ (g "R/M" = [T;F;T]) then Xm NONE NONE (b2w g "disp32") else
129      if ~(g "Mod" = [T;T]) /\ (g "R/M" = [F;F;T]) then decode_SIB g else
130      if  (g "Mod" = [F;F]) then Xm NONE (SOME (b2reg g "R/M")) 0w else
131      if  (g "Mod" = [T;F]) then Xm NONE (SOME (b2reg g "R/M")) (sw2sw:word8->word32 (b2w g "disp8")) else
132      if  (g "Mod" = [F;T]) then Xm NONE (SOME (b2reg g "R/M")) (b2w g "disp32") else
133      if  (g "Mod" = [T;T]) then Xr (b2reg g "R/M") else Xr (b2reg g "reg") `;
134
135val decode_Xconst_def = Define `
136  decode_Xconst name g =
137   if name = "imm8"  then sw2sw:word8 ->word32 (b2w g "ib") else
138   if name = "rel8"  then sw2sw:word8 ->word32 (b2w g "cb") else
139   if name = "imm16" then sw2sw:word16->word32 (b2w g "iw") else
140   if name = "imm32" then b2w g "id" else
141   if name = "rel32" then b2w g "cd" else
142   if name = "1"     then 1w else 0w`;
143
144val decode_Xdest_src_def = Define `
145  decode_Xdest_src g dest src =
146    if MEM src ["r32";"r8"] then
147      Xrm_r (decode_Xrm32 g dest) (decode_Xr32 g src)
148    else if MEM src ["r/m32";"r/m8"] then
149      Xr_rm (decode_Xr32 g dest)  (decode_Xrm32 g src)
150    else if src = "m" then
151      Xr_rm (decode_Xr32 g dest)  (decode_Xrm32 g src)
152    else
153      Xrm_i (decode_Xrm32 g dest) (decode_Xconst src g)`;
154
155val decode_Xconst_or_zero_def = Define `
156  decode_Xconst_or_zero ts g =
157    if LENGTH ts < 2 then 0w else decode_Xconst (EL 1 ts) g`;
158
159val decode_Ximm_rm_def = Define `
160  decode_Ximm_rm ts g =
161    if MEM (EL 1 ts) ["r/m32";"r32";"r/m8";"r8"]
162    then Xi_rm (decode_Xrm32 g (EL 1 ts))
163    else Xi (decode_Xconst (EL 1 ts) g)`;
164
165val x86_select_op_def = Define `
166  x86_select_op name list = SND (HD (FILTER (\x. FST x = name) list))`;
167
168val x86_syntax_binop = ``
169  Xbinop (x86_select_op (HD ts) x86_binop) (decode_Xdest_src g (EL 1 ts) (EL 2 ts))``;
170
171val x86_syntax_monop = ``
172  Xmonop (x86_select_op (HD ts) x86_monop) (decode_Xrm32 g (EL 1 ts))``;
173
174val X_SOME_def = Define `X_SOME f = SOME o (\(g,w). (f g,w))`;
175
176val x86_syntax_def = Define `
177  x86_syntax ts =
178    if LENGTH ts = 0 then option_fail else
179    if (HD ts = "CMP") /\ MEM "r/m8" ts then X_SOME (\g. Xcmp_byte (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else
180    if (HD ts = "MOV") /\ MEM "r/m8" ts then X_SOME (\g. Xmov_byte (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else
181    if (HD ts = "DEC") /\ MEM "r/m8" ts then X_SOME (\g. Xdec_byte (decode_Xrm32 g (EL 1 ts))) else
182    if HD ts = "MOVZX" then X_SOME (\g. Xmovzx (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else
183    if MEM (HD ts) (MAP FST x86_binop) then X_SOME (\g. ^x86_syntax_binop) else
184    if MEM (HD ts) (MAP FST x86_monop) then X_SOME (\g. ^x86_syntax_monop) else
185    if HD ts = "POP"  then X_SOME (\g. Xpop  (decode_Xrm32 g (EL 1 ts))) else
186    if HD ts = "PUSH" then X_SOME (\g. Xpush (decode_Ximm_rm ts g)) else
187    if HD ts = "PUSHAD"  then X_SOME (\g. Xpushad) else
188    if HD ts = "POPAD"   then X_SOME (\g. Xpopad)  else
189    if HD ts = "CMPXCHG" then X_SOME (\g. Xcmpxchg (decode_Xrm32 g (EL 1 ts)) (decode_Xr32 g (EL 2 ts))) else
190    if HD ts = "XCHG"    then X_SOME (\g. Xxchg (decode_Xrm32 g (EL 1 ts)) (decode_Xr32 g (EL 2 ts))) else
191    if HD ts = "XADD"    then X_SOME (\g. Xxadd (decode_Xrm32 g (EL 1 ts)) (decode_Xr32 g (EL 2 ts))) else
192    if (HD ts = "JMP") /\ (TL ts = ["r/m32"]) then X_SOME (\g. Xjmp (decode_Xrm32 g (EL 1 ts))) else
193    if HD ts = "JMP"     then X_SOME (\g. Xjcc X_ALWAYS (decode_Xconst_or_zero ts g)) else
194    if HD ts = "JE"      then X_SOME (\g. Xjcc X_E (decode_Xconst_or_zero ts g)) else
195    if HD ts = "JNE"     then X_SOME (\g. Xjcc X_NE (decode_Xconst_or_zero ts g)) else
196    if HD ts = "JS"      then X_SOME (\g. Xjcc X_S (decode_Xconst_or_zero ts g)) else
197    if HD ts = "JNS"     then X_SOME (\g. Xjcc X_NS (decode_Xconst_or_zero ts g)) else
198    if HD ts = "JA"      then X_SOME (\g. Xjcc X_A (decode_Xconst_or_zero ts g)) else
199    if HD ts = "JNA"     then X_SOME (\g. Xjcc X_NA (decode_Xconst_or_zero ts g)) else
200    if HD ts = "JB"      then X_SOME (\g. Xjcc X_B (decode_Xconst_or_zero ts g)) else
201    if HD ts = "JNB"     then X_SOME (\g. Xjcc X_NB (decode_Xconst_or_zero ts g)) else
202    if HD ts = "LEA"     then X_SOME (\g. Xlea (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else
203    if HD ts = "LOOP"    then X_SOME (\g. Xloop X_ALWAYS (decode_Xconst_or_zero ts g)) else
204    if HD ts = "LOOPE"   then X_SOME (\g. Xloop X_E (decode_Xconst_or_zero ts g)) else
205    if HD ts = "LOOPNE"  then X_SOME (\g. Xloop X_NE (decode_Xconst_or_zero ts g)) else
206    if HD ts = "MOV"     then X_SOME (\g. Xmov X_ALWAYS (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else
207    if HD ts = "CMOVE"   then X_SOME (\g. Xmov X_E (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else
208    if HD ts = "CMOVNE"  then X_SOME (\g. Xmov X_NE (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else
209    if HD ts = "CMOVS"   then X_SOME (\g. Xmov X_S (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else
210    if HD ts = "CMOVNS"  then X_SOME (\g. Xmov X_NS (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else
211    if HD ts = "CMOVA"   then X_SOME (\g. Xmov X_A (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else
212    if HD ts = "CMOVNA"  then X_SOME (\g. Xmov X_NA (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else
213    if HD ts = "CMOVB"   then X_SOME (\g. Xmov X_B (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else
214    if HD ts = "CMOVNB"  then X_SOME (\g. Xmov X_NB (decode_Xdest_src g (EL 1 ts) (EL 2 ts))) else
215    if HD ts = "MUL"     then X_SOME (\g. Xmul  (decode_Xrm32 g (EL 1 ts))) else
216    if HD ts = "DIV"     then X_SOME (\g. Xdiv  (decode_Xrm32 g (EL 1 ts))) else
217    if HD ts = "CALL"    then X_SOME (\g. Xcall (decode_Ximm_rm ts g)) else
218    if HD ts = "RET"     then X_SOME (\g. Xret (decode_Xconst_or_zero ts g)) else option_fail`;
219
220
221(* a list of x86 instructions, ordered by the combination of addressing modes they support *)
222
223val x86_syntax_list = `` [
224
225    " 15 id     | ADC EAX, imm32    ";
226    " 83 /2 ib  | ADC r/m32, imm8   ";
227    " 81 /2 id  | ADC r/m32, imm32  ";
228    " 11 /r     | ADC r/m32, r32    ";
229    " 13 /r     | ADC r32, r/m32    ";
230    " 05 id     | ADD EAX, imm32    ";
231    " 83 /0 ib  | ADD r/m32, imm8   ";
232    " 81 /0 id  | ADD r/m32, imm32  ";
233    " 01 /r     | ADD r/m32, r32    ";
234    " 03 /r     | ADD r32, r/m32    ";
235    " 25 id     | AND EAX, imm32    ";
236    " 81 /4 id  | AND r/m32, imm32  ";
237    " 83 /4 ib  | AND r/m32, imm8   ";
238    " 21 /r     | AND r/m32, r32    ";
239    " 23 /r     | AND r32, r/m32    ";
240    " 3D id     | CMP EAX, imm32    ";
241    " 81 /7 id  | CMP r/m32, imm32  ";
242    " 83 /7 ib  | CMP r/m32, imm8   ";
243    " 39 /r     | CMP r/m32, r32    ";
244    " 3B /r     | CMP r32, r/m32    ";
245    " 89 /r     | MOV r/m32,r32     ";
246    " 8B /r     | MOV r32,r/m32     ";
247    " B8+rd id  | MOV r32, imm32    ";
248    " C7 /0 id  | MOV r/m32, imm32  ";
249    " 0D id     | OR EAX, imm32     ";
250    " 81 /1 id  | OR r/m32, imm32   ";
251    " 83 /1 ib  | OR r/m32, imm8    ";
252    " 09 /r     | OR r/m32, r32     ";
253    " 0B /r     | OR r32, r/m32     ";
254    " 1D id     | SBB EAX, imm32    ";
255    " 83 /3 ib  | SBB r/m32, imm8   ";
256    " 81 /3 id  | SBB r/m32, imm32  ";
257    " 19 /r     | SBB r/m32, r32    ";
258    " 1B /r     | SBB r32, r/m32    ";
259    " 2D id     | SUB EAX, imm32    ";
260    " 83 /5 ib  | SUB r/m32, imm8   ";
261    " 81 /5 id  | SUB r/m32, imm32  ";
262    " 29 /r     | SUB r/m32, r32    ";
263    " 2B /r     | SUB r32, r/m32    ";
264    " A9 id     | TEST EAX, imm32   ";
265    " F7 /0 id  | TEST r/m32, imm32 ";
266    " 85 /r     | TEST r/m32, r32   ";
267    " 35 id     | XOR EAX, imm32    ";
268    " 81 /6 id  | XOR r/m32, imm32  ";
269    " 31 /r     | XOR r/m32, r32    ";
270    " 83 /6 ib  | XOR r/m32, imm8   ";
271    " 33 /r     | XOR r32, r/m32    ";
272
273    " 0F B1 /r  | CMPXCHG r/m32, r32";
274    " 0F C1 /r  | XADD r/m32, r32   ";
275    " 90+rd     | XCHG EAX, r32     ";
276    " 87 /r     | XCHG r/m32, r32   ";
277
278    " FF /1     | DEC r/m32         ";
279    " 48+rd     | DEC r32           ";
280    " FF /0     | INC r/m32         ";
281    " 40+rd     | INC r32           ";
282    " F7 /3     | NEG r/m32         ";
283    " F7 /2     | NOT r/m32         ";
284    " 8F /0     | POP r/m32         ";
285    " 58+rd     | POP r32           ";
286    " FF /6     | PUSH r/m32        ";
287    " 50+rd     | PUSH r32          ";
288
289    " E8 cd     | CALL rel32        ";
290    " FF /2     | CALL r/m32        ";
291
292    " 8D /r     | LEA r32, m        ";
293    " D1 /4     | SHL r/m32, 1      ";
294    " C1 /4 ib  | SHL r/m32, imm8   ";
295    " D1 /5     | SHR r/m32, 1      ";
296    " C1 /5 ib  | SHR r/m32, imm8   ";
297    " D1 /7     | SAR r/m32, 1      ";
298    " C1 /7 ib  | SAR r/m32, imm8   ";
299
300    " FF /4     | JMP r/m32         ";
301    " EB cb     | JMP rel8          ";
302    " E9 cd     | JMP rel32         ";
303    " 74 cb     | JE rel8           ";
304    " 75 cb     | JNE rel8          ";
305    " 0F 84 cd  | JE rel32          ";
306    " 0F 85 cd  | JNE rel32         ";
307    " 78 cb     | JS rel8           ";
308    " 79 cb     | JNS rel8          ";
309    " 0F 88 cd  | JS rel32          ";
310    " 0F 89 cd  | JNS rel32         ";
311    " 77 cb     | JA rel8           ";
312    " 76 cb     | JNA rel8          ";
313    " 0F 87 cd  | JA rel32          ";
314    " 0F 86 cd  | JNA rel32         ";
315    " 72 cb     | JB rel8           ";
316    " 73 cb     | JNB rel8          ";
317    " 0F 82 cd  | JB rel32          ";
318    " 0F 83 cd  | JNB rel32         ";
319
320    " 0F 44 /r  | CMOVE r32, r/m32  ";
321    " 0F 45 /r  | CMOVNE r32, r/m32 ";
322    " 0F 48 /r  | CMOVS r32, r/m32  ";
323    " 0F 49 /r  | CMOVNS r32, r/m32 ";
324    " 0F 47 /r  | CMOVA r32, r/m32  ";
325    " 0F 46 /r  | CMOVNA r32, r/m32 ";
326    " 0F 42 /r  | CMOVB r32, r/m32  ";
327    " 0F 43 /r  | CMOVNB r32, r/m32 ";
328
329    " C3        | RET               "; (* short for "RET 0" *)
330    " C2 iw     | RET imm16         ";
331    " E2 cb     | LOOP rel8         ";
332    " E1 cb     | LOOPE rel8        ";
333    " E0 cb     | LOOPNE rel8       ";
334
335    " 60        | PUSHAD            ";
336    " 61        | POPAD             ";
337
338    " F7 /6     | DIV r/m32         ";
339    " F7 /4     | MUL r/m32         ";
340
341    " C6 /0 ib  | MOV r/m8, imm8    ";
342    " 88 /r     | MOV r/m8, r8      ";
343    " 0F B6 /r  | MOVZX r32, r/m8   ";
344    " 38 /r     | CMP r/m8, r8      ";
345    " 3A /r     | CMP r8, r/m8      ";
346    " 80 /7 ib  | CMP r/m8, imm8    ";
347    " FE /1     | DEC r/m8          "
348
349  ] ``;
350
351val x86_decode_aux_def = zDefine `
352  x86_decode_aux = (match_list x86_match_step tokenise (x86_syntax o tokenise) o
353                     MAP (\s. let x = STR_SPLIT [#"|"] s in (EL 0 x, EL 1 x))) ^x86_syntax_list`;
354
355val x86_decode_prefixes_def = Define `
356  x86_decode_prefixes w =
357    if LENGTH w < 16 then (Xg1_none,Xg2_none,w) else
358      let bt1  = (TAKE 8 w = n2bits 8 (hex2num "2E")) in
359      let bnt1 = (TAKE 8 w = n2bits 8 (hex2num "3E")) in
360      let l1   = (TAKE 8 w = n2bits 8 (hex2num "F0")) in
361      let bt2  = (TAKE 8 (DROP 8 w) = n2bits 8 (hex2num "2E")) /\ l1 in
362      let bnt2 = (TAKE 8 (DROP 8 w) = n2bits 8 (hex2num "3E")) /\ l1 in
363      let l2   = (TAKE 8 (DROP 8 w) = n2bits 8 (hex2num "F0")) /\ (bt1 \/ bnt1) in
364      let g1   = if l1 \/ l2 then Xlock else Xg1_none in
365      let g2   = if bt1 \/ bt2 then Xbranch_taken else Xg2_none in
366      let g2   = if bnt1 \/ bnt2 then Xbranch_not_taken else g2 in
367      let n    = if bt1 \/ bnt1 \/ l1 then (if bt2 \/ bnt2 \/ l2 then 16 else 8) else 0 in
368        (g1,g2,DROP n w)`;
369
370val dest_accesses_memory_def = Define `
371  (dest_accesses_memory (Xrm_i rm i) = rm_is_memory_access rm) /\
372  (dest_accesses_memory (Xrm_r rm r) = rm_is_memory_access rm) /\
373  (dest_accesses_memory (Xr_rm r rm) = F)`;
374
375val x86_lock_ok_def = Define `
376  (x86_lock_ok (Xbinop binop_name ds) =
377    MEM binop_name [Xadd;Xand;Xor;Xsub;Xxor] /\
378    dest_accesses_memory ds) /\
379  (x86_lock_ok (Xmonop monop_name rm) =
380    MEM monop_name [Xdec;Xinc;Xneg;Xnot] /\
381    rm_is_memory_access rm) /\
382  (x86_lock_ok (Xxadd rm r) = rm_is_memory_access rm) /\
383  (x86_lock_ok (Xxchg rm r) = rm_is_memory_access rm) /\
384  (x86_lock_ok (Xcmpxchg rm r) = rm_is_memory_access rm) /\
385  (x86_lock_ok (Xpop rm) = F) /\
386  (x86_lock_ok (Xlea ds) = F) /\
387  (x86_lock_ok (Xpush imm_rm) = F) /\
388  (x86_lock_ok (Xcall imm_rm) = F) /\
389  (x86_lock_ok (Xret imm) = F) /\
390  (x86_lock_ok (Xmov c ds) = F) /\
391  (x86_lock_ok (Xjcc c imm) = F) /\
392  (x86_lock_ok (Xjmp rm) = F) /\
393  (x86_lock_ok (Xloop c imm) = F) /\
394  (x86_lock_ok (Xpushad) = F) /\
395  (x86_lock_ok (Xpopad) = F)`;
396
397val x86_decode_def = Define `
398  x86_decode w =
399    let (g1,g2,w) = x86_decode_prefixes w in
400    let result = x86_decode_aux w in
401      if result = NONE then NONE else
402        let (i,w) = THE result in
403          if ~(g1 = Xlock) \/ x86_lock_ok i then SOME (Xprefix g1 g2 i, w) else NONE`;
404
405val x86_decode_bytes_def = Define `
406  x86_decode_bytes b = x86_decode (FOLDR APPEND [] (MAP w2bits b))`;
407
408val rm_args_ok_def = Define `
409  (rm_args_ok (Xr r) = T) /\
410  (rm_args_ok (Xm NONE NONE t3) = T) /\
411  (rm_args_ok (Xm NONE (SOME br) t3) = T) /\
412  (rm_args_ok (Xm (SOME (s,ir)) NONE t3) = ~(ir = ESP)) /\
413  (rm_args_ok (Xm (SOME (s,ir)) (SOME br) t3) = ~(ir = ESP))`;
414
415val dest_src_args_ok_def = Define `
416  (dest_src_args_ok (Xrm_i rm i) = rm_args_ok rm) /\
417  (dest_src_args_ok (Xrm_r rm r) = rm_args_ok rm ) /\
418  (dest_src_args_ok (Xr_rm r rm) = rm_args_ok rm )`;
419
420val imm_rm_args_ok_def = Define `
421  (imm_rm_args_ok (Xi_rm rm) = rm_args_ok rm) /\
422  (imm_rm_args_ok (Xi i) = T)`;
423
424val x86_args_ok_def = Define `
425  (x86_args_ok (Xbinop binop_name ds) = dest_src_args_ok ds) /\
426  (x86_args_ok (Xmonop monop_name rm) = rm_args_ok rm) /\
427  (x86_args_ok (Xxadd rm r) = rm_args_ok rm ) /\
428  (x86_args_ok (Xxchg rm r) = rm_args_ok rm ) /\
429  (x86_args_ok (Xcmpxchg rm r) = rm_args_ok rm ) /\
430  (x86_args_ok (Xpop rm) = rm_args_ok rm) /\
431  (x86_args_ok (Xpush imm_rm) = imm_rm_args_ok imm_rm) /\
432  (x86_args_ok (Xcall imm_rm) = imm_rm_args_ok imm_rm) /\
433  (x86_args_ok (Xret imm) = w2n imm < 2**16) /\
434  (x86_args_ok (Xmov c ds) = dest_src_args_ok ds /\ MEM c [X_NE;X_E;X_ALWAYS]) /\ (* partial list *)
435  (x86_args_ok (Xjcc c imm) = T) /\
436  (x86_args_ok (Xjmp rm) = T) /\
437  (x86_args_ok (Xloop c imm) = MEM c [X_NE;X_E;X_ALWAYS]) /\
438  (x86_args_ok (Xpushad) = T) /\
439  (x86_args_ok (Xpopad) = T)`;
440
441val x86_well_formed_instruction_def = Define `
442  (x86_well_formed_instruction (Xprefix Xlock g2 i) = x86_lock_ok i /\ x86_args_ok i) /\
443  (x86_well_formed_instruction (Xprefix Xg1_none g2 i) = x86_args_ok i)`;
444
445
446
447(* -- partially pre-evaluate x86_decode_aux -- *)
448
449val x86_decode_aux_thm = let
450  fun eval_term_ss tm_name tm = conv_ss
451    { name = tm_name, trace = 3, key = SOME ([],tm), conv = K (K EVAL) };
452  val token_ss = eval_term_ss "tokenise" ``tokenise x``;
453  val if_ss = conv_ss { name = "if", trace = 3,
454    key = SOME ([],``if x then (y:'a) else z``),
455    conv = K (K ((RATOR_CONV o RATOR_CONV o RAND_CONV) EVAL)) };
456  val hex_ss = eval_term_ss "hex" ``n2bits 8 (hex2num y)``;
457  val hex_add_ss = eval_term_ss "hex" ``n2bits 8 ((hex2num y) + k)``;
458  val n2bits_3_ss = eval_term_ss "n2bits_3" ``n2bits 3 y``;
459  val select_op_ss = eval_term_ss "select_op" ``x86_select_op x (y:('a#'b)list)``;
460  val EL_LEMMA = prove(
461    ``!x y z t. (EL 0 (x::t) = x) /\ (EL 1 (x::y::t) = y) /\ (EL 2 (x:'a::y::z::t) = z)``,
462    REPEAT STRIP_TAC THEN EVAL_TAC);
463  val SOME_LEMMA = prove(``!(f :'a -> 'b option) (g :'c) (h :'d). (SOME >> f = f)``,
464    SIMP_TAC std_ss [FUN_EQ_THM,option_then_def,LET_DEF]);
465  val th1 = REWRITE_CONV [MAP,x86_decode_aux_def,combinTheory.o_DEF] ``x86_decode_aux``
466  val th2 = CONV_RULE (ONCE_DEPTH_CONV (BETA_CONV) THENC (RAND_CONV (RAND_CONV EVAL))) th1
467  val th3 = REWRITE_RULE [match_list_def,MAP] th2
468  val th4 = SIMP_RULE (std_ss++token_ss) [match_def] th3
469  val th5 = SIMP_RULE (bool_ss++if_ss) [MAP,x86_match_step_def] th4
470  val th6 = SIMP_RULE (bool_ss++if_ss++select_op_ss) [x86_syntax_def,EL_LEMMA,HD] th5
471  val th6a = SIMP_RULE (std_ss) [LET_DEF,process_hex_add_def] th6
472  val th7 = SIMP_RULE (bool_ss++if_ss++hex_ss++hex_add_ss++n2bits_3_ss) [decode_Xdest_src_def,decode_Xconst_def] th6a
473  val th8 = REWRITE_RULE [GSYM option_then_assoc,option_do_def,option_try_def,GSYM option_orelse_assoc] th7
474  val th9 = SIMP_RULE std_ss [option_orelse_SOME,GSYM option_orelse_assoc,X_SOME_def] th8
475  val thA = REWRITE_RULE [GSYM option_then_assoc,EL_LEMMA,drop_eq_thm,SOME_LEMMA,option_do_def] th9
476  val thB = REWRITE_RULE [option_then_assoc,SOME_LEMMA] thA
477  val thC = REWRITE_RULE [option_try_def,GSYM option_orelse_assoc] thB
478  val thD = REWRITE_RULE [option_then_OVER_orelse] thC
479  val thE = REWRITE_RULE [option_orelse_assoc] thD
480  val thX = REWRITE_RULE [DT_over_DF,option_then_assoc,option_orelse_assoc,option_then_OVER_orelse] thE
481  val thZ = REWRITE_RULE [DTF_THM] thX
482  in thZ end;
483
484val _ = save_thm("x86_decode_aux_thm",x86_decode_aux_thm);
485val _ = computeLib.add_persistent_funs ["x86_decode_aux_thm"];
486
487
488(* x86 examples/tests:
489
490val _ = wordsLib.output_words_as_hex();
491
492val th = EVAL ``x86_decode(bytebits "8D84B6EE711202")``;  (* lea eax, [esi + 4*esi + 34763246] *)
493val th = EVAL ``x86_decode(bytebits "D1E1")``;            (* shl ecx, 1 *)
494val th = EVAL ``x86_decode(bytebits "C1E108")``;          (* shl ecx, 8 *)
495val th = EVAL ``x86_decode(bytebits "D1EB")``;            (* shr ebx, 1 *)
496val th = EVAL ``x86_decode(bytebits "C1EB08")``;          (* shr ebx, 8 *)
497val th = EVAL ``x86_decode(bytebits "D1F8")``;            (* sar eax, 1 *)
498val th = EVAL ``x86_decode(bytebits "C1F808")``;          (* sar eax, 8 *)
499val th = EVAL ``x86_decode(bytebits "0538000000")``;      (* add eax,56 *)
500val th = EVAL ``x86_decode(bytebits "810037020000")``;    (* add dword [eax],567 *)
501val th = EVAL ``x86_decode(bytebits "010B")``;            (* add [ebx], ecx *)
502val th = EVAL ``x86_decode(bytebits "0119")``;            (* add [ecx], ebx *)
503val th = EVAL ``x86_decode(bytebits "2538000000")``;      (* and eax,56 *)
504val th = EVAL ``x86_decode(bytebits "812037020000")``;    (* and dword [eax],567 *)
505val th = EVAL ``x86_decode(bytebits "210B")``;            (* and [ebx], ecx *)
506val th = EVAL ``x86_decode(bytebits "2319")``;            (* and ebx, [ecx] *)
507val th = EVAL ``x86_decode(bytebits "0F44C1")``;          (* cmove eax, ecx *)
508val th = EVAL ``x86_decode(bytebits "0F454104")``;        (* cmovne eax, [ecx+4] *)
509val th = EVAL ``x86_decode(bytebits "81FE38000000")``;    (* cmp esi,56 *)
510val th = EVAL ``x86_decode(bytebits "813B37020000")``;    (* cmp dword [ebx],567 *)
511val th = EVAL ``x86_decode(bytebits "390B")``;            (* cmp [ebx], ecx *)
512val th = EVAL ``x86_decode(bytebits "3B19")``;            (* cmp ebx, [ecx] *)
513val th = EVAL ``x86_decode(bytebits "893E")``;            (* mov [esi],edi *)
514val th = EVAL ``x86_decode(bytebits "8B3E")``;            (* mov edi,[esi] *)
515val th = EVAL ``x86_decode(bytebits "BC37020000")``;      (* mov esp,567 *)
516val th = EVAL ``x86_decode(bytebits "81CE38000000")``;    (* or  esi,56 *)
517val th = EVAL ``x86_decode(bytebits "810B37020000")``;    (* or  dword [ebx],567 *)
518val th = EVAL ``x86_decode(bytebits "090B")``;            (* or  [ebx], ecx *)
519val th = EVAL ``x86_decode(bytebits "0B19")``;            (* or  ebx, [ecx] *)
520val th = EVAL ``x86_decode(bytebits "81EE38000000")``;    (* sub esi,56 *)
521val th = EVAL ``x86_decode(bytebits "812B37020000")``;    (* sub dword [ebx],567 *)
522val th = EVAL ``x86_decode(bytebits "290B")``;            (* sub [ebx], ecx *)
523val th = EVAL ``x86_decode(bytebits "2B19")``;            (* sub ebx, [ecx] *)
524val th = EVAL ``x86_decode(bytebits "F7C638000000")``;    (* test esi,56 *)
525val th = EVAL ``x86_decode(bytebits "F70337020000")``;    (* test dword [ebx],567 *)
526val th = EVAL ``x86_decode(bytebits "850B")``;            (* test [ebx], ecx *)
527val th = EVAL ``x86_decode(bytebits "81F638000000")``;    (* xor esi,56 *)
528val th = EVAL ``x86_decode(bytebits "813337020000")``;    (* xor dword [ebx],567 *)
529val th = EVAL ``x86_decode(bytebits "310B")``;            (* xor [ebx], ecx *)
530val th = EVAL ``x86_decode(bytebits "3319")``;            (* xor ebx, [ecx] *)
531val th = EVAL ``x86_decode(bytebits "0FB110")``;          (* cmpxchg [eax],edx *)
532val th = EVAL ``x86_decode(bytebits "0FC11E")``;          (* xadd [esi],ebx *)
533val th = EVAL ``x86_decode(bytebits "FF4E3C")``;          (* dec dword [esi+60] *)
534val th = EVAL ``x86_decode(bytebits "4C")``;              (* dec esp *)
535val th = EVAL ``x86_decode(bytebits "FF80401F0000")``;    (* inc dword [eax+8000] *)
536val th = EVAL ``x86_decode(bytebits "40")``;              (* inc eax *)
537val th = EVAL ``x86_decode(bytebits "F750C8")``;          (* not dword [eax-56] *)
538val th = EVAL ``x86_decode(bytebits "8F0590010000")``;    (* pop dword [400] *)
539val th = EVAL ``x86_decode(bytebits "59")``;              (* pop ecx *)
540val th = EVAL ``x86_decode(bytebits "FFB498C8010000")``;  (* push dword [eax + 4*ebx + 456] *)
541val th = EVAL ``x86_decode(bytebits "FF749801")``;        (* push dword [eax + 4*ebx + 1] *)
542val th = EVAL ``x86_decode(bytebits "FF74AD02")``;        (* push dword [ebp + 4*ebp + 2] *)
543val th = EVAL ``x86_decode(bytebits "FF746D2D")``;        (* push dword [ebp + 2*ebp + 45] *)
544val th = EVAL ``x86_decode(bytebits "FFB42DEA000000")``;  (* push dword [ebp + ebp + 234] *)
545val th = EVAL ``x86_decode(bytebits "FFB4B6EE711202")``;  (* push dword [esi + 4*esi + 34763246] *)
546val th = EVAL ``x86_decode(bytebits "50")``;              (* push eax *)
547val th = EVAL ``x86_decode(bytebits "E8BDFFFFFF")``;      (* call l1 *)
548val th = EVAL ``x86_decode(bytebits "FFD0")``;            (* call eax *)
549val th = EVAL ``x86_decode(bytebits "EBB9")``;            (* jmp l1 *)
550val th = EVAL ``x86_decode(bytebits "74B7")``;            (* je l1 *)
551val th = EVAL ``x86_decode(bytebits "75B5")``;            (* jne l1 *)
552val th = EVAL ``x86_decode(bytebits "C3")``;              (* ret *)
553val th = EVAL ``x86_decode(bytebits "C20500")``;          (* ret 5 *)
554val th = EVAL ``x86_decode(bytebits "E2AF")``;            (* loop l1 *)
555val th = EVAL ``x86_decode(bytebits "E1AD")``;            (* loope l1 *)
556val th = EVAL ``x86_decode(bytebits "E0AB")``;            (* loopne l1 *)
557val th = EVAL ``x86_decode(bytebits "60")``;              (* pushad *)
558val th = EVAL ``x86_decode(bytebits "61")``;              (* popad *)
559val th = EVAL ``x86_decode(bytebits "0FAEF0")``;          (* mfence *)
560val th = EVAL ``x86_decode(bytebits "0FAEF8")``;          (* sfence *)
561val th = EVAL ``x86_decode(bytebits "0FAEE8")``;          (* lfence *)
562val th = EVAL ``x86_decode(bytebits "93")``;              (* xchg eax, ebx *)
563val th = EVAL ``x86_decode(bytebits "8731")``;            (* xchg [ecx], esi *)
564val th = EVAL ``x86_decode(bytebits "F7583C")``;          (* neg dword [eax+60] *)
565val th = EVAL ``x86_decode(bytebits "8325C49B040800")``;  (* and [8049BC4], 0 *)
566val th = EVAL ``x86_decode(bytebits "8125C49B040800000000")``;  (* and [8049BC4], 0 *)
567val th = EVAL ``x86_decode(bytebits "0FB1F8")``;          (* cmpxchg eax, edi *)
568val th = EVAL ``x86_decode(bytebits "0FB1441500")``;      (* cmpxchg [ebp + edx], eax *)
569val th = EVAL ``x86_decode(bytebits "56")``;
570val th = EVAL ``x86_decode(bytebits "5B")``;
571
572*)
573
574
575(* test whether decoding works, if this is slow then something's wrong *)
576
577val t1 = Time.now();
578val th = EVAL ``x86_decode(bytebits "D1F8")``;  (* sar eax, 1 *)
579val t2 = Time.now();
580val elapsed_time = Time.toReal t2 - Time.toReal t1
581val _ = elapsed_time < 5.0 orelse failwith("Decoding failed to use compset properly.")
582(* The time difference should really be under a second, but 5 seconds
583   gives a bit of a margin. *)
584
585val _ = export_theory ();
586