1structure x64_encodeLib :> x64_encodeLib =
2struct
3
4open HolKernel boolLib bossLib;
5open x64_decoderTheory;
6
7val car = fst o dest_comb;
8val cdr = snd o dest_comb;
9
10val instructions = let
11  fun div_at [] ys = (ys,[])
12    | div_at (x::xs) ys = if x = "|" then (rev ys,xs) else
13      div_at xs (x::ys)
14  fun d xs = div_at xs []
15  val t = String.tokens (fn x => mem x [#" ",#","])
16  in (map (d o t) o map stringSyntax.fromHOLstring o fst o
17     listSyntax.dest_list o cdr o cdr o concl o SPEC_ALL) x64_decode_aux_def end;
18
19fun parse_address s = let
20  val xs = explode s
21  val _ = if hd xs = #"[" then () else fail()
22  val _ = if last xs = #"]" then () else fail()
23  fun f c = String.tokens (fn x => x = c)
24  val ts = f #"+" (implode (butlast (tl xs)))
25  fun append_lists [] = []
26    | append_lists (x::xs) = x @ append_lists xs
27  fun minus [x] = [("+",x)]
28    | minus ["",x] = [("-",x)]
29    | minus [x,y] = [("+",x),("-",y)]
30    | minus (x::xs) = minus [x] @ minus xs
31    | minus _ = fail()
32  val zs = append_lists (map (minus o f #"-") ts)
33  fun prod [x] = (1,x)
34    | prod [x,y] = if can string_to_int x then (string_to_int x, y)
35                                          else (string_to_int y, x)
36    | prod _ = fail()
37  val qs = map (fn (s,n) => (s,prod (f #"*" n))) zs
38  fun g (c,(3,x)) = [(c,(1,x)),(c,(2,x))]
39    | g (c,(5,x)) = [(c,(1,x)),(c,(4,x))]
40    | g (c,(9,x)) = [(c,(1,x)),(c,(8,x))]
41    | g y = [y]
42  val us = append_lists (map g qs)
43  fun extract_int ("-",(1,x)) = Arbint.-(Arbint.zero,Arbint.fromString x)
44    | extract_int ("+",(1,x)) = Arbint.fromString x
45    | extract_int _ = fail()
46  fun sum [] = Arbint.zero | sum (i::is) = Arbint.+(i,sum is)
47  val imm = sum (map extract_int (filter (can extract_int) us))
48  val vs = filter (not o can extract_int) us
49  val _ = if filter (fn (x,y) => not (x = "+")) vs = [] then () else fail()
50  val ks = map snd vs
51  val base = SOME ((snd o hd o filter (fn (x,y) => x = 1)) ks) handle e => NONE
52  fun delete_base NONE xs = xs
53    | delete_base (SOME b) [] = []
54    | delete_base (SOME b) ((n,x)::xs) = if (1,b) = (n,x) then xs else (n,x) :: delete_base (SOME b) xs
55  val index = delete_base base ks
56  val index = SOME (hd index) handle e => NONE
57  fun sp_as_scale NONE = false
58    | sp_as_scale (SOME (0,x)) = false
59    | sp_as_scale (SOME (1,x)) = false
60    | sp_as_scale (SOME (_,x)) = (x = "RSP")
61  val _ = not (sp_as_scale index) orelse fail()
62  fun scale_by_2 (SOME (2,x)) = true
63    | scale_by_2 _ = false
64  fun get_scale_reg (SOME (_,r)) = r
65    | get_scale_reg _ = fail()
66  val (index,base) = if base = NONE andalso scale_by_2 index then
67                       (SOME (1,get_scale_reg index), SOME (get_scale_reg index))
68                     else (index,base)
69  fun swap_rsp (SOME (1, "RSP"), SOME r) = (SOME (1, r), SOME "RSP")
70    | swap_rsp (x,y) = (x,y)
71  val (index,base) = swap_rsp (index,base)
72  val _ = not ((index,base) = (SOME (1,"RSP"),SOME "RSP")) orelse fail()
73  in (index,base,imm) end;
74
75fun fill n s = if size s < n then fill n ("0" ^ s) else s
76
77fun unsigned_hex n s = let
78  val i = fill n (Arbnum.toHexString (Arbnum.fromString s))
79  val i = (substring(i,14,2) ^ substring(i,12,2) ^ substring(i,10,2) ^ substring(i,8,2) ^
80           substring(i,6,2) ^ substring(i,4,2) ^ substring(i,2,2) ^ substring(i,0,2)) handle e =>
81          (substring(i,6,2) ^ substring(i,4,2) ^ substring(i,2,2) ^ substring(i,0,2)) handle e => i
82  in if size i = n then i else fail() end
83
84fun signed_hex n i = let
85  val m = Arbnum.pow(Arbnum.fromInt 2, Arbnum.fromInt (4 * n))
86  val m2 = Arbnum.pow(Arbnum.fromInt 2, Arbnum.fromInt (4 * n - 1))
87  val j = Arbint.+(i,Arbint.fromNat m)
88  val i = if not (Arbint.<(i,Arbint.fromNat m2)) then fail() else
89          if not (Arbint.<=(Arbint.fromNat m2, j)) then fail() else
90          if Arbint.<=(Arbint.zero,i) then fill n (Arbnum.toHexString(Arbint.toNat i))
91                                      else fill n (Arbnum.toHexString(Arbint.toNat j))
92  val i = (substring(i,6,2) ^ substring(i,4,2) ^ substring(i,2,2) ^
93           substring(i,0,2)) handle e => i
94  in i end
95
96local
97  fun add_reg_type ty xs = map (fn x => (x,ty)) xs
98  val x64_regs =
99    zip ["AL","CL","DL","BL","SPL","BPL","SIL","DIL"] (add_reg_type 8 [0,1,2,3,4,5,6,7]) @
100    zip ["AX","CX","DX","BX","SP","BP","SI","DI"] (add_reg_type 16 [0,1,2,3,4,5,6,7]) @
101    zip ["EAX","ECX","EDX","EBX","ESP","EBP","ESI","EDI"] (add_reg_type 32 [0,1,2,3,4,5,6,7]) @
102    zip ["RAX","RCX","RDX","RBX","RSP","RBP","RSI","RDI"] (add_reg_type 64 [0,1,2,3,4,5,6,7]) @
103    zip ["R8B","R9B","R10B","R11B","R12B","R13B","R14B","R15B"] (add_reg_type 8 [8,9,10,11,12,13,14,15]) @
104    zip ["R8W","R9W","R10W","R11W","R12W","R13W","R14W","R15W"] (add_reg_type 16 [8,9,10,11,12,13,14,15]) @
105    zip ["R8D","R9D","R10D","R11D","R12D","R13D","R14D","R15D"] (add_reg_type 32 [8,9,10,11,12,13,14,15]) @
106    zip ["R8","R9","R10","R11","R12","R13","R14","R15"] (add_reg_type 64 [8,9,10,11,12,13,14,15]) @
107    zip ["R0B","R1B","R2B","R3B","R4B","R5B","R6B","R7B"] (add_reg_type 8 [0,1,2,3,4,5,6,7]) @
108    zip ["R0W","R1W","R2W","R3W","R4W","R5W","R6W","R7W"] (add_reg_type 16 [0,1,2,3,4,5,6,7]) @
109    zip ["R0D","R1D","R2D","R3D","R4D","R5D","R6D","R7D"] (add_reg_type 32 [0,1,2,3,4,5,6,7]) @
110    zip ["R0","R1","R2","R3","R4","R5","R6","R7"] (add_reg_type 64 [0,1,2,3,4,5,6,7])
111  fun x64_reg_info r = let
112    fun find x [] = fail()
113      | find x ((y,z)::ys) = if x = y then z else find x ys
114    val r = String.translate (fn x => implode [Char.toUpper x]) r
115    in find r x64_regs end
116in
117  fun x64_reg2int r = fst (x64_reg_info r);
118  fun x64_reg_size r = snd (x64_reg_info r);
119  fun x64_is_reg r = can x64_reg_info r;
120end;
121
122(*
123val s = last xs
124*)
125
126fun x64_address_encode s =
127  if mem s ["[R13]","[0+R13]","[R13+0]"] then
128    (* work around suggested by Intel manual for avoiding
129       the encoding of RIP-relative addressing in 64-bit x86 *)
130    [("R/M", "13"), ("Mod", "1"), ("disp", "00")]
131  else let
132  val (index,base,disp) = parse_address s
133  val rr = int_to_string o x64_reg2int
134  fun the (SOME x) = x | the NONE = fail()
135  fun get_scale (SOME (1,i)) = [("Scale","0"),("Index",rr i)]
136    | get_scale (SOME (2,i)) = [("Scale","1"),("Index",rr i)]
137    | get_scale (SOME (4,i)) = [("Scale","2"),("Index",rr i)]
138    | get_scale (SOME (8,i)) = [("Scale","3"),("Index",rr i)]
139    | get_scale _ = fail()
140  fun is_SP (SOME s) = (x64_reg2int s = 4) | is_SP _ = false
141  fun is_BP (SOME s) = (x64_reg2int s = 5) | is_BP _ = false
142  fun is_R12 (SOME s) = (x64_reg2int s = 12) | is_R12 _ = false
143  fun is_R13 (SOME s) = (x64_reg2int s = 13) | is_R13 _ = false
144  fun f (index,base,disp) =
145    if index = NONE then
146      if base = NONE then
147        [("R/M","5"),("Mod","0"),("disp",signed_hex 8 disp)]
148      else if is_SP base orelse is_R12 base then
149        [("R/M",rr (the base)),("Base","4"),("Index","4"),("Scale","0")] @
150        (if disp = Arbint.fromInt 0 then [("Mod","0")]
151         else if can (signed_hex 2) disp
152         then [("Mod","1"),("disp",signed_hex 2 disp)]
153         else [("Mod","2"),("disp",signed_hex 8 disp)])
154      else if (disp = Arbint.fromInt 0) andalso not (is_BP base) then
155        [("R/M",rr (the base)),("Mod","0")]
156      else if can (signed_hex 2) disp then
157        [("R/M",rr (the base)),("Mod","1"),("disp",signed_hex 2 disp)]
158      else
159        [("R/M",rr (the base)),("Mod","2"),("disp",signed_hex 8 disp)]
160    else
161      if base = NONE then
162        [("R/M",rr "ESP"),("Mod","0"),("Base","5"),("disp",signed_hex 8 disp)]
163        @ get_scale index
164      else if (disp = Arbint.fromInt 0) andalso not (is_BP base) andalso not (is_R13 base) then
165        [("R/M",rr "ESP"),("Mod","0"),("Base",rr (the base))] @ get_scale index
166      else if can (signed_hex 2) disp then
167        [("R/M",rr "ESP"),("Mod","1"),("Base",rr (the base))] @ get_scale index
168        @ [("disp",signed_hex 2 disp)]
169      else
170        [("R/M",rr "ESP"),("Mod","2"),("Base",rr (the base))] @ get_scale index
171        @ [("disp",signed_hex 8 disp)]
172  in f (index,base,disp) end
173
174fun x64_encode s = let
175  fun rem [] b ys = rev ys
176    | rem (x::xs) b ys =
177         if x = #"[" then rem xs true (x::ys) else
178         if x = #"]" then rem xs false (x::ys) else
179         if x = #" " then if b then rem xs b ys else rem xs b (x::ys) else
180           rem xs b (x::ys)
181  val s = implode (rem (explode s) false [])
182  val s = String.translate (fn x => implode [Char.toUpper x]) s
183  val xs = String.tokens (fn x => mem x [#" ",#","]) s
184  val ys = filter (fn (x,y) => ((hd y = hd xs) handle _ => false)) instructions
185  fun token_data_size t =
186    if mem t ["r8","r/m8"] then 8 else
187    if mem t ["r16","r/m16"] then 16 else
188    if mem t ["r32","r/m32"] then 32 else
189    if mem t ["r64","r/m64"] then 64 else
190    if x64_is_reg t then x64_reg_size t else fail();
191  fun every p [] = true | every p (x::xs) = p x andalso every p xs
192  val use_default_size = every (fn (x,y) => every (fn t => not (can token_data_size t)) y) ys
193                         orelse mem (hd xs) ["CALL","JMP"]
194  val zsize = if use_default_size then 64 else
195              x64_reg_size (hd (filter x64_is_reg xs))
196              handle Empty => if mem "BYTE" xs then 8 else
197                              if mem "WORD" xs then 16 else
198                              if mem "DWORD" xs then 32 else
199                              if mem "QWORD" xs then 64 else
200                                (print ("\n\nERROR: Specify data size using BYTE, WORD, DWORD or QWORD in "^s^".\n\n"); fail())
201  val ys = if not (hd xs = "MOVZX") then ys else let
202             val x = el 3 xs
203             val s = if x64_is_reg x then x64_reg_size x else
204                       if x = "WORD" then 16 else
205                       if x = "BYTE" then 8 else fail()
206             val _ = if mem s [8,16] then () else fail()
207             in filter (fn (x,y) => token_data_size (el 3 y) = s) ys end
208  val ys = if not ((hd xs = "MOV") andalso (zsize = 64) andalso (can string_to_int (last xs))) then ys else
209             ys |> filter (fn (x,y) => last y = "imm64")
210                |> map (fn (x,y) => (filter (fn t => not (mem t ["REX.W","+"])) x,y))
211  val xs = filter (fn x => not (mem x ["BYTE","WORD","DWORD","QWORD"])) xs
212  val prefixes = if zsize = 16 then "66" else ""
213  fun intro_eax s = (if (x64_reg2int s = 0) then "EAX" else s) handle HOL_ERR _ => s
214  val xs = map intro_eax xs
215  fun pos x [] = fail()
216    | pos x (y::ys) = if x = y then 0 else 1 + pos x ys
217  fun find x [] = fail()
218    | find x ((y,z)::ys) = if x = y then z else find x ys
219  fun try_all f [] = []
220    | try_all f (x::xs) = f x :: try_all f xs handle e => try_all f xs
221  val ys = if zsize = 8 then filter (fn (x,y) => (token_data_size (el 2 y) = 8) handle HOL_ERR _ => true) ys
222                        else filter (fn (x,y) => not (token_data_size (el 2 y) = 8) handle HOL_ERR _ => true) ys
223  fun simplify_token t =
224    if mem t ["r8","r16","r32","r64"] then "r32" else
225    if mem t ["r/m8","r/m16","r/m32","r/m64"] then "r/m32" else t;
226  val ys = map (fn (x,y) => (x,map simplify_token y)) ys
227(*
228  val (zs,ys) = el 3 ys
229*)
230  fun use_encoding (zs,ys) = let
231    val rex = ref (if zsize = 64 then [#"W"] else [])
232    fun rex_prefix() =
233      if (!rex) = [] then "" else
234        signed_hex 2 (Arbint.fromInt (4 * 16 + (if mem #"W" (!rex) then 8 else 0)
235                                             + (if mem #"R" (!rex) then 4 else 0)
236                                             + (if mem #"X" (!rex) then 2 else 0)
237                                             + (if mem #"B" (!rex) then 1 else 0)))
238    val l = filter (fn (x,y) => not (x = y)) (zip xs ys)
239    fun foo (y,x) =
240      if x = "imm64" then
241        [("iq",unsigned_hex 16 y)]
242      else if x = "imm32" then
243        [("id",signed_hex 8 (Arbint.fromString y))]
244      else if x = "imm16" then
245        [("iw",signed_hex 4 (Arbint.fromString y))]
246      else if x = "imm8" then
247        [("ib",signed_hex 2 (Arbint.fromString y))]
248      else if x = "rel8" then
249        [("cb",signed_hex 2 (Arbint.fromString y))]
250      else if x = "rel32" then
251        [("cd",signed_hex 8 (Arbint.fromString y))]
252      else if x = "r32" then
253        [("Reg/Opcode",int_to_string (x64_reg2int y))]
254      else if (x = "r/m32") andalso (x64_is_reg y) then
255        [("R/M",int_to_string (x64_reg2int y)),("Mod","3")]
256      else if ((x = "r/m32") orelse (x = "m")) andalso can x64_address_encode y then
257        x64_address_encode y
258      else fail()
259    fun list_append [] = [] | list_append (x::xs) = x @ list_append xs
260    val ts = list_append (map foo l)
261    fun string_to_int_8 s c = let
262      val n = string_to_int s
263      val _ = (if 8 <= n then (rex := c::(!rex)) else ())
264      val _ = (if 4 <= n andalso (zsize = 8) then (rex := #" "::(!rex)) else ())
265      in n mod 8 end;
266    fun create_SIB xs =
267      (fill 2 o Arbnum.toHexString o Arbnum.fromInt)
268      (string_to_int_8 (find "Base" xs) #"B" +
269       string_to_int_8 (find "Index" xs) #"X" * 8 +
270       string_to_int (find "Scale" xs) * 8 * 8) handle _ => ""
271    fun create_ModRM xs =
272      (fill 2 o Arbnum.toHexString o Arbnum.fromInt)
273      (string_to_int_8 (find "R/M" xs) #"B" +
274       string_to_int_8 (find "Reg/Opcode" xs) #"R" * 8 +
275       string_to_int (find "Mod" xs) * 8 * 8) ^
276      create_SIB xs ^ (find "disp" ts handle e => "")
277    fun is_plus x = (implode (tl (tl (explode x))) = "+rd") handle _ => false
278    fun do_replace x =
279      if mem x ["iq","id","iw","ib","cb","cw","cd"] then find x ts else
280      if is_plus x then let
281        val reg_n = string_to_int_8 (find "Reg/Opcode" ts) #"B"
282        val reg = Arbnum.fromInt (reg_n)
283        val s = implode [hd (explode x), hd (tl (explode x))]
284        in Arbnum.toHexString(Arbnum.+(Arbnum.fromHexString s, reg)) end else
285      if can Arbnum.fromHexString x then x else
286      if x = "/r" then create_ModRM ts else
287      if hd (explode x) = #"/" then
288        create_ModRM (("Reg/Opcode",(implode o tl o explode) x)::ts)
289      else fail()
290    val e = concat (map do_replace zs)
291    val e = prefixes ^ (rex_prefix()) ^ e
292    in e end;
293  val all = try_all use_encoding ys
294  val e = hd (sort (fn x => fn y => size x <= size y) all)
295  in e end
296
297fun x64_supported_instructions () = let
298  fun all_distinct [] = []
299    | all_distinct (x::xs) = x::all_distinct (filter (fn y => not ((x:string) = y)) xs)
300  in all_distinct (map (fn (x,y) => hd y) instructions) end;
301
302
303end
304