1structure x64_encodeLib :> x64_encodeLib = 2struct 3 4open HolKernel boolLib bossLib; 5open x64_decoderTheory; 6 7val car = fst o dest_comb; 8val cdr = snd o dest_comb; 9 10val instructions = let 11 fun div_at [] ys = (ys,[]) 12 | div_at (x::xs) ys = if x = "|" then (rev ys,xs) else 13 div_at xs (x::ys) 14 fun d xs = div_at xs [] 15 val t = String.tokens (fn x => mem x [#" ",#","]) 16 in (map (d o t) o map stringSyntax.fromHOLstring o fst o 17 listSyntax.dest_list o cdr o cdr o concl o SPEC_ALL) x64_decode_aux_def end; 18 19fun parse_address s = let 20 val xs = explode s 21 val _ = if hd xs = #"[" then () else fail() 22 val _ = if last xs = #"]" then () else fail() 23 fun f c = String.tokens (fn x => x = c) 24 val ts = f #"+" (implode (butlast (tl xs))) 25 fun append_lists [] = [] 26 | append_lists (x::xs) = x @ append_lists xs 27 fun minus [x] = [("+",x)] 28 | minus ["",x] = [("-",x)] 29 | minus [x,y] = [("+",x),("-",y)] 30 | minus (x::xs) = minus [x] @ minus xs 31 | minus _ = fail() 32 val zs = append_lists (map (minus o f #"-") ts) 33 fun prod [x] = (1,x) 34 | prod [x,y] = if can string_to_int x then (string_to_int x, y) 35 else (string_to_int y, x) 36 | prod _ = fail() 37 val qs = map (fn (s,n) => (s,prod (f #"*" n))) zs 38 fun g (c,(3,x)) = [(c,(1,x)),(c,(2,x))] 39 | g (c,(5,x)) = [(c,(1,x)),(c,(4,x))] 40 | g (c,(9,x)) = [(c,(1,x)),(c,(8,x))] 41 | g y = [y] 42 val us = append_lists (map g qs) 43 fun extract_int ("-",(1,x)) = Arbint.-(Arbint.zero,Arbint.fromString x) 44 | extract_int ("+",(1,x)) = Arbint.fromString x 45 | extract_int _ = fail() 46 fun sum [] = Arbint.zero | sum (i::is) = Arbint.+(i,sum is) 47 val imm = sum (map extract_int (filter (can extract_int) us)) 48 val vs = filter (not o can extract_int) us 49 val _ = if filter (fn (x,y) => not (x = "+")) vs = [] then () else fail() 50 val ks = map snd vs 51 val base = SOME ((snd o hd o filter (fn (x,y) => x = 1)) ks) handle e => NONE 52 fun delete_base NONE xs = xs 53 | delete_base (SOME b) [] = [] 54 | delete_base (SOME b) ((n,x)::xs) = if (1,b) = (n,x) then xs else (n,x) :: delete_base (SOME b) xs 55 val index = delete_base base ks 56 val index = SOME (hd index) handle e => NONE 57 fun sp_as_scale NONE = false 58 | sp_as_scale (SOME (0,x)) = false 59 | sp_as_scale (SOME (1,x)) = false 60 | sp_as_scale (SOME (_,x)) = (x = "RSP") 61 val _ = not (sp_as_scale index) orelse fail() 62 fun scale_by_2 (SOME (2,x)) = true 63 | scale_by_2 _ = false 64 fun get_scale_reg (SOME (_,r)) = r 65 | get_scale_reg _ = fail() 66 val (index,base) = if base = NONE andalso scale_by_2 index then 67 (SOME (1,get_scale_reg index), SOME (get_scale_reg index)) 68 else (index,base) 69 fun swap_rsp (SOME (1, "RSP"), SOME r) = (SOME (1, r), SOME "RSP") 70 | swap_rsp (x,y) = (x,y) 71 val (index,base) = swap_rsp (index,base) 72 val _ = not ((index,base) = (SOME (1,"RSP"),SOME "RSP")) orelse fail() 73 in (index,base,imm) end; 74 75fun fill n s = if size s < n then fill n ("0" ^ s) else s 76 77fun unsigned_hex n s = let 78 val i = fill n (Arbnum.toHexString (Arbnum.fromString s)) 79 val i = (substring(i,14,2) ^ substring(i,12,2) ^ substring(i,10,2) ^ substring(i,8,2) ^ 80 substring(i,6,2) ^ substring(i,4,2) ^ substring(i,2,2) ^ substring(i,0,2)) handle e => 81 (substring(i,6,2) ^ substring(i,4,2) ^ substring(i,2,2) ^ substring(i,0,2)) handle e => i 82 in if size i = n then i else fail() end 83 84fun signed_hex n i = let 85 val m = Arbnum.pow(Arbnum.fromInt 2, Arbnum.fromInt (4 * n)) 86 val m2 = Arbnum.pow(Arbnum.fromInt 2, Arbnum.fromInt (4 * n - 1)) 87 val j = Arbint.+(i,Arbint.fromNat m) 88 val i = if not (Arbint.<(i,Arbint.fromNat m2)) then fail() else 89 if not (Arbint.<=(Arbint.fromNat m2, j)) then fail() else 90 if Arbint.<=(Arbint.zero,i) then fill n (Arbnum.toHexString(Arbint.toNat i)) 91 else fill n (Arbnum.toHexString(Arbint.toNat j)) 92 val i = (substring(i,6,2) ^ substring(i,4,2) ^ substring(i,2,2) ^ 93 substring(i,0,2)) handle e => i 94 in i end 95 96local 97 fun add_reg_type ty xs = map (fn x => (x,ty)) xs 98 val x64_regs = 99 zip ["AL","CL","DL","BL","SPL","BPL","SIL","DIL"] (add_reg_type 8 [0,1,2,3,4,5,6,7]) @ 100 zip ["AX","CX","DX","BX","SP","BP","SI","DI"] (add_reg_type 16 [0,1,2,3,4,5,6,7]) @ 101 zip ["EAX","ECX","EDX","EBX","ESP","EBP","ESI","EDI"] (add_reg_type 32 [0,1,2,3,4,5,6,7]) @ 102 zip ["RAX","RCX","RDX","RBX","RSP","RBP","RSI","RDI"] (add_reg_type 64 [0,1,2,3,4,5,6,7]) @ 103 zip ["R8B","R9B","R10B","R11B","R12B","R13B","R14B","R15B"] (add_reg_type 8 [8,9,10,11,12,13,14,15]) @ 104 zip ["R8W","R9W","R10W","R11W","R12W","R13W","R14W","R15W"] (add_reg_type 16 [8,9,10,11,12,13,14,15]) @ 105 zip ["R8D","R9D","R10D","R11D","R12D","R13D","R14D","R15D"] (add_reg_type 32 [8,9,10,11,12,13,14,15]) @ 106 zip ["R8","R9","R10","R11","R12","R13","R14","R15"] (add_reg_type 64 [8,9,10,11,12,13,14,15]) @ 107 zip ["R0B","R1B","R2B","R3B","R4B","R5B","R6B","R7B"] (add_reg_type 8 [0,1,2,3,4,5,6,7]) @ 108 zip ["R0W","R1W","R2W","R3W","R4W","R5W","R6W","R7W"] (add_reg_type 16 [0,1,2,3,4,5,6,7]) @ 109 zip ["R0D","R1D","R2D","R3D","R4D","R5D","R6D","R7D"] (add_reg_type 32 [0,1,2,3,4,5,6,7]) @ 110 zip ["R0","R1","R2","R3","R4","R5","R6","R7"] (add_reg_type 64 [0,1,2,3,4,5,6,7]) 111 fun x64_reg_info r = let 112 fun find x [] = fail() 113 | find x ((y,z)::ys) = if x = y then z else find x ys 114 val r = String.translate (fn x => implode [Char.toUpper x]) r 115 in find r x64_regs end 116in 117 fun x64_reg2int r = fst (x64_reg_info r); 118 fun x64_reg_size r = snd (x64_reg_info r); 119 fun x64_is_reg r = can x64_reg_info r; 120end; 121 122(* 123val s = last xs 124*) 125 126fun x64_address_encode s = 127 if mem s ["[R13]","[0+R13]","[R13+0]"] then 128 (* work around suggested by Intel manual for avoiding 129 the encoding of RIP-relative addressing in 64-bit x86 *) 130 [("R/M", "13"), ("Mod", "1"), ("disp", "00")] 131 else let 132 val (index,base,disp) = parse_address s 133 val rr = int_to_string o x64_reg2int 134 fun the (SOME x) = x | the NONE = fail() 135 fun get_scale (SOME (1,i)) = [("Scale","0"),("Index",rr i)] 136 | get_scale (SOME (2,i)) = [("Scale","1"),("Index",rr i)] 137 | get_scale (SOME (4,i)) = [("Scale","2"),("Index",rr i)] 138 | get_scale (SOME (8,i)) = [("Scale","3"),("Index",rr i)] 139 | get_scale _ = fail() 140 fun is_SP (SOME s) = (x64_reg2int s = 4) | is_SP _ = false 141 fun is_BP (SOME s) = (x64_reg2int s = 5) | is_BP _ = false 142 fun is_R12 (SOME s) = (x64_reg2int s = 12) | is_R12 _ = false 143 fun is_R13 (SOME s) = (x64_reg2int s = 13) | is_R13 _ = false 144 fun f (index,base,disp) = 145 if index = NONE then 146 if base = NONE then 147 [("R/M","5"),("Mod","0"),("disp",signed_hex 8 disp)] 148 else if is_SP base orelse is_R12 base then 149 [("R/M",rr (the base)),("Base","4"),("Index","4"),("Scale","0")] @ 150 (if disp = Arbint.fromInt 0 then [("Mod","0")] 151 else if can (signed_hex 2) disp 152 then [("Mod","1"),("disp",signed_hex 2 disp)] 153 else [("Mod","2"),("disp",signed_hex 8 disp)]) 154 else if (disp = Arbint.fromInt 0) andalso not (is_BP base) then 155 [("R/M",rr (the base)),("Mod","0")] 156 else if can (signed_hex 2) disp then 157 [("R/M",rr (the base)),("Mod","1"),("disp",signed_hex 2 disp)] 158 else 159 [("R/M",rr (the base)),("Mod","2"),("disp",signed_hex 8 disp)] 160 else 161 if base = NONE then 162 [("R/M",rr "ESP"),("Mod","0"),("Base","5"),("disp",signed_hex 8 disp)] 163 @ get_scale index 164 else if (disp = Arbint.fromInt 0) andalso not (is_BP base) andalso not (is_R13 base) then 165 [("R/M",rr "ESP"),("Mod","0"),("Base",rr (the base))] @ get_scale index 166 else if can (signed_hex 2) disp then 167 [("R/M",rr "ESP"),("Mod","1"),("Base",rr (the base))] @ get_scale index 168 @ [("disp",signed_hex 2 disp)] 169 else 170 [("R/M",rr "ESP"),("Mod","2"),("Base",rr (the base))] @ get_scale index 171 @ [("disp",signed_hex 8 disp)] 172 in f (index,base,disp) end 173 174fun x64_encode s = let 175 fun rem [] b ys = rev ys 176 | rem (x::xs) b ys = 177 if x = #"[" then rem xs true (x::ys) else 178 if x = #"]" then rem xs false (x::ys) else 179 if x = #" " then if b then rem xs b ys else rem xs b (x::ys) else 180 rem xs b (x::ys) 181 val s = implode (rem (explode s) false []) 182 val s = String.translate (fn x => implode [Char.toUpper x]) s 183 val xs = String.tokens (fn x => mem x [#" ",#","]) s 184 val ys = filter (fn (x,y) => ((hd y = hd xs) handle _ => false)) instructions 185 fun token_data_size t = 186 if mem t ["r8","r/m8"] then 8 else 187 if mem t ["r16","r/m16"] then 16 else 188 if mem t ["r32","r/m32"] then 32 else 189 if mem t ["r64","r/m64"] then 64 else 190 if x64_is_reg t then x64_reg_size t else fail(); 191 fun every p [] = true | every p (x::xs) = p x andalso every p xs 192 val use_default_size = every (fn (x,y) => every (fn t => not (can token_data_size t)) y) ys 193 orelse mem (hd xs) ["CALL","JMP"] 194 val zsize = if use_default_size then 64 else 195 x64_reg_size (hd (filter x64_is_reg xs)) 196 handle Empty => if mem "BYTE" xs then 8 else 197 if mem "WORD" xs then 16 else 198 if mem "DWORD" xs then 32 else 199 if mem "QWORD" xs then 64 else 200 (print ("\n\nERROR: Specify data size using BYTE, WORD, DWORD or QWORD in "^s^".\n\n"); fail()) 201 val ys = if not (hd xs = "MOVZX") then ys else let 202 val x = el 3 xs 203 val s = if x64_is_reg x then x64_reg_size x else 204 if x = "WORD" then 16 else 205 if x = "BYTE" then 8 else fail() 206 val _ = if mem s [8,16] then () else fail() 207 in filter (fn (x,y) => token_data_size (el 3 y) = s) ys end 208 val ys = if not ((hd xs = "MOV") andalso (zsize = 64) andalso (can string_to_int (last xs))) then ys else 209 ys |> filter (fn (x,y) => last y = "imm64") 210 |> map (fn (x,y) => (filter (fn t => not (mem t ["REX.W","+"])) x,y)) 211 val xs = filter (fn x => not (mem x ["BYTE","WORD","DWORD","QWORD"])) xs 212 val prefixes = if zsize = 16 then "66" else "" 213 fun intro_eax s = (if (x64_reg2int s = 0) then "EAX" else s) handle HOL_ERR _ => s 214 val xs = map intro_eax xs 215 fun pos x [] = fail() 216 | pos x (y::ys) = if x = y then 0 else 1 + pos x ys 217 fun find x [] = fail() 218 | find x ((y,z)::ys) = if x = y then z else find x ys 219 fun try_all f [] = [] 220 | try_all f (x::xs) = f x :: try_all f xs handle e => try_all f xs 221 val ys = if zsize = 8 then filter (fn (x,y) => (token_data_size (el 2 y) = 8) handle HOL_ERR _ => true) ys 222 else filter (fn (x,y) => not (token_data_size (el 2 y) = 8) handle HOL_ERR _ => true) ys 223 fun simplify_token t = 224 if mem t ["r8","r16","r32","r64"] then "r32" else 225 if mem t ["r/m8","r/m16","r/m32","r/m64"] then "r/m32" else t; 226 val ys = map (fn (x,y) => (x,map simplify_token y)) ys 227(* 228 val (zs,ys) = el 3 ys 229*) 230 fun use_encoding (zs,ys) = let 231 val rex = ref (if zsize = 64 then [#"W"] else []) 232 fun rex_prefix() = 233 if (!rex) = [] then "" else 234 signed_hex 2 (Arbint.fromInt (4 * 16 + (if mem #"W" (!rex) then 8 else 0) 235 + (if mem #"R" (!rex) then 4 else 0) 236 + (if mem #"X" (!rex) then 2 else 0) 237 + (if mem #"B" (!rex) then 1 else 0))) 238 val l = filter (fn (x,y) => not (x = y)) (zip xs ys) 239 fun foo (y,x) = 240 if x = "imm64" then 241 [("iq",unsigned_hex 16 y)] 242 else if x = "imm32" then 243 [("id",signed_hex 8 (Arbint.fromString y))] 244 else if x = "imm16" then 245 [("iw",signed_hex 4 (Arbint.fromString y))] 246 else if x = "imm8" then 247 [("ib",signed_hex 2 (Arbint.fromString y))] 248 else if x = "rel8" then 249 [("cb",signed_hex 2 (Arbint.fromString y))] 250 else if x = "rel32" then 251 [("cd",signed_hex 8 (Arbint.fromString y))] 252 else if x = "r32" then 253 [("Reg/Opcode",int_to_string (x64_reg2int y))] 254 else if (x = "r/m32") andalso (x64_is_reg y) then 255 [("R/M",int_to_string (x64_reg2int y)),("Mod","3")] 256 else if ((x = "r/m32") orelse (x = "m")) andalso can x64_address_encode y then 257 x64_address_encode y 258 else fail() 259 fun list_append [] = [] | list_append (x::xs) = x @ list_append xs 260 val ts = list_append (map foo l) 261 fun string_to_int_8 s c = let 262 val n = string_to_int s 263 val _ = (if 8 <= n then (rex := c::(!rex)) else ()) 264 val _ = (if 4 <= n andalso (zsize = 8) then (rex := #" "::(!rex)) else ()) 265 in n mod 8 end; 266 fun create_SIB xs = 267 (fill 2 o Arbnum.toHexString o Arbnum.fromInt) 268 (string_to_int_8 (find "Base" xs) #"B" + 269 string_to_int_8 (find "Index" xs) #"X" * 8 + 270 string_to_int (find "Scale" xs) * 8 * 8) handle _ => "" 271 fun create_ModRM xs = 272 (fill 2 o Arbnum.toHexString o Arbnum.fromInt) 273 (string_to_int_8 (find "R/M" xs) #"B" + 274 string_to_int_8 (find "Reg/Opcode" xs) #"R" * 8 + 275 string_to_int (find "Mod" xs) * 8 * 8) ^ 276 create_SIB xs ^ (find "disp" ts handle e => "") 277 fun is_plus x = (implode (tl (tl (explode x))) = "+rd") handle _ => false 278 fun do_replace x = 279 if mem x ["iq","id","iw","ib","cb","cw","cd"] then find x ts else 280 if is_plus x then let 281 val reg_n = string_to_int_8 (find "Reg/Opcode" ts) #"B" 282 val reg = Arbnum.fromInt (reg_n) 283 val s = implode [hd (explode x), hd (tl (explode x))] 284 in Arbnum.toHexString(Arbnum.+(Arbnum.fromHexString s, reg)) end else 285 if can Arbnum.fromHexString x then x else 286 if x = "/r" then create_ModRM ts else 287 if hd (explode x) = #"/" then 288 create_ModRM (("Reg/Opcode",(implode o tl o explode) x)::ts) 289 else fail() 290 val e = concat (map do_replace zs) 291 val e = prefixes ^ (rex_prefix()) ^ e 292 in e end; 293 val all = try_all use_encoding ys 294 val e = hd (sort (fn x => fn y => size x <= size y) all) 295 in e end 296 297fun x64_supported_instructions () = let 298 fun all_distinct [] = [] 299 | all_distinct (x::xs) = x::all_distinct (filter (fn y => not ((x:string) = y)) xs) 300 in all_distinct (map (fn (x,y) => hd y) instructions) end; 301 302 303end 304