1(* x64 - generated by L3 - Thu Dec 21 10:59:16 2017 *)
2
3structure x64 :> x64 =
4struct
5
6structure Map = MutableMap
7
8(* -------------------------------------------------------------------------
9   Type declarations
10   ------------------------------------------------------------------------- *)
11
12datatype Zreg
13  = RAX | RCX | RDX | RBX | RSP | RBP | RSI | RDI | zR8 | zR9 | zR10
14  | zR11 | zR12 | zR13 | zR14 | zR15
15
16type MXCSR =
17  { DAZ: bool, DE: bool, DM: bool, FZ: bool, IE: bool, IM: bool, OE: bool,
18    OM: bool, PE: bool, PM: bool, RC: BitsN.nbit, Reserved: BitsN.nbit,
19    UE: bool, UM: bool, ZE: bool, ZM: bool }
20
21datatype Zeflags = Z_CF | Z_PF | Z_AF | Z_ZF | Z_SF | Z_OF
22
23datatype Zsize = Z16 | Z32 | Z64 | Z8 of bool
24
25datatype Zbase = ZnoBase | ZregBase of Zreg | ZripBase
26
27datatype Zrm
28  = Zm of ((BitsN.nbit * Zreg) option) * (Zbase * BitsN.nbit) | Zr of Zreg
29
30datatype Zdest_src
31  = Zr_rm of Zreg * Zrm | Zrm_i of Zrm * BitsN.nbit | Zrm_r of Zrm * Zreg
32
33datatype Zimm_rm = Zimm of BitsN.nbit | Zrm of Zrm
34
35datatype Zmonop_name = Zdec | Zinc | Znot | Zneg
36
37datatype Zbinop_name
38  = Zadd | Zor | Zadc | Zsbb | Zand | Zsub | Zxor | Zcmp | Zrol | Zror
39  | Zrcl | Zrcr | Zshl | Zshr | Ztest | Zsar
40
41datatype Zbit_test_name = Zbt | Zbts | Zbtr | Zbtc
42
43datatype Zcond
44  = Z_O | Z_NO | Z_B | Z_NB | Z_E | Z_NE | Z_NA | Z_A | Z_S | Z_NS | Z_P
45  | Z_NP | Z_L | Z_NL | Z_NG | Z_G | Z_ALWAYS
46
47datatype Zea
48  = Zea_i of Zsize * BitsN.nbit
49  | Zea_m of Zsize * BitsN.nbit
50  | Zea_r of Zsize * Zreg
51
52datatype sse_binop
53  = sse_add | sse_sub | sse_mul | sse_div | sse_max | sse_min
54
55datatype sse_logic = sse_and | sse_andn | sse_or | sse_xor
56
57datatype sse_compare
58  = sse_eq_oq | sse_lt_os | sse_le_os | sse_unord_q | sse_neq_uq
59  | sse_nlt_us | sse_nle_us | sse_ord_q
60
61datatype xmm_mem
62  = xmm_mem of ((BitsN.nbit * Zreg) option) * (Zbase * BitsN.nbit)
63  | xmm_reg of BitsN.nbit
64
65datatype SSE
66  = CMPPD of sse_compare * (BitsN.nbit * xmm_mem)
67  | CMPPS of sse_compare * (BitsN.nbit * xmm_mem)
68  | CMPSD of sse_compare * (BitsN.nbit * xmm_mem)
69  | CMPSS of sse_compare * (BitsN.nbit * xmm_mem)
70  | COMISD of BitsN.nbit * xmm_mem
71  | COMISS of BitsN.nbit * xmm_mem
72  | CVTDQ2PD of BitsN.nbit * xmm_mem
73  | CVTDQ2PS of BitsN.nbit * xmm_mem
74  | CVTPD2DQ of bool * (BitsN.nbit * xmm_mem)
75  | CVTPD2PS of BitsN.nbit * xmm_mem
76  | CVTPS2DQ of bool * (BitsN.nbit * xmm_mem)
77  | CVTPS2PD of BitsN.nbit * xmm_mem
78  | CVTSD2SI of bool * (bool * (Zreg * xmm_mem))
79  | CVTSD2SS of BitsN.nbit * xmm_mem
80  | CVTSI2SD of bool * (BitsN.nbit * Zrm)
81  | CVTSI2SS of bool * (BitsN.nbit * Zrm)
82  | CVTSS2SD of BitsN.nbit * xmm_mem
83  | CVTSS2SI of bool * (bool * (Zreg * xmm_mem))
84  | MOVAP_D_S of bool * (xmm_mem * xmm_mem)
85  | MOVQ of xmm_mem * xmm_mem
86  | MOVSD of xmm_mem * xmm_mem
87  | MOVSS of xmm_mem * xmm_mem
88  | MOVUP_D_S of bool * (xmm_mem * xmm_mem)
89  | MOV_D_Q of bool * (bool * (BitsN.nbit * Zrm))
90  | PCMPEQQ of BitsN.nbit * xmm_mem
91  | PSLLDQ of BitsN.nbit * BitsN.nbit
92  | PSLLD_imm of BitsN.nbit * BitsN.nbit
93  | PSLLQ_imm of BitsN.nbit * BitsN.nbit
94  | PSLLW_imm of BitsN.nbit * BitsN.nbit
95  | PSRAD_imm of BitsN.nbit * BitsN.nbit
96  | PSRAW_imm of BitsN.nbit * BitsN.nbit
97  | PSRLDQ of BitsN.nbit * BitsN.nbit
98  | PSRLD_imm of BitsN.nbit * BitsN.nbit
99  | PSRLQ_imm of BitsN.nbit * BitsN.nbit
100  | PSRLW_imm of BitsN.nbit * BitsN.nbit
101  | SQRTPD of BitsN.nbit * xmm_mem
102  | SQRTPS of BitsN.nbit * xmm_mem
103  | SQRTSD of BitsN.nbit * xmm_mem
104  | SQRTSS of BitsN.nbit * xmm_mem
105  | bin_PD of sse_binop * (BitsN.nbit * xmm_mem)
106  | bin_PS of sse_binop * (BitsN.nbit * xmm_mem)
107  | bin_SD of sse_binop * (BitsN.nbit * xmm_mem)
108  | bin_SS of sse_binop * (BitsN.nbit * xmm_mem)
109  | logic_PD of sse_logic * (BitsN.nbit * xmm_mem)
110  | logic_PS of sse_logic * (BitsN.nbit * xmm_mem)
111
112datatype instruction
113  = SSE of SSE
114  | Zbinop of Zbinop_name * (Zsize * Zdest_src)
115  | Zbit_test of Zbit_test_name * (Zsize * Zdest_src)
116  | Zcall of Zimm_rm
117  | Zclc
118  | Zcmc
119  | Zcmpxchg of Zsize * (Zrm * Zreg)
120  | Zdiv of Zsize * Zrm
121  | Zidiv of Zsize * Zrm
122  | Zimul of Zsize * Zrm
123  | Zimul2 of Zsize * (Zreg * Zrm)
124  | Zimul3 of Zsize * (Zreg * (Zrm * BitsN.nbit))
125  | Zjcc of Zcond * BitsN.nbit
126  | Zjmp of Zrm
127  | Zlea of Zsize * Zdest_src
128  | Zleave
129  | Zloop of Zcond * BitsN.nbit
130  | Zmonop of Zmonop_name * (Zsize * Zrm)
131  | Zmov of Zcond * (Zsize * Zdest_src)
132  | Zmovsx of Zsize * (Zdest_src * Zsize)
133  | Zmovzx of Zsize * (Zdest_src * Zsize)
134  | Zmul of Zsize * Zrm
135  | Znop of Nat.nat
136  | Zpop of Zrm
137  | Zpush of Zimm_rm
138  | Zret of BitsN.nbit
139  | Zset of Zcond * (bool * Zrm)
140  | Zstc
141  | Zxadd of Zsize * (Zrm * Zreg)
142  | Zxchg of Zsize * (Zrm * Zreg)
143
144datatype Zinst
145  = Zdec_fail of string
146  | Zfull_inst of
147      (BitsN.nbit list) * (instruction * ((BitsN.nbit list) option))
148
149type REX = { B: bool, R: bool, W: bool, X: bool }
150
151datatype maybe_instruction
152  = FAIL of string
153  | OK of instruction
154  | PENDING of string * instruction
155  | STREAM of BitsN.nbit list
156
157(* -------------------------------------------------------------------------
158   Casting maps (for enumerated types)
159   ------------------------------------------------------------------------- *)
160
161structure Cast =
162struct
163fun natToZreg x =
164  case Nat.toInt x of
165     0 => RAX
166   | 1 => RCX
167   | 2 => RDX
168   | 3 => RBX
169   | 4 => RSP
170   | 5 => RBP
171   | 6 => RSI
172   | 7 => RDI
173   | 8 => zR8
174   | 9 => zR9
175   | 10 => zR10
176   | 11 => zR11
177   | 12 => zR12
178   | 13 => zR13
179   | 14 => zR14
180   | 15 => zR15
181   | _ => raise Fail "natToZreg"
182
183fun natToZeflags x =
184  case Nat.toInt x of
185     0 => Z_CF
186   | 1 => Z_PF
187   | 2 => Z_AF
188   | 3 => Z_ZF
189   | 4 => Z_SF
190   | 5 => Z_OF
191   | _ => raise Fail "natToZeflags"
192
193fun natToZmonop_name x =
194  case Nat.toInt x of
195     0 => Zdec
196   | 1 => Zinc
197   | 2 => Znot
198   | 3 => Zneg
199   | _ => raise Fail "natToZmonop_name"
200
201fun natToZbinop_name x =
202  case Nat.toInt x of
203     0 => Zadd
204   | 1 => Zor
205   | 2 => Zadc
206   | 3 => Zsbb
207   | 4 => Zand
208   | 5 => Zsub
209   | 6 => Zxor
210   | 7 => Zcmp
211   | 8 => Zrol
212   | 9 => Zror
213   | 10 => Zrcl
214   | 11 => Zrcr
215   | 12 => Zshl
216   | 13 => Zshr
217   | 14 => Ztest
218   | 15 => Zsar
219   | _ => raise Fail "natToZbinop_name"
220
221fun natToZbit_test_name x =
222  case Nat.toInt x of
223     0 => Zbt
224   | 1 => Zbts
225   | 2 => Zbtr
226   | 3 => Zbtc
227   | _ => raise Fail "natToZbit_test_name"
228
229fun natToZcond x =
230  case Nat.toInt x of
231     0 => Z_O
232   | 1 => Z_NO
233   | 2 => Z_B
234   | 3 => Z_NB
235   | 4 => Z_E
236   | 5 => Z_NE
237   | 6 => Z_NA
238   | 7 => Z_A
239   | 8 => Z_S
240   | 9 => Z_NS
241   | 10 => Z_P
242   | 11 => Z_NP
243   | 12 => Z_L
244   | 13 => Z_NL
245   | 14 => Z_NG
246   | 15 => Z_G
247   | 16 => Z_ALWAYS
248   | _ => raise Fail "natToZcond"
249
250fun natTosse_binop x =
251  case Nat.toInt x of
252     0 => sse_add
253   | 1 => sse_sub
254   | 2 => sse_mul
255   | 3 => sse_div
256   | 4 => sse_max
257   | 5 => sse_min
258   | _ => raise Fail "natTosse_binop"
259
260fun natTosse_logic x =
261  case Nat.toInt x of
262     0 => sse_and
263   | 1 => sse_andn
264   | 2 => sse_or
265   | 3 => sse_xor
266   | _ => raise Fail "natTosse_logic"
267
268fun natTosse_compare x =
269  case Nat.toInt x of
270     0 => sse_eq_oq
271   | 1 => sse_lt_os
272   | 2 => sse_le_os
273   | 3 => sse_unord_q
274   | 4 => sse_neq_uq
275   | 5 => sse_nlt_us
276   | 6 => sse_nle_us
277   | 7 => sse_ord_q
278   | _ => raise Fail "natTosse_compare"
279
280fun ZregToNat x =
281  case x of
282     RAX => 0
283   | RCX => 1
284   | RDX => 2
285   | RBX => 3
286   | RSP => 4
287   | RBP => 5
288   | RSI => 6
289   | RDI => 7
290   | zR8 => 8
291   | zR9 => 9
292   | zR10 => 10
293   | zR11 => 11
294   | zR12 => 12
295   | zR13 => 13
296   | zR14 => 14
297   | zR15 => 15
298
299fun ZeflagsToNat x =
300  case x of
301     Z_CF => 0 | Z_PF => 1 | Z_AF => 2 | Z_ZF => 3 | Z_SF => 4 | Z_OF => 5
302
303fun Zmonop_nameToNat x =
304  case x of
305     Zdec => 0 | Zinc => 1 | Znot => 2 | Zneg => 3
306
307fun Zbinop_nameToNat x =
308  case x of
309     Zadd => 0
310   | Zor => 1
311   | Zadc => 2
312   | Zsbb => 3
313   | Zand => 4
314   | Zsub => 5
315   | Zxor => 6
316   | Zcmp => 7
317   | Zrol => 8
318   | Zror => 9
319   | Zrcl => 10
320   | Zrcr => 11
321   | Zshl => 12
322   | Zshr => 13
323   | Ztest => 14
324   | Zsar => 15
325
326fun Zbit_test_nameToNat x =
327  case x of
328     Zbt => 0 | Zbts => 1 | Zbtr => 2 | Zbtc => 3
329
330fun ZcondToNat x =
331  case x of
332     Z_O => 0
333   | Z_NO => 1
334   | Z_B => 2
335   | Z_NB => 3
336   | Z_E => 4
337   | Z_NE => 5
338   | Z_NA => 6
339   | Z_A => 7
340   | Z_S => 8
341   | Z_NS => 9
342   | Z_P => 10
343   | Z_NP => 11
344   | Z_L => 12
345   | Z_NL => 13
346   | Z_NG => 14
347   | Z_G => 15
348   | Z_ALWAYS => 16
349
350fun sse_binopToNat x =
351  case x of
352     sse_add => 0
353   | sse_sub => 1
354   | sse_mul => 2
355   | sse_div => 3
356   | sse_max => 4
357   | sse_min => 5
358
359fun sse_logicToNat x =
360  case x of
361     sse_and => 0 | sse_andn => 1 | sse_or => 2 | sse_xor => 3
362
363fun sse_compareToNat x =
364  case x of
365     sse_eq_oq => 0
366   | sse_lt_os => 1
367   | sse_le_os => 2
368   | sse_unord_q => 3
369   | sse_neq_uq => 4
370   | sse_nlt_us => 5
371   | sse_nle_us => 6
372   | sse_ord_q => 7
373
374fun ZregToString x =
375  case x of
376     RAX => "RAX"
377   | RCX => "RCX"
378   | RDX => "RDX"
379   | RBX => "RBX"
380   | RSP => "RSP"
381   | RBP => "RBP"
382   | RSI => "RSI"
383   | RDI => "RDI"
384   | zR8 => "zR8"
385   | zR9 => "zR9"
386   | zR10 => "zR10"
387   | zR11 => "zR11"
388   | zR12 => "zR12"
389   | zR13 => "zR13"
390   | zR14 => "zR14"
391   | zR15 => "zR15"
392
393fun ZeflagsToString x =
394  case x of
395     Z_CF => "Z_CF"
396   | Z_PF => "Z_PF"
397   | Z_AF => "Z_AF"
398   | Z_ZF => "Z_ZF"
399   | Z_SF => "Z_SF"
400   | Z_OF => "Z_OF"
401
402fun Zmonop_nameToString x =
403  case x of
404     Zdec => "Zdec" | Zinc => "Zinc" | Znot => "Znot" | Zneg => "Zneg"
405
406fun Zbinop_nameToString x =
407  case x of
408     Zadd => "Zadd"
409   | Zor => "Zor"
410   | Zadc => "Zadc"
411   | Zsbb => "Zsbb"
412   | Zand => "Zand"
413   | Zsub => "Zsub"
414   | Zxor => "Zxor"
415   | Zcmp => "Zcmp"
416   | Zrol => "Zrol"
417   | Zror => "Zror"
418   | Zrcl => "Zrcl"
419   | Zrcr => "Zrcr"
420   | Zshl => "Zshl"
421   | Zshr => "Zshr"
422   | Ztest => "Ztest"
423   | Zsar => "Zsar"
424
425fun Zbit_test_nameToString x =
426  case x of
427     Zbt => "Zbt" | Zbts => "Zbts" | Zbtr => "Zbtr" | Zbtc => "Zbtc"
428
429fun ZcondToString x =
430  case x of
431     Z_O => "Z_O"
432   | Z_NO => "Z_NO"
433   | Z_B => "Z_B"
434   | Z_NB => "Z_NB"
435   | Z_E => "Z_E"
436   | Z_NE => "Z_NE"
437   | Z_NA => "Z_NA"
438   | Z_A => "Z_A"
439   | Z_S => "Z_S"
440   | Z_NS => "Z_NS"
441   | Z_P => "Z_P"
442   | Z_NP => "Z_NP"
443   | Z_L => "Z_L"
444   | Z_NL => "Z_NL"
445   | Z_NG => "Z_NG"
446   | Z_G => "Z_G"
447   | Z_ALWAYS => "Z_ALWAYS"
448
449fun sse_binopToString x =
450  case x of
451     sse_add => "sse_add"
452   | sse_sub => "sse_sub"
453   | sse_mul => "sse_mul"
454   | sse_div => "sse_div"
455   | sse_max => "sse_max"
456   | sse_min => "sse_min"
457
458fun sse_logicToString x =
459  case x of
460     sse_and => "sse_and"
461   | sse_andn => "sse_andn"
462   | sse_or => "sse_or"
463   | sse_xor => "sse_xor"
464
465fun sse_compareToString x =
466  case x of
467     sse_eq_oq => "sse_eq_oq"
468   | sse_lt_os => "sse_lt_os"
469   | sse_le_os => "sse_le_os"
470   | sse_unord_q => "sse_unord_q"
471   | sse_neq_uq => "sse_neq_uq"
472   | sse_nlt_us => "sse_nlt_us"
473   | sse_nle_us => "sse_nle_us"
474   | sse_ord_q => "sse_ord_q"
475
476fun stringToZreg x =
477  case x of
478     "RAX" => RAX
479   | "RCX" => RCX
480   | "RDX" => RDX
481   | "RBX" => RBX
482   | "RSP" => RSP
483   | "RBP" => RBP
484   | "RSI" => RSI
485   | "RDI" => RDI
486   | "zR8" => zR8
487   | "zR9" => zR9
488   | "zR10" => zR10
489   | "zR11" => zR11
490   | "zR12" => zR12
491   | "zR13" => zR13
492   | "zR14" => zR14
493   | "zR15" => zR15
494   | _ => raise Fail "stringToZreg"
495
496fun stringToZeflags x =
497  case x of
498     "Z_CF" => Z_CF
499   | "Z_PF" => Z_PF
500   | "Z_AF" => Z_AF
501   | "Z_ZF" => Z_ZF
502   | "Z_SF" => Z_SF
503   | "Z_OF" => Z_OF
504   | _ => raise Fail "stringToZeflags"
505
506fun stringToZmonop_name x =
507  case x of
508     "Zdec" => Zdec
509   | "Zinc" => Zinc
510   | "Znot" => Znot
511   | "Zneg" => Zneg
512   | _ => raise Fail "stringToZmonop_name"
513
514fun stringToZbinop_name x =
515  case x of
516     "Zadd" => Zadd
517   | "Zor" => Zor
518   | "Zadc" => Zadc
519   | "Zsbb" => Zsbb
520   | "Zand" => Zand
521   | "Zsub" => Zsub
522   | "Zxor" => Zxor
523   | "Zcmp" => Zcmp
524   | "Zrol" => Zrol
525   | "Zror" => Zror
526   | "Zrcl" => Zrcl
527   | "Zrcr" => Zrcr
528   | "Zshl" => Zshl
529   | "Zshr" => Zshr
530   | "Ztest" => Ztest
531   | "Zsar" => Zsar
532   | _ => raise Fail "stringToZbinop_name"
533
534fun stringToZbit_test_name x =
535  case x of
536     "Zbt" => Zbt
537   | "Zbts" => Zbts
538   | "Zbtr" => Zbtr
539   | "Zbtc" => Zbtc
540   | _ => raise Fail "stringToZbit_test_name"
541
542fun stringToZcond x =
543  case x of
544     "Z_O" => Z_O
545   | "Z_NO" => Z_NO
546   | "Z_B" => Z_B
547   | "Z_NB" => Z_NB
548   | "Z_E" => Z_E
549   | "Z_NE" => Z_NE
550   | "Z_NA" => Z_NA
551   | "Z_A" => Z_A
552   | "Z_S" => Z_S
553   | "Z_NS" => Z_NS
554   | "Z_P" => Z_P
555   | "Z_NP" => Z_NP
556   | "Z_L" => Z_L
557   | "Z_NL" => Z_NL
558   | "Z_NG" => Z_NG
559   | "Z_G" => Z_G
560   | "Z_ALWAYS" => Z_ALWAYS
561   | _ => raise Fail "stringToZcond"
562
563fun stringTosse_binop x =
564  case x of
565     "sse_add" => sse_add
566   | "sse_sub" => sse_sub
567   | "sse_mul" => sse_mul
568   | "sse_div" => sse_div
569   | "sse_max" => sse_max
570   | "sse_min" => sse_min
571   | _ => raise Fail "stringTosse_binop"
572
573fun stringTosse_logic x =
574  case x of
575     "sse_and" => sse_and
576   | "sse_andn" => sse_andn
577   | "sse_or" => sse_or
578   | "sse_xor" => sse_xor
579   | _ => raise Fail "stringTosse_logic"
580
581fun stringTosse_compare x =
582  case x of
583     "sse_eq_oq" => sse_eq_oq
584   | "sse_lt_os" => sse_lt_os
585   | "sse_le_os" => sse_le_os
586   | "sse_unord_q" => sse_unord_q
587   | "sse_neq_uq" => sse_neq_uq
588   | "sse_nlt_us" => sse_nlt_us
589   | "sse_nle_us" => sse_nle_us
590   | "sse_ord_q" => sse_ord_q
591   | _ => raise Fail "stringTosse_compare"
592end
593
594(* -------------------------------------------------------------------------
595   Record update functions
596   ------------------------------------------------------------------------- *)
597
598fun MXCSR_DAZ_rupd ({DAZ, DE, DM, FZ, IE, IM, OE, OM, PE, PM, RC,
599   Reserved, UE, UM, ZE, ZM}: MXCSR, x') =
600  {DAZ = x', DE = DE, DM = DM, FZ = FZ, IE = IE, IM = IM, OE = OE,
601   OM = OM, PE = PE, PM = PM, RC = RC, Reserved = Reserved, UE = UE,
602   UM = UM, ZE = ZE, ZM = ZM}: MXCSR
603
604fun MXCSR_DE_rupd ({DAZ, DE, DM, FZ, IE, IM, OE, OM, PE, PM, RC, Reserved,
605   UE, UM, ZE, ZM}: MXCSR, x') =
606  {DAZ = DAZ, DE = x', DM = DM, FZ = FZ, IE = IE, IM = IM, OE = OE,
607   OM = OM, PE = PE, PM = PM, RC = RC, Reserved = Reserved, UE = UE,
608   UM = UM, ZE = ZE, ZM = ZM}: MXCSR
609
610fun MXCSR_DM_rupd ({DAZ, DE, DM, FZ, IE, IM, OE, OM, PE, PM, RC, Reserved,
611   UE, UM, ZE, ZM}: MXCSR, x') =
612  {DAZ = DAZ, DE = DE, DM = x', FZ = FZ, IE = IE, IM = IM, OE = OE,
613   OM = OM, PE = PE, PM = PM, RC = RC, Reserved = Reserved, UE = UE,
614   UM = UM, ZE = ZE, ZM = ZM}: MXCSR
615
616fun MXCSR_FZ_rupd ({DAZ, DE, DM, FZ, IE, IM, OE, OM, PE, PM, RC, Reserved,
617   UE, UM, ZE, ZM}: MXCSR, x') =
618  {DAZ = DAZ, DE = DE, DM = DM, FZ = x', IE = IE, IM = IM, OE = OE,
619   OM = OM, PE = PE, PM = PM, RC = RC, Reserved = Reserved, UE = UE,
620   UM = UM, ZE = ZE, ZM = ZM}: MXCSR
621
622fun MXCSR_IE_rupd ({DAZ, DE, DM, FZ, IE, IM, OE, OM, PE, PM, RC, Reserved,
623   UE, UM, ZE, ZM}: MXCSR, x') =
624  {DAZ = DAZ, DE = DE, DM = DM, FZ = FZ, IE = x', IM = IM, OE = OE,
625   OM = OM, PE = PE, PM = PM, RC = RC, Reserved = Reserved, UE = UE,
626   UM = UM, ZE = ZE, ZM = ZM}: MXCSR
627
628fun MXCSR_IM_rupd ({DAZ, DE, DM, FZ, IE, IM, OE, OM, PE, PM, RC, Reserved,
629   UE, UM, ZE, ZM}: MXCSR, x') =
630  {DAZ = DAZ, DE = DE, DM = DM, FZ = FZ, IE = IE, IM = x', OE = OE,
631   OM = OM, PE = PE, PM = PM, RC = RC, Reserved = Reserved, UE = UE,
632   UM = UM, ZE = ZE, ZM = ZM}: MXCSR
633
634fun MXCSR_OE_rupd ({DAZ, DE, DM, FZ, IE, IM, OE, OM, PE, PM, RC, Reserved,
635   UE, UM, ZE, ZM}: MXCSR, x') =
636  {DAZ = DAZ, DE = DE, DM = DM, FZ = FZ, IE = IE, IM = IM, OE = x',
637   OM = OM, PE = PE, PM = PM, RC = RC, Reserved = Reserved, UE = UE,
638   UM = UM, ZE = ZE, ZM = ZM}: MXCSR
639
640fun MXCSR_OM_rupd ({DAZ, DE, DM, FZ, IE, IM, OE, OM, PE, PM, RC, Reserved,
641   UE, UM, ZE, ZM}: MXCSR, x') =
642  {DAZ = DAZ, DE = DE, DM = DM, FZ = FZ, IE = IE, IM = IM, OE = OE,
643   OM = x', PE = PE, PM = PM, RC = RC, Reserved = Reserved, UE = UE,
644   UM = UM, ZE = ZE, ZM = ZM}: MXCSR
645
646fun MXCSR_PE_rupd ({DAZ, DE, DM, FZ, IE, IM, OE, OM, PE, PM, RC, Reserved,
647   UE, UM, ZE, ZM}: MXCSR, x') =
648  {DAZ = DAZ, DE = DE, DM = DM, FZ = FZ, IE = IE, IM = IM, OE = OE,
649   OM = OM, PE = x', PM = PM, RC = RC, Reserved = Reserved, UE = UE,
650   UM = UM, ZE = ZE, ZM = ZM}: MXCSR
651
652fun MXCSR_PM_rupd ({DAZ, DE, DM, FZ, IE, IM, OE, OM, PE, PM, RC, Reserved,
653   UE, UM, ZE, ZM}: MXCSR, x') =
654  {DAZ = DAZ, DE = DE, DM = DM, FZ = FZ, IE = IE, IM = IM, OE = OE,
655   OM = OM, PE = PE, PM = x', RC = RC, Reserved = Reserved, UE = UE,
656   UM = UM, ZE = ZE, ZM = ZM}: MXCSR
657
658fun MXCSR_RC_rupd ({DAZ, DE, DM, FZ, IE, IM, OE, OM, PE, PM, RC, Reserved,
659   UE, UM, ZE, ZM}: MXCSR, x') =
660  {DAZ = DAZ, DE = DE, DM = DM, FZ = FZ, IE = IE, IM = IM, OE = OE,
661   OM = OM, PE = PE, PM = PM, RC = x', Reserved = Reserved, UE = UE,
662   UM = UM, ZE = ZE, ZM = ZM}: MXCSR
663
664fun MXCSR_Reserved_rupd ({DAZ, DE, DM, FZ, IE, IM, OE, OM, PE, PM, RC,
665   Reserved, UE, UM, ZE, ZM}: MXCSR, x') =
666  {DAZ = DAZ, DE = DE, DM = DM, FZ = FZ, IE = IE, IM = IM, OE = OE,
667   OM = OM, PE = PE, PM = PM, RC = RC, Reserved = x', UE = UE, UM = UM,
668   ZE = ZE, ZM = ZM}: MXCSR
669
670fun MXCSR_UE_rupd ({DAZ, DE, DM, FZ, IE, IM, OE, OM, PE, PM, RC, Reserved,
671   UE, UM, ZE, ZM}: MXCSR, x') =
672  {DAZ = DAZ, DE = DE, DM = DM, FZ = FZ, IE = IE, IM = IM, OE = OE,
673   OM = OM, PE = PE, PM = PM, RC = RC, Reserved = Reserved, UE = x',
674   UM = UM, ZE = ZE, ZM = ZM}: MXCSR
675
676fun MXCSR_UM_rupd ({DAZ, DE, DM, FZ, IE, IM, OE, OM, PE, PM, RC, Reserved,
677   UE, UM, ZE, ZM}: MXCSR, x') =
678  {DAZ = DAZ, DE = DE, DM = DM, FZ = FZ, IE = IE, IM = IM, OE = OE,
679   OM = OM, PE = PE, PM = PM, RC = RC, Reserved = Reserved, UE = UE,
680   UM = x', ZE = ZE, ZM = ZM}: MXCSR
681
682fun MXCSR_ZE_rupd ({DAZ, DE, DM, FZ, IE, IM, OE, OM, PE, PM, RC, Reserved,
683   UE, UM, ZE, ZM}: MXCSR, x') =
684  {DAZ = DAZ, DE = DE, DM = DM, FZ = FZ, IE = IE, IM = IM, OE = OE,
685   OM = OM, PE = PE, PM = PM, RC = RC, Reserved = Reserved, UE = UE,
686   UM = UM, ZE = x', ZM = ZM}: MXCSR
687
688fun MXCSR_ZM_rupd ({DAZ, DE, DM, FZ, IE, IM, OE, OM, PE, PM, RC, Reserved,
689   UE, UM, ZE, ZM}: MXCSR, x') =
690  {DAZ = DAZ, DE = DE, DM = DM, FZ = FZ, IE = IE, IM = IM, OE = OE,
691   OM = OM, PE = PE, PM = PM, RC = RC, Reserved = Reserved, UE = UE,
692   UM = UM, ZE = ZE, ZM = x'}: MXCSR
693
694fun REX_B_rupd ({B, R, W, X}: REX, x') = {B = x', R = R, W = W, X = X}
695  : REX
696
697fun REX_R_rupd ({B, R, W, X}: REX, x') = {B = B, R = x', W = W, X = X}
698  : REX
699
700fun REX_W_rupd ({B, R, W, X}: REX, x') = {B = B, R = R, W = x', X = X}
701  : REX
702
703fun REX_X_rupd ({B, R, W, X}: REX, x') = {B = B, R = R, W = W, X = x'}
704  : REX
705
706(* -------------------------------------------------------------------------
707   Exceptions
708   ------------------------------------------------------------------------- *)
709
710exception BadFlagAccess of string
711
712exception FAILURE of string
713
714exception INTERRUPT_EXCEPTION of BitsN.nbit
715
716(* -------------------------------------------------------------------------
717   Global variables (state)
718   ------------------------------------------------------------------------- *)
719
720val EFLAGS = ref (Map.mkMap(SOME 6,NONE)): ((bool option) Map.map) ref
721
722val MEM = ref (Map.mkMap(SOME 18446744073709551616,BitsN.B(0x0,8)))
723  : (BitsN.nbit Map.map) ref
724
725val MXCSR = ref
726  ({DAZ = false, DE = false, DM = false, FZ = false, IE = false,
727    IM = false, OE = false, OM = false, PE = false, PM = false,
728    RC = BitsN.B(0x0,2), Reserved = BitsN.B(0x0,16), UE = false,
729    UM = false, ZE = false, ZM = false}): MXCSR ref
730
731val REG = ref (Map.mkMap(SOME 16,BitsN.B(0x0,64)))
732  : (BitsN.nbit Map.map) ref
733
734val RIP = ref (BitsN.B(0x0,64)): BitsN.nbit ref
735
736val XMM_REG = ref (Map.mkMap(SOME 8,BitsN.B(0x0,128)))
737  : (BitsN.nbit Map.map) ref
738
739(* -------------------------------------------------------------------------
740   Main specification
741   ------------------------------------------------------------------------- *)
742
743local
744  fun tuple'3 [t0,t1,t2] = (t0,(t1,t2))
745    | tuple'3 (_: bool list) = raise Fail "tuple'3"
746in
747  val boolify'3 = tuple'3 o BitsN.toList
748end
749
750local
751  fun tuple'8 [t0,t1,t2,t3,t4,t5,t6,t7] =
752    (t0,(t1,(t2,(t3,(t4,(t5,(t6,t7)))))))
753    | tuple'8 (_: bool list) = raise Fail "tuple'8"
754in
755  val boolify'8 = tuple'8 o BitsN.toList
756end
757
758fun DE_exception () = raise INTERRUPT_EXCEPTION (BitsN.B(0x0,8));
759
760fun UD_exception () = raise INTERRUPT_EXCEPTION (BitsN.B(0x6,8));
761
762fun GP_exception () = raise INTERRUPT_EXCEPTION (BitsN.B(0xD,8));
763
764fun XM_exception () = raise INTERRUPT_EXCEPTION (BitsN.B(0x13,8));
765
766fun rec'MXCSR x =
767  {DAZ = BitsN.bit(x,6), DE = BitsN.bit(x,1), DM = BitsN.bit(x,8),
768   FZ = BitsN.bit(x,15), IE = BitsN.bit(x,0), IM = BitsN.bit(x,7),
769   OE = BitsN.bit(x,3), OM = BitsN.bit(x,10), PE = BitsN.bit(x,5),
770   PM = BitsN.bit(x,12), RC = BitsN.bits(14,13) x,
771   Reserved = BitsN.bits(31,16) x, UE = BitsN.bit(x,4),
772   UM = BitsN.bit(x,11), ZE = BitsN.bit(x,2), ZM = BitsN.bit(x,9)};
773
774fun reg'MXCSR x =
775  case x of
776     {DAZ = DAZ, DE = DE, DM = DM, FZ = FZ, IE = IE, IM = IM, OE = OE,
777      OM = OM, PE = PE, PM = PM, RC = RC, Reserved = Reserved, UE = UE,
778      UM = UM, ZE = ZE, ZM = ZM} =>
779       BitsN.concat
780         [Reserved,BitsN.fromBit FZ,RC,BitsN.fromBit PM,BitsN.fromBit UM,
781          BitsN.fromBit OM,BitsN.fromBit ZM,BitsN.fromBit DM,
782          BitsN.fromBit IM,BitsN.fromBit DAZ,BitsN.fromBit PE,
783          BitsN.fromBit UE,BitsN.fromBit OE,BitsN.fromBit ZE,
784          BitsN.fromBit DE,BitsN.fromBit IE];
785
786fun write'rec'MXCSR (_,x) = reg'MXCSR x;
787
788fun write'reg'MXCSR (_,x) = rec'MXCSR x;
789
790fun mem8 addr = Map.lookup((!MEM),BitsN.toNat addr);
791
792fun write'mem8 (b,addr) = MEM := (Map.update((!MEM),BitsN.toNat addr,b));
793
794fun mem16 addr = BitsN.@@(mem8(BitsN.+(addr,BitsN.B(0x1,64))),mem8 addr);
795
796fun write'mem16 (w,addr) =
797  ( write'mem8(BitsN.bits(7,0) w,addr)
798  ; let
799      val x = BitsN.+(addr,BitsN.B(0x1,64))
800    in
801      write'mem8(BitsN.bits(15,8) w,x)
802    end
803  );
804
805fun mem32 addr =
806  BitsN.@@(mem16(BitsN.+(addr,BitsN.B(0x2,64))),mem16 addr);
807
808fun write'mem32 (w,addr) =
809  ( write'mem16(BitsN.bits(15,0) w,addr)
810  ; let
811      val x = BitsN.+(addr,BitsN.B(0x2,64))
812    in
813      write'mem16(BitsN.bits(31,16) w,x)
814    end
815  );
816
817fun mem64 addr =
818  BitsN.@@(mem32(BitsN.+(addr,BitsN.B(0x4,64))),mem32 addr);
819
820fun write'mem64 (w,addr) =
821  ( write'mem32(BitsN.bits(31,0) w,addr)
822  ; let
823      val x = BitsN.+(addr,BitsN.B(0x4,64))
824    in
825      write'mem32(BitsN.bits(63,32) w,x)
826    end
827  );
828
829fun mem128 addr =
830  BitsN.@@(mem64(BitsN.+(addr,BitsN.B(0x8,64))),mem64 addr);
831
832fun write'mem128 (w,addr) =
833  ( write'mem64(BitsN.bits(63,0) w,addr)
834  ; let
835      val x = BitsN.+(addr,BitsN.B(0x8,64))
836    in
837      write'mem64(BitsN.bits(127,64) w,x)
838    end
839  );
840
841fun Eflag flag =
842  case Map.lookup((!EFLAGS),Cast.ZeflagsToNat flag) of
843     Option.SOME b => b
844   | NONE => raise BadFlagAccess (Cast.ZeflagsToString flag);
845
846fun write'Eflag (b,flag) =
847  EFLAGS := (Map.update((!EFLAGS),Cast.ZeflagsToNat flag,Option.SOME b));
848
849fun FlagUnspecified flag =
850  EFLAGS := (Map.update((!EFLAGS),Cast.ZeflagsToNat flag,NONE));
851
852fun CF () = Eflag Z_CF;
853
854fun write'CF b = write'Eflag(b,Z_CF);
855
856fun PF () = Eflag Z_PF;
857
858fun write'PF b = write'Eflag(b,Z_PF);
859
860fun AF () = Eflag Z_AF;
861
862fun write'AF b = write'Eflag(b,Z_AF);
863
864fun ZF () = Eflag Z_ZF;
865
866fun write'ZF b = write'Eflag(b,Z_ZF);
867
868fun SF () = Eflag Z_SF;
869
870fun write'SF b = write'Eflag(b,Z_SF);
871
872fun OF () = Eflag Z_OF;
873
874fun write'OF b = write'Eflag(b,Z_OF);
875
876fun ea_index index =
877  case index of
878     NONE => BitsN.B(0x0,64)
879   | Option.SOME(scale,idx) =>
880     BitsN.*
881       (BitsN.<<(BitsN.B(0x1,64),BitsN.toNat scale),
882        Map.lookup((!REG),Cast.ZregToNat idx));
883
884fun ea_base base =
885  case base of
886     ZnoBase => BitsN.B(0x0,64)
887   | ZripBase => (!RIP)
888   | ZregBase b => Map.lookup((!REG),Cast.ZregToNat b);
889
890fun mem_addr m =
891  let
892    val (index,(base,displacement)) = m
893  in
894    BitsN.+(BitsN.+(ea_index index,ea_base base),displacement)
895  end;
896
897fun ea_Zrm (size,rm) =
898  case rm of Zr r => Zea_r(size,r) | Zm m => Zea_m(size,mem_addr m);
899
900fun ea_Zdest (size,ds) =
901  case ds of
902     Zrm_i(rm,_) => ea_Zrm(size,rm)
903   | Zrm_r(rm,_) => ea_Zrm(size,rm)
904   | Zr_rm(r,_) => Zea_r(size,r);
905
906fun ea_Zsrc (size,ds) =
907  case ds of
908     Zrm_i(_,i) => Zea_i(size,i)
909   | Zrm_r(_,r) => Zea_r(size,r)
910   | Zr_rm(_,rm) => ea_Zrm(size,rm);
911
912fun ea_Zimm_rm imm_rm =
913  case imm_rm of Zrm rm => ea_Zrm(Z64,rm) | Zimm imm => Zea_i(Z64,imm);
914
915fun modSize (size,imm) =
916  case size of
917     Z8 _ => BitsN.mod(imm,BitsN.B(0x8,64))
918   | Z16 => BitsN.mod(imm,BitsN.B(0x10,64))
919   | Z32 => BitsN.mod(imm,BitsN.B(0x20,64))
920   | Z64 => BitsN.mod(imm,BitsN.B(0x40,64));
921
922fun restrictSize (size,imm) =
923  case size of
924     Z8 _ => BitsN.&&(imm,BitsN.B(0xFF,64))
925   | Z16 => BitsN.&&(imm,BitsN.B(0xFFFF,64))
926   | Z32 => BitsN.&&(imm,BitsN.B(0xFFFFFFFF,64))
927   | Z64 => imm;
928
929fun EA ea =
930  case ea of
931     Zea_i i => restrictSize i
932   | Zea_r(Z8 have_rex,r) =>
933     BitsN.&&
934       (if have_rex orelse (not(Set.mem(r,[RSP,RBP,RSI,RDI])))
935          then Map.lookup((!REG),Cast.ZregToNat r)
936        else BitsN.>>+
937               (Map.lookup
938                  ((!REG),
939                   Cast.ZregToNat
940                     (Cast.natToZreg(Nat.-(Cast.ZregToNat r,4)))),8),
941        BitsN.B(0xFF,64))
942   | Zea_r(s,r) => restrictSize(s,Map.lookup((!REG),Cast.ZregToNat r))
943   | Zea_m(Z8 _,a) => BitsN.fromNat(BitsN.toNat(mem8 a),64)
944   | Zea_m(Z16,a) => BitsN.fromNat(BitsN.toNat(mem16 a),64)
945   | Zea_m(Z32,a) => BitsN.fromNat(BitsN.toNat(mem32 a),64)
946   | Zea_m(Z64,a) => mem64 a;
947
948fun write'EA (w,ea) =
949  case ea of
950     Zea_i i => raise FAILURE "write to constant"
951   | Zea_r(Z8 have_rex,r) =>
952     (if have_rex orelse (not(Set.mem(r,[RSP,RBP,RSI,RDI])))
953        then let
954               val w0 = Map.lookup((!REG),Cast.ZregToNat r)
955             in
956               REG :=
957               (Map.update
958                  ((!REG),Cast.ZregToNat r,
959                   BitsN.bitFieldInsert(7,0) (w0,BitsN.bits(7,0) w)))
960             end
961      else let
962             val x = Cast.natToZreg(Nat.-(Cast.ZregToNat r,4))
963             val w0 = Map.lookup((!REG),Cast.ZregToNat x)
964           in
965             REG :=
966             (Map.update
967                ((!REG),Cast.ZregToNat x,
968                 BitsN.bitFieldInsert(15,8) (w0,BitsN.bits(7,0) w)))
969           end)
970   | Zea_r(Z16,r) =>
971     let
972       val w0 = Map.lookup((!REG),Cast.ZregToNat r)
973     in
974       REG :=
975       (Map.update
976          ((!REG),Cast.ZregToNat r,
977           BitsN.bitFieldInsert(15,0) (w0,BitsN.bits(15,0) w)))
978     end
979   | Zea_r(Z32,r) =>
980     REG :=
981     (Map.update
982        ((!REG),Cast.ZregToNat r,BitsN.zeroExtend 64 (BitsN.bits(31,0) w)))
983   | Zea_r(Z64,r) => REG := (Map.update((!REG),Cast.ZregToNat r,w))
984   | Zea_m(Z8 _,a) => write'mem8(BitsN.bits(7,0) w,a)
985   | Zea_m(Z16,a) => write'mem16(BitsN.bits(15,0) w,a)
986   | Zea_m(Z32,a) => write'mem32(BitsN.bits(31,0) w,a)
987   | Zea_m(Z64,a) => write'mem64(w,a);
988
989fun read_dest_src_ea sd =
990  let val ea = ea_Zdest sd in (ea,(EA ea,EA(ea_Zsrc sd))) end;
991
992fun call_dest_from_ea ea =
993  case ea of
994     Zea_i(_,i) => BitsN.+((!RIP),i)
995   | Zea_r(_,r) => Map.lookup((!REG),Cast.ZregToNat r)
996   | Zea_m(_,a) => mem64 a;
997
998fun get_ea_address ea =
999  case ea of
1000     Zea_i(_,i) => BitsN.B(0x0,64)
1001   | Zea_r(_,r) => BitsN.B(0x0,64)
1002   | Zea_m(_,a) => a;
1003
1004fun jump_to_ea ea = RIP := (call_dest_from_ea ea);
1005
1006fun ByteParity b =
1007  let
1008    val count =
1009      Nat.+
1010        (Nat.+
1011           (Nat.+
1012              (Nat.+
1013                 (Nat.+
1014                    (Nat.+
1015                       (Nat.+
1016                          (Nat.fromBool(BitsN.bit(b,7)),
1017                           Nat.fromBool(BitsN.bit(b,6))),
1018                        Nat.fromBool(BitsN.bit(b,5))),
1019                     Nat.fromBool(BitsN.bit(b,4))),
1020                  Nat.fromBool(BitsN.bit(b,3))),
1021               Nat.fromBool(BitsN.bit(b,2))),Nat.fromBool(BitsN.bit(b,1))),
1022         Nat.fromBool(BitsN.bit(b,0)))
1023  in
1024    (Nat.mod(count,2)) = 0
1025  end;
1026
1027fun Zsize_width size =
1028  case size of Z8 _ => 8 | Z16 => 16 | Z32 => 32 | Z64 => 64;
1029
1030fun word_size_msb (size,w) = BitsN.bit(w,Nat.-(Zsize_width size,1));
1031
1032fun write_PF w = write'PF(ByteParity(BitsN.bits(7,0) w));
1033
1034fun write_SF s_w = write'SF(word_size_msb s_w);
1035
1036fun write_ZF (size,w) =
1037  write'ZF
1038    (case size of
1039        Z8 _ => (BitsN.fromNat(BitsN.toNat w,8)) = (BitsN.B(0x0,8))
1040      | Z16 => (BitsN.fromNat(BitsN.toNat w,16)) = (BitsN.B(0x0,16))
1041      | Z32 => (BitsN.fromNat(BitsN.toNat w,32)) = (BitsN.B(0x0,32))
1042      | Z64 => w = (BitsN.B(0x0,64)));
1043
1044fun write_arith_eflags_except_CF_OF (size,w) =
1045  ( write_PF w; write_SF(size,w); write_ZF(size,w); FlagUnspecified Z_AF );
1046
1047fun write_arith_eflags (size,(w,(c,x))) =
1048  ( write'CF c; write'OF x; write_arith_eflags_except_CF_OF(size,w) );
1049
1050fun write_logical_eflags (size,w) =
1051  write_arith_eflags(size,(w,(false,false)));
1052
1053fun erase_eflags () = EFLAGS := (Map.mkMap(SOME 6,NONE));
1054
1055fun value_width s = Nat.pow(2,Zsize_width s);
1056
1057fun word_signed_overflow_add (size,(a,b)) =
1058  ((word_size_msb(size,a)) = (word_size_msb(size,b))) andalso
1059  (not((word_size_msb(size,BitsN.+(a,b))) = (word_size_msb(size,a))));
1060
1061fun word_signed_overflow_sub (size,(a,b)) =
1062  (not((word_size_msb(size,a)) = (word_size_msb(size,b)))) andalso
1063  (not((word_size_msb(size,BitsN.-(a,b))) = (word_size_msb(size,a))));
1064
1065fun add_with_carry_out (size,(x,y)) =
1066  (BitsN.+(x,y),
1067   (Nat.<=(value_width size,Nat.+(BitsN.toNat x,BitsN.toNat y)),
1068    word_signed_overflow_add(size,(x,y))));
1069
1070fun sub_with_borrow (size,(x,y)) =
1071  (BitsN.-(x,y),(BitsN.<+(x,y),word_signed_overflow_sub(size,(x,y))));
1072
1073fun write_arith_result (size,(r,ea)) =
1074  ( write_arith_eflags(size,r); write'EA(L3.fst r,ea) );
1075
1076fun write_arith_result_no_CF_OF (size,(w,ea)) =
1077  ( write_arith_eflags_except_CF_OF(size,w); write'EA(w,ea) );
1078
1079fun write_logical_result (size,(w,ea)) =
1080  ( write_logical_eflags(size,w); write'EA(w,ea) );
1081
1082fun write_result_erase_eflags (w,ea) =
1083  ( erase_eflags (); write'EA(w,ea) );
1084
1085fun SignExtension (w,(size1,size2)) =
1086  let
1087    val v = ref w
1088  in
1089    ( case (size1,size2) of
1090         (Z8 _,Z16) =>
1091           v :=
1092           (BitsN.bitFieldInsert(15,0)
1093              ((!v),BitsN.signExtend 16 (BitsN.bits(7,0) w)))
1094       | (Z8 _,Z32) =>
1095         v :=
1096         (BitsN.bitFieldInsert(31,0)
1097            ((!v),BitsN.signExtend 32 (BitsN.bits(7,0) w)))
1098       | (Z8 _,Z64) => v := (BitsN.signExtend 64 (BitsN.bits(7,0) w))
1099       | (Z16,Z32) =>
1100         v :=
1101         (BitsN.bitFieldInsert(31,0)
1102            ((!v),BitsN.signExtend 32 (BitsN.bits(15,0) w)))
1103       | (Z16,Z64) => v := (BitsN.signExtend 64 (BitsN.bits(15,0) w))
1104       | (Z32,Z64) => v := (BitsN.signExtend 64 (BitsN.bits(31,0) w))
1105       | _ => raise FAILURE "SignExtension"
1106    ; (!v)
1107    )
1108  end;
1109
1110fun SignExtension64 (w,size) =
1111  if size = Z64 then w else SignExtension(w,(size,Z64));
1112
1113fun maskShift (size,w) =
1114  if size = Z64
1115    then BitsN.toNat(BitsN.bits(5,0) w)
1116  else BitsN.toNat(BitsN.bits(4,0) w);
1117
1118fun ROL (size,(x,y)) =
1119  case size of
1120     Z8 _ =>
1121       BitsN.fromNat
1122         (BitsN.toNat
1123            (BitsN.#<<(BitsN.bits(7,0) x,BitsN.toNat(BitsN.bits(4,0) y))),
1124          64)
1125   | Z16 =>
1126     BitsN.fromNat
1127       (BitsN.toNat
1128          (BitsN.#<<(BitsN.bits(15,0) x,BitsN.toNat(BitsN.bits(4,0) y))),
1129        64)
1130   | Z32 =>
1131     BitsN.fromNat
1132       (BitsN.toNat
1133          (BitsN.#<<(BitsN.bits(31,0) x,BitsN.toNat(BitsN.bits(4,0) y))),
1134        64)
1135   | Z64 => BitsN.#<<(x,BitsN.toNat(BitsN.bits(5,0) y));
1136
1137fun ROR (size,(x,y)) =
1138  case size of
1139     Z8 _ =>
1140       BitsN.fromNat
1141         (BitsN.toNat
1142            (BitsN.#>>(BitsN.bits(7,0) x,BitsN.toNat(BitsN.bits(4,0) y))),
1143          64)
1144   | Z16 =>
1145     BitsN.fromNat
1146       (BitsN.toNat
1147          (BitsN.#>>(BitsN.bits(15,0) x,BitsN.toNat(BitsN.bits(4,0) y))),
1148        64)
1149   | Z32 =>
1150     BitsN.fromNat
1151       (BitsN.toNat
1152          (BitsN.#>>(BitsN.bits(31,0) x,BitsN.toNat(BitsN.bits(4,0) y))),
1153        64)
1154   | Z64 => BitsN.#>>(x,BitsN.toNat(BitsN.bits(5,0) y));
1155
1156fun SAR (size,(x,y)) =
1157  case size of
1158     Z8 _ =>
1159       BitsN.fromNat
1160         (BitsN.toNat
1161            (BitsN.>>(BitsN.bits(7,0) x,BitsN.toNat(BitsN.bits(4,0) y))),
1162          64)
1163   | Z16 =>
1164     BitsN.fromNat
1165       (BitsN.toNat
1166          (BitsN.>>(BitsN.bits(15,0) x,BitsN.toNat(BitsN.bits(4,0) y))),64)
1167   | Z32 =>
1168     BitsN.fromNat
1169       (BitsN.toNat
1170          (BitsN.>>(BitsN.bits(31,0) x,BitsN.toNat(BitsN.bits(4,0) y))),64)
1171   | Z64 => BitsN.>>(x,BitsN.toNat(BitsN.bits(5,0) y));
1172
1173fun write_binop (s,(bop,(x,(y,ea)))) =
1174  case bop of
1175     Zadd => write_arith_result(s,(add_with_carry_out(s,(x,y)),ea))
1176   | Zsub => write_arith_result(s,(sub_with_borrow(s,(x,y)),ea))
1177   | Zcmp => write_arith_eflags(s,sub_with_borrow(s,(x,y)))
1178   | Ztest => write_logical_eflags(s,BitsN.&&(x,y))
1179   | Zand => write_logical_result(s,(BitsN.&&(x,y),ea))
1180   | Zxor => write_logical_result(s,(BitsN.??(x,y),ea))
1181   | Zor => write_logical_result(s,(BitsN.||(x,y),ea))
1182   | Zrol => write_result_erase_eflags(ROL(s,(x,y)),ea)
1183   | Zror => write_result_erase_eflags(ROR(s,(x,y)),ea)
1184   | Zsar => write_result_erase_eflags(SAR(s,(x,y)),ea)
1185   | Zshl => write_result_erase_eflags(BitsN.<<(x,maskShift(s,y)),ea)
1186   | Zshr => write_result_erase_eflags(BitsN.>>+(x,maskShift(s,y)),ea)
1187   | Zadc =>
1188     let
1189       val carry = CF ()
1190       val result = BitsN.+(BitsN.+(x,y),BitsN.fromBool 64 carry)
1191     in
1192       ( write'CF
1193           (Nat.<=
1194              (value_width s,
1195               Nat.+
1196                 (Nat.+(BitsN.toNat x,BitsN.toNat y),Nat.fromBool carry)))
1197       ; FlagUnspecified Z_OF
1198       ; write_arith_result_no_CF_OF(s,(result,ea))
1199       )
1200     end
1201   | Zsbb =>
1202     let
1203       val carry = CF ()
1204       val result = BitsN.-(x,BitsN.+(y,BitsN.fromBool 64 carry))
1205     in
1206       ( write'CF
1207           (Nat.<(BitsN.toNat x,Nat.+(BitsN.toNat y,Nat.fromBool carry)))
1208       ; FlagUnspecified Z_OF
1209       ; write_arith_result_no_CF_OF(s,(result,ea))
1210       )
1211     end
1212   | _ =>
1213     raise FAILURE
1214       ("Binary op not implemented: " ^ (Cast.Zbinop_nameToString bop));
1215
1216fun write_monop (s,(mop,(x,ea))) =
1217  case mop of
1218     Znot => write'EA(BitsN.~ x,ea)
1219   | Zdec =>
1220     write_arith_result_no_CF_OF(s,(BitsN.-(x,BitsN.B(0x1,64)),ea))
1221   | Zinc =>
1222     write_arith_result_no_CF_OF(s,(BitsN.+(x,BitsN.B(0x1,64)),ea))
1223   | Zneg =>
1224     ( write_arith_result_no_CF_OF(s,(BitsN.neg x,ea))
1225     ; FlagUnspecified Z_CF
1226     );
1227
1228fun bit_test (bt,(base,offset)) =
1229  let
1230    val bit = BitsN.bit(EA base,offset)
1231  in
1232    ( write'CF bit
1233    ; case bt of
1234         Zbt => ()
1235       | Zbtc =>
1236         let
1237           val w = EA base
1238         in
1239           write'EA
1240             (BitsN.bitFieldInsert(offset,offset)
1241                (w,BitsN.fromBit(not bit)),base)
1242         end
1243       | Zbtr =>
1244         let
1245           val w = EA base
1246         in
1247           write'EA
1248             (BitsN.bitFieldInsert(offset,offset) (w,BitsN.fromBit false),
1249              base)
1250         end
1251       | Zbts =>
1252         let
1253           val w = EA base
1254         in
1255           write'EA
1256             (BitsN.bitFieldInsert(offset,offset) (w,BitsN.fromBit true),
1257              base)
1258         end
1259    )
1260  end;
1261
1262fun read_cond c =
1263  case c of
1264     Z_A =>
1265       (case (Map.lookup((!EFLAGS),Cast.ZeflagsToNat Z_CF),
1266         Map.lookup((!EFLAGS),Cast.ZeflagsToNat Z_ZF)) of
1267           (Option.SOME false,Option.SOME false) => true
1268         | (Option.SOME true,_) => false
1269         | (_,Option.SOME true) => false
1270         | _ =>
1271           raise BadFlagAccess ("read_cond: " ^ (Cast.ZcondToString c)))
1272   | Z_NB => not(CF ())
1273   | Z_B => CF ()
1274   | Z_NA =>
1275     (case (Map.lookup((!EFLAGS),Cast.ZeflagsToNat Z_CF),
1276       Map.lookup((!EFLAGS),Cast.ZeflagsToNat Z_ZF)) of
1277         (Option.SOME true,_) => true
1278       | (_,Option.SOME true) => true
1279       | (Option.SOME false,Option.SOME false) => false
1280       | _ => raise BadFlagAccess ("read_cond: " ^ (Cast.ZcondToString c)))
1281   | Z_E => ZF ()
1282   | Z_G =>
1283     (case (Map.lookup((!EFLAGS),Cast.ZeflagsToNat Z_SF),
1284       Map.lookup((!EFLAGS),Cast.ZeflagsToNat Z_OF)) of
1285         (Option.SOME a,Option.SOME b) => (a = b) andalso (not(ZF ()))
1286       | _ =>
1287         (case Map.lookup((!EFLAGS),Cast.ZeflagsToNat Z_ZF) of
1288             Option.SOME true => false
1289           | _ =>
1290             raise BadFlagAccess ("read_cond: " ^ (Cast.ZcondToString c))))
1291   | Z_NL => (SF ()) = (OF ())
1292   | Z_L => not((SF ()) = (OF ()))
1293   | Z_NG =>
1294     (case (Map.lookup((!EFLAGS),Cast.ZeflagsToNat Z_SF),
1295       Map.lookup((!EFLAGS),Cast.ZeflagsToNat Z_OF)) of
1296         (Option.SOME a,Option.SOME b) => (not(a = b)) orelse (ZF ())
1297       | _ =>
1298         (case Map.lookup((!EFLAGS),Cast.ZeflagsToNat Z_ZF) of
1299             Option.SOME true => true
1300           | _ =>
1301             raise BadFlagAccess ("read_cond: " ^ (Cast.ZcondToString c))))
1302   | Z_NE => not(ZF ())
1303   | Z_NO => not(OF ())
1304   | Z_NP => not(PF ())
1305   | Z_NS => not(SF ())
1306   | Z_O => OF ()
1307   | Z_P => PF ()
1308   | Z_S => SF ()
1309   | Z_ALWAYS => true;
1310
1311fun x64_pop_aux () =
1312  let
1313    val rsp = Map.lookup((!REG),Cast.ZregToNat RSP)
1314    val top = mem64 rsp
1315  in
1316    ( REG :=
1317      (Map.update((!REG),Cast.ZregToNat RSP,BitsN.+(rsp,BitsN.B(0x8,64))))
1318    ; top
1319    )
1320  end;
1321
1322fun x64_pop rm =
1323  let val x = ea_Zrm(Z64,rm) in write'EA(x64_pop_aux (),x) end;
1324
1325fun x64_pop_rip () = RIP := (x64_pop_aux ());
1326
1327fun x64_push_aux w =
1328  let
1329    val rsp =
1330      BitsN.-(Map.lookup((!REG),Cast.ZregToNat RSP),BitsN.B(0x8,64))
1331  in
1332    ( REG := (Map.update((!REG),Cast.ZregToNat RSP,rsp))
1333    ; write'mem64(w,rsp)
1334    )
1335  end;
1336
1337fun x64_push imm_rm = x64_push_aux(EA(ea_Zimm_rm imm_rm));
1338
1339fun x64_push_rip () = x64_push_aux (!RIP);
1340
1341fun x64_drop imm =
1342  ( if not((BitsN.bits(7,0) imm) = (BitsN.B(0x0,8)))
1343      then raise FAILURE "x64_drop"
1344    else ()
1345  ; REG :=
1346    (Map.update
1347       ((!REG),Cast.ZregToNat RSP,
1348        BitsN.+(Map.lookup((!REG),Cast.ZregToNat RSP),imm)))
1349  );
1350
1351fun initial_ieee_flags invalid =
1352  let
1353    val flags = ref {DivideByZero = false, InvalidOp = false,
1354     Overflow = false, Precision = false, Underflow = false}
1355  in
1356    ( flags := (SSE.ieee_flags_DivideByZero_rupd((!flags),false))
1357    ; flags := (SSE.ieee_flags_InvalidOp_rupd((!flags),invalid))
1358    ; flags := (SSE.ieee_flags_Overflow_rupd((!flags),false))
1359    ; flags := (SSE.ieee_flags_Precision_rupd((!flags),false))
1360    ; flags := (SSE.ieee_flags_Underflow_rupd((!flags),false))
1361    ; (!flags)
1362    )
1363  end;
1364
1365fun set_precision (flags,b) =
1366  let
1367    val f = ref flags
1368  in
1369    ( f := (SSE.ieee_flags_Precision_rupd((!f),b)); (!f) )
1370  end;
1371
1372fun zero32 a = if BitsN.msb a then FP32.negZero else FP32.posZero;
1373
1374fun zero64 a = if BitsN.msb a then FP64.negZero else FP64.posZero;
1375
1376fun denormal_to_zero32 a =
1377  if (#DAZ((!MXCSR) : MXCSR)) andalso (FP32.isSubnormal a)
1378    then zero32 a
1379  else a;
1380
1381fun denormal_to_zero64 a =
1382  if (#DAZ((!MXCSR) : MXCSR)) andalso (FP64.isSubnormal a)
1383    then zero64 a
1384  else a;
1385
1386fun flush_to_zero32 (flags,a) =
1387  if (#FZ((!MXCSR) : MXCSR)) andalso
1388     ((#UM((!MXCSR) : MXCSR)) andalso (#Underflow(flags : SSE.ieee_flags)))
1389    then (set_precision(flags,true),zero32 a)
1390  else (flags,a);
1391
1392fun flush_to_zero64 (flags,a) =
1393  if (#FZ((!MXCSR) : MXCSR)) andalso
1394     ((#UM((!MXCSR) : MXCSR)) andalso (#Underflow(flags : SSE.ieee_flags)))
1395    then (set_precision(flags,true),zero64 a)
1396  else (flags,a);
1397
1398fun sse_from_int64 N (m,w) =
1399  let
1400    val i = BitsN.toInt w
1401    val q = FP64.fromInt(m,i)
1402  in
1403    (set_precision
1404       (initial_ieee_flags false,not((FP64.toInt(m,q)) = (Option.SOME i))),
1405     q)
1406  end;
1407
1408fun sse_from_int32 N (m,w) =
1409  let
1410    val i = BitsN.toInt w
1411    val d = FP32.fromInt(m,i)
1412  in
1413    (set_precision
1414       (initial_ieee_flags false,not((FP32.toInt(m,d)) = (Option.SOME i))),
1415     d)
1416  end;
1417
1418fun sse_to_int64 N (m,a) =
1419  case FP64.toInt(m,a) of
1420     Option.SOME i =>
1421       (if (IntInf.<=
1422              (IntInf.~
1423                 (IntExtra.pow(2,Nat.-(BitsN.size(BitsN.BV(0x0,N)),1))),i)) andalso
1424           (IntInf.<=
1425              (i,
1426               IntInf.-
1427                 (IntExtra.pow(2,Nat.-(BitsN.size(BitsN.BV(0x0,N)),1)),1)))
1428          then let
1429                 val w = BitsN.fromInt(i,N)
1430                 val f =
1431                   set_precision
1432                     (initial_ieee_flags false,
1433                      not(FP64.equal(FP64.fromInt(m,BitsN.toInt w),a)))
1434               in
1435                 (f,w)
1436               end
1437        else (initial_ieee_flags true,BitsN.#>>(BitsN.BV(0x1,N),1)))
1438   | NONE => (initial_ieee_flags true,BitsN.#>>(BitsN.BV(0x1,N),1));
1439
1440fun sse_to_int32 N (m,a) =
1441  case FP32.toInt(m,a) of
1442     Option.SOME i =>
1443       (if (IntInf.<=
1444              (IntInf.~
1445                 (IntExtra.pow(2,Nat.-(BitsN.size(BitsN.BV(0x0,N)),1))),i)) andalso
1446           (IntInf.<=
1447              (i,
1448               IntInf.-
1449                 (IntExtra.pow(2,Nat.-(BitsN.size(BitsN.BV(0x0,N)),1)),1)))
1450          then let
1451                 val w = BitsN.fromInt(i,N)
1452                 val f =
1453                   set_precision
1454                     (initial_ieee_flags false,
1455                      not(FP32.equal(FP32.fromInt(m,BitsN.toInt w),a)))
1456               in
1457                 (f,w)
1458               end
1459        else (initial_ieee_flags true,BitsN.#>>(BitsN.BV(0x1,N),1)))
1460   | NONE => (initial_ieee_flags true,BitsN.#>>(BitsN.BV(0x1,N),1));
1461
1462fun float_min32 (a,b) =
1463  let
1464    val flags =
1465      initial_ieee_flags((FP32.compare(a,b)) = IEEEReal.UNORDERED)
1466  in
1467    (flags,
1468     if (not(#InvalidOp(flags : SSE.ieee_flags))) andalso
1469        (FP32.lessThan(a,b))
1470       then a
1471     else b)
1472  end;
1473
1474fun float_max32 (a,b) =
1475  let
1476    val flags =
1477      initial_ieee_flags((FP32.compare(a,b)) = IEEEReal.UNORDERED)
1478  in
1479    (flags,
1480     if (#InvalidOp(flags : SSE.ieee_flags)) orelse (FP32.lessEqual(a,b))
1481       then b
1482     else a)
1483  end;
1484
1485fun float_min64 (a,b) =
1486  let
1487    val flags =
1488      initial_ieee_flags((FP64.compare(a,b)) = IEEEReal.UNORDERED)
1489  in
1490    (flags,
1491     if (not(#InvalidOp(flags : SSE.ieee_flags))) andalso
1492        (FP64.lessThan(a,b))
1493       then a
1494     else b)
1495  end;
1496
1497fun float_max64 (a,b) =
1498  let
1499    val flags =
1500      initial_ieee_flags((FP64.compare(a,b)) = IEEEReal.UNORDERED)
1501  in
1502    (flags,
1503     if (#InvalidOp(flags : SSE.ieee_flags)) orelse (FP64.lessEqual(a,b))
1504       then b
1505     else a)
1506  end;
1507
1508fun process_float_flags l =
1509  let
1510    val denorm = ref false
1511  in
1512    let
1513      val f = ref (initial_ieee_flags false)
1514    in
1515      ( L3.foreach
1516          (l,
1517           fn x =>
1518             let
1519               val (d,flags) = x
1520             in
1521               ( denorm := ((!denorm) orelse d)
1522               ; f :=
1523                 (SSE.ieee_flags_DivideByZero_rupd
1524                    ((!f),
1525                     (#DivideByZero((!f) : SSE.ieee_flags)) orelse
1526                     (#DivideByZero(flags : SSE.ieee_flags))))
1527               ; f :=
1528                 (SSE.ieee_flags_InvalidOp_rupd
1529                    ((!f),
1530                     (#InvalidOp((!f) : SSE.ieee_flags)) orelse
1531                     (#InvalidOp(flags : SSE.ieee_flags))))
1532               ; f :=
1533                 (SSE.ieee_flags_Overflow_rupd
1534                    ((!f),
1535                     (#Overflow((!f) : SSE.ieee_flags)) orelse
1536                     (#Overflow(flags : SSE.ieee_flags))))
1537               ; f :=
1538                 (SSE.ieee_flags_Precision_rupd
1539                    ((!f),
1540                     (#Precision((!f) : SSE.ieee_flags)) orelse
1541                     (#Precision(flags : SSE.ieee_flags))))
1542               ; f :=
1543                 (SSE.ieee_flags_Underflow_rupd
1544                    ((!f),
1545                     (#Underflow((!f) : SSE.ieee_flags)) orelse
1546                     (#Underflow(flags : SSE.ieee_flags))))
1547               )
1548             end)
1549      ; MXCSR :=
1550        (MXCSR_IE_rupd
1551           ((!MXCSR),
1552            (#IE((!MXCSR) : MXCSR)) orelse
1553            (#InvalidOp((!f) : SSE.ieee_flags))))
1554      ; MXCSR :=
1555        (MXCSR_DE_rupd((!MXCSR),(#DE((!MXCSR) : MXCSR)) orelse (!denorm)))
1556      ; MXCSR :=
1557        (MXCSR_ZE_rupd
1558           ((!MXCSR),
1559            (#ZE((!MXCSR) : MXCSR)) orelse
1560            (#DivideByZero((!f) : SSE.ieee_flags))))
1561      ; if ((#InvalidOp((!f) : SSE.ieee_flags)) andalso
1562            (not(#IM((!MXCSR) : MXCSR)))) orelse
1563           (((!denorm) andalso (not(#DM((!MXCSR) : MXCSR)))) orelse
1564            ((#DivideByZero((!f) : SSE.ieee_flags)) andalso
1565             (not(#ZM((!MXCSR) : MXCSR)))))
1566          then XM_exception ()
1567        else ()
1568      ; MXCSR :=
1569        (MXCSR_OE_rupd
1570           ((!MXCSR),
1571            (#OE((!MXCSR) : MXCSR)) orelse
1572            (#Overflow((!f) : SSE.ieee_flags))))
1573      ; MXCSR :=
1574        (MXCSR_PE_rupd
1575           ((!MXCSR),
1576            (#PE((!MXCSR) : MXCSR)) orelse
1577            (#Precision((!f) : SSE.ieee_flags))))
1578      ; MXCSR :=
1579        (MXCSR_UE_rupd
1580           ((!MXCSR),
1581            (#UE((!MXCSR) : MXCSR)) orelse
1582            (#Underflow((!f) : SSE.ieee_flags))))
1583      ; if ((#Overflow((!f) : SSE.ieee_flags)) andalso
1584            (not(#OM((!MXCSR) : MXCSR)))) orelse
1585           (((#Precision((!f) : SSE.ieee_flags)) andalso
1586             (not(#PM((!MXCSR) : MXCSR)))) orelse
1587            ((#Underflow((!f) : SSE.ieee_flags)) andalso
1588             (not(#UM((!MXCSR) : MXCSR)))))
1589          then XM_exception ()
1590        else ()
1591      )
1592    end
1593  end;
1594
1595fun RoundingMode () =
1596  case #RC((!MXCSR) : MXCSR) of
1597     BitsN.B(0x0,_) => IEEEReal.TO_NEAREST
1598   | BitsN.B(0x1,_) => IEEEReal.TO_NEGINF
1599   | BitsN.B(0x2,_) => IEEEReal.TO_POSINF
1600   | BitsN.B(0x3,_) => IEEEReal.TO_ZERO
1601   | _ => raise General.Bind;
1602
1603fun sse_binop32 (bop,(a,b)) =
1604  let
1605    val denorm = (FP32.isSubnormal a) orelse (FP32.isSubnormal b)
1606    val a = denormal_to_zero32 a
1607    val b = denormal_to_zero32 b
1608    val (f,r) =
1609      case bop of
1610         sse_add => flush_to_zero32(FP32.add(RoundingMode (),(a,b)))
1611       | sse_sub => flush_to_zero32(FP32.sub(RoundingMode (),(a,b)))
1612       | sse_mul => flush_to_zero32(FP32.mul(RoundingMode (),(a,b)))
1613       | sse_div => flush_to_zero32(FP32.div(RoundingMode (),(a,b)))
1614       | sse_max => float_max32(a,b)
1615       | sse_min => float_min32(a,b)
1616  in
1617    ((denorm,f),r)
1618  end;
1619
1620fun sse_binop64 (bop,(a,b)) =
1621  let
1622    val denorm = (FP64.isSubnormal a) orelse (FP64.isSubnormal b)
1623    val a = denormal_to_zero64 a
1624    val b = denormal_to_zero64 b
1625    val (f,r) =
1626      case bop of
1627         sse_add => flush_to_zero64(FP64.add(RoundingMode (),(a,b)))
1628       | sse_sub => flush_to_zero64(FP64.sub(RoundingMode (),(a,b)))
1629       | sse_mul => flush_to_zero64(FP64.mul(RoundingMode (),(a,b)))
1630       | sse_div => flush_to_zero64(FP64.div(RoundingMode (),(a,b)))
1631       | sse_max => float_max64(a,b)
1632       | sse_min => float_min64(a,b)
1633  in
1634    ((denorm,f),r)
1635  end;
1636
1637fun sse_sqrt32 a =
1638  let
1639    val (f,r) = FP32.sqrt(RoundingMode (),denormal_to_zero32 a)
1640  in
1641    ((FP32.isSubnormal a,f),r)
1642  end;
1643
1644fun sse_sqrt64 a =
1645  let
1646    val (f,r) = FP64.sqrt(RoundingMode (),denormal_to_zero64 a)
1647  in
1648    ((FP64.isSubnormal a,f),r)
1649  end;
1650
1651fun sse_logic N (bop,(a,b)) =
1652  case bop of
1653     sse_and => BitsN.&&(a,b)
1654   | sse_or => BitsN.||(a,b)
1655   | sse_xor => BitsN.??(a,b)
1656   | sse_andn => BitsN.&&(BitsN.~ a,b);
1657
1658fun sse_compare_signalling cmp =
1659  Set.mem(cmp,[sse_lt_os,sse_le_os,sse_nlt_us,sse_nle_us]);
1660
1661fun sse_compare32 (cmp,(a,b)) =
1662  let
1663    val unordered = (FP32.compare(a,b)) = IEEEReal.UNORDERED
1664    val c =
1665      case cmp of
1666         sse_eq_oq => FP32.equal(a,b)
1667       | sse_lt_os => FP32.lessThan(a,b)
1668       | sse_le_os => FP32.lessEqual(a,b)
1669       | sse_unord_q => unordered
1670       | sse_neq_uq => unordered orelse (not(FP32.equal(a,b)))
1671       | sse_nlt_us => unordered orelse (not(FP32.lessThan(a,b)))
1672       | sse_nle_us => unordered orelse (not(FP32.lessEqual(a,b)))
1673       | sss_ord_q => not unordered
1674    val flags =
1675      initial_ieee_flags
1676        ((FP32.isSignallingNan a) orelse
1677         ((FP32.isSignallingNan b) orelse
1678          (unordered andalso (sse_compare_signalling cmp))))
1679  in
1680    (((FP32.isSubnormal a) orelse (FP32.isSubnormal b),flags),
1681     BitsN.signExtend 32 (BitsN.fromBit c))
1682  end;
1683
1684fun sse_compare64 (cmp,(a,b)) =
1685  let
1686    val unordered = (FP64.compare(a,b)) = IEEEReal.UNORDERED
1687    val c =
1688      case cmp of
1689         sse_eq_oq => FP64.equal(a,b)
1690       | sse_lt_os => FP64.lessThan(a,b)
1691       | sse_le_os => FP64.lessEqual(a,b)
1692       | sse_unord_q => unordered
1693       | sse_neq_uq => unordered orelse (not(FP64.equal(a,b)))
1694       | sse_nlt_us => unordered orelse (not(FP64.lessThan(a,b)))
1695       | sse_nle_us => unordered orelse (not(FP64.lessEqual(a,b)))
1696       | sss_ord_q => not unordered
1697    val flags =
1698      initial_ieee_flags
1699        ((FP64.isSignallingNan a) orelse
1700         ((FP64.isSignallingNan b) orelse
1701          (unordered andalso (sse_compare_signalling cmp))))
1702  in
1703    (((FP64.isSubnormal a) orelse (FP64.isSubnormal b),flags),
1704     BitsN.signExtend 64 (BitsN.fromBit c))
1705  end;
1706
1707fun rm_to_xmm_mem rm =
1708  case rm of
1709     Zr r => xmm_reg(BitsN.fromNat(Cast.ZregToNat r,3))
1710   | Zm m => xmm_mem m;
1711
1712fun XMM xm =
1713  case xm of
1714     xmm_reg r => Map.lookup((!XMM_REG),BitsN.toNat r)
1715   | xmm_mem m => mem128(mem_addr m);
1716
1717fun write'XMM (dqw,xm) =
1718  case xm of
1719     xmm_reg r => XMM_REG := (Map.update((!XMM_REG),BitsN.toNat r,dqw))
1720   | xmm_mem m => let val x = mem_addr m in write'mem128(dqw,x) end;
1721
1722fun CheckAlignedXMM (xm,n) =
1723  case xm of
1724     xmm_reg r => ()
1725   | xmm_mem m =>
1726     let
1727       val a = mem_addr m
1728     in
1729       if not((BitsN.<<(BitsN.>>+(a,n),n)) = a)
1730         then GP_exception ()
1731       else ()
1732     end;
1733
1734fun dfn'bin_PD (bop,(dst,src)) =
1735  let
1736    val dst = xmm_reg dst
1737    val x = XMM dst
1738    val x0 = XMM src
1739    val (f1,r1) =
1740      sse_binop64(bop,(BitsN.bits(127,64) x,BitsN.bits(127,64) x0))
1741    val (f2,r2) =
1742      sse_binop64(bop,(BitsN.bits(63,0) x,BitsN.bits(63,0) x0))
1743  in
1744    ( process_float_flags[f1,f2]; write'XMM(BitsN.@@(r1,r2),dst) )
1745  end;
1746
1747fun dfn'bin_PS (bop,(dst,src)) =
1748  let
1749    val dst = xmm_reg dst
1750    val x = XMM dst
1751    val x0 = XMM src
1752    val (f1,r1) =
1753      sse_binop32(bop,(BitsN.bits(127,96) x,BitsN.bits(127,96) x0))
1754    val (f2,r2) =
1755      sse_binop32(bop,(BitsN.bits(95,64) x,BitsN.bits(95,64) x0))
1756    val (f3,r3) =
1757      sse_binop32(bop,(BitsN.bits(63,32) x,BitsN.bits(63,32) x0))
1758    val (f4,r4) =
1759      sse_binop32(bop,(BitsN.bits(31,0) x,BitsN.bits(31,0) x0))
1760  in
1761    ( process_float_flags[f1,f2,f3,f4]
1762    ; write'XMM(BitsN.concat[r1,r2,r3,r4],dst)
1763    )
1764  end;
1765
1766fun dfn'bin_SD (bop,(dst,src)) =
1767  let
1768    val dst = xmm_reg dst
1769    val x = XMM dst
1770    val x0 = XMM src
1771    val (f,r) = sse_binop64(bop,(BitsN.bits(63,0) x,BitsN.bits(63,0) x0))
1772  in
1773    ( process_float_flags[f]
1774    ; let
1775        val w = XMM dst
1776      in
1777        write'XMM(BitsN.bitFieldInsert(63,0) (w,r),dst)
1778      end
1779    )
1780  end;
1781
1782fun dfn'bin_SS (bop,(dst,src)) =
1783  let
1784    val dst = xmm_reg dst
1785    val x = XMM dst
1786    val x0 = XMM src
1787    val (f,r) = sse_binop32(bop,(BitsN.bits(31,0) x,BitsN.bits(31,0) x0))
1788  in
1789    ( process_float_flags[f]
1790    ; let
1791        val w = XMM dst
1792      in
1793        write'XMM(BitsN.bitFieldInsert(31,0) (w,r),dst)
1794      end
1795    )
1796  end;
1797
1798fun dfn'logic_PD (bop,(dst,src)) =
1799  let
1800    val dst = xmm_reg dst
1801    val x = XMM dst
1802    val x0 = XMM src
1803    val r1 =
1804      sse_logic 64 (bop,(BitsN.bits(127,64) x,BitsN.bits(127,64) x0))
1805    val r2 = sse_logic 64 (bop,(BitsN.bits(63,0) x,BitsN.bits(63,0) x0))
1806  in
1807    write'XMM(BitsN.@@(r1,r2),dst)
1808  end;
1809
1810fun dfn'logic_PS (bop,(dst,src)) =
1811  let
1812    val dst = xmm_reg dst
1813    val x = XMM dst
1814    val x0 = XMM src
1815    val r1 =
1816      sse_logic 32 (bop,(BitsN.bits(127,96) x,BitsN.bits(127,96) x0))
1817    val r2 = sse_logic 32 (bop,(BitsN.bits(95,64) x,BitsN.bits(95,64) x0))
1818    val r3 = sse_logic 32 (bop,(BitsN.bits(63,32) x,BitsN.bits(63,32) x0))
1819    val r4 = sse_logic 32 (bop,(BitsN.bits(31,0) x,BitsN.bits(31,0) x0))
1820  in
1821    write'XMM(BitsN.concat[r1,r2,r3,r4],dst)
1822  end;
1823
1824fun dfn'CMPPD (cmp,(dst,src)) =
1825  let
1826    val dst = xmm_reg dst
1827    val x = XMM dst
1828    val x0 = XMM src
1829    val (f1,r1) =
1830      sse_compare64(cmp,(BitsN.bits(127,64) x,BitsN.bits(127,64) x0))
1831    val (f2,r2) =
1832      sse_compare64(cmp,(BitsN.bits(63,0) x,BitsN.bits(63,0) x0))
1833  in
1834    ( process_float_flags[f1,f2]; write'XMM(BitsN.@@(r1,r2),dst) )
1835  end;
1836
1837fun dfn'CMPPS (cmp,(dst,src)) =
1838  let
1839    val dst = xmm_reg dst
1840    val x = XMM dst
1841    val x0 = XMM src
1842    val (f1,r1) =
1843      sse_compare32(cmp,(BitsN.bits(127,96) x,BitsN.bits(127,96) x0))
1844    val (f2,r2) =
1845      sse_compare32(cmp,(BitsN.bits(95,64) x,BitsN.bits(95,64) x0))
1846    val (f3,r3) =
1847      sse_compare32(cmp,(BitsN.bits(63,32) x,BitsN.bits(63,32) x0))
1848    val (f4,r4) =
1849      sse_compare32(cmp,(BitsN.bits(31,0) x,BitsN.bits(31,0) x0))
1850  in
1851    ( process_float_flags[f1,f2,f3,f4]
1852    ; write'XMM(BitsN.concat[r1,r2,r3,r4],dst)
1853    )
1854  end;
1855
1856fun dfn'CMPSD (cmp,(dst,src)) =
1857  let
1858    val dst = xmm_reg dst
1859    val x = XMM dst
1860    val x0 = XMM src
1861    val (f,r) =
1862      sse_compare64(cmp,(BitsN.bits(63,0) x,BitsN.bits(63,0) x0))
1863  in
1864    ( process_float_flags[f]
1865    ; let
1866        val w = XMM dst
1867      in
1868        write'XMM(BitsN.bitFieldInsert(63,0) (w,r),dst)
1869      end
1870    )
1871  end;
1872
1873fun dfn'CMPSS (cmp,(dst,src)) =
1874  let
1875    val dst = xmm_reg dst
1876    val x = XMM dst
1877    val x0 = XMM src
1878    val (f,r) =
1879      sse_compare32(cmp,(BitsN.bits(31,0) x,BitsN.bits(31,0) x0))
1880  in
1881    ( process_float_flags[f]
1882    ; let
1883        val w = XMM dst
1884      in
1885        write'XMM(BitsN.bitFieldInsert(31,0) (w,r),dst)
1886      end
1887    )
1888  end;
1889
1890fun dfn'COMISD (src1,src2) =
1891  let
1892    val x = XMM(xmm_reg src1)
1893    val x0 = XMM src2
1894  in
1895    ( case FP64.compare(BitsN.bits(63,0) x,BitsN.bits(63,0) x0) of
1896         IEEEReal.UNORDERED =>
1897           ( write'ZF true; write'PF true; write'CF true )
1898       | IEEEReal.GREATER =>
1899         ( write'ZF false; write'PF false; write'CF false )
1900       | IEEEReal.LESS =>
1901         ( write'ZF false; write'PF false; write'CF true )
1902       | IEEEReal.EQUAL =>
1903         ( write'ZF true; write'PF false; write'CF false )
1904    ; write'OF false
1905    ; write'AF false
1906    ; write'SF false
1907    )
1908  end;
1909
1910fun dfn'COMISS (src1,src2) =
1911  let
1912    val x = XMM(xmm_reg src1)
1913    val x0 = XMM src2
1914  in
1915    ( case FP32.compare(BitsN.bits(31,0) x,BitsN.bits(31,0) x0) of
1916         IEEEReal.UNORDERED =>
1917           ( write'ZF true; write'PF true; write'CF true )
1918       | IEEEReal.GREATER =>
1919         ( write'ZF false; write'PF false; write'CF false )
1920       | IEEEReal.LESS =>
1921         ( write'ZF false; write'PF false; write'CF true )
1922       | IEEEReal.EQUAL =>
1923         ( write'ZF true; write'PF false; write'CF false )
1924    ; write'OF false
1925    ; write'AF false
1926    ; write'SF false
1927    )
1928  end;
1929
1930fun dfn'CVTDQ2PD (dst,src) =
1931  let
1932    val x = XMM src
1933    val mode = RoundingMode ()
1934    val x0 = xmm_reg dst
1935  in
1936    write'XMM
1937      (BitsN.@@
1938         (FP64.fromInt(mode,BitsN.toInt(BitsN.bits(63,32) x)),
1939          FP64.fromInt(mode,BitsN.toInt(BitsN.bits(31,0) x))),x0)
1940  end;
1941
1942fun dfn'CVTDQ2PS (dst,src) =
1943  let
1944    val x = XMM src
1945    val mode = RoundingMode ()
1946    val (f1,d1) = sse_from_int32 32 (mode,BitsN.bits(127,96) x)
1947    val (f2,d2) = sse_from_int32 32 (mode,BitsN.bits(95,64) x)
1948    val (f3,d3) = sse_from_int32 32 (mode,BitsN.bits(63,32) x)
1949    val (f4,d4) = sse_from_int32 32 (mode,BitsN.bits(31,0) x)
1950  in
1951    ( process_float_flags[(false,f1),(false,f2),(false,f3),(false,f4)]
1952    ; let
1953        val x = xmm_reg dst
1954      in
1955        write'XMM(BitsN.concat[d1,d2,d3,d4],x)
1956      end
1957    )
1958  end;
1959
1960fun dfn'CVTPD2DQ (truncate,(dst,src)) =
1961  let
1962    val x = XMM src
1963    val mode = if truncate then IEEEReal.TO_ZERO else RoundingMode ()
1964    val (f1,w1) = sse_to_int64 32 (mode,BitsN.bits(127,64) x)
1965    val (f2,w2) = sse_to_int64 32 (mode,BitsN.bits(63,0) x)
1966  in
1967    ( process_float_flags[(false,f1),(false,f2)]
1968    ; let
1969        val x = xmm_reg dst
1970        val w = XMM x
1971      in
1972        write'XMM(BitsN.bitFieldInsert(63,0) (w,BitsN.@@(w1,w2)),x)
1973      end
1974    )
1975  end;
1976
1977fun dfn'CVTPD2PS (dst,src) =
1978  let
1979    val x = XMM src
1980    val q1 = BitsN.bits(127,64) x
1981    val mode = RoundingMode ()
1982    val (f1,r1) = FPConvert.fp64_to_fp32_with_flags(mode,q1)
1983    val (f2,r2) = FPConvert.fp64_to_fp32_with_flags(mode,q1)
1984    val f1 = (FP64.isSubnormal q1,f1)
1985    val f2 = (FP64.isSubnormal(BitsN.bits(63,0) x),f2)
1986  in
1987    ( process_float_flags[f1,f2]
1988    ; let
1989        val x = xmm_reg dst
1990        val w = XMM x
1991      in
1992        write'XMM(BitsN.bitFieldInsert(63,0) (w,BitsN.@@(r1,r2)),x)
1993      end
1994    )
1995  end;
1996
1997fun dfn'CVTPS2DQ (truncate,(dst,src)) =
1998  let
1999    val x = XMM src
2000    val mode = if truncate then IEEEReal.TO_ZERO else RoundingMode ()
2001    val (f1,w1) = sse_to_int32 32 (mode,BitsN.bits(127,96) x)
2002    val (f2,w2) = sse_to_int32 32 (mode,BitsN.bits(95,64) x)
2003    val (f3,w3) = sse_to_int32 32 (mode,BitsN.bits(63,32) x)
2004    val (f4,w4) = sse_to_int32 32 (mode,BitsN.bits(31,0) x)
2005  in
2006    ( process_float_flags[(false,f1),(false,f2),(false,f3),(false,f4)]
2007    ; let
2008        val x = xmm_reg dst
2009      in
2010        write'XMM(BitsN.concat[w1,w2,w3,w4],x)
2011      end
2012    )
2013  end;
2014
2015fun dfn'CVTPS2PD (dst,src) =
2016  let
2017    val x = XMM src
2018    val d1 = BitsN.bits(63,32) x
2019    val d2 = BitsN.bits(31,0) x
2020    val f1 =
2021      (FP32.isSubnormal d1,initial_ieee_flags(FP32.isSignallingNan d1))
2022    val f2 =
2023      (FP32.isSubnormal d2,initial_ieee_flags(FP32.isSignallingNan d2))
2024  in
2025    ( process_float_flags[f1,f2]
2026    ; let
2027        val x = xmm_reg dst
2028      in
2029        write'XMM
2030          (BitsN.@@(FPConvert.fp32_to_fp64 d1,FPConvert.fp32_to_fp64 d2),x)
2031      end
2032    )
2033  end;
2034
2035fun dfn'CVTSD2SI (truncate,(quad,(dst,src))) =
2036  let
2037    val x = XMM src
2038    val q = BitsN.bits(63,0) x
2039    val mode = if truncate then IEEEReal.TO_ZERO else RoundingMode ()
2040  in
2041    if quad
2042      then let
2043             val (f,w) = sse_to_int64 64 (mode,q)
2044           in
2045             ( process_float_flags[(false,f)]
2046             ; let val x = Zea_r(Z64,dst) in write'EA(w,x) end
2047             )
2048           end
2049    else let
2050           val (f,w) = sse_to_int64 32 (mode,q)
2051         in
2052           ( process_float_flags[(false,f)]
2053           ; let
2054               val x = Zea_r(Z32,dst)
2055             in
2056               write'EA(BitsN.zeroExtend 64 w,x)
2057             end
2058           )
2059         end
2060  end;
2061
2062fun dfn'CVTSD2SS (dst,src) =
2063  let
2064    val x = XMM src
2065    val q = BitsN.bits(63,0) x
2066    val (f,r) = FPConvert.fp64_to_fp32_with_flags(RoundingMode (),q)
2067    val f = (FP64.isSubnormal q,f)
2068  in
2069    ( process_float_flags[f]
2070    ; let
2071        val x = xmm_reg dst
2072        val w = XMM x
2073      in
2074        write'XMM(BitsN.bitFieldInsert(31,0) (w,r),x)
2075      end
2076    )
2077  end;
2078
2079fun dfn'CVTSI2SD (quad,(dst,src)) =
2080  let
2081    val ea_src = ea_Zrm(if quad then Z64 else Z32,src)
2082    val (f,q) = sse_from_int64 64 (RoundingMode (),EA ea_src)
2083  in
2084    ( process_float_flags[(false,f)]
2085    ; let
2086        val x = xmm_reg dst
2087        val w = XMM x
2088      in
2089        write'XMM(BitsN.bitFieldInsert(63,0) (w,q),x)
2090      end
2091    )
2092  end;
2093
2094fun dfn'CVTSI2SS (quad,(dst,src)) =
2095  let
2096    val ea_src = ea_Zrm(if quad then Z64 else Z32,src)
2097    val (f,d) = sse_from_int32 64 (RoundingMode (),EA ea_src)
2098  in
2099    ( process_float_flags[(false,f)]
2100    ; let
2101        val x = xmm_reg dst
2102        val w = XMM x
2103      in
2104        write'XMM(BitsN.bitFieldInsert(31,0) (w,d),x)
2105      end
2106    )
2107  end;
2108
2109fun dfn'CVTSS2SD (dst,src) =
2110  let
2111    val x = XMM src
2112    val d = BitsN.bits(31,0) x
2113    val f =
2114      (FP32.isSubnormal d,initial_ieee_flags(FP32.isSignallingNan d))
2115  in
2116    ( process_float_flags[f]
2117    ; let
2118        val x = xmm_reg dst
2119        val w = XMM x
2120      in
2121        write'XMM
2122          (BitsN.bitFieldInsert(63,0) (w,FPConvert.fp32_to_fp64 d),x)
2123      end
2124    )
2125  end;
2126
2127fun dfn'CVTSS2SI (truncate,(quad,(dst,src))) =
2128  let
2129    val x = XMM src
2130    val d = BitsN.bits(31,0) x
2131    val mode = if truncate then IEEEReal.TO_ZERO else RoundingMode ()
2132  in
2133    if quad
2134      then let
2135             val (f,w) = sse_to_int32 64 (mode,d)
2136           in
2137             ( process_float_flags[(false,f)]
2138             ; let val x = Zea_r(Z64,dst) in write'EA(w,x) end
2139             )
2140           end
2141    else let
2142           val (f,w) = sse_to_int32 32 (mode,d)
2143         in
2144           ( process_float_flags[(false,f)]
2145           ; let
2146               val x = Zea_r(Z32,dst)
2147             in
2148               write'EA(BitsN.zeroExtend 64 w,x)
2149             end
2150           )
2151         end
2152  end;
2153
2154fun dfn'MOVAP_D_S (double,(dst,src)) =
2155  ( CheckAlignedXMM(dst,4)
2156  ; CheckAlignedXMM(src,4)
2157  ; write'XMM(XMM src,dst)
2158  );
2159
2160fun dfn'MOVUP_D_S (double,(dst,src)) = write'XMM(XMM src,dst);
2161
2162fun dfn'MOV_D_Q (to_xmm,(quad,(xmm,rm))) =
2163  case (to_xmm,quad) of
2164     (false,false) =>
2165       let
2166         val x = ea_Zrm(Z32,rm)
2167       in
2168         write'EA
2169           (BitsN.zeroExtend 64 (BitsN.bits(31,0) (XMM(xmm_reg xmm))),x)
2170       end
2171   | (false,true) =>
2172     let
2173       val x = ea_Zrm(Z64,rm)
2174     in
2175       write'EA(BitsN.bits(63,0) (XMM(xmm_reg xmm)),x)
2176     end
2177   | (true,false) =>
2178     let
2179       val x = xmm_reg xmm
2180     in
2181       write'XMM
2182         (BitsN.zeroExtend 128 (BitsN.bits(31,0) (EA(ea_Zrm(Z32,rm)))),x)
2183     end
2184   | (true,true) =>
2185     let
2186       val x = xmm_reg xmm
2187     in
2188       write'XMM(BitsN.zeroExtend 128 (EA(ea_Zrm(Z64,rm))),x)
2189     end;
2190
2191fun dfn'MOVQ (dst,src) =
2192  case dst of
2193     xmm_reg _ =>
2194       write'XMM(BitsN.zeroExtend 128 (BitsN.bits(63,0) (XMM src)),dst)
2195   | xmm_mem m =>
2196     let
2197       val x = ea_Zrm(Z64,Zm m)
2198     in
2199       write'EA(BitsN.bits(63,0) (XMM src),x)
2200     end;
2201
2202fun dfn'MOVSD (dst,src) =
2203  case src of
2204     xmm_reg _ =>
2205       let
2206         val w = XMM dst
2207       in
2208         write'XMM
2209           (BitsN.bitFieldInsert(63,0) (w,BitsN.bits(63,0) (XMM src)),dst)
2210       end
2211   | xmm_mem _ =>
2212     write'XMM(BitsN.zeroExtend 128 (BitsN.bits(63,0) (XMM src)),dst);
2213
2214fun dfn'MOVSS (dst,src) =
2215  case src of
2216     xmm_reg _ =>
2217       let
2218         val w = XMM dst
2219       in
2220         write'XMM
2221           (BitsN.bitFieldInsert(31,0) (w,BitsN.bits(31,0) (XMM src)),dst)
2222       end
2223   | xmm_mem _ =>
2224     write'XMM(BitsN.zeroExtend 128 (BitsN.bits(31,0) (XMM src)),dst);
2225
2226fun dfn'PCMPEQQ (dst,src) =
2227  let
2228    val dst = xmm_reg dst
2229    val x = XMM dst
2230    val x0 = XMM src
2231  in
2232    write'XMM
2233      (BitsN.@@
2234         (if (BitsN.bits(127,64) x) = (BitsN.bits(127,64) x0)
2235            then BitsN.neg(BitsN.B(0x1,64))
2236          else BitsN.B(0x0,64),
2237          if (BitsN.bits(63,0) x) = (BitsN.bits(63,0) x0)
2238            then BitsN.neg(BitsN.B(0x1,64))
2239          else BitsN.B(0x0,64)),dst)
2240  end;
2241
2242fun dfn'PSLLDQ (dst,imm) =
2243  let
2244    val dst = xmm_reg dst
2245  in
2246    write'XMM(BitsN.<<(XMM dst,Nat.*(Nat.min(16,BitsN.toNat imm),8)),dst)
2247  end;
2248
2249fun dfn'PSLLD_imm (dst,imm) =
2250  let
2251    val dst = xmm_reg dst
2252    val x = XMM dst
2253    val i = BitsN.toNat imm
2254  in
2255    write'XMM
2256      (BitsN.concat
2257         [BitsN.<<(BitsN.bits(127,96) x,i),
2258          BitsN.<<(BitsN.bits(95,64) x,i),BitsN.<<(BitsN.bits(63,32) x,i),
2259          BitsN.<<(BitsN.bits(31,0) x,i)],dst)
2260  end;
2261
2262fun dfn'PSLLQ_imm (dst,imm) =
2263  let
2264    val dst = xmm_reg dst
2265    val x = XMM dst
2266    val i = BitsN.toNat imm
2267  in
2268    write'XMM
2269      (BitsN.@@
2270         (BitsN.<<(BitsN.bits(127,64) x,i),BitsN.<<(BitsN.bits(63,0) x,i)),
2271       dst)
2272  end;
2273
2274fun dfn'PSLLW_imm (dst,imm) =
2275  let
2276    val dst = xmm_reg dst
2277    val x = XMM dst
2278    val i = BitsN.toNat imm
2279  in
2280    write'XMM
2281      (BitsN.concat
2282         [BitsN.<<(BitsN.bits(127,112) x,i),
2283          BitsN.<<(BitsN.bits(111,96) x,i),
2284          BitsN.<<(BitsN.bits(95,80) x,i),BitsN.<<(BitsN.bits(79,64) x,i),
2285          BitsN.<<(BitsN.bits(63,48) x,i),BitsN.<<(BitsN.bits(47,32) x,i),
2286          BitsN.<<(BitsN.bits(31,16) x,i),BitsN.<<(BitsN.bits(15,0) x,i)],
2287       dst)
2288  end;
2289
2290fun dfn'PSRAD_imm (dst,imm) =
2291  let
2292    val dst = xmm_reg dst
2293    val x = XMM dst
2294    val i = BitsN.toNat imm
2295  in
2296    write'XMM
2297      (BitsN.concat
2298         [BitsN.>>(BitsN.bits(127,96) x,i),
2299          BitsN.>>(BitsN.bits(95,64) x,i),BitsN.>>(BitsN.bits(63,32) x,i),
2300          BitsN.>>(BitsN.bits(31,0) x,i)],dst)
2301  end;
2302
2303fun dfn'PSRAW_imm (dst,imm) =
2304  let
2305    val dst = xmm_reg dst
2306    val x = XMM dst
2307    val i = BitsN.toNat imm
2308  in
2309    write'XMM
2310      (BitsN.concat
2311         [BitsN.>>(BitsN.bits(127,112) x,i),
2312          BitsN.>>(BitsN.bits(111,96) x,i),
2313          BitsN.>>(BitsN.bits(95,80) x,i),BitsN.>>(BitsN.bits(79,64) x,i),
2314          BitsN.>>(BitsN.bits(63,48) x,i),BitsN.>>(BitsN.bits(47,32) x,i),
2315          BitsN.>>(BitsN.bits(31,16) x,i),BitsN.>>(BitsN.bits(15,0) x,i)],
2316       dst)
2317  end;
2318
2319fun dfn'PSRLDQ (dst,imm) =
2320  let
2321    val dst = xmm_reg dst
2322  in
2323    write'XMM(BitsN.>>+(XMM dst,Nat.*(Nat.min(16,BitsN.toNat imm),8)),dst)
2324  end;
2325
2326fun dfn'PSRLD_imm (dst,imm) =
2327  let
2328    val dst = xmm_reg dst
2329    val x = XMM dst
2330    val i = BitsN.toNat imm
2331  in
2332    write'XMM
2333      (BitsN.concat
2334         [BitsN.>>+(BitsN.bits(127,96) x,i),
2335          BitsN.>>+(BitsN.bits(95,64) x,i),
2336          BitsN.>>+(BitsN.bits(63,32) x,i),BitsN.>>+(BitsN.bits(31,0) x,i)],
2337       dst)
2338  end;
2339
2340fun dfn'PSRLQ_imm (dst,imm) =
2341  let
2342    val dst = xmm_reg dst
2343    val x = XMM dst
2344    val i = BitsN.toNat imm
2345  in
2346    write'XMM
2347      (BitsN.@@
2348         (BitsN.>>+(BitsN.bits(127,64) x,i),
2349          BitsN.>>+(BitsN.bits(63,0) x,i)),dst)
2350  end;
2351
2352fun dfn'PSRLW_imm (dst,imm) =
2353  let
2354    val dst = xmm_reg dst
2355    val x = XMM dst
2356    val i = BitsN.toNat imm
2357  in
2358    write'XMM
2359      (BitsN.concat
2360         [BitsN.>>+(BitsN.bits(127,112) x,i),
2361          BitsN.>>+(BitsN.bits(111,96) x,i),
2362          BitsN.>>+(BitsN.bits(95,80) x,i),
2363          BitsN.>>+(BitsN.bits(79,64) x,i),
2364          BitsN.>>+(BitsN.bits(63,48) x,i),
2365          BitsN.>>+(BitsN.bits(47,32) x,i),
2366          BitsN.>>+(BitsN.bits(31,16) x,i),BitsN.>>+(BitsN.bits(15,0) x,i)],
2367       dst)
2368  end;
2369
2370fun dfn'SQRTPD (dst,src) =
2371  let
2372    val x = XMM src
2373    val (f1,r1) = sse_sqrt64(BitsN.bits(127,64) x)
2374    val (f2,r2) = sse_sqrt64(BitsN.bits(63,0) x)
2375  in
2376    ( process_float_flags[f1,f2]
2377    ; let val x = xmm_reg dst in write'XMM(BitsN.@@(r1,r2),x) end
2378    )
2379  end;
2380
2381fun dfn'SQRTSD (dst,src) =
2382  let
2383    val x = XMM src
2384    val (f,r) = sse_sqrt64(BitsN.bits(63,0) x)
2385  in
2386    ( process_float_flags[f]
2387    ; let
2388        val x = xmm_reg dst
2389        val w = XMM x
2390      in
2391        write'XMM(BitsN.bitFieldInsert(63,0) (w,r),x)
2392      end
2393    )
2394  end;
2395
2396fun dfn'SQRTPS (dst,src) =
2397  let
2398    val x = XMM src
2399    val (f1,r1) = sse_sqrt32(BitsN.bits(127,96) x)
2400    val (f2,r2) = sse_sqrt32(BitsN.bits(95,64) x)
2401    val (f3,r3) = sse_sqrt32(BitsN.bits(63,32) x)
2402    val (f4,r4) = sse_sqrt32(BitsN.bits(31,0) x)
2403  in
2404    ( process_float_flags[f1,f2,f3,f4]
2405    ; let
2406        val x = xmm_reg dst
2407      in
2408        write'XMM(BitsN.concat[r1,r2,r3,r4],x)
2409      end
2410    )
2411  end;
2412
2413fun dfn'SQRTSS (dst,src) =
2414  let
2415    val x = XMM src
2416    val (f,r) = sse_sqrt32(BitsN.bits(31,0) x)
2417  in
2418    ( process_float_flags[f]
2419    ; let
2420        val x = xmm_reg dst
2421        val w = XMM x
2422      in
2423        write'XMM(BitsN.bitFieldInsert(31,0) (w,r),x)
2424      end
2425    )
2426  end;
2427
2428fun dfn'Zbinop (bop,(size,dst_src)) =
2429  let
2430    val (ea,(val_dst,val_src)) = read_dest_src_ea(size,dst_src)
2431  in
2432    write_binop(size,(bop,(val_dst,(val_src,ea))))
2433  end;
2434
2435fun dfn'Zbit_test (bt,(size,dst_src)) =
2436  let
2437    val ea_src = ea_Zsrc(size,dst_src)
2438    val offset = EA ea_src
2439  in
2440    case dst_src of
2441       Zr_rm _ => UD_exception ()
2442     | Zrm_r(Zr _,_) =>
2443       bit_test
2444         (bt,(ea_Zdest(size,dst_src),BitsN.toNat(modSize(size,offset))))
2445     | Zrm_i(Zr _,_) =>
2446       bit_test
2447         (bt,(ea_Zdest(size,dst_src),BitsN.toNat(modSize(size,offset))))
2448     | Zrm_i(Zm(si,(base,disp)),_) =>
2449       let
2450         val offset = modSize(size,offset)
2451         val bit_base =
2452           ea_Zrm
2453             (Z8 false,
2454              Zm(si,
2455                 (base,BitsN.+(disp,BitsN.sdiv(offset,BitsN.B(0x8,64))))))
2456       in
2457         bit_test
2458           (bt,(bit_base,BitsN.toNat(BitsN.smod(offset,BitsN.B(0x8,64)))))
2459       end
2460     | Zrm_r(Zm(si,(base,disp)),_) =>
2461       let
2462         val offset = SignExtension64(offset,size)
2463         val bit_base =
2464           ea_Zrm
2465             (Z8 false,
2466              Zm(si,
2467                 (base,BitsN.+(disp,BitsN.sdiv(offset,BitsN.B(0x8,64))))))
2468       in
2469         bit_test
2470           (bt,(bit_base,BitsN.toNat(BitsN.smod(offset,BitsN.B(0x8,64)))))
2471       end
2472  end;
2473
2474fun dfn'Zcall imm_rm = ( x64_push_rip (); jump_to_ea(ea_Zimm_rm imm_rm) );
2475
2476fun dfn'Zcmpxchg (size,(rm,r)) =
2477  let
2478    val ea_src = Zea_r(size,r)
2479    val ea_acc = Zea_r(size,RAX)
2480    val ea_dst = ea_Zrm(size,rm)
2481    val val_dst = EA ea_dst
2482    val acc = EA ea_src
2483  in
2484    ( write_binop(size,(Zcmp,(acc,(val_dst,ea_src))))
2485    ; if acc = val_dst
2486        then write'EA(EA ea_src,ea_dst)
2487      else write'EA(val_dst,ea_acc)
2488    )
2489  end;
2490
2491fun dfn'Zdiv (size,rm) =
2492  let
2493    val w = value_width size
2494    val ea_eax = Zea_r(size,RAX)
2495    val ea_edx = Zea_r(size,RDX)
2496    val ea_rm = ea_Zrm(size,rm)
2497    val n = Nat.+(Nat.*(BitsN.toNat(EA ea_edx),w),BitsN.toNat(EA ea_eax))
2498    val d = BitsN.toNat(EA ea_rm)
2499    val q = Nat.div(n,d)
2500    val r = Nat.mod(n,d)
2501  in
2502    ( if (d = 0) orelse (Nat.<=(w,q)) then DE_exception () else ()
2503    ; write'EA(BitsN.fromNat(q,64),ea_eax)
2504    ; write'EA(BitsN.fromNat(r,64),ea_edx)
2505    ; erase_eflags ()
2506    )
2507  end;
2508
2509fun dfn'Zidiv (size,rm) =
2510  let
2511    val w = Nat.toInt(value_width size)
2512    val ea_eax = Zea_r(size,RAX)
2513    val ea_edx = Zea_r(size,RDX)
2514    val ea_rm = ea_Zrm(size,rm)
2515    val n =
2516      IntInf.+
2517        (IntInf.*(BitsN.toInt(SignExtension64(EA ea_edx,size)),w),
2518         Nat.toInt(BitsN.toNat(EA ea_eax)))
2519    val d = BitsN.toInt(SignExtension64(EA ea_rm,size))
2520    val q = IntInf.quot(n,d)
2521    val r = IntInf.rem(n,d)
2522  in
2523    ( if (d = 0) orelse
2524         ((IntInf.<(q,IntInf.~(IntInf.div(w,2)))) orelse
2525          (IntInf.<=(IntInf.div(w,2),q)))
2526        then DE_exception ()
2527      else ()
2528    ; write'EA(BitsN.fromInt(q,64),ea_eax)
2529    ; write'EA(BitsN.fromInt(r,64),ea_edx)
2530    ; erase_eflags ()
2531    )
2532  end;
2533
2534fun dfn'Zjcc (cond,imm) =
2535  if read_cond cond then RIP := (BitsN.+((!RIP),imm)) else ();
2536
2537fun dfn'Zjmp rm = RIP := (EA(ea_Zrm(Z64,rm)));
2538
2539fun dfn'Zlea (size,dst_src) =
2540  let
2541    val ea_src = ea_Zsrc(size,dst_src)
2542    val ea_dst = ea_Zdest(size,dst_src)
2543  in
2544    write'EA(get_ea_address ea_src,ea_dst)
2545  end;
2546
2547fun dfn'Zleave () =
2548  ( REG :=
2549    (Map.update
2550       ((!REG),Cast.ZregToNat RSP,Map.lookup((!REG),Cast.ZregToNat RBP)))
2551  ; x64_pop(Zr RBP)
2552  );
2553
2554fun dfn'Zloop (cond,imm) =
2555  let
2556    val ecx1 =
2557      BitsN.-(Map.lookup((!REG),Cast.ZregToNat RCX),BitsN.B(0x1,64))
2558  in
2559    ( REG := (Map.update((!REG),Cast.ZregToNat RCX,ecx1))
2560    ; if (not(ecx1 = (BitsN.B(0x0,64)))) andalso (read_cond cond)
2561        then RIP := (BitsN.+((!RIP),imm))
2562      else ()
2563    )
2564  end;
2565
2566fun dfn'Zmonop (mop,(size,rm)) =
2567  let val ea = ea_Zrm(size,rm) in write_monop(size,(mop,(EA ea,ea))) end;
2568
2569fun dfn'Zmov (cond,(size,dst_src)) =
2570  if read_cond cond
2571    then let
2572           val ea_src = ea_Zsrc(size,dst_src)
2573           val ea_dst = ea_Zdest(size,dst_src)
2574         in
2575           write'EA(EA ea_src,ea_dst)
2576         end
2577  else ();
2578
2579fun dfn'Zmovsx (size1,(dst_src,size2)) =
2580  let
2581    val x = ea_Zdest(size2,dst_src)
2582  in
2583    write'EA(SignExtension(EA(ea_Zsrc(size1,dst_src)),(size1,size2)),x)
2584  end;
2585
2586fun dfn'Zmovzx (size1,(dst_src,size2)) =
2587  let
2588    val x = ea_Zdest(size2,dst_src)
2589  in
2590    write'EA(EA(ea_Zsrc(size1,dst_src)),x)
2591  end;
2592
2593fun dfn'Zmul (size,rm) =
2594  let
2595    val ea_eax = Zea_r(size,RAX)
2596    val eax = EA ea_eax
2597    val val_src = EA(ea_Zrm(size,rm))
2598  in
2599    ( case size of
2600         Z8 _ =>
2601           let
2602             val x = Zea_r(Z16,RAX)
2603           in
2604             write'EA(BitsN.*(eax,val_src),x)
2605           end
2606       | _ =>
2607         ( write'EA(BitsN.*(eax,val_src),ea_eax)
2608         ; let
2609             val ea_edx = Zea_r(size,RDX)
2610           in
2611             write'EA
2612               (BitsN.fromNat
2613                  (Nat.div
2614                     (Nat.*(BitsN.toNat eax,BitsN.toNat val_src),
2615                      value_width size),64),ea_edx)
2616           end
2617         )
2618    ; erase_eflags ()
2619    )
2620  end;
2621
2622fun dfn'Zimul (size,rm) =
2623  let
2624    val ea_eax = Zea_r(size,RAX)
2625    val eax = SignExtension64(EA ea_eax,size)
2626    val val_src = SignExtension64(EA(ea_Zrm(size,rm)),size)
2627    val product = IntInf.*(BitsN.toInt eax,BitsN.toInt val_src)
2628    val product64 = BitsN.fromInt(product,64)
2629  in
2630    ( erase_eflags ()
2631    ; let
2632        val overflow =
2633          not(if size = Z64
2634                then (BitsN.toInt product64) = product
2635              else (SignExtension64(product64,size)) = product64)
2636      in
2637        ( write'CF overflow
2638        ; write'OF overflow
2639        ; case size of
2640             Z8 _ =>
2641               let val x = Zea_r(Z16,RAX) in write'EA(product64,x) end
2642           | _ =>
2643             ( write'EA(product64,ea_eax)
2644             ; let
2645                 val ea_edx = Zea_r(size,RDX)
2646               in
2647                 write'EA
2648                   (BitsN.fromInt
2649                      (IntInf.div(product,Nat.toInt(value_width size)),64),
2650                    ea_edx)
2651               end
2652             )
2653        )
2654      end
2655    )
2656  end;
2657
2658fun dfn'Zimul2 (size,(r,rm)) =
2659  let
2660    val ea_dst = Zea_r(size,r)
2661    val val_dst = SignExtension64(EA ea_dst,size)
2662    val val_src = SignExtension64(EA(ea_Zrm(size,rm)),size)
2663    val product = IntInf.*(BitsN.toInt val_dst,BitsN.toInt val_src)
2664    val product64 = BitsN.fromInt(product,64)
2665  in
2666    ( erase_eflags ()
2667    ; let
2668        val overflow =
2669          not(if size = Z64
2670                then (BitsN.toInt product64) = product
2671              else (SignExtension64(product64,size)) = product64)
2672      in
2673        ( write'CF overflow
2674        ; write'OF overflow
2675        ; write'EA(product64,ea_dst)
2676        )
2677      end
2678    )
2679  end;
2680
2681fun dfn'Zimul3 (size,(r,(rm,imm))) =
2682  let
2683    val val_src = SignExtension64(EA(ea_Zrm(size,rm)),size)
2684    val product = IntInf.*(BitsN.toInt val_src,BitsN.toInt imm)
2685    val product64 = BitsN.fromInt(product,64)
2686  in
2687    ( erase_eflags ()
2688    ; let
2689        val overflow =
2690          not(if size = Z64
2691                then (BitsN.toInt product64) = product
2692              else (SignExtension64(product64,size)) = product64)
2693      in
2694        ( write'CF overflow
2695        ; write'OF overflow
2696        ; let val x = Zea_r(size,r) in write'EA(product64,x) end
2697        )
2698      end
2699    )
2700  end;
2701
2702fun dfn'Znop n = ();
2703
2704fun dfn'Zpop rm = x64_pop rm;
2705
2706fun dfn'Zpush imm_rm = x64_push imm_rm;
2707
2708fun dfn'Zret imm = ( x64_pop_rip (); x64_drop imm );
2709
2710fun dfn'Zset (cond,(have_rex,rm)) =
2711  let
2712    val x = ea_Zrm(Z8 have_rex,rm)
2713  in
2714    write'EA(BitsN.fromBool 64 (read_cond cond),x)
2715  end;
2716
2717fun dfn'Zxadd (size,(rm,r)) =
2718  let
2719    val ea_src = Zea_r(size,r)
2720    val ea_dst = ea_Zrm(size,rm)
2721    val val_src = EA ea_src
2722    val val_dst = EA ea_dst
2723  in
2724    ( write'EA(val_dst,ea_src)
2725    ; write_binop(size,(Zadd,(val_src,(val_dst,ea_dst))))
2726    )
2727  end;
2728
2729fun dfn'Zxchg (size,(rm,r)) =
2730  let
2731    val ea_src = Zea_r(size,r)
2732    val ea_dst = ea_Zrm(size,rm)
2733    val val_src = EA ea_src
2734    val val_dst = EA ea_dst
2735  in
2736    ( write'EA(val_dst,ea_src); write'EA(val_src,ea_dst) )
2737  end;
2738
2739fun dfn'Zcmc () = write'CF(not(CF ()));
2740
2741fun dfn'Zclc () = write'CF false;
2742
2743fun dfn'Zstc () = write'CF true;
2744
2745fun Run v0 =
2746  case v0 of
2747     Zclc => dfn'Zclc ()
2748   | Zcmc => dfn'Zcmc ()
2749   | Zleave => dfn'Zleave ()
2750   | Zstc => dfn'Zstc ()
2751   | Zbinop v47 => dfn'Zbinop v47
2752   | Zbit_test v48 => dfn'Zbit_test v48
2753   | Zcall v49 => dfn'Zcall v49
2754   | Zcmpxchg v50 => dfn'Zcmpxchg v50
2755   | Zdiv v51 => dfn'Zdiv v51
2756   | Zidiv v52 => dfn'Zidiv v52
2757   | Zimul v53 => dfn'Zimul v53
2758   | Zimul2 v54 => dfn'Zimul2 v54
2759   | Zimul3 v55 => dfn'Zimul3 v55
2760   | Zjcc v56 => dfn'Zjcc v56
2761   | Zjmp v57 => dfn'Zjmp v57
2762   | Zlea v58 => dfn'Zlea v58
2763   | Zloop v59 => dfn'Zloop v59
2764   | Zmonop v60 => dfn'Zmonop v60
2765   | Zmov v61 => dfn'Zmov v61
2766   | Zmovsx v62 => dfn'Zmovsx v62
2767   | Zmovzx v63 => dfn'Zmovzx v63
2768   | Zmul v64 => dfn'Zmul v64
2769   | Znop v65 => dfn'Znop v65
2770   | Zpop v66 => dfn'Zpop v66
2771   | Zpush v67 => dfn'Zpush v67
2772   | Zret v68 => dfn'Zret v68
2773   | Zset v69 => dfn'Zset v69
2774   | Zxadd v70 => dfn'Zxadd v70
2775   | Zxchg v71 => dfn'Zxchg v71
2776   | SSE v1 =>
2777     (case v1 of
2778         CMPPD v2 => dfn'CMPPD v2
2779       | CMPPS v3 => dfn'CMPPS v3
2780       | CMPSD v4 => dfn'CMPSD v4
2781       | CMPSS v5 => dfn'CMPSS v5
2782       | COMISD v6 => dfn'COMISD v6
2783       | COMISS v7 => dfn'COMISS v7
2784       | CVTDQ2PD v8 => dfn'CVTDQ2PD v8
2785       | CVTDQ2PS v9 => dfn'CVTDQ2PS v9
2786       | CVTPD2DQ v10 => dfn'CVTPD2DQ v10
2787       | CVTPD2PS v11 => dfn'CVTPD2PS v11
2788       | CVTPS2DQ v12 => dfn'CVTPS2DQ v12
2789       | CVTPS2PD v13 => dfn'CVTPS2PD v13
2790       | CVTSD2SI v14 => dfn'CVTSD2SI v14
2791       | CVTSD2SS v15 => dfn'CVTSD2SS v15
2792       | CVTSI2SD v16 => dfn'CVTSI2SD v16
2793       | CVTSI2SS v17 => dfn'CVTSI2SS v17
2794       | CVTSS2SD v18 => dfn'CVTSS2SD v18
2795       | CVTSS2SI v19 => dfn'CVTSS2SI v19
2796       | MOVAP_D_S v20 => dfn'MOVAP_D_S v20
2797       | MOVQ v21 => dfn'MOVQ v21
2798       | MOVSD v22 => dfn'MOVSD v22
2799       | MOVSS v23 => dfn'MOVSS v23
2800       | MOVUP_D_S v24 => dfn'MOVUP_D_S v24
2801       | MOV_D_Q v25 => dfn'MOV_D_Q v25
2802       | PCMPEQQ v26 => dfn'PCMPEQQ v26
2803       | PSLLDQ v27 => dfn'PSLLDQ v27
2804       | PSLLD_imm v28 => dfn'PSLLD_imm v28
2805       | PSLLQ_imm v29 => dfn'PSLLQ_imm v29
2806       | PSLLW_imm v30 => dfn'PSLLW_imm v30
2807       | PSRAD_imm v31 => dfn'PSRAD_imm v31
2808       | PSRAW_imm v32 => dfn'PSRAW_imm v32
2809       | PSRLDQ v33 => dfn'PSRLDQ v33
2810       | PSRLD_imm v34 => dfn'PSRLD_imm v34
2811       | PSRLQ_imm v35 => dfn'PSRLQ_imm v35
2812       | PSRLW_imm v36 => dfn'PSRLW_imm v36
2813       | SQRTPD v37 => dfn'SQRTPD v37
2814       | SQRTPS v38 => dfn'SQRTPS v38
2815       | SQRTSD v39 => dfn'SQRTSD v39
2816       | SQRTSS v40 => dfn'SQRTSS v40
2817       | bin_PD v41 => dfn'bin_PD v41
2818       | bin_PS v42 => dfn'bin_PS v42
2819       | bin_SD v43 => dfn'bin_SD v43
2820       | bin_SS v44 => dfn'bin_SS v44
2821       | logic_PD v45 => dfn'logic_PD v45
2822       | logic_PS v46 => dfn'logic_PS v46);
2823
2824fun oimmediate8 strm =
2825  case strm of
2826     Option.SOME(b :: t) => (BitsN.signExtend 64 b,Option.SOME t)
2827   | _ => (BitsN.B(0x0,64),NONE);
2828
2829fun immediate8 strm = oimmediate8(Option.SOME strm);
2830
2831fun immediate16 strm =
2832  case strm of
2833     b1 :: (b2 :: t) =>
2834       (BitsN.signExtend 64 (BitsN.@@(b2,b1)),Option.SOME t)
2835   | _ => (BitsN.B(0x0,64),NONE);
2836
2837fun immediate32 strm =
2838  case strm of
2839     b1 :: (b2 :: (b3 :: (b4 :: t))) =>
2840       (BitsN.signExtend 64 (BitsN.concat[b4,b3,b2,b1]),Option.SOME t)
2841   | _ => (BitsN.B(0x0,64),NONE);
2842
2843fun immediate64 strm =
2844  case strm of
2845     b1 :: (b2 :: (b3 :: (b4 :: (b5 :: (b6 :: (b7 :: (b8 :: t))))))) =>
2846       (BitsN.concat[b8,b7,b6,b5,b4,b3,b2,b1],Option.SOME t)
2847   | _ => (BitsN.B(0x0,64),NONE);
2848
2849fun immediate (size,strm) =
2850  case size of
2851     Z8 _ => immediate8 strm
2852   | Z16 => immediate16 strm
2853   | _ => immediate32 strm;
2854
2855fun oimmediate (size,strm) =
2856  case strm of
2857     Option.SOME s => immediate(size,s)
2858   | NONE => (BitsN.B(0x0,64),NONE);
2859
2860fun full_immediate (size,strm) =
2861  if size = Z64 then immediate64 strm else immediate(size,strm);
2862
2863fun rec'REX x =
2864  {B = BitsN.bit(x,0), R = BitsN.bit(x,2), W = BitsN.bit(x,3),
2865   X = BitsN.bit(x,1)};
2866
2867fun reg'REX x =
2868  case x of
2869     {B = B, R = R, W = W, X = X} =>
2870       BitsN.concat
2871         [BitsN.fromBit W,BitsN.fromBit R,BitsN.fromBit X,BitsN.fromBit B];
2872
2873fun write'rec'REX (_,x) = reg'REX x;
2874
2875fun write'reg'REX (_,x) = rec'REX x;
2876
2877fun RexReg (b,r) =
2878  (Cast.natToZreg o BitsN.toNat) (BitsN.@@(BitsN.fromBit b,r));
2879
2880fun readDisplacement (Mod,strm) =
2881  if Mod = (BitsN.B(0x1,2))
2882    then immediate8 strm
2883  else if Mod = (BitsN.B(0x2,2))
2884    then immediate32 strm
2885  else (BitsN.B(0x0,64),Option.SOME strm);
2886
2887fun readSibDisplacement (Mod,strm) =
2888  if Mod = (BitsN.B(0x1,2)) then immediate8 strm else immediate32 strm;
2889
2890fun readSIB (REX,(Mod,strm)) =
2891  case strm of
2892     v'0 :: v'1 =>
2893       (case (boolify'8 v'0,v'1) of
2894           ((SS'1,
2895             (SS'0,(Index'2,(Index'1,(Index'0,(Base'2,(Base'1,Base'0))))))),
2896            strm1) =>
2897             let
2898               val base =
2899                 RexReg
2900                   (#B(REX : REX),
2901                    BitsN.fromBitstring([Base'2,Base'1,Base'0],3))
2902               val index =
2903                 RexReg
2904                   (#X(REX : REX),
2905                    BitsN.fromBitstring([Index'2,Index'1,Index'0],3))
2906               val scaled_index =
2907                 if index = RSP
2908                   then NONE
2909                 else Option.SOME
2910                        (BitsN.fromBitstring([SS'1,SS'0],2),index)
2911             in
2912               if base = RBP
2913                 then let
2914                        val (displacement,strm2) =
2915                          readSibDisplacement(Mod,strm1)
2916                        val base =
2917                          if Mod = (BitsN.B(0x0,2))
2918                            then ZnoBase
2919                          else ZregBase base
2920                      in
2921                        (Zm(scaled_index,(base,displacement)),strm2)
2922                      end
2923               else let
2924                      val (displacement,strm2) =
2925                        readDisplacement(Mod,strm1)
2926                    in
2927                      (Zm(scaled_index,(ZregBase base,displacement)),strm2)
2928                    end
2929             end)
2930   | _ => (Zm(NONE,(ZnoBase,BitsN.B(0x0,64))),NONE);
2931
2932fun readModRM (REX,strm) =
2933  case strm of
2934     v'0 :: v'1 =>
2935       (case (boolify'8 v'0,v'1) of
2936           ((false,
2937             (false,(RegOpc'2,(RegOpc'1,(RegOpc'0,(true,(false,true))))))),
2938            strm1) =>
2939             let
2940               val (displacement,strm2) = immediate32 strm1
2941             in
2942               (RexReg
2943                  (#R(REX : REX),
2944                   BitsN.fromBitstring([RegOpc'2,RegOpc'1,RegOpc'0],3)),
2945                (Zm(NONE,(ZripBase,displacement)),strm2))
2946             end
2947         | ((true,(true,(REG'2,(REG'1,(REG'0,(RM'2,(RM'1,RM'0))))))),strm1) =>
2948           (RexReg
2949              (#R(REX : REX),BitsN.fromBitstring([REG'2,REG'1,REG'0],3)),
2950            (Zr(RexReg
2951                  (#B(REX : REX),BitsN.fromBitstring([RM'2,RM'1,RM'0],3))),
2952             Option.SOME strm1))
2953         | ((Mod'1,
2954           (Mod'0,(RegOpc'2,(RegOpc'1,(RegOpc'0,(true,(false,false))))))),
2955          strm1) =>
2956           let
2957             val (sib,strm2) =
2958               readSIB(REX,(BitsN.fromBitstring([Mod'1,Mod'0],2),strm1))
2959           in
2960             (RexReg
2961                (#R(REX : REX),
2962                 BitsN.fromBitstring([RegOpc'2,RegOpc'1,RegOpc'0],3)),
2963              (sib,strm2))
2964           end
2965         | ((Mod'1,
2966           (Mod'0,(RegOpc'2,(RegOpc'1,(RegOpc'0,(RM'2,(RM'1,RM'0))))))),
2967          strm1) =>
2968           let
2969             val (displacement,strm2) =
2970               readDisplacement
2971                 (BitsN.fromBitstring([Mod'1,Mod'0],2),strm1)
2972           in
2973             (RexReg
2974                (#R(REX : REX),
2975                 BitsN.fromBitstring([RegOpc'2,RegOpc'1,RegOpc'0],3)),
2976              (Zm(NONE,
2977                  (ZregBase
2978                     (RexReg
2979                        (#B(REX : REX),
2980                         BitsN.fromBitstring([RM'2,RM'1,RM'0],3))),
2981                   displacement)),strm2))
2982           end)
2983   | _ => (RAX,(Zm(NONE,(ZnoBase,BitsN.B(0x0,64))),NONE));
2984
2985fun readOpcodeModRM (REX,strm) =
2986  let
2987    val (opcode,(rm,strm1)) = readModRM(REX,strm)
2988  in
2989    (BitsN.fromNat(Nat.mod(Cast.ZregToNat opcode,8),3),(rm,strm1))
2990  end;
2991
2992fun prefixGroup b =
2993  case b of
2994     BitsN.B(0xF0,_) => 1
2995   | BitsN.B(0xF2,_) => 1
2996   | BitsN.B(0xF3,_) => 1
2997   | BitsN.B(0x26,_) => 2
2998   | BitsN.B(0x2E,_) => 2
2999   | BitsN.B(0x36,_) => 2
3000   | BitsN.B(0x3E,_) => 2
3001   | BitsN.B(0x64,_) => 2
3002   | BitsN.B(0x65,_) => 2
3003   | BitsN.B(0x66,_) => 3
3004   | BitsN.B(0x67,_) => 4
3005   | _ => (if (BitsN.bits(7,4) b) = (BitsN.B(0x4,4)) then 5 else 0);
3006
3007fun readPrefix (s,(p,strm)) =
3008  case strm of
3009     h :: strm1 =>
3010       let
3011         val group = prefixGroup h
3012       in
3013         if group = 0
3014           then Option.SOME(p,(false,(rec'REX(BitsN.B(0x0,4)),strm)))
3015         else if group = 5
3016           then Option.SOME(p,(true,(rec'REX(BitsN.bits(3,0) h),strm1)))
3017         else if Set.mem(group,s)
3018           then NONE
3019         else readPrefix(Set.insert(group,s),(h :: p,strm1))
3020       end
3021   | [] =>
3022     Option.SOME
3023       (p,(false,({B = false, R = false, W = false, X = false},strm)));
3024
3025fun readPrefixes strm = readPrefix([],([],strm));
3026
3027fun OpSize (have_rex,(w,(v,override))) =
3028  if v = (BitsN.B(0x0,1))
3029    then Z8 have_rex
3030  else if w then Z64 else if override then Z16 else Z32;
3031
3032fun isZm rm = case rm of Zm _ => true | _ => false;
3033
3034fun x64_decode strm =
3035  case readPrefixes strm of
3036     NONE => Zdec_fail "Bad prefix"
3037   | Option.SOME(p,(have_rex,(REX,strm1))) =>
3038     let
3039       val prefixes = Set.mk p
3040       val op_size_override = Set.mem(BitsN.B(0x66,8),prefixes)
3041     in
3042       if Set.mem(BitsN.B(0x67,8),prefixes)
3043         then Zdec_fail "address override prefix not supported"
3044       else case strm1 of
3045               v'0 :: v'1 =>
3046                 (case (boolify'8 v'0,v'1) of
3047                     ((false,
3048                       (false,(opc'2,(opc'1,(opc'0,(false,(x'0,v'0))))))),
3049                      strm2) =>
3050                       let
3051                         val (reg,(rm,strm3)) = readModRM(REX,strm2)
3052                         val size =
3053                           OpSize
3054                             (have_rex,
3055                              (#W(REX : REX),
3056                               (BitsN.fromBitstring([v'0],1),
3057                                op_size_override)))
3058                         val binop =
3059                           (Cast.natToZbinop_name o BitsN.toNat)
3060                             (BitsN.fromBitstring([opc'2,opc'1,opc'0],3))
3061                         val src_dst =
3062                           if (BitsN.fromBitstring([x'0],1)) =
3063                              (BitsN.B(0x0,1))
3064                             then Zrm_r(rm,reg)
3065                           else Zr_rm(reg,rm)
3066                       in
3067                         Zfull_inst
3068                           (p,(Zbinop(binop,(size,src_dst)),strm3))
3069                       end
3070                   | ((false,
3071                     (false,(opc'2,(opc'1,(opc'0,(true,(false,v'0))))))),
3072                    strm2) =>
3073                     let
3074                       val size =
3075                         OpSize
3076                           (have_rex,
3077                            (#W(REX : REX),
3078                             (BitsN.fromBitstring([v'0],1),
3079                              op_size_override)))
3080                       val (imm,strm3) = immediate(size,strm2)
3081                     in
3082                       Zfull_inst
3083                         (p,
3084                          (Zbinop
3085                             ((Cast.natToZbinop_name o BitsN.toNat)
3086                                (BitsN.fromBitstring
3087                                   ([opc'2,opc'1,opc'0],3)),
3088                              (size,Zrm_i(Zr RAX,imm))),strm3))
3089                     end
3090                   | ((false,(true,(false,(true,(b'0,(r'2,(r'1,r'0))))))),
3091                    strm2) =>
3092                     let
3093                       val reg =
3094                         Zr((Cast.natToZreg o BitsN.toNat)
3095                              (BitsN.@@
3096                                 (BitsN.fromBit(#B(REX : REX)),
3097                                  BitsN.fromBitstring([r'2,r'1,r'0],3))))
3098                     in
3099                       Zfull_inst
3100                         (p,
3101                          (if (BitsN.fromBitstring([b'0],1)) =
3102                              (BitsN.B(0x0,1))
3103                             then Zpush(Zrm reg)
3104                           else Zpop reg,Option.SOME strm2))
3105                     end
3106                   | ((false,
3107                     (true,(true,(false,(false,(false,(true,true))))))),
3108                    strm2) =>
3109                     let
3110                       val (reg,(rm,strm3)) = readModRM(REX,strm2)
3111                     in
3112                       Zfull_inst
3113                         (p,(Zmovsx(Z32,(Zr_rm(reg,rm),Z64)),strm3))
3114                     end
3115                   | ((false,
3116                     (true,(true,(false,(true,(false,(b'0,false))))))),
3117                    strm2) =>
3118                     let
3119                       val (imm,strm3) =
3120                         if (BitsN.fromBitstring([b'0],1)) =
3121                            (BitsN.B(0x1,1))
3122                           then immediate8 strm2
3123                         else immediate32 strm2
3124                     in
3125                       Zfull_inst(p,(Zpush(Zimm imm),strm3))
3126                     end
3127                   | ((false,
3128                     (true,(true,(false,(true,(false,(b'0,true))))))),
3129                    strm2) =>
3130                     let
3131                       val size =
3132                         OpSize
3133                           (have_rex,
3134                            (#W(REX : REX),
3135                             (BitsN.B(0x1,1),op_size_override)))
3136                       val (reg,(rm,strm3)) = readModRM(REX,strm2)
3137                     in
3138                       case strm3 of
3139                          NONE => Zdec_fail "not enough bytes"
3140                        | Option.SOME s =>
3141                          let
3142                            val (imm,strm4) =
3143                              if (BitsN.fromBitstring([b'0],1)) =
3144                                 (BitsN.B(0x1,1))
3145                                then immediate8 s
3146                              else immediate32 s
3147                          in
3148                            Zfull_inst
3149                              (p,(Zimul3(size,(reg,(rm,imm))),strm4))
3150                          end
3151                     end
3152                   | ((false,(true,(true,(true,(c'3,(c'2,(c'1,c'0))))))),
3153                    strm2) =>
3154                     let
3155                       val (imm,strm3) = immediate8 strm2
3156                     in
3157                       Zfull_inst
3158                         (p,
3159                          (Zjcc
3160                             ((Cast.natToZcond o BitsN.toNat)
3161                                (BitsN.fromBitstring([c'3,c'2,c'1,c'0],4)),
3162                              imm),strm3))
3163                     end
3164                   | ((true,
3165                     (false,(false,(false,(false,(false,(false,v'0))))))),
3166                    strm2) =>
3167                     let
3168                       val size =
3169                         OpSize
3170                           (have_rex,
3171                            (#W(REX : REX),
3172                             (BitsN.fromBitstring([v'0],1),
3173                              op_size_override)))
3174                       val (opcode,(rm,strm3)) =
3175                         readOpcodeModRM(REX,strm2)
3176                       val (imm,strm4) = oimmediate(size,strm3)
3177                       val binop =
3178                         (Cast.natToZbinop_name o BitsN.toNat) opcode
3179                     in
3180                       Zfull_inst
3181                         (p,(Zbinop(binop,(size,Zrm_i(rm,imm))),strm4))
3182                     end
3183                   | ((true,
3184                     (false,(false,(false,(false,(false,(true,true))))))),
3185                    strm2) =>
3186                     let
3187                       val size =
3188                         OpSize
3189                           (false,
3190                            (#W(REX : REX),
3191                             (BitsN.B(0x1,1),op_size_override)))
3192                       val (opcode,(rm,strm3)) =
3193                         readOpcodeModRM(REX,strm2)
3194                       val (imm,strm4) = oimmediate8 strm3
3195                       val binop =
3196                         (Cast.natToZbinop_name o BitsN.toNat) opcode
3197                     in
3198                       Zfull_inst
3199                         (p,(Zbinop(binop,(size,Zrm_i(rm,imm))),strm4))
3200                     end
3201                   | ((true,
3202                     (false,(false,(false,(false,(true,(false,v'0))))))),
3203                    strm2) =>
3204                     let
3205                       val size =
3206                         OpSize
3207                           (have_rex,
3208                            (#W(REX : REX),
3209                             (BitsN.fromBitstring([v'0],1),
3210                              op_size_override)))
3211                       val (reg,(rm,strm3)) = readModRM(REX,strm2)
3212                     in
3213                       Zfull_inst
3214                         (p,(Zbinop(Ztest,(size,Zrm_r(rm,reg))),strm3))
3215                     end
3216                   | ((true,
3217                     (false,(false,(false,(false,(true,(true,v'0))))))),
3218                    strm2) =>
3219                     let
3220                       val size =
3221                         OpSize
3222                           (have_rex,
3223                            (#W(REX : REX),
3224                             (BitsN.fromBitstring([v'0],1),
3225                              op_size_override)))
3226                       val (reg,(rm,strm3)) = readModRM(REX,strm2)
3227                     in
3228                       Zfull_inst(p,(Zxchg(size,(rm,reg)),strm3))
3229                     end
3230                   | ((true,
3231                     (false,(false,(false,(true,(false,(x'0,v'0))))))),
3232                    strm2) =>
3233                     let
3234                       val (reg,(rm,strm3)) = readModRM(REX,strm2)
3235                       val size =
3236                         OpSize
3237                           (have_rex,
3238                            (#W(REX : REX),
3239                             (BitsN.fromBitstring([v'0],1),
3240                              op_size_override)))
3241                       val src_dst =
3242                         if (BitsN.fromBitstring([x'0],1)) =
3243                            (BitsN.B(0x0,1))
3244                           then Zrm_r(rm,reg)
3245                         else Zr_rm(reg,rm)
3246                     in
3247                       Zfull_inst(p,(Zmov(Z_ALWAYS,(size,src_dst)),strm3))
3248                     end
3249                   | ((true,
3250                     (false,(false,(false,(true,(true,(false,true))))))),
3251                    strm2) =>
3252                     let
3253                       val size =
3254                         OpSize
3255                           (true,
3256                            (#W(REX : REX),
3257                             (BitsN.B(0x1,1),op_size_override)))
3258                       val (reg,(rm,strm3)) = readModRM(REX,strm2)
3259                     in
3260                       if isZm rm
3261                         then Zfull_inst
3262                                (p,(Zlea(size,Zr_rm(reg,rm)),strm3))
3263                       else Zdec_fail "LEA with register argument"
3264                     end
3265                   | ((true,
3266                     (false,(false,(false,(true,(true,(true,true))))))),
3267                    strm2) =>
3268                     let
3269                       val (opcode,(rm,strm3)) =
3270                         readOpcodeModRM(REX,strm2)
3271                     in
3272                       if opcode = (BitsN.B(0x0,3))
3273                         then Zfull_inst(p,(Zpop rm,strm3))
3274                       else Zdec_fail "Unsupported opcode: Group 1a"
3275                     end
3276                   | ((true,(false,(false,(true,(false,(r'2,(r'1,r'0))))))),
3277                    strm2) =>
3278                     let
3279                       val size =
3280                         OpSize
3281                           (true,
3282                            (#W(REX : REX),
3283                             (BitsN.B(0x1,1),op_size_override)))
3284                       val reg =
3285                         RexReg
3286                           (#B(REX : REX),
3287                            BitsN.fromBitstring([r'2,r'1,r'0],3))
3288                     in
3289                       if reg = RAX
3290                         then Zfull_inst
3291                                (p,
3292                                 (Znop(L3.length strm),Option.SOME strm2))
3293                       else Zfull_inst
3294                              (p,
3295                               (Zxchg(size,(Zr RAX,reg)),Option.SOME strm2))
3296                     end
3297                   | ((true,
3298                     (false,(true,(false,(true,(false,(false,v'0))))))),
3299                    strm2) =>
3300                     let
3301                       val size =
3302                         OpSize
3303                           (true,
3304                            (#W(REX : REX),
3305                             (BitsN.fromBitstring([v'0],1),
3306                              op_size_override)))
3307                       val (imm,strm3) = immediate(size,strm2)
3308                     in
3309                       Zfull_inst
3310                         (p,(Zbinop(Ztest,(size,Zrm_i(Zr RAX,imm))),strm3))
3311                     end
3312                   | ((true,(false,(true,(true,(v'0,(r'2,(r'1,r'0))))))),
3313                    strm2) =>
3314                     let
3315                       val size =
3316                         OpSize
3317                           (have_rex,
3318                            (#W(REX : REX),
3319                             (BitsN.fromBitstring([v'0],1),
3320                              op_size_override)))
3321                       val (imm,strm3) = full_immediate(size,strm2)
3322                       val reg =
3323                         RexReg
3324                           (#B(REX : REX),
3325                            BitsN.fromBitstring([r'2,r'1,r'0],3))
3326                     in
3327                       Zfull_inst
3328                         (p,
3329                          (Zmov(Z_ALWAYS,(size,Zrm_i(Zr reg,imm))),strm3))
3330                     end
3331                   | ((true,
3332                     (true,(false,(false,(false,(false,(false,v'0))))))),
3333                    strm2) =>
3334                     let
3335                       val size =
3336                         OpSize
3337                           (have_rex,
3338                            (#W(REX : REX),
3339                             (BitsN.fromBitstring([v'0],1),
3340                              op_size_override)))
3341                       val (opcode,(rm,strm3)) =
3342                         readOpcodeModRM(REX,strm2)
3343                       val (imm,strm4) = oimmediate8 strm3
3344                       val binop =
3345                         Cast.natToZbinop_name
3346                           (Nat.+(BitsN.toNat opcode,8))
3347                     in
3348                       if opcode = (BitsN.B(0x6,3))
3349                         then Zdec_fail
3350                                "Unsupported opcode: Shift Group 2"
3351                       else Zfull_inst
3352                              (p,
3353                               (Zbinop(binop,(size,Zrm_i(rm,imm))),strm4))
3354                     end
3355                   | ((true,
3356                     (true,(false,(false,(false,(false,(true,v'0))))))),
3357                    strm2) =>
3358                     (if (BitsN.fromBitstring([v'0],1)) = (BitsN.B(0x0,1))
3359                        then let
3360                               val (imm,strm3) = immediate16 strm2
3361                             in
3362                               Zfull_inst(p,(Zret imm,strm3))
3363                             end
3364                      else Zfull_inst
3365                             (p,(Zret(BitsN.B(0x0,64)),Option.SOME strm2)))
3366                   | ((true,
3367                     (true,(false,(false,(false,(true,(true,v'0))))))),
3368                    strm2) =>
3369                     let
3370                       val size =
3371                         OpSize
3372                           (have_rex,
3373                            (#W(REX : REX),
3374                             (BitsN.fromBitstring([v'0],1),
3375                              op_size_override)))
3376                       val (opcode,(rm,strm3)) =
3377                         readOpcodeModRM(REX,strm2)
3378                       val (imm,strm4) = oimmediate(size,strm3)
3379                     in
3380                       if opcode = (BitsN.B(0x0,3))
3381                         then Zfull_inst
3382                                (p,
3383                                 (Zmov(Z_ALWAYS,(size,Zrm_i(rm,imm))),
3384                                  strm4))
3385                       else Zdec_fail "Unsupported opcode: Group 11"
3386                     end
3387                   | ((true,
3388                     (true,(false,(false,(true,(false,(false,true))))))),
3389                    strm2) => Zfull_inst(p,(Zleave,Option.SOME strm2))
3390                   | ((true,
3391                     (true,(false,(true,(false,(false,(b'0,v'0))))))),
3392                    strm2) =>
3393                     let
3394                       val size =
3395                         OpSize
3396                           (have_rex,
3397                            (#W(REX : REX),
3398                             (BitsN.fromBitstring([v'0],1),
3399                              op_size_override)))
3400                       val (opcode,(rm,strm3)) =
3401                         readOpcodeModRM(REX,strm2)
3402                       val shift =
3403                         if (BitsN.fromBitstring([b'0],1)) =
3404                            (BitsN.B(0x0,1))
3405                           then Zrm_i(rm,BitsN.B(0x1,64))
3406                         else Zrm_r(rm,RCX)
3407                       val binop =
3408                         Cast.natToZbinop_name
3409                           (Nat.+(BitsN.toNat opcode,8))
3410                     in
3411                       if opcode = (BitsN.B(0x6,3))
3412                         then Zdec_fail
3413                                "Unsupported opcode: Shift Group 2"
3414                       else Zfull_inst
3415                              (p,(Zbinop(binop,(size,shift)),strm3))
3416                     end
3417                   | ((true,
3418                     (true,(true,(false,(false,(false,(false,b'0))))))),
3419                    strm2) =>
3420                     let
3421                       val (imm,strm3) = immediate8 strm2
3422                       val cond =
3423                         if (BitsN.fromBitstring([b'0],1)) =
3424                            (BitsN.B(0x0,1))
3425                           then Z_NE
3426                         else Z_E
3427                     in
3428                       Zfull_inst(p,(Zloop(cond,imm),strm3))
3429                     end
3430                   | ((true,
3431                     (true,(true,(false,(false,(false,(true,false))))))),
3432                    strm2) =>
3433                     let
3434                       val (imm,strm3) = immediate8 strm2
3435                     in
3436                       Zfull_inst(p,(Zloop(Z_ALWAYS,imm),strm3))
3437                     end
3438                   | ((true,
3439                     (true,(true,(false,(true,(false,(false,false))))))),
3440                    strm2) =>
3441                     let
3442                       val (imm,strm3) = immediate32 strm2
3443                     in
3444                       Zfull_inst(p,(Zcall(Zimm imm),strm3))
3445                     end
3446                   | ((true,
3447                     (true,(true,(false,(true,(false,(b'0,true))))))),
3448                    strm2) =>
3449                     let
3450                       val (imm,strm3) =
3451                         if (BitsN.fromBitstring([b'0],1)) =
3452                            (BitsN.B(0x0,1))
3453                           then immediate32 strm2
3454                         else immediate8 strm2
3455                     in
3456                       Zfull_inst(p,(Zjcc(Z_ALWAYS,imm),strm3))
3457                     end
3458                   | ((true,
3459                     (true,(true,(true,(false,(true,(false,true))))))),
3460                    strm2) => Zfull_inst(p,(Zcmc,Option.SOME strm2))
3461                   | ((true,(true,(true,(true,(false,(true,(true,v'0))))))),
3462                    strm2) =>
3463                     let
3464                       val size =
3465                         OpSize
3466                           (have_rex,
3467                            (#W(REX : REX),
3468                             (BitsN.fromBitstring([v'0],1),
3469                              op_size_override)))
3470                       val (opcode,(rm,strm3)) =
3471                         readOpcodeModRM(REX,strm2)
3472                     in
3473                       case opcode of
3474                          BitsN.B(0x0,_) =>
3475                            let
3476                              val (imm,strm4) = oimmediate(size,strm3)
3477                            in
3478                              Zfull_inst
3479                                (p,
3480                                 (Zbinop(Ztest,(size,Zrm_i(rm,imm))),strm4))
3481                            end
3482                        | BitsN.B(0x2,_) =>
3483                          Zfull_inst(p,(Zmonop(Znot,(size,rm)),strm3))
3484                        | BitsN.B(0x3,_) =>
3485                          Zfull_inst(p,(Zmonop(Zneg,(size,rm)),strm3))
3486                        | BitsN.B(0x4,_) =>
3487                          Zfull_inst(p,(Zmul(size,rm),strm3))
3488                        | BitsN.B(0x5,_) =>
3489                          Zfull_inst(p,(Zimul(size,rm),strm3))
3490                        | BitsN.B(0x6,_) =>
3491                          Zfull_inst(p,(Zdiv(size,rm),strm3))
3492                        | BitsN.B(0x7,_) =>
3493                          Zfull_inst(p,(Zidiv(size,rm),strm3))
3494                        | _ =>
3495                          Zdec_fail "Unsupported opcode: Unary Group 3"
3496                     end
3497                   | ((true,
3498                     (true,(true,(true,(true,(false,(false,false))))))),
3499                    strm2) => Zfull_inst(p,(Zclc,Option.SOME strm2))
3500                   | ((true,
3501                     (true,(true,(true,(true,(false,(false,true))))))),
3502                    strm2) => Zfull_inst(p,(Zstc,Option.SOME strm2))
3503                   | ((true,
3504                     (true,(true,(true,(true,(true,(true,false))))))),
3505                    strm2) =>
3506                     let
3507                       val (opcode,(rm,strm3)) =
3508                         readOpcodeModRM(REX,strm2)
3509                     in
3510                       if opcode = (BitsN.B(0x0,3))
3511                         then Zfull_inst
3512                                (p,(Zmonop(Zinc,(Z8 have_rex,rm)),strm3))
3513                       else if opcode = (BitsN.B(0x1,3))
3514                         then Zfull_inst
3515                                (p,(Zmonop(Zdec,(Z8 have_rex,rm)),strm3))
3516                       else Zdec_fail
3517                              "Unsupported opcode: INC/DEC Group 4"
3518                     end
3519                   | ((true,(true,(true,(true,(true,(true,(true,true))))))),
3520                    strm2) =>
3521                     let
3522                       val size =
3523                         OpSize
3524                           (have_rex,
3525                            (#W(REX : REX),
3526                             (BitsN.B(0x1,1),op_size_override)))
3527                       val (opcode,(rm,strm3)) =
3528                         readOpcodeModRM(REX,strm2)
3529                     in
3530                       case opcode of
3531                          BitsN.B(0x0,_) =>
3532                            Zfull_inst(p,(Zmonop(Zinc,(size,rm)),strm3))
3533                        | BitsN.B(0x1,_) =>
3534                          Zfull_inst(p,(Zmonop(Zdec,(size,rm)),strm3))
3535                        | BitsN.B(0x2,_) =>
3536                          Zfull_inst(p,(Zcall(Zrm rm),strm3))
3537                        | BitsN.B(0x4,_) => Zfull_inst(p,(Zjmp rm,strm3))
3538                        | BitsN.B(0x6,_) =>
3539                          Zfull_inst(p,(Zpush(Zrm rm),strm3))
3540                        | _ =>
3541                          Zdec_fail "Unsupported opcode: INC/DEC Group 5"
3542                     end
3543                   | ((false,
3544                     (false,(false,(false,(true,(true,(true,true))))))),
3545                    (BitsN.B(0x38,_)) :: (opc :: strm2)) =>
3546                     (case opc of
3547                         BitsN.B(0x29,_) =>
3548                           if p = [BitsN.B(0x66,8)]
3549                             then let
3550                                    val (reg,(rm,strm3)) =
3551                                      readOpcodeModRM(REX,strm2)
3552                                    val a = (reg,rm_to_xmm_mem rm)
3553                                  in
3554                                    Zfull_inst(p,(SSE(PCMPEQQ a),strm3))
3555                                  end
3556                           else Zdec_fail
3557                                  ("Unsupported opcode: 0F 38 "
3558                                     ^
3559                                     (BitsN.toHexString opc))
3560                       | _ =>
3561                         Zdec_fail
3562                           ("Unsupported opcode: 0F 38 "
3563                              ^
3564                              (BitsN.toHexString opc)))
3565                   | ((false,
3566                     (false,(false,(false,(true,(true,(true,true))))))),
3567                    (BitsN.B(0x3A,_)) :: (opc :: _)) =>
3568                     Zdec_fail
3569                       ("Unsupported opcode: 0F 3A "
3570                          ^
3571                          (BitsN.toHexString opc))
3572                   | ((false,
3573                     (false,(false,(false,(true,(true,(true,true))))))),
3574                    opc :: strm2) =>
3575                     (case boolify'8 opc of
3576                         (false,
3577                          (false,
3578                           (false,(true,(false,(false,(false,false))))))) =>
3579                           let
3580                             val (reg,(rm,strm3)) =
3581                               readOpcodeModRM(REX,strm2)
3582                             val a = (xmm_reg reg,rm_to_xmm_mem rm)
3583                           in
3584                             case p of
3585                                [BitsN.B(0x66,_)] =>
3586                                  Zfull_inst
3587                                    (p,(SSE(MOVUP_D_S(true,a)),strm3))
3588                              | [BitsN.B(0xF2,_)] =>
3589                                Zfull_inst(p,(SSE(MOVSD a),strm3))
3590                              | [BitsN.B(0xF3,_)] =>
3591                                Zfull_inst(p,(SSE(MOVSS a),strm3))
3592                              | _ =>
3593                                Zfull_inst
3594                                  (p,(SSE(MOVUP_D_S(false,a)),strm3))
3595                           end
3596                       | (false,
3597                        (false,(false,(true,(false,(false,(false,true))))))) =>
3598                         let
3599                           val (reg,(rm,strm3)) =
3600                             readOpcodeModRM(REX,strm2)
3601                           val a = (rm_to_xmm_mem rm,xmm_reg reg)
3602                         in
3603                           case p of
3604                              [BitsN.B(0x66,_)] =>
3605                                Zfull_inst
3606                                  (p,(SSE(MOVUP_D_S(true,a)),strm3))
3607                            | [BitsN.B(0xF2,_)] =>
3608                              Zfull_inst(p,(SSE(MOVSD a),strm3))
3609                            | [BitsN.B(0xF3,_)] =>
3610                              Zfull_inst(p,(SSE(MOVSS a),strm3))
3611                            | _ =>
3612                              Zfull_inst
3613                                (p,(SSE(MOVUP_D_S(false,a)),strm3))
3614                         end
3615                       | (false,
3616                        (false,(false,(true,(true,(true,(true,true))))))) =>
3617                         let
3618                           val (_,(_,strm3)) = readOpcodeModRM(REX,strm2)
3619                         in
3620                           Zfull_inst(p,(Znop(L3.length strm),strm3))
3621                         end
3622                       | (false,
3623                        (false,(true,(false,(true,(false,(false,false))))))) =>
3624                         let
3625                           val (reg,(rm,strm3)) =
3626                             readOpcodeModRM(REX,strm2)
3627                           val double = p = [BitsN.B(0x66,8)]
3628                         in
3629                           Zfull_inst
3630                             (p,
3631                              (SSE(MOVAP_D_S
3632                                     (double,
3633                                      (xmm_reg reg,rm_to_xmm_mem rm))),
3634                               strm3))
3635                         end
3636                       | (false,
3637                        (false,(true,(false,(true,(false,(false,true))))))) =>
3638                         let
3639                           val (reg,(rm,strm3)) =
3640                             readOpcodeModRM(REX,strm2)
3641                           val double = p = [BitsN.B(0x66,8)]
3642                         in
3643                           Zfull_inst
3644                             (p,
3645                              (SSE(MOVAP_D_S
3646                                     (double,
3647                                      (rm_to_xmm_mem rm,xmm_reg reg))),
3648                               strm3))
3649                         end
3650                       | (false,
3651                        (false,(true,(false,(true,(false,(true,false))))))) =>
3652                         let
3653                           val (reg,(rm,strm3)) =
3654                             readOpcodeModRM(REX,strm2)
3655                           val a = (#W(REX : REX),(reg,rm))
3656                         in
3657                           case p of
3658                              [BitsN.B(0xF3,_)] =>
3659                                Zfull_inst(p,(SSE(CVTSI2SS a),strm3))
3660                            | [BitsN.B(0xF2,_)] =>
3661                              Zfull_inst(p,(SSE(CVTSI2SD a),strm3))
3662                            | _ =>
3663                              Zdec_fail
3664                                ("Unsupported opcode: 0F "
3665                                   ^
3666                                   (BitsN.toHexString opc))
3667                         end
3668                       | (false,
3669                        (false,(true,(false,(true,(true,(false,b'0))))))) =>
3670                         let
3671                           val (reg,(rm,strm3)) = readModRM(REX,strm2)
3672                           val a =
3673                             (not((not o L3.equal (BitsN.zero (1)))
3674                                    (BitsN.fromBitstring([b'0],1))),
3675                              (#W(REX : REX),(reg,rm_to_xmm_mem rm)))
3676                         in
3677                           case p of
3678                              [BitsN.B(0xF3,_)] =>
3679                                Zfull_inst(p,(SSE(CVTSS2SI a),strm3))
3680                            | [BitsN.B(0xF2,_)] =>
3681                              Zfull_inst(p,(SSE(CVTSD2SI a),strm3))
3682                            | _ =>
3683                              Zdec_fail
3684                                ("Unsupported opcode: 0F "
3685                                   ^
3686                                   (BitsN.toHexString opc))
3687                         end
3688                       | (false,
3689                        (false,(true,(false,(true,(true,(true,true))))))) =>
3690                         let
3691                           val (reg,(rm,strm3)) =
3692                             readOpcodeModRM(REX,strm2)
3693                           val a = (reg,rm_to_xmm_mem rm)
3694                         in
3695                           case p of
3696                              [BitsN.B(0x66,_)] =>
3697                                Zfull_inst(p,(SSE(COMISD a),strm3))
3698                            | _ => Zfull_inst(p,(SSE(COMISS a),strm3))
3699                         end
3700                       | (false,
3701                        (true,(false,(false,(c'3,(c'2,(c'1,c'0))))))) =>
3702                         let
3703                           val size =
3704                             OpSize
3705                               (true,
3706                                (#W(REX : REX),
3707                                 (BitsN.B(0x1,1),op_size_override)))
3708                           val (reg,(rm,strm3)) = readModRM(REX,strm2)
3709                         in
3710                           Zfull_inst
3711                             (p,
3712                              (Zmov
3713                                 ((Cast.natToZcond o BitsN.toNat)
3714                                    (BitsN.fromBitstring
3715                                       ([c'3,c'2,c'1,c'0],4)),
3716                                  (size,Zr_rm(reg,rm))),strm3))
3717                         end
3718                       | (false,
3719                        (true,(false,(true,(false,(false,(false,true))))))) =>
3720                         let
3721                           val (reg,(rm,strm3)) =
3722                             readOpcodeModRM(REX,strm2)
3723                           val a = (reg,rm_to_xmm_mem rm)
3724                         in
3725                           case p of
3726                              [BitsN.B(0x66,_)] =>
3727                                Zfull_inst(p,(SSE(SQRTPD a),strm3))
3728                            | [BitsN.B(0xF3,_)] =>
3729                              Zfull_inst(p,(SSE(SQRTSS a),strm3))
3730                            | [BitsN.B(0xF2,_)] =>
3731                              Zfull_inst(p,(SSE(SQRTSD a),strm3))
3732                            | _ => Zfull_inst(p,(SSE(SQRTPS a),strm3))
3733                         end
3734                       | (false,
3735                        (true,(false,(true,(false,(true,(c'1,c'0))))))) =>
3736                         let
3737                           val (reg,(rm,strm3)) =
3738                             readOpcodeModRM(REX,strm2)
3739                           val a =
3740                             ((Cast.natTosse_logic o BitsN.toNat)
3741                                (BitsN.fromBitstring([c'1,c'0],2)),
3742                              (reg,rm_to_xmm_mem rm))
3743                         in
3744                           case p of
3745                              [BitsN.B(0x66,_)] =>
3746                                Zfull_inst(p,(SSE(logic_PD a),strm3))
3747                            | _ => Zfull_inst(p,(SSE(logic_PS a),strm3))
3748                         end
3749                       | (false,
3750                        (true,(false,(true,(true,(c'2,(c'1,c'0))))))) =>
3751                         let
3752                           val (reg,(rm,strm3)) =
3753                             readOpcodeModRM(REX,strm2)
3754                           val a = (reg,rm_to_xmm_mem rm)
3755                         in
3756                           case (BitsN.fromBitstring([c'2,c'1,c'0],3),p) of
3757                              (BitsN.B(0x0,_),[BitsN.B(0x66,_)]) =>
3758                                Zfull_inst
3759                                  (p,(SSE(bin_PD(sse_add,a)),strm3))
3760                            | (BitsN.B(0x0,_),[BitsN.B(0xF3,_)]) =>
3761                              Zfull_inst(p,(SSE(bin_SS(sse_add,a)),strm3))
3762                            | (BitsN.B(0x0,_),[BitsN.B(0xF2,_)]) =>
3763                              Zfull_inst(p,(SSE(bin_SD(sse_add,a)),strm3))
3764                            | (BitsN.B(0x0,_),_) =>
3765                              Zfull_inst(p,(SSE(bin_PS(sse_add,a)),strm3))
3766                            | (BitsN.B(0x1,_),[BitsN.B(0x66,_)]) =>
3767                              Zfull_inst(p,(SSE(bin_PD(sse_mul,a)),strm3))
3768                            | (BitsN.B(0x1,_),[BitsN.B(0xF3,_)]) =>
3769                              Zfull_inst(p,(SSE(bin_SS(sse_mul,a)),strm3))
3770                            | (BitsN.B(0x1,_),[BitsN.B(0xF2,_)]) =>
3771                              Zfull_inst(p,(SSE(bin_SD(sse_mul,a)),strm3))
3772                            | (BitsN.B(0x1,_),_) =>
3773                              Zfull_inst(p,(SSE(bin_PS(sse_mul,a)),strm3))
3774                            | (BitsN.B(0x2,_),[BitsN.B(0x66,_)]) =>
3775                              Zfull_inst(p,(SSE(CVTPD2PS a),strm3))
3776                            | (BitsN.B(0x2,_),[BitsN.B(0xF3,_)]) =>
3777                              Zfull_inst(p,(SSE(CVTSS2SD a),strm3))
3778                            | (BitsN.B(0x2,_),[BitsN.B(0xF2,_)]) =>
3779                              Zfull_inst(p,(SSE(CVTSD2SS a),strm3))
3780                            | (BitsN.B(0x2,_),_) =>
3781                              Zfull_inst(p,(SSE(CVTPS2PD a),strm3))
3782                            | (BitsN.B(0x3,_),[BitsN.B(0x66,_)]) =>
3783                              Zfull_inst(p,(SSE(CVTPS2DQ(false,a)),strm3))
3784                            | (BitsN.B(0x3,_),[BitsN.B(0xF3,_)]) =>
3785                              Zfull_inst(p,(SSE(CVTPS2DQ(true,a)),strm3))
3786                            | (BitsN.B(0x3,_),[BitsN.B(0xF2,_)]) =>
3787                              Zdec_fail
3788                                ("Unsupported opcode: 0F "
3789                                   ^
3790                                   (BitsN.toHexString opc))
3791                            | (BitsN.B(0x3,_),_) =>
3792                              Zfull_inst(p,(SSE(CVTDQ2PS a),strm3))
3793                            | (BitsN.B(0x4,_),[BitsN.B(0x66,_)]) =>
3794                              Zfull_inst(p,(SSE(bin_PD(sse_sub,a)),strm3))
3795                            | (BitsN.B(0x4,_),[BitsN.B(0xF3,_)]) =>
3796                              Zfull_inst(p,(SSE(bin_SS(sse_sub,a)),strm3))
3797                            | (BitsN.B(0x4,_),[BitsN.B(0xF2,_)]) =>
3798                              Zfull_inst(p,(SSE(bin_SD(sse_sub,a)),strm3))
3799                            | (BitsN.B(0x4,_),_) =>
3800                              Zfull_inst(p,(SSE(bin_PS(sse_sub,a)),strm3))
3801                            | (BitsN.B(0x5,_),[BitsN.B(0x66,_)]) =>
3802                              Zfull_inst(p,(SSE(bin_PD(sse_min,a)),strm3))
3803                            | (BitsN.B(0x5,_),[BitsN.B(0xF3,_)]) =>
3804                              Zfull_inst(p,(SSE(bin_SS(sse_min,a)),strm3))
3805                            | (BitsN.B(0x5,_),[BitsN.B(0xF2,_)]) =>
3806                              Zfull_inst(p,(SSE(bin_SD(sse_min,a)),strm3))
3807                            | (BitsN.B(0x5,_),_) =>
3808                              Zfull_inst(p,(SSE(bin_PS(sse_min,a)),strm3))
3809                            | (BitsN.B(0x6,_),[BitsN.B(0x66,_)]) =>
3810                              Zfull_inst(p,(SSE(bin_PD(sse_div,a)),strm3))
3811                            | (BitsN.B(0x6,_),[BitsN.B(0xF3,_)]) =>
3812                              Zfull_inst(p,(SSE(bin_SS(sse_div,a)),strm3))
3813                            | (BitsN.B(0x6,_),[BitsN.B(0xF2,_)]) =>
3814                              Zfull_inst(p,(SSE(bin_SD(sse_div,a)),strm3))
3815                            | (BitsN.B(0x6,_),_) =>
3816                              Zfull_inst(p,(SSE(bin_PS(sse_div,a)),strm3))
3817                            | (BitsN.B(0x7,_),[BitsN.B(0x66,_)]) =>
3818                              Zfull_inst(p,(SSE(bin_PD(sse_max,a)),strm3))
3819                            | (BitsN.B(0x7,_),[BitsN.B(0xF3,_)]) =>
3820                              Zfull_inst(p,(SSE(bin_SS(sse_max,a)),strm3))
3821                            | (BitsN.B(0x7,_),[BitsN.B(0xF2,_)]) =>
3822                              Zfull_inst(p,(SSE(bin_SD(sse_max,a)),strm3))
3823                            | (BitsN.B(0x7,_),_) =>
3824                              Zfull_inst(p,(SSE(bin_PS(sse_max,a)),strm3))
3825                         end
3826                       | (false,
3827                        (true,(true,(true,(false,(false,(false,true))))))) =>
3828                         if p = [BitsN.B(0x66,8)]
3829                           then let
3830                                  val (opcode,(rm,strm3)) =
3831                                    readOpcodeModRM(REX,strm2)
3832                                  val (imm,strm4) = oimmediate8 strm3
3833                                in
3834                                  case (opcode,rm) of
3835                                     (BitsN.B(0x2,_),Zr r) =>
3836                                       Zfull_inst
3837                                         (p,
3838                                          (SSE(PSRLW_imm
3839                                                 (BitsN.fromNat
3840                                                    (Cast.ZregToNat r,3),
3841                                                  BitsN.fromNat
3842                                                    (BitsN.toNat imm,8))),
3843                                           strm4))
3844                                   | (BitsN.B(0x4,_),Zr r) =>
3845                                     Zfull_inst
3846                                       (p,
3847                                        (SSE(PSRAW_imm
3848                                               (BitsN.fromNat
3849                                                  (Cast.ZregToNat r,3),
3850                                                BitsN.fromNat
3851                                                  (BitsN.toNat imm,8))),
3852                                         strm4))
3853                                   | (BitsN.B(0x6,_),Zr r) =>
3854                                     Zfull_inst
3855                                       (p,
3856                                        (SSE(PSLLW_imm
3857                                               (BitsN.fromNat
3858                                                  (Cast.ZregToNat r,3),
3859                                                BitsN.fromNat
3860                                                  (BitsN.toNat imm,8))),
3861                                         strm4))
3862                                   | _ =>
3863                                     Zdec_fail
3864                                       "Unsupported group 12 opcode: 0F 71"
3865                                end
3866                         else Zdec_fail
3867                                "Unsupported group 12 opcode: 0F 71"
3868                       | (false,
3869                        (true,(true,(true,(false,(false,(true,false))))))) =>
3870                         if p = [BitsN.B(0x66,8)]
3871                           then let
3872                                  val (opcode,(rm,strm3)) =
3873                                    readOpcodeModRM(REX,strm2)
3874                                  val (imm,strm4) = oimmediate8 strm3
3875                                in
3876                                  case (opcode,rm) of
3877                                     (BitsN.B(0x2,_),Zr r) =>
3878                                       Zfull_inst
3879                                         (p,
3880                                          (SSE(PSRLD_imm
3881                                                 (BitsN.fromNat
3882                                                    (Cast.ZregToNat r,3),
3883                                                  BitsN.fromNat
3884                                                    (BitsN.toNat imm,8))),
3885                                           strm4))
3886                                   | (BitsN.B(0x4,_),Zr r) =>
3887                                     Zfull_inst
3888                                       (p,
3889                                        (SSE(PSRAD_imm
3890                                               (BitsN.fromNat
3891                                                  (Cast.ZregToNat r,3),
3892                                                BitsN.fromNat
3893                                                  (BitsN.toNat imm,8))),
3894                                         strm4))
3895                                   | (BitsN.B(0x6,_),Zr r) =>
3896                                     Zfull_inst
3897                                       (p,
3898                                        (SSE(PSLLD_imm
3899                                               (BitsN.fromNat
3900                                                  (Cast.ZregToNat r,3),
3901                                                BitsN.fromNat
3902                                                  (BitsN.toNat imm,8))),
3903                                         strm4))
3904                                   | _ =>
3905                                     Zdec_fail
3906                                       "Unsupported group 13 opcode: 0F 72"
3907                                end
3908                         else Zdec_fail
3909                                "Unsupported group 13 opcode: 0F 71"
3910                       | (false,
3911                        (true,(true,(true,(false,(false,(true,true))))))) =>
3912                         if p = [BitsN.B(0x66,8)]
3913                           then let
3914                                  val (opcode,(rm,strm3)) =
3915                                    readOpcodeModRM(REX,strm2)
3916                                  val (imm,strm4) = oimmediate8 strm3
3917                                in
3918                                  case (opcode,rm) of
3919                                     (BitsN.B(0x2,_),Zr r) =>
3920                                       Zfull_inst
3921                                         (p,
3922                                          (SSE(PSRLQ_imm
3923                                                 (BitsN.fromNat
3924                                                    (Cast.ZregToNat r,3),
3925                                                  BitsN.fromNat
3926                                                    (BitsN.toNat imm,8))),
3927                                           strm4))
3928                                   | (BitsN.B(0x3,_),Zr r) =>
3929                                     Zfull_inst
3930                                       (p,
3931                                        (SSE(PSRLDQ
3932                                               (BitsN.fromNat
3933                                                  (Cast.ZregToNat r,3),
3934                                                BitsN.fromNat
3935                                                  (BitsN.toNat imm,8))),
3936                                         strm4))
3937                                   | (BitsN.B(0x6,_),Zr r) =>
3938                                     Zfull_inst
3939                                       (p,
3940                                        (SSE(PSLLQ_imm
3941                                               (BitsN.fromNat
3942                                                  (Cast.ZregToNat r,3),
3943                                                BitsN.fromNat
3944                                                  (BitsN.toNat imm,8))),
3945                                         strm4))
3946                                   | (BitsN.B(0x7,_),Zr r) =>
3947                                     Zfull_inst
3948                                       (p,
3949                                        (SSE(PSLLDQ
3950                                               (BitsN.fromNat
3951                                                  (Cast.ZregToNat r,3),
3952                                                BitsN.fromNat
3953                                                  (BitsN.toNat imm,8))),
3954                                         strm4))
3955                                   | _ =>
3956                                     Zdec_fail
3957                                       "Unsupported group 14 opcode: 0F 73"
3958                                end
3959                         else Zdec_fail
3960                                "Unsupported group 14 opcode: 0F 71"
3961                       | (true,
3962                        (true,(false,(true,(false,(true,(true,false))))))) =>
3963                         let
3964                           val (reg,(rm,strm3)) =
3965                             readOpcodeModRM(REX,strm2)
3966                           val a = (rm_to_xmm_mem rm,xmm_reg reg)
3967                         in
3968                           case p of
3969                              [BitsN.B(0x66,_)] =>
3970                                Zfull_inst(p,(SSE(MOVQ a),strm3))
3971                            | _ =>
3972                              Zdec_fail
3973                                ("Unsupported opcode: 0F "
3974                                   ^
3975                                   (BitsN.toHexString opc))
3976                         end
3977                       | (false,
3978                        (true,(true,(b'0,(true,(true,(true,false))))))) =>
3979                         let
3980                           val b = BitsN.fromBitstring([b'0],1)
3981                           val (reg,(rm,strm3)) =
3982                             readOpcodeModRM(REX,strm2)
3983                           val a =
3984                             (not((not o L3.equal (BitsN.zero (1))) b),
3985                              (#W(REX : REX),(reg,rm)))
3986                         in
3987                           case p of
3988                              [BitsN.B(0x66,_)] =>
3989                                Zfull_inst(p,(SSE(MOV_D_Q a),strm3))
3990                            | [BitsN.B(0xF3,_)] =>
3991                              if b = (BitsN.B(0x1,1))
3992                                then Zfull_inst
3993                                       (p,
3994                                        (SSE(MOVQ
3995                                               (xmm_reg reg,
3996                                                rm_to_xmm_mem rm)),strm3))
3997                              else Zdec_fail
3998                                     ("Unsupported opcode: 0F "
3999                                        ^
4000                                        (BitsN.toHexString opc))
4001                            | _ =>
4002                              Zdec_fail
4003                                ("Unsupported opcode: 0F "
4004                                   ^
4005                                   (BitsN.toHexString opc))
4006                         end
4007                       | (true,
4008                        (false,(false,(false,(c'3,(c'2,(c'1,c'0))))))) =>
4009                         let
4010                           val (imm,strm3) = immediate32 strm2
4011                         in
4012                           Zfull_inst
4013                             (p,
4014                              (Zjcc
4015                                 ((Cast.natToZcond o BitsN.toNat)
4016                                    (BitsN.fromBitstring
4017                                       ([c'3,c'2,c'1,c'0],4)),imm),strm3))
4018                         end
4019                       | (true,
4020                        (false,(false,(true,(c'3,(c'2,(c'1,c'0))))))) =>
4021                         let
4022                           val (_,(rm,strm3)) = readModRM(REX,strm2)
4023                         in
4024                           Zfull_inst
4025                             (p,
4026                              (Zset
4027                                 ((Cast.natToZcond o BitsN.toNat)
4028                                    (BitsN.fromBitstring
4029                                       ([c'3,c'2,c'1,c'0],4)),
4030                                  (have_rex,rm)),strm3))
4031                         end
4032                       | (true,
4033                        (false,(true,(x'1,(x'0,(false,(true,true))))))) =>
4034                         let
4035                           val size =
4036                             OpSize
4037                               (have_rex,
4038                                (#W(REX : REX),
4039                                 (BitsN.B(0x1,1),op_size_override)))
4040                           val (reg,(rm,strm3)) = readModRM(REX,strm2)
4041                         in
4042                           Zfull_inst
4043                             (p,
4044                              (Zbit_test
4045                                 ((Cast.natToZbit_test_name o BitsN.toNat)
4046                                    (BitsN.fromBitstring([x'1,x'0],2)),
4047                                  (size,Zrm_r(rm,reg))),strm3))
4048                         end
4049                       | (true,
4050                        (false,(true,(true,(true,(false,(true,false))))))) =>
4051                         let
4052                           val size =
4053                             OpSize
4054                               (have_rex,
4055                                (#W(REX : REX),
4056                                 (BitsN.B(0x1,1),op_size_override)))
4057                           val (opcode,(rm,strm3)) =
4058                             readOpcodeModRM(REX,strm2)
4059                           val (imm,strm4) = oimmediate8 strm3
4060                         in
4061                           case boolify'3 opcode of
4062                              (true,(x'1,x'0)) =>
4063                                Zfull_inst
4064                                  (p,
4065                                   (Zbit_test
4066                                      ((Cast.natToZbit_test_name o BitsN.toNat)
4067                                         (BitsN.fromBitstring([x'1,x'0],2)),
4068                                       (size,Zrm_i(rm,imm))),strm4))
4069                            | _ => Zdec_fail "Unsupported opcode: Group 8"
4070                         end
4071                       | (true,
4072                        (false,(true,(false,(true,(true,(true,true))))))) =>
4073                         let
4074                           val size =
4075                             OpSize
4076                               (have_rex,
4077                                (#W(REX : REX),
4078                                 (BitsN.B(0x1,1),op_size_override)))
4079                           val (reg,(rm,strm3)) = readModRM(REX,strm2)
4080                         in
4081                           Zfull_inst(p,(Zimul2(size,(reg,rm)),strm3))
4082                         end
4083                       | (true,
4084                        (false,(true,(true,(false,(false,(false,v'0))))))) =>
4085                         let
4086                           val size =
4087                             OpSize
4088                               (have_rex,
4089                                (#W(REX : REX),
4090                                 (BitsN.fromBitstring([v'0],1),
4091                                  op_size_override)))
4092                           val (reg,(rm,strm3)) = readModRM(REX,strm2)
4093                         in
4094                           Zfull_inst(p,(Zcmpxchg(size,(rm,reg)),strm3))
4095                         end
4096                       | (true,
4097                        (false,(true,(true,(s'0,(true,(true,v'0))))))) =>
4098                         let
4099                           val size2 =
4100                             OpSize
4101                               (have_rex,
4102                                (#W(REX : REX),
4103                                 (BitsN.B(0x1,1),op_size_override)))
4104                           val size =
4105                             if (BitsN.fromBitstring([v'0],1)) =
4106                                (BitsN.B(0x1,1))
4107                               then Z16
4108                             else Z8 have_rex
4109                           val (reg,(rm,strm3)) = readModRM(REX,strm2)
4110                           val arg = (size,(Zr_rm(reg,rm),size2))
4111                           val instr =
4112                             if (BitsN.fromBitstring([s'0],1)) =
4113                                (BitsN.B(0x1,1))
4114                               then Zmovsx arg
4115                             else Zmovzx arg
4116                         in
4117                           Zfull_inst(p,(instr,strm3))
4118                         end
4119                       | (true,
4120                        (true,(false,(false,(false,(false,(false,v'0))))))) =>
4121                         let
4122                           val size =
4123                             OpSize
4124                               (have_rex,
4125                                (#W(REX : REX),
4126                                 (BitsN.fromBitstring([v'0],1),
4127                                  op_size_override)))
4128                           val (reg,(rm,strm3)) = readModRM(REX,strm2)
4129                         in
4130                           Zfull_inst(p,(Zxadd(size,(rm,reg)),strm3))
4131                         end
4132                       | (true,
4133                        (true,(false,(false,(false,(false,(true,false))))))) =>
4134                         let
4135                           val (reg,(rm,strm3)) =
4136                             readOpcodeModRM(REX,strm2)
4137                           val (imm,strm4) = oimmediate8 strm3
4138                           val a =
4139                             ((Cast.natTosse_compare o BitsN.toNat) imm,
4140                              (reg,rm_to_xmm_mem rm))
4141                         in
4142                           case p of
4143                              [BitsN.B(0x66,_)] =>
4144                                Zfull_inst(p,(SSE(CMPPD a),strm4))
4145                            | [BitsN.B(0xF3,_)] =>
4146                              Zfull_inst(p,(SSE(CMPSS a),strm4))
4147                            | [BitsN.B(0xF2,_)] =>
4148                              Zfull_inst(p,(SSE(CMPSD a),strm4))
4149                            | _ => Zfull_inst(p,(SSE(CMPPS a),strm4))
4150                         end
4151                       | (true,
4152                        (true,(true,(false,(false,(true,(true,false))))))) =>
4153                         let
4154                           val (reg,(rm,strm3)) =
4155                             readOpcodeModRM(REX,strm2)
4156                           val a = (reg,rm_to_xmm_mem rm)
4157                         in
4158                           case p of
4159                              [BitsN.B(0x66,_)] =>
4160                                Zfull_inst
4161                                  (p,(SSE(CVTPD2DQ(true,a)),strm3))
4162                            | [BitsN.B(0xF3,_)] =>
4163                              Zfull_inst(p,(SSE(CVTDQ2PD a),strm3))
4164                            | [BitsN.B(0xF2,_)] =>
4165                              Zfull_inst(p,(SSE(CVTPD2DQ(false,a)),strm3))
4166                            | _ =>
4167                              Zdec_fail
4168                                ("Unsupported opcode: "
4169                                   ^
4170                                   (BitsN.toHexString opc))
4171                         end
4172                       | _ =>
4173                         Zdec_fail
4174                           ("Unsupported opcode: 0F "
4175                              ^
4176                              (BitsN.toHexString opc)))
4177                   | ((opc'7,
4178                     (opc'6,(opc'5,(opc'4,(opc'3,(opc'2,(opc'1,opc'0))))))),
4179                    _) =>
4180                     Zdec_fail
4181                       ("Unsupported opcode: "
4182                          ^
4183                          (BitsN.toHexString
4184                             (BitsN.fromBitstring
4185                                ([opc'7,opc'6,opc'5,opc'4,opc'3,opc'2,
4186                                  opc'1,opc'0],8)))))
4187             | [] => Zdec_fail "No opcode"
4188     end;
4189
4190fun x64_fetch () =
4191  let
4192    val strm = ref []
4193  in
4194    ( L3.for
4195        (19,0,
4196         fn i =>
4197           strm :=
4198           ((Map.lookup
4199               ((!MEM),BitsN.toNat(BitsN.+((!RIP),BitsN.fromNat(i,64)))))
4200              ::
4201              (!strm)))
4202    ; (!strm)
4203    )
4204  end;
4205
4206fun x64_next () =
4207  case x64_decode(x64_fetch ()) of
4208     Zfull_inst(_,(i,Option.SOME strm1)) =>
4209       let
4210         val len = Nat.-(20,L3.length strm1)
4211       in
4212         ( RIP := (BitsN.+((!RIP),BitsN.fromNat(len,64))); Run i )
4213       end
4214   | Zfull_inst(_,(_,NONE)) => raise FAILURE "not enough bytes"
4215   | Zdec_fail s => raise FAILURE s;
4216
4217fun e_imm8 imm =
4218  if (BitsN.<=(BitsN.B(0xFFFFFFFFFFFFFF80,64),imm)) andalso
4219     (BitsN.<=(imm,BitsN.B(0x7F,64)))
4220    then [BitsN.bits(7,0) imm]
4221  else [];
4222
4223fun e_imm16 imm =
4224  if (BitsN.<=(BitsN.B(0xFFFFFFFFFFFF8000,64),imm)) andalso
4225     (BitsN.<=(imm,BitsN.B(0x7FFF,64)))
4226    then [BitsN.bits(7,0) imm,BitsN.bits(15,8) imm]
4227  else [];
4228
4229fun e_imm32 imm =
4230  if (BitsN.<=(BitsN.B(0xFFFFFFFF80000000,64),imm)) andalso
4231     (BitsN.<=(imm,BitsN.B(0x7FFFFFFF,64)))
4232    then [BitsN.bits(7,0) imm,BitsN.bits(15,8) imm,BitsN.bits(23,16) imm,
4233          BitsN.bits(31,24) imm]
4234  else [];
4235
4236fun e_imm64 imm =
4237  [BitsN.bits(7,0) imm,BitsN.bits(15,8) imm,BitsN.bits(23,16) imm,
4238   BitsN.bits(31,24) imm,BitsN.bits(39,32) imm,BitsN.bits(47,40) imm,
4239   BitsN.bits(55,48) imm,BitsN.bits(63,56) imm];
4240
4241fun e_imm imm =
4242  case e_imm8 imm of
4243     [] =>
4244       (case e_imm16 imm of
4245           [] => (case e_imm32 imm of [] => e_imm64 imm | l => l)
4246         | l => l)
4247   | l => l;
4248
4249fun e_imm_8_32 imm =
4250  case e_imm8 imm of
4251     [] => (case e_imm32 imm of [] => (8,[]) | l => (4,l))
4252   | l => (1,l);
4253
4254fun e_ModRM (r,rm) =
4255  case rm of
4256     Zm(NONE,(ZripBase,displacement)) =>
4257       Option.SOME
4258         (BitsN.concat[BitsN.B(0x0,1),BitsN.bits(3,3) r,BitsN.B(0x0,2)],
4259          (BitsN.concat[BitsN.B(0x0,2),BitsN.bits(2,0) r,BitsN.B(0x5,3)])
4260            ::
4261            (e_imm32 displacement))
4262   | Zr rm =>
4263     let
4264       val rm = BitsN.fromNat(Cast.ZregToNat rm,4)
4265     in
4266       Option.SOME
4267         (BitsN.concat
4268            [BitsN.B(0x0,1),BitsN.bits(3,3) r,BitsN.B(0x0,1),
4269             BitsN.bits(3,3) rm],
4270          [BitsN.concat
4271             [BitsN.B(0x3,2),BitsN.bits(2,0) r,BitsN.bits(2,0) rm]])
4272     end
4273   | Zm(NONE,(ZnoBase,imm)) =>
4274     (case e_imm32 imm of
4275         [] => NONE
4276       | l =>
4277         Option.SOME
4278           (BitsN.concat[BitsN.B(0x0,1),BitsN.bits(3,3) r,BitsN.B(0x0,2)],
4279            [BitsN.concat[BitsN.B(0x0,2),BitsN.bits(2,0) r,BitsN.B(0x4,3)],
4280             BitsN.B(0x25,8)]
4281              @
4282              l))
4283   | Zm(Option.SOME(ss,index),(ZnoBase,imm)) =>
4284     let
4285       val i = BitsN.fromNat(Cast.ZregToNat index,4)
4286     in
4287       case e_imm32 imm of
4288          [] => NONE
4289        | l =>
4290          Option.SOME
4291            (BitsN.concat
4292               [BitsN.B(0x0,1),BitsN.bits(3,3) r,BitsN.bits(3,3) i,
4293                BitsN.B(0x0,1)],
4294             [BitsN.concat
4295                [BitsN.B(0x0,2),BitsN.bits(2,0) r,BitsN.B(0x4,3)],
4296              BitsN.concat[ss,BitsN.bits(2,0) i,BitsN.B(0x5,3)]]
4297               @
4298               l)
4299     end
4300   | Zm(Option.SOME(ss,index),(ZregBase base,imm)) =>
4301     let
4302       val b = BitsN.fromNat(Cast.ZregToNat base,4)
4303       val i = BitsN.fromNat(Cast.ZregToNat index,4)
4304       val b20 = BitsN.bits(2,0) b
4305       val (s,l) =
4306         if (imm = (BitsN.B(0x0,64))) andalso
4307            (not(b20 = (BitsN.B(0x5,3))))
4308           then (0,[])
4309         else e_imm_8_32 imm
4310     in
4311       if Set.mem(s,[0,1,4])
4312         then Option.SOME
4313                (BitsN.concat
4314                   [BitsN.B(0x0,1),BitsN.bits(3,3) r,BitsN.bits(3,3) i,
4315                    BitsN.bits(3,3) b],
4316                 [BitsN.concat
4317                    [case s of
4318                        0 => BitsN.B(0x0,2)
4319                      | 1 => BitsN.B(0x1,2)
4320                      | _ => BitsN.B(0x2,2),BitsN.bits(2,0) r,
4321                     BitsN.B(0x4,3)],
4322                  BitsN.concat[ss,BitsN.bits(2,0) i,b20]]
4323                   @
4324                   l)
4325       else NONE
4326     end
4327   | Zm(NONE,(ZregBase base,imm)) =>
4328     let
4329       val base = BitsN.fromNat(Cast.ZregToNat base,4)
4330       val base20 = BitsN.bits(2,0) base
4331       val (s,l) =
4332         if (imm = (BitsN.B(0x0,64))) andalso
4333            (not(base20 = (BitsN.B(0x5,3))))
4334           then (0,[])
4335         else e_imm_8_32 imm
4336     in
4337       if Set.mem(s,[0,1,4])
4338         then Option.SOME
4339                (BitsN.concat
4340                   [BitsN.B(0x0,1),BitsN.bits(3,3) r,BitsN.B(0x0,1),
4341                    BitsN.bits(3,3) base],
4342                 ((BitsN.concat
4343                     [case s of
4344                         0 => BitsN.B(0x0,2)
4345                       | 1 => BitsN.B(0x1,2)
4346                       | _ => BitsN.B(0x2,2),BitsN.bits(2,0) r,base20])
4347                    ::
4348                    (if base20 = (BitsN.B(0x4,3))
4349                       then [BitsN.@@(BitsN.B(0x4,5),base20)]
4350                     else []))
4351                   @
4352                   l)
4353       else NONE
4354     end
4355   | _ => NONE;
4356
4357fun rex_prefix rex =
4358  if rex = (BitsN.B(0x0,4)) then [] else [BitsN.@@(BitsN.B(0x4,4),rex)];
4359
4360fun e_opsize (sz,rex) =
4361  case sz of
4362     Z8 have_rex =>
4363       let
4364         val p = rex_prefix rex
4365       in
4366         (if have_rex andalso (p = []) then [BitsN.B(0x40,8)] else p,
4367          BitsN.B(0x0,8))
4368       end
4369   | Z16 =>
4370     ([BitsN.B(0x66,8)] @ (rex_prefix(BitsN.&&(rex,BitsN.B(0x7,4)))),
4371      BitsN.B(0x1,8))
4372   | Z32 => (rex_prefix(BitsN.&&(rex,BitsN.B(0x7,4))),BitsN.B(0x1,8))
4373   | Z64 => (rex_prefix(BitsN.||(rex,BitsN.B(0x8,4))),BitsN.B(0x1,8));
4374
4375fun e_opsize_imm (sz,(rex,(imm,normal))) =
4376  let
4377    val (prefixes,v) = e_opsize(sz,rex)
4378  in
4379    case if (sz = Z64) andalso normal then Z32 else sz of
4380       Z8 _ =>
4381         (case e_imm8 imm of
4382             [] => NONE
4383           | l =>
4384             (if v = (BitsN.B(0x0,8))
4385                then Option.SOME(prefixes,(v,l))
4386              else NONE))
4387     | Z16 =>
4388       (case e_imm16 imm of
4389           [] => NONE
4390         | l =>
4391           (if v = (BitsN.B(0x1,8))
4392              then Option.SOME(prefixes,(v,l))
4393            else NONE))
4394     | Z32 =>
4395       (case e_imm32 imm of
4396           [] => NONE
4397         | l =>
4398           (if v = (BitsN.B(0x1,8))
4399              then Option.SOME(prefixes,(v,l))
4400            else NONE))
4401     | Z64 =>
4402       if v = (BitsN.B(0x1,8))
4403         then Option.SOME(prefixes,(v,e_imm64 imm))
4404       else NONE
4405  end;
4406
4407fun e_opc (opc1,(opc2,rm)) =
4408  case e_ModRM(BitsN.fromNat(BitsN.toNat opc2,4),rm) of
4409     NONE => []
4410   | Option.SOME(rex,strm) => (rex_prefix rex) @ (opc1 :: strm);
4411
4412fun e_gen_rm_reg (sz,(rm,(r,(p,(opc,mo))))) =
4413  case e_ModRM(r,rm) of
4414     NONE => []
4415   | Option.SOME(rex,strm) =>
4416     let
4417       val (prefixes,v) = e_opsize(sz,rex)
4418       val m = case mo of Option.SOME x => x | NONE => v
4419     in
4420       List.concat[prefixes,p,[BitsN.||(opc,m)],strm]
4421     end;
4422
4423fun e_rm_imm (sz,(rm,(imm,(opc1,opc2)))) =
4424  case e_ModRM(opc1,rm) of
4425     NONE => []
4426   | Option.SOME(rex,strm) =>
4427     (case e_opsize_imm(sz,(rex,(imm,true))) of
4428         Option.SOME(prefixes,(v,l)) =>
4429           List.concat[prefixes,[BitsN.||(opc2,v)],strm,l]
4430       | NONE => []);
4431
4432fun e_rm_imm8 (sz,(rm,(imm,(opc1,opc2)))) =
4433  case e_ModRM(opc1,rm) of
4434     NONE => []
4435   | Option.SOME(rex,strm) =>
4436     let
4437       val (prefixes,v) = e_opsize(sz,rex)
4438     in
4439       case e_imm8 imm of
4440          [] => []
4441        | l => List.concat[prefixes,[BitsN.||(opc2,v)],strm,l]
4442     end;
4443
4444fun e_rm_imm8b (sz,(rm,(imm,(opc1,opc2)))) =
4445  case e_ModRM(opc1,rm) of
4446     Option.SOME(rex,s :: t) =>
4447       let
4448         val prefixes = L3.fst(e_opsize(sz,rex))
4449       in
4450         case e_imm8 imm of
4451            [] => []
4452          | l =>
4453            List.concat[prefixes,opc2,[BitsN.||(s,BitsN.B(0x20,8))],t,l]
4454       end
4455   | _ => [];
4456
4457fun e_rax_imm (sz,(imm,opc)) =
4458  case e_opsize_imm(sz,(BitsN.B(0x0,4),(imm,true))) of
4459     Option.SOME(prefixes,(v,l)) =>
4460       List.concat[prefixes,[BitsN.||(opc,v)],l]
4461   | NONE => [];
4462
4463fun e_jcc_rel32 i =
4464  case i of
4465     Zjcc(cond,imm) =>
4466       (case (e_imm32 imm,cond) of
4467           ([],_) => []
4468         | (l,Z_ALWAYS) => (BitsN.B(0xE9,8)) :: l
4469         | (l,_) =>
4470           [BitsN.B(0xF,8),
4471            BitsN.@@(BitsN.B(0x8,4),BitsN.fromNat(Cast.ZcondToNat cond,4))]
4472             @
4473             l)
4474   | _ => [];
4475
4476fun not_byte sz = case sz of Z8 _ => false | _ => true;
4477
4478fun is_rax rm = case rm of Zr RAX => true | _ => false;
4479
4480fun xmm_mem_to_rm x =
4481  case x of
4482     xmm_reg r => Zr((Cast.natToZreg o BitsN.toNat) r)
4483   | xmm_mem m => Zm m;
4484
4485fun encode_sse_binop bop =
4486  case bop of
4487     sse_add => BitsN.B(0x0,3)
4488   | sse_mul => BitsN.B(0x1,3)
4489   | sse_sub => BitsN.B(0x4,3)
4490   | sse_min => BitsN.B(0x5,3)
4491   | sse_div => BitsN.B(0x6,3)
4492   | sse_max => BitsN.B(0x7,3);
4493
4494fun encode_sse i =
4495  case i of
4496     bin_PD(bop,(dst,src)) =>
4497       (BitsN.B(0x66,8))
4498         ::
4499         (e_gen_rm_reg
4500            (Z32,
4501             (xmm_mem_to_rm src,
4502              (BitsN.fromNat(BitsN.toNat dst,4),
4503               ([BitsN.B(0xF,8)],
4504                (BitsN.concat
4505                   [BitsN.B(0x5,4),BitsN.B(0x1,1),encode_sse_binop bop],
4506                 Option.SOME(BitsN.B(0x0,8))))))))
4507   | bin_PS(bop,(dst,src)) =>
4508     e_gen_rm_reg
4509       (Z32,
4510        (xmm_mem_to_rm src,
4511         (BitsN.fromNat(BitsN.toNat dst,4),
4512          ([BitsN.B(0xF,8)],
4513           (BitsN.concat
4514              [BitsN.B(0x5,4),BitsN.B(0x1,1),encode_sse_binop bop],
4515            Option.SOME(BitsN.B(0x0,8)))))))
4516   | bin_SD(bop,(dst,src)) =>
4517     (BitsN.B(0xF2,8))
4518       ::
4519       (e_gen_rm_reg
4520          (Z32,
4521           (xmm_mem_to_rm src,
4522            (BitsN.fromNat(BitsN.toNat dst,4),
4523             ([BitsN.B(0xF,8)],
4524              (BitsN.concat
4525                 [BitsN.B(0x5,4),BitsN.B(0x1,1),encode_sse_binop bop],
4526               Option.SOME(BitsN.B(0x0,8))))))))
4527   | bin_SS(bop,(dst,src)) =>
4528     (BitsN.B(0xF3,8))
4529       ::
4530       (e_gen_rm_reg
4531          (Z32,
4532           (xmm_mem_to_rm src,
4533            (BitsN.fromNat(BitsN.toNat dst,4),
4534             ([BitsN.B(0xF,8)],
4535              (BitsN.concat
4536                 [BitsN.B(0x5,4),BitsN.B(0x1,1),encode_sse_binop bop],
4537               Option.SOME(BitsN.B(0x0,8))))))))
4538   | logic_PD(bop,(dst,src)) =>
4539     (BitsN.B(0x66,8))
4540       ::
4541       (e_gen_rm_reg
4542          (Z32,
4543           (xmm_mem_to_rm src,
4544            (BitsN.fromNat(BitsN.toNat dst,4),
4545             ([BitsN.B(0xF,8)],
4546              (BitsN.concat
4547                 [BitsN.B(0x5,4),BitsN.B(0x1,2),
4548                  BitsN.fromNat(Cast.sse_logicToNat bop,2)],
4549               Option.SOME(BitsN.B(0x0,8))))))))
4550   | logic_PS(bop,(dst,src)) =>
4551     e_gen_rm_reg
4552       (Z32,
4553        (xmm_mem_to_rm src,
4554         (BitsN.fromNat(BitsN.toNat dst,4),
4555          ([BitsN.B(0xF,8)],
4556           (BitsN.concat
4557              [BitsN.B(0x5,4),BitsN.B(0x1,2),
4558               BitsN.fromNat(Cast.sse_logicToNat bop,2)],
4559            Option.SOME(BitsN.B(0x0,8)))))))
4560   | CMPPD(bop,(dst,src)) =>
4561     ((BitsN.B(0x66,8))
4562        ::
4563        (e_gen_rm_reg
4564           (Z32,
4565            (xmm_mem_to_rm src,
4566             (BitsN.fromNat(BitsN.toNat dst,4),
4567              ([BitsN.B(0xF,8)],
4568               (BitsN.B(0xC2,8),Option.SOME(BitsN.B(0x0,8)))))))))
4569       @
4570       [BitsN.fromNat(Cast.sse_compareToNat bop,8)]
4571   | CMPPS(bop,(dst,src)) =>
4572     (e_gen_rm_reg
4573        (Z32,
4574         (xmm_mem_to_rm src,
4575          (BitsN.fromNat(BitsN.toNat dst,4),
4576           ([BitsN.B(0xF,8)],(BitsN.B(0xC2,8),Option.SOME(BitsN.B(0x0,8))))))))
4577       @
4578       [BitsN.fromNat(Cast.sse_compareToNat bop,8)]
4579   | CMPSD(bop,(dst,src)) =>
4580     ((BitsN.B(0xF2,8))
4581        ::
4582        (e_gen_rm_reg
4583           (Z32,
4584            (xmm_mem_to_rm src,
4585             (BitsN.fromNat(BitsN.toNat dst,4),
4586              ([BitsN.B(0xF,8)],
4587               (BitsN.B(0xC2,8),Option.SOME(BitsN.B(0x0,8)))))))))
4588       @
4589       [BitsN.fromNat(Cast.sse_compareToNat bop,8)]
4590   | CMPSS(bop,(dst,src)) =>
4591     ((BitsN.B(0xF3,8))
4592        ::
4593        (e_gen_rm_reg
4594           (Z32,
4595            (xmm_mem_to_rm src,
4596             (BitsN.fromNat(BitsN.toNat dst,4),
4597              ([BitsN.B(0xF,8)],
4598               (BitsN.B(0xC2,8),Option.SOME(BitsN.B(0x0,8)))))))))
4599       @
4600       [BitsN.fromNat(Cast.sse_compareToNat bop,8)]
4601   | COMISD(src1,src2) =>
4602     (BitsN.B(0x66,8))
4603       ::
4604       (e_gen_rm_reg
4605          (Z32,
4606           (xmm_mem_to_rm src2,
4607            (BitsN.fromNat(BitsN.toNat src1,4),
4608             ([BitsN.B(0xF,8)],
4609              (BitsN.B(0x2F,8),Option.SOME(BitsN.B(0x0,8))))))))
4610   | COMISS(src1,src2) =>
4611     e_gen_rm_reg
4612       (Z32,
4613        (xmm_mem_to_rm src2,
4614         (BitsN.fromNat(BitsN.toNat src1,4),
4615          ([BitsN.B(0xF,8)],(BitsN.B(0x2F,8),NONE)))))
4616   | CVTDQ2PD(dst,src) =>
4617     (BitsN.B(0xF3,8))
4618       ::
4619       (e_gen_rm_reg
4620          (Z32,
4621           (xmm_mem_to_rm src,
4622            (BitsN.fromNat(BitsN.toNat dst,4),
4623             ([BitsN.B(0xF,8)],
4624              (BitsN.B(0xE6,8),Option.SOME(BitsN.B(0x0,8))))))))
4625   | CVTDQ2PS(dst,src) =>
4626     e_gen_rm_reg
4627       (Z32,
4628        (xmm_mem_to_rm src,
4629         (BitsN.fromNat(BitsN.toNat dst,4),
4630          ([BitsN.B(0xF,8)],(BitsN.B(0x5B,8),Option.SOME(BitsN.B(0x0,8)))))))
4631   | CVTPD2DQ(truncate,(dst,src)) =>
4632     (if truncate then BitsN.B(0x66,8) else BitsN.B(0xF2,8))
4633       ::
4634       (e_gen_rm_reg
4635          (Z32,
4636           (xmm_mem_to_rm src,
4637            (BitsN.fromNat(BitsN.toNat dst,4),
4638             ([BitsN.B(0xF,8)],
4639              (BitsN.B(0xE6,8),Option.SOME(BitsN.B(0x0,8))))))))
4640   | CVTPD2PS(dst,src) =>
4641     (BitsN.B(0x66,8))
4642       ::
4643       (e_gen_rm_reg
4644          (Z32,
4645           (xmm_mem_to_rm src,
4646            (BitsN.fromNat(BitsN.toNat dst,4),
4647             ([BitsN.B(0xF,8)],
4648              (BitsN.B(0x5A,8),Option.SOME(BitsN.B(0x0,8))))))))
4649   | CVTPS2DQ(truncate,(dst,src)) =>
4650     (if truncate then BitsN.B(0xF3,8) else BitsN.B(0x66,8))
4651       ::
4652       (e_gen_rm_reg
4653          (Z32,
4654           (xmm_mem_to_rm src,
4655            (BitsN.fromNat(BitsN.toNat dst,4),
4656             ([BitsN.B(0xF,8)],
4657              (BitsN.B(0x5B,8),Option.SOME(BitsN.B(0x0,8))))))))
4658   | CVTPS2PD(dst,src) =>
4659     e_gen_rm_reg
4660       (Z32,
4661        (xmm_mem_to_rm src,
4662         (BitsN.fromNat(BitsN.toNat dst,4),
4663          ([BitsN.B(0xF,8)],(BitsN.B(0x5A,8),Option.SOME(BitsN.B(0x0,8)))))))
4664   | CVTSD2SI(truncate,(quad,(dst,src))) =>
4665     (BitsN.B(0xF2,8))
4666       ::
4667       (e_gen_rm_reg
4668          (if quad then Z64 else Z32,
4669           (xmm_mem_to_rm src,
4670            (BitsN.fromNat(Cast.ZregToNat dst,4),
4671             ([BitsN.B(0xF,8)],
4672              (BitsN.B(0x2C,8),
4673               Option.SOME(BitsN.fromBool 8 (not truncate))))))))
4674   | CVTSD2SS(dst,src) =>
4675     (BitsN.B(0xF2,8))
4676       ::
4677       (e_gen_rm_reg
4678          (Z32,
4679           (xmm_mem_to_rm src,
4680            (BitsN.fromNat(BitsN.toNat dst,4),
4681             ([BitsN.B(0xF,8)],
4682              (BitsN.B(0x5A,8),Option.SOME(BitsN.B(0x0,8))))))))
4683   | CVTSI2SD(quad,(reg,src)) =>
4684     (BitsN.B(0xF2,8))
4685       ::
4686       (e_gen_rm_reg
4687          (if quad then Z64 else Z32,
4688           (src,
4689            (BitsN.fromNat(BitsN.toNat reg,4),
4690             ([BitsN.B(0xF,8)],
4691              (BitsN.B(0x2A,8),Option.SOME(BitsN.B(0x0,8))))))))
4692   | CVTSI2SS(quad,(reg,src)) =>
4693     (BitsN.B(0xF3,8))
4694       ::
4695       (e_gen_rm_reg
4696          (if quad then Z64 else Z32,
4697           (src,
4698            (BitsN.fromNat(BitsN.toNat reg,4),
4699             ([BitsN.B(0xF,8)],
4700              (BitsN.B(0x2A,8),Option.SOME(BitsN.B(0x0,8))))))))
4701   | CVTSS2SD(dst,src) =>
4702     (BitsN.B(0xF3,8))
4703       ::
4704       (e_gen_rm_reg
4705          (Z32,
4706           (xmm_mem_to_rm src,
4707            (BitsN.fromNat(BitsN.toNat dst,4),
4708             ([BitsN.B(0xF,8)],
4709              (BitsN.B(0x5A,8),Option.SOME(BitsN.B(0x0,8))))))))
4710   | CVTSS2SI(truncate,(quad,(dst,src))) =>
4711     (BitsN.B(0xF3,8))
4712       ::
4713       (e_gen_rm_reg
4714          (if quad then Z64 else Z32,
4715           (xmm_mem_to_rm src,
4716            (BitsN.fromNat(Cast.ZregToNat dst,4),
4717             ([BitsN.B(0xF,8)],
4718              (BitsN.B(0x2C,8),
4719               Option.SOME(BitsN.fromBool 8 (not truncate))))))))
4720   | MOVAP_D_S(double,(xmm_reg dst,src)) =>
4721     (if double then [BitsN.B(0x66,8)] else [])
4722       @
4723       (e_gen_rm_reg
4724          (Z32,
4725           (xmm_mem_to_rm src,
4726            (BitsN.fromNat(BitsN.toNat dst,4),
4727             ([BitsN.B(0xF,8)],
4728              (BitsN.B(0x28,8),Option.SOME(BitsN.B(0x0,8))))))))
4729   | MOVAP_D_S(double,(dst,xmm_reg src)) =>
4730     (if double then [BitsN.B(0x66,8)] else [])
4731       @
4732       (e_gen_rm_reg
4733          (Z32,
4734           (xmm_mem_to_rm dst,
4735            (BitsN.fromNat(BitsN.toNat src,4),
4736             ([BitsN.B(0xF,8)],
4737              (BitsN.B(0x29,8),Option.SOME(BitsN.B(0x0,8))))))))
4738   | MOVAP_D_S _ => []
4739   | MOVUP_D_S(double,(xmm_reg dst,src)) =>
4740     (if double then [BitsN.B(0x66,8)] else [])
4741       @
4742       (e_gen_rm_reg
4743          (Z32,
4744           (xmm_mem_to_rm src,
4745            (BitsN.fromNat(BitsN.toNat dst,4),
4746             ([BitsN.B(0xF,8)],
4747              (BitsN.B(0x10,8),Option.SOME(BitsN.B(0x0,8))))))))
4748   | MOVUP_D_S(double,(dst,xmm_reg src)) =>
4749     (if double then [BitsN.B(0x66,8)] else [])
4750       @
4751       (e_gen_rm_reg
4752          (Z32,
4753           (xmm_mem_to_rm dst,
4754            (BitsN.fromNat(BitsN.toNat src,4),
4755             ([BitsN.B(0xF,8)],
4756              (BitsN.B(0x11,8),Option.SOME(BitsN.B(0x0,8))))))))
4757   | MOVUP_D_S _ => []
4758   | MOVSD(xmm_reg dst,src) =>
4759     (BitsN.B(0xF2,8))
4760       ::
4761       (e_gen_rm_reg
4762          (Z32,
4763           (xmm_mem_to_rm src,
4764            (BitsN.fromNat(BitsN.toNat dst,4),
4765             ([BitsN.B(0xF,8)],
4766              (BitsN.B(0x10,8),Option.SOME(BitsN.B(0x0,8))))))))
4767   | MOVSD(dst,xmm_reg src) =>
4768     (BitsN.B(0xF2,8))
4769       ::
4770       (e_gen_rm_reg
4771          (Z32,
4772           (xmm_mem_to_rm dst,
4773            (BitsN.fromNat(BitsN.toNat src,4),
4774             ([BitsN.B(0xF,8)],
4775              (BitsN.B(0x11,8),Option.SOME(BitsN.B(0x0,8))))))))
4776   | MOVSD _ => []
4777   | MOVSS(xmm_reg dst,src) =>
4778     (BitsN.B(0xF3,8))
4779       ::
4780       (e_gen_rm_reg
4781          (Z32,
4782           (xmm_mem_to_rm src,
4783            (BitsN.fromNat(BitsN.toNat dst,4),
4784             ([BitsN.B(0xF,8)],
4785              (BitsN.B(0x10,8),Option.SOME(BitsN.B(0x0,8))))))))
4786   | MOVSS(dst,xmm_reg src) =>
4787     (BitsN.B(0xF3,8))
4788       ::
4789       (e_gen_rm_reg
4790          (Z32,
4791           (xmm_mem_to_rm dst,
4792            (BitsN.fromNat(BitsN.toNat src,4),
4793             ([BitsN.B(0xF,8)],
4794              (BitsN.B(0x11,8),Option.SOME(BitsN.B(0x0,8))))))))
4795   | MOVSS _ => []
4796   | MOV_D_Q(to_xmm,(quad,(xmm,rm))) =>
4797     (BitsN.B(0x66,8))
4798       ::
4799       (e_gen_rm_reg
4800          (if quad then Z64 else Z32,
4801           (rm,
4802            (BitsN.fromNat(BitsN.toNat xmm,4),
4803             ([BitsN.B(0xF,8)],
4804              (BitsN.concat
4805                 [BitsN.B(0x3,3),BitsN.fromBit(not to_xmm),BitsN.B(0xE,4)],
4806               Option.SOME(BitsN.B(0x0,8))))))))
4807   | MOVQ(xmm_mem m,xmm_reg src) =>
4808     (BitsN.B(0x66,8))
4809       ::
4810       (e_gen_rm_reg
4811          (Z8 false,
4812           (Zm m,
4813            (BitsN.fromNat(BitsN.toNat src,4),
4814             ([BitsN.B(0xF,8)],(BitsN.B(0xD6,8),NONE))))))
4815   | MOVQ(xmm_reg r,src) =>
4816     (BitsN.B(0xF3,8))
4817       ::
4818       (e_gen_rm_reg
4819          (Z8 false,
4820           (xmm_mem_to_rm src,
4821            (BitsN.fromNat(BitsN.toNat r,4),
4822             ([BitsN.B(0xF,8)],(BitsN.B(0x7E,8),NONE))))))
4823   | MOVQ _ => []
4824   | PCMPEQQ(dst,src) =>
4825     (BitsN.B(0x66,8))
4826       ::
4827       (e_gen_rm_reg
4828          (Z32,
4829           (xmm_mem_to_rm src,
4830            (BitsN.fromNat(BitsN.toNat dst,4),
4831             ([BitsN.B(0xF,8),BitsN.B(0x38,8)],
4832              (BitsN.B(0x29,8),Option.SOME(BitsN.B(0x0,8))))))))
4833   | PSRLW_imm(dst,imm) =>
4834     ((BitsN.B(0x66,8))
4835        ::
4836        (e_gen_rm_reg
4837           (Z32,
4838            (Zr((Cast.natToZreg o BitsN.toNat) dst),
4839             (BitsN.B(0x2,4),
4840              ([BitsN.B(0xF,8)],
4841               (BitsN.B(0x71,8),Option.SOME(BitsN.B(0x0,8)))))))))
4842       @
4843       [imm]
4844   | PSRAW_imm(dst,imm) =>
4845     ((BitsN.B(0x66,8))
4846        ::
4847        (e_gen_rm_reg
4848           (Z32,
4849            (Zr((Cast.natToZreg o BitsN.toNat) dst),
4850             (BitsN.B(0x4,4),
4851              ([BitsN.B(0xF,8)],
4852               (BitsN.B(0x71,8),Option.SOME(BitsN.B(0x0,8)))))))))
4853       @
4854       [imm]
4855   | PSLLW_imm(dst,imm) =>
4856     ((BitsN.B(0x66,8))
4857        ::
4858        (e_gen_rm_reg
4859           (Z32,
4860            (Zr((Cast.natToZreg o BitsN.toNat) dst),
4861             (BitsN.B(0x6,4),
4862              ([BitsN.B(0xF,8)],
4863               (BitsN.B(0x71,8),Option.SOME(BitsN.B(0x0,8)))))))))
4864       @
4865       [imm]
4866   | PSRLD_imm(dst,imm) =>
4867     ((BitsN.B(0x66,8))
4868        ::
4869        (e_gen_rm_reg
4870           (Z32,
4871            (Zr((Cast.natToZreg o BitsN.toNat) dst),
4872             (BitsN.B(0x2,4),
4873              ([BitsN.B(0xF,8)],
4874               (BitsN.B(0x72,8),Option.SOME(BitsN.B(0x0,8)))))))))
4875       @
4876       [imm]
4877   | PSRAD_imm(dst,imm) =>
4878     ((BitsN.B(0x66,8))
4879        ::
4880        (e_gen_rm_reg
4881           (Z32,
4882            (Zr((Cast.natToZreg o BitsN.toNat) dst),
4883             (BitsN.B(0x4,4),
4884              ([BitsN.B(0xF,8)],
4885               (BitsN.B(0x72,8),Option.SOME(BitsN.B(0x0,8)))))))))
4886       @
4887       [imm]
4888   | PSLLD_imm(dst,imm) =>
4889     ((BitsN.B(0x66,8))
4890        ::
4891        (e_gen_rm_reg
4892           (Z32,
4893            (Zr((Cast.natToZreg o BitsN.toNat) dst),
4894             (BitsN.B(0x6,4),
4895              ([BitsN.B(0xF,8)],
4896               (BitsN.B(0x72,8),Option.SOME(BitsN.B(0x0,8)))))))))
4897       @
4898       [imm]
4899   | PSRLQ_imm(dst,imm) =>
4900     ((BitsN.B(0x66,8))
4901        ::
4902        (e_gen_rm_reg
4903           (Z32,
4904            (Zr((Cast.natToZreg o BitsN.toNat) dst),
4905             (BitsN.B(0x2,4),
4906              ([BitsN.B(0xF,8)],
4907               (BitsN.B(0x73,8),Option.SOME(BitsN.B(0x0,8)))))))))
4908       @
4909       [imm]
4910   | PSRLDQ(dst,imm) =>
4911     ((BitsN.B(0x66,8))
4912        ::
4913        (e_gen_rm_reg
4914           (Z32,
4915            (Zr((Cast.natToZreg o BitsN.toNat) dst),
4916             (BitsN.B(0x3,4),
4917              ([BitsN.B(0xF,8)],
4918               (BitsN.B(0x73,8),Option.SOME(BitsN.B(0x0,8)))))))))
4919       @
4920       [imm]
4921   | PSLLQ_imm(dst,imm) =>
4922     ((BitsN.B(0x66,8))
4923        ::
4924        (e_gen_rm_reg
4925           (Z32,
4926            (Zr((Cast.natToZreg o BitsN.toNat) dst),
4927             (BitsN.B(0x6,4),
4928              ([BitsN.B(0xF,8)],
4929               (BitsN.B(0x73,8),Option.SOME(BitsN.B(0x0,8)))))))))
4930       @
4931       [imm]
4932   | PSLLDQ(dst,imm) =>
4933     ((BitsN.B(0x66,8))
4934        ::
4935        (e_gen_rm_reg
4936           (Z32,
4937            (Zr((Cast.natToZreg o BitsN.toNat) dst),
4938             (BitsN.B(0x7,4),
4939              ([BitsN.B(0xF,8)],
4940               (BitsN.B(0x73,8),Option.SOME(BitsN.B(0x0,8)))))))))
4941       @
4942       [imm]
4943   | SQRTPD(dst,src) =>
4944     (BitsN.B(0x66,8))
4945       ::
4946       (e_gen_rm_reg
4947          (Z32,
4948           (xmm_mem_to_rm src,
4949            (BitsN.fromNat(BitsN.toNat dst,4),
4950             ([BitsN.B(0xF,8)],
4951              (BitsN.B(0x51,8),Option.SOME(BitsN.B(0x0,8))))))))
4952   | SQRTSD(dst,src) =>
4953     (BitsN.B(0xF2,8))
4954       ::
4955       (e_gen_rm_reg
4956          (Z32,
4957           (xmm_mem_to_rm src,
4958            (BitsN.fromNat(BitsN.toNat dst,4),
4959             ([BitsN.B(0xF,8)],
4960              (BitsN.B(0x51,8),Option.SOME(BitsN.B(0x0,8))))))))
4961   | SQRTPS(dst,src) =>
4962     e_gen_rm_reg
4963       (Z32,
4964        (xmm_mem_to_rm src,
4965         (BitsN.fromNat(BitsN.toNat dst,4),
4966          ([BitsN.B(0xF,8)],(BitsN.B(0x51,8),Option.SOME(BitsN.B(0x0,8)))))))
4967   | SQRTSS(dst,src) =>
4968     (BitsN.B(0xF3,8))
4969       ::
4970       (e_gen_rm_reg
4971          (Z32,
4972           (xmm_mem_to_rm src,
4973            (BitsN.fromNat(BitsN.toNat dst,4),
4974             ([BitsN.B(0xF,8)],
4975              (BitsN.B(0x51,8),Option.SOME(BitsN.B(0x0,8))))))));
4976
4977fun encode i =
4978  case i of
4979     SSE j => encode_sse j
4980   | Zbinop(bop,(sz,Zrm_i(rm,imm))) =>
4981     (if bop = Ztest
4982        then if is_rax rm
4983               then e_rax_imm(sz,(imm,BitsN.B(0xA8,8)))
4984             else e_rm_imm(sz,(rm,(imm,(BitsN.B(0x0,4),BitsN.B(0xF6,8)))))
4985      else let
4986             val opc = BitsN.fromNat(Cast.Zbinop_nameToNat bop,4)
4987           in
4988             if BitsN.bit(opc,3)
4989               then if imm = (BitsN.B(0x1,64))
4990                      then e_gen_rm_reg
4991                             (sz,
4992                              (rm,
4993                               (BitsN.&&(opc,BitsN.B(0x7,4)),
4994                                ([],(BitsN.B(0xD0,8),NONE)))))
4995                    else e_rm_imm8
4996                           (sz,
4997                            (rm,
4998                             (imm,
4999                              (BitsN.&&(opc,BitsN.B(0x7,4)),
5000                               BitsN.B(0xC0,8)))))
5001             else if (not_byte sz) andalso (not((e_imm8 imm) = []))
5002               then e_rm_imm8(sz,(rm,(imm,(opc,BitsN.B(0x83,8)))))
5003             else if is_rax rm
5004               then e_rax_imm
5005                      (sz,
5006                       (imm,
5007                        BitsN.concat
5008                          [BitsN.B(0x0,2),BitsN.bits(2,0) opc,
5009                           BitsN.B(0x4,3)]))
5010             else e_rm_imm(sz,(rm,(imm,(opc,BitsN.B(0x80,8)))))
5011           end)
5012   | Zbinop(bop,(sz,Zrm_r(rm,r))) =>
5013     (if bop = Ztest
5014        then e_gen_rm_reg
5015               (sz,
5016                (rm,
5017                 (BitsN.fromNat(Cast.ZregToNat r,4),
5018                  ([],(BitsN.B(0x84,8),NONE)))))
5019      else let
5020             val opc = BitsN.fromNat(Cast.Zbinop_nameToNat bop,4)
5021           in
5022             if BitsN.bit(opc,3)
5023               then if r = RCX
5024                      then e_gen_rm_reg
5025                             (sz,
5026                              (rm,
5027                               (BitsN.&&(opc,BitsN.B(0x7,4)),
5028                                ([],(BitsN.B(0xD2,8),NONE)))))
5029                    else []
5030             else e_gen_rm_reg
5031                    (sz,
5032                     (rm,
5033                      (BitsN.fromNat(Cast.ZregToNat r,4),
5034                       ([],
5035                        (BitsN.concat
5036                           [BitsN.B(0x0,2),BitsN.bits(2,0) opc,
5037                            BitsN.B(0x0,3)],NONE)))))
5038           end)
5039   | Zbinop(bop,(sz,Zr_rm(r,rm))) =>
5040     let
5041       val opc = BitsN.fromNat(Cast.Zbinop_nameToNat bop,4)
5042     in
5043       if BitsN.bit(opc,3)
5044         then []
5045       else e_gen_rm_reg
5046              (sz,
5047               (rm,
5048                (BitsN.fromNat(Cast.ZregToNat r,4),
5049                 ([],
5050                  (BitsN.concat
5051                     [BitsN.B(0x0,2),BitsN.bits(2,0) opc,BitsN.B(0x2,3)],
5052                   NONE)))))
5053     end
5054   | Zbit_test(bt,(sz,Zrm_r(rm,r))) =>
5055     e_gen_rm_reg
5056       (sz,
5057        (rm,
5058         (BitsN.fromNat(Cast.ZregToNat r,4),
5059          ([BitsN.B(0xF,8)],
5060           (BitsN.concat
5061              [BitsN.B(0x5,3),
5062               BitsN.fromNat(Cast.Zbit_test_nameToNat bt,2),BitsN.B(0x3,3)],
5063            NONE)))))
5064   | Zbit_test(bt,(sz,Zrm_i(rm,i))) =>
5065     e_rm_imm8b
5066       (sz,
5067        (rm,
5068         (i,
5069          (BitsN.fromNat(Cast.Zbit_test_nameToNat bt,4),
5070           [BitsN.B(0xF,8),BitsN.B(0xBA,8)]))))
5071   | Zbit_test _ => []
5072   | Zcall(Zrm rm) => e_opc(BitsN.B(0xFF,8),(BitsN.B(0x2,3),rm))
5073   | Zcall(Zimm imm) =>
5074     (case e_imm32 imm of [] => [] | l => (BitsN.B(0xE8,8)) :: l)
5075   | Zcmc => [BitsN.B(0xF5,8)]
5076   | Zclc => [BitsN.B(0xF8,8)]
5077   | Zstc => [BitsN.B(0xF9,8)]
5078   | Zcmpxchg(sz,(rm,r)) =>
5079     e_gen_rm_reg
5080       (sz,
5081        (rm,
5082         (BitsN.fromNat(Cast.ZregToNat r,4),
5083          ([BitsN.B(0xF,8)],(BitsN.B(0xB0,8),NONE)))))
5084   | Zdiv(sz,rm) =>
5085     e_gen_rm_reg(sz,(rm,(BitsN.B(0x6,4),([],(BitsN.B(0xF6,8),NONE)))))
5086   | Zidiv(sz,rm) =>
5087     e_gen_rm_reg(sz,(rm,(BitsN.B(0x7,4),([],(BitsN.B(0xF6,8),NONE)))))
5088   | Zjcc(cond,imm) =>
5089     let
5090       val (s,l) = e_imm_8_32 imm
5091     in
5092       if cond = Z_ALWAYS
5093         then if s = 1
5094                then (BitsN.B(0xEB,8)) :: l
5095              else if s = 4 then (BitsN.B(0xE9,8)) :: l else []
5096       else if s = 1
5097         then (BitsN.@@
5098                 (BitsN.B(0x7,4),BitsN.fromNat(Cast.ZcondToNat cond,4)))
5099                ::
5100                l
5101       else if s = 4
5102         then [BitsN.B(0xF,8),
5103               BitsN.@@
5104                 (BitsN.B(0x8,4),BitsN.fromNat(Cast.ZcondToNat cond,4))]
5105                @
5106                l
5107       else []
5108     end
5109   | Zjmp rm => e_opc(BitsN.B(0xFF,8),(BitsN.B(0x4,3),rm))
5110   | Zlea(Z8 _,_) => []
5111   | Zlea(sz,Zr_rm(r,Zm m)) =>
5112     e_gen_rm_reg
5113       (sz,
5114        (Zm m,
5115         (BitsN.fromNat(Cast.ZregToNat r,4),([],(BitsN.B(0x8D,8),NONE)))))
5116   | Zlea _ => []
5117   | Zleave => [BitsN.B(0xC9,8)]
5118   | Zloop(cond,imm) =>
5119     (case (cond,e_imm8 imm) of
5120         (_,[]) => []
5121       | (Z_NE,l) => (BitsN.B(0xE0,8)) :: l
5122       | (Z_E,l) => (BitsN.B(0xE1,8)) :: l
5123       | (Z_ALWAYS,l) => (BitsN.B(0xE2,8)) :: l
5124       | _ => [])
5125   | Zmonop(Zinc,(Z8 _,rm)) => e_opc(BitsN.B(0xFE,8),(BitsN.B(0x0,3),rm))
5126   | Zmonop(Zdec,(Z8 _,rm)) => e_opc(BitsN.B(0xFE,8),(BitsN.B(0x1,3),rm))
5127   | Zmonop(Zinc,(sz,rm)) =>
5128     e_gen_rm_reg(sz,(rm,(BitsN.B(0x0,4),([],(BitsN.B(0xFF,8),NONE)))))
5129   | Zmonop(Zdec,(sz,rm)) =>
5130     e_gen_rm_reg(sz,(rm,(BitsN.B(0x1,4),([],(BitsN.B(0xFF,8),NONE)))))
5131   | Zmonop(Znot,(sz,rm)) =>
5132     e_gen_rm_reg(sz,(rm,(BitsN.B(0x2,4),([],(BitsN.B(0xF6,8),NONE)))))
5133   | Zmonop(Zneg,(sz,rm)) =>
5134     e_gen_rm_reg(sz,(rm,(BitsN.B(0x3,4),([],(BitsN.B(0xF6,8),NONE)))))
5135   | Zmov(Z_ALWAYS,(sz,Zrm_r(rm,r))) =>
5136     e_gen_rm_reg
5137       (sz,
5138        (rm,
5139         (BitsN.fromNat(Cast.ZregToNat r,4),([],(BitsN.B(0x88,8),NONE)))))
5140   | Zmov(Z_ALWAYS,(sz,Zr_rm(r,rm))) =>
5141     e_gen_rm_reg
5142       (sz,
5143        (rm,
5144         (BitsN.fromNat(Cast.ZregToNat r,4),([],(BitsN.B(0x8A,8),NONE)))))
5145   | Zmov(Z_ALWAYS,(sz,Zrm_i(Zr reg,imm))) =>
5146     let
5147       val r = BitsN.fromNat(Cast.ZregToNat reg,4)
5148       val rex = if BitsN.bit(r,3) then BitsN.B(0x1,4) else BitsN.B(0x0,4)
5149     in
5150       case e_opsize_imm(sz,(rex,(imm,false))) of
5151          Option.SOME(prefixes,(v,l)) =>
5152            List.concat
5153              [prefixes,
5154               [BitsN.concat
5155                  [BitsN.B(0xB,4),BitsN.fromNat(BitsN.toNat v,1),
5156                   BitsN.bits(2,0) r]],l]
5157        | NONE => []
5158     end
5159   | Zmov(Z_ALWAYS,(sz,Zrm_i(rm,imm))) =>
5160     e_rm_imm(sz,(rm,(imm,(BitsN.B(0x0,4),BitsN.B(0xC6,8)))))
5161   | Zmov(cond,(Z8 _,_)) => []
5162   | Zmov(cond,(sz,Zr_rm(r,rm))) =>
5163     e_gen_rm_reg
5164       (sz,
5165        (rm,
5166         (BitsN.fromNat(Cast.ZregToNat r,4),
5167          ([BitsN.B(0xF,8)],
5168           (BitsN.B(0x40,8),
5169            Option.SOME(BitsN.fromNat(Cast.ZcondToNat cond,8)))))))
5170   | Zmov _ => []
5171   | Zmovsx(Z32,(Zr_rm(r,rm),Z64)) =>
5172     e_gen_rm_reg
5173       (Z64,
5174        (rm,
5175         (BitsN.fromNat(Cast.ZregToNat r,4),([],(BitsN.B(0x63,8),NONE)))))
5176   | Zmovsx(sz1,(Zr_rm(r,rm),sz2)) =>
5177     (if Nat.<(Zsize_width sz1,Zsize_width sz2)
5178        then let
5179               val v =
5180                 if sz1 = Z16 then BitsN.B(0x1,8) else BitsN.B(0x0,8)
5181             in
5182               e_gen_rm_reg
5183                 (sz2,
5184                  (rm,
5185                   (BitsN.fromNat(Cast.ZregToNat r,4),
5186                    ([BitsN.B(0xF,8)],(BitsN.B(0xBE,8),Option.SOME v)))))
5187             end
5188      else [])
5189   | Zmovsx _ => []
5190   | Zmovzx(sz1,(Zr_rm(r,rm),sz2)) =>
5191     (if (Nat.<(Zsize_width sz1,Zsize_width sz2)) andalso (not(sz1 = Z32))
5192        then let
5193               val v =
5194                 if sz1 = Z16 then BitsN.B(0x1,8) else BitsN.B(0x0,8)
5195             in
5196               e_gen_rm_reg
5197                 (sz2,
5198                  (rm,
5199                   (BitsN.fromNat(Cast.ZregToNat r,4),
5200                    ([BitsN.B(0xF,8)],(BitsN.B(0xB6,8),Option.SOME v)))))
5201             end
5202      else [])
5203   | Zmovzx _ => []
5204   | Zmul(sz,rm) =>
5205     e_gen_rm_reg(sz,(rm,(BitsN.B(0x4,4),([],(BitsN.B(0xF6,8),NONE)))))
5206   | Zimul(sz,rm) =>
5207     e_gen_rm_reg(sz,(rm,(BitsN.B(0x5,4),([],(BitsN.B(0xF6,8),NONE)))))
5208   | Zimul2(Z8 _,_) => []
5209   | Zimul2(sz,(r,rm)) =>
5210     e_gen_rm_reg
5211       (sz,
5212        (rm,
5213         (BitsN.fromNat(Cast.ZregToNat r,4),
5214          ([BitsN.B(0xF,8)],(BitsN.B(0xAF,8),NONE)))))
5215   | Zimul3(Z8 _,_) => []
5216   | Zimul3(sz,(r,(rm,imm))) =>
5217     let
5218       val (s,l) = e_imm_8_32 imm
5219     in
5220       if s = 1
5221         then (e_gen_rm_reg
5222                 (sz,
5223                  (rm,
5224                   (BitsN.fromNat(Cast.ZregToNat r,4),
5225                    ([],(BitsN.B(0x6B,8),NONE))))))
5226                @
5227                l
5228       else if s = 4
5229         then (e_gen_rm_reg
5230                 (sz,
5231                  (rm,
5232                   (BitsN.fromNat(Cast.ZregToNat r,4),
5233                    ([],(BitsN.B(0x69,8),NONE))))))
5234                @
5235                l
5236       else []
5237     end
5238   | Znop 1 => [BitsN.B(0x90,8)]
5239   | Znop 2 => [BitsN.B(0x66,8),BitsN.B(0x90,8)]
5240   | Znop 3 => [BitsN.B(0xF,8),BitsN.B(0x1F,8),BitsN.B(0x0,8)]
5241   | Znop 4 =>
5242     [BitsN.B(0xF,8),BitsN.B(0x1F,8),BitsN.B(0x40,8),BitsN.B(0x0,8)]
5243   | Znop 5 =>
5244     [BitsN.B(0xF,8),BitsN.B(0x1F,8),BitsN.B(0x44,8),BitsN.B(0x0,8),
5245      BitsN.B(0x0,8)]
5246   | Znop 6 =>
5247     [BitsN.B(0x66,8),BitsN.B(0xF,8),BitsN.B(0x1F,8),BitsN.B(0x44,8),
5248      BitsN.B(0x0,8),BitsN.B(0x0,8)]
5249   | Znop 7 =>
5250     [BitsN.B(0xF,8),BitsN.B(0x1F,8),BitsN.B(0x80,8),BitsN.B(0x0,8),
5251      BitsN.B(0x0,8),BitsN.B(0x0,8),BitsN.B(0x0,8)]
5252   | Znop 8 =>
5253     [BitsN.B(0xF,8),BitsN.B(0x1F,8),BitsN.B(0x84,8),BitsN.B(0x0,8),
5254      BitsN.B(0x0,8),BitsN.B(0x0,8),BitsN.B(0x0,8),BitsN.B(0x0,8)]
5255   | Znop 9 =>
5256     [BitsN.B(0x66,8),BitsN.B(0xF,8),BitsN.B(0x1F,8),BitsN.B(0x84,8),
5257      BitsN.B(0x0,8),BitsN.B(0x0,8),BitsN.B(0x0,8),BitsN.B(0x0,8),
5258      BitsN.B(0x0,8)]
5259   | Znop _ => []
5260   | Zpop(Zr reg) =>
5261     let
5262       val r = BitsN.fromNat(Cast.ZregToNat reg,4)
5263     in
5264       (if BitsN.bit(r,3) then [BitsN.B(0x49,8)] else [])
5265         @
5266         [BitsN.@@(BitsN.B(0xB,5),BitsN.bits(2,0) r)]
5267     end
5268   | Zpop rm => e_opc(BitsN.B(0x8F,8),(BitsN.B(0x0,3),rm))
5269   | Zpush(Zrm(Zr reg)) =>
5270     let
5271       val r = BitsN.fromNat(Cast.ZregToNat reg,4)
5272     in
5273       (if BitsN.bit(r,3) then [BitsN.B(0x49,8)] else [])
5274         @
5275         [BitsN.@@(BitsN.B(0xA,5),BitsN.bits(2,0) r)]
5276     end
5277   | Zpush(Zrm rm) => e_opc(BitsN.B(0xFF,8),(BitsN.B(0x6,3),rm))
5278   | Zpush(Zimm imm) =>
5279     let
5280       val (s,l) = e_imm_8_32 imm
5281     in
5282       if s = 1
5283         then (BitsN.B(0x6A,8)) :: l
5284       else if s = 4 then (BitsN.B(0x68,8)) :: l else []
5285     end
5286   | Zret(BitsN.B(0x0,64)) => [BitsN.B(0xC3,8)]
5287   | Zret imm =>
5288     (case e_imm16 imm of [] => [] | l => (BitsN.B(0xC2,8)) :: l)
5289   | Zset(Z_ALWAYS,(have_rex,rm)) => []
5290   | Zset(cond,(have_rex,rm)) =>
5291     e_gen_rm_reg
5292       (Z8 have_rex,
5293        (rm,
5294         (BitsN.B(0x0,4),
5295          ([BitsN.B(0xF,8)],
5296           (BitsN.@@(BitsN.B(0x9,4),BitsN.fromNat(Cast.ZcondToNat cond,4)),
5297            NONE)))))
5298   | Zxadd(sz,(rm,r)) =>
5299     e_gen_rm_reg
5300       (sz,
5301        (rm,
5302         (BitsN.fromNat(Cast.ZregToNat r,4),
5303          ([BitsN.B(0xF,8)],(BitsN.B(0xC0,8),NONE)))))
5304   | Zxchg(sz,(rm,reg)) =>
5305     (if (not_byte sz) andalso ((reg = RAX) orelse (is_rax rm))
5306        then let
5307               val r = BitsN.fromNat(Cast.ZregToNat reg,4)
5308               val rex =
5309                 if BitsN.bit(r,3) then BitsN.B(0x1,4) else BitsN.B(0x0,4)
5310               val (prefixes,v) = e_opsize(sz,rex)
5311             in
5312               if v = (BitsN.B(0x1,8))
5313                 then prefixes
5314                        @
5315                        [BitsN.@@(BitsN.B(0x12,5),BitsN.bits(2,0) r)]
5316               else []
5317             end
5318      else e_gen_rm_reg
5319             (sz,
5320              (rm,
5321               (BitsN.fromNat(Cast.ZregToNat reg,4),
5322                ([],(BitsN.B(0x86,8),NONE))))));
5323
5324fun stripLeftSpaces s = L3.snd(L3.splitl(fn c => Char.isSpace c,s));
5325
5326fun stripSpaces s =
5327  L3.fst(L3.splitr(fn c => Char.isSpace c,stripLeftSpaces s));
5328
5329fun p_number s =
5330  case String.explode(stripSpaces s) of
5331     #"0" :: (#"b" :: t) => Nat.fromBinString(String.implode t)
5332   | #"0" :: (#"x" :: t) => Nat.fromHexString(String.implode t)
5333   | _ => Nat.fromString s;
5334
5335fun p_bin_or_hex_number s =
5336  case String.explode(stripSpaces s) of
5337     #"0" :: (#"b" :: t) => Nat.fromBinString(String.implode t)
5338   | #"0" :: (#"x" :: t) => Nat.fromHexString(String.implode t)
5339   | t => Nat.fromHexString(String.implode t);
5340
5341fun p_signed_number s =
5342  case String.explode(stripSpaces s) of
5343     #"-" :: t =>
5344       (case p_number(String.implode t) of
5345           Option.SOME n => Option.SOME(IntInf.~(Nat.toInt n))
5346         | NONE => NONE)
5347   | #"+" :: t =>
5348     (case p_number(String.implode t) of
5349         Option.SOME n => Option.SOME(Nat.toInt n)
5350       | NONE => NONE)
5351   | t =>
5352     (case p_number(String.implode t) of
5353         Option.SOME n => Option.SOME(Nat.toInt n)
5354       | NONE => NONE);
5355
5356fun p_imm8 s =
5357  case p_signed_number s of
5358     Option.SOME n =>
5359       (if (IntInf.<=(IntInf.~ 128,n)) andalso (IntInf.<=(n,255))
5360          then Option.SOME(BitsN.signExtend 64 (BitsN.fromInt(n,8)))
5361        else NONE)
5362   | NONE => NONE;
5363
5364fun p_imm16 s =
5365  case p_signed_number s of
5366     Option.SOME n =>
5367       (if (IntInf.<=(IntInf.~ 32768,n)) andalso (IntInf.<=(n,65535))
5368          then Option.SOME(BitsN.signExtend 64 (BitsN.fromInt(n,16)))
5369        else NONE)
5370   | NONE => NONE;
5371
5372fun p_imm32 s =
5373  case p_signed_number s of
5374     Option.SOME n =>
5375       (if (IntInf.<=(IntInf.~ 2147483648,n)) andalso
5376           (IntInf.<=(n,4294967295))
5377          then Option.SOME(BitsN.signExtend 64 (BitsN.fromInt(n,32)))
5378        else NONE)
5379   | NONE => NONE;
5380
5381fun p_imm64 s =
5382  case p_signed_number s of
5383     Option.SOME n =>
5384       (if (IntInf.<=(IntInf.~ 9223372036854775808,n)) andalso
5385           (IntInf.<=(n,18446744073709551615))
5386          then Option.SOME(BitsN.fromInt(n,64))
5387        else NONE)
5388   | NONE => NONE;
5389
5390fun p_imm_of_size (sz,s) =
5391  case sz of
5392     Z8 _ => p_imm8 s
5393   | Z16 => p_imm16 s
5394   | Z32 => p_imm32 s
5395   | Z64 => p_imm64 s;
5396
5397fun readBytes (acc,l) =
5398  case l of
5399     [] => Option.SOME(List.rev acc)
5400   | h :: t =>
5401     (case p_bin_or_hex_number h of
5402         Option.SOME n =>
5403           (if Nat.<=(n,255)
5404              then readBytes((BitsN.fromNat(n,8)) :: acc,t)
5405            else NONE)
5406       | NONE => NONE);
5407
5408fun p_bytes s =
5409  readBytes([],L3.uncurry String.tokens (fn c => Char.isSpace c,s));
5410
5411fun p_label s =
5412  case L3.uncurry String.tokens (fn c => Char.isSpace c,s) of
5413     [t] =>
5414       let
5415         val (l,r) =
5416           L3.splitl
5417             (fn c => (Char.isAlphaNum c) orelse (Set.mem(c,[#"_",#"."])),
5418              t)
5419       in
5420         if (r = "") andalso
5421            ((not(l = "")) andalso (not(Char.isDigit(L3.strHd l))))
5422           then Option.SOME l
5423         else NONE
5424       end
5425   | _ => NONE;
5426
5427fun p_register s =
5428  case stripSpaces s of
5429     "al" => Option.SOME(Z8 false,RAX)
5430   | "cl" => Option.SOME(Z8 false,RCX)
5431   | "dl" => Option.SOME(Z8 false,RDX)
5432   | "bl" => Option.SOME(Z8 false,RBX)
5433   | "ah" => Option.SOME(Z8 false,RSP)
5434   | "ch" => Option.SOME(Z8 false,RBP)
5435   | "dh" => Option.SOME(Z8 false,RSI)
5436   | "bh" => Option.SOME(Z8 false,RDI)
5437   | "spl" => Option.SOME(Z8 true,RSP)
5438   | "bpl" => Option.SOME(Z8 true,RBP)
5439   | "sil" => Option.SOME(Z8 true,RSI)
5440   | "dil" => Option.SOME(Z8 true,RDI)
5441   | "r8b" => Option.SOME(Z8 false,zR8)
5442   | "r9b" => Option.SOME(Z8 false,zR9)
5443   | "r10b" => Option.SOME(Z8 false,zR10)
5444   | "r11b" => Option.SOME(Z8 false,zR11)
5445   | "r12b" => Option.SOME(Z8 false,zR12)
5446   | "r13b" => Option.SOME(Z8 false,zR13)
5447   | "r14b" => Option.SOME(Z8 false,zR14)
5448   | "r15b" => Option.SOME(Z8 false,zR15)
5449   | "ax" => Option.SOME(Z16,RAX)
5450   | "cx" => Option.SOME(Z16,RCX)
5451   | "dx" => Option.SOME(Z16,RDX)
5452   | "bx" => Option.SOME(Z16,RBX)
5453   | "sp" => Option.SOME(Z16,RSP)
5454   | "bp" => Option.SOME(Z16,RBP)
5455   | "si" => Option.SOME(Z16,RSI)
5456   | "di" => Option.SOME(Z16,RDI)
5457   | "r8w" => Option.SOME(Z16,zR8)
5458   | "r9w" => Option.SOME(Z16,zR9)
5459   | "r10w" => Option.SOME(Z16,zR10)
5460   | "r11w" => Option.SOME(Z16,zR11)
5461   | "r12w" => Option.SOME(Z16,zR12)
5462   | "r13w" => Option.SOME(Z16,zR13)
5463   | "r14w" => Option.SOME(Z16,zR14)
5464   | "r15w" => Option.SOME(Z16,zR15)
5465   | "eax" => Option.SOME(Z32,RAX)
5466   | "ecx" => Option.SOME(Z32,RCX)
5467   | "edx" => Option.SOME(Z32,RDX)
5468   | "ebx" => Option.SOME(Z32,RBX)
5469   | "esp" => Option.SOME(Z32,RSP)
5470   | "ebp" => Option.SOME(Z32,RBP)
5471   | "esi" => Option.SOME(Z32,RSI)
5472   | "edi" => Option.SOME(Z32,RDI)
5473   | "r8d" => Option.SOME(Z32,zR8)
5474   | "r9d" => Option.SOME(Z32,zR9)
5475   | "r10d" => Option.SOME(Z32,zR10)
5476   | "r11d" => Option.SOME(Z32,zR11)
5477   | "r12d" => Option.SOME(Z32,zR12)
5478   | "r13d" => Option.SOME(Z32,zR13)
5479   | "r14d" => Option.SOME(Z32,zR14)
5480   | "r15d" => Option.SOME(Z32,zR15)
5481   | "rax" => Option.SOME(Z64,RAX)
5482   | "rcx" => Option.SOME(Z64,RCX)
5483   | "rdx" => Option.SOME(Z64,RDX)
5484   | "rbx" => Option.SOME(Z64,RBX)
5485   | "rsp" => Option.SOME(Z64,RSP)
5486   | "rbp" => Option.SOME(Z64,RBP)
5487   | "rsi" => Option.SOME(Z64,RSI)
5488   | "rdi" => Option.SOME(Z64,RDI)
5489   | "r8" => Option.SOME(Z64,zR8)
5490   | "r9" => Option.SOME(Z64,zR9)
5491   | "r10" => Option.SOME(Z64,zR10)
5492   | "r11" => Option.SOME(Z64,zR11)
5493   | "r12" => Option.SOME(Z64,zR12)
5494   | "r13" => Option.SOME(Z64,zR13)
5495   | "r14" => Option.SOME(Z64,zR14)
5496   | "r15" => Option.SOME(Z64,zR15)
5497   | _ => NONE;
5498
5499fun p_xreg s =
5500  case String.explode(stripSpaces s) of
5501     #"x" :: (#"m" :: (#"m" :: n)) =>
5502       (case Nat.fromString(String.implode n) of
5503           Option.SOME n =>
5504             (if Nat.<(n,8) then Option.SOME(BitsN.fromNat(n,3)) else NONE)
5505         | _ => NONE)
5506   | _ => NONE;
5507
5508fun p_scale s =
5509  case stripSpaces s of
5510     "1" => Option.SOME(BitsN.B(0x0,2))
5511   | "2" => Option.SOME(BitsN.B(0x1,2))
5512   | "4" => Option.SOME(BitsN.B(0x2,2))
5513   | "8" => Option.SOME(BitsN.B(0x3,2))
5514   | _ => NONE;
5515
5516fun p_scale_index s =
5517  case L3.uncurry String.fields (fn c => c = #"*",stripSpaces s) of
5518     [v1,v2] =>
5519       (case p_register v1 of
5520           Option.SOME(sz,r) =>
5521             (case p_scale v2 of
5522                 Option.SOME n =>
5523                   (if sz = Z64 then Option.SOME(n,r) else NONE)
5524               | NONE => NONE)
5525         | NONE =>
5526           (case p_scale v1 of
5527               Option.SOME n =>
5528                 (case p_register v2 of
5529                     Option.SOME(sz,r) =>
5530                       (if sz = Z64 then Option.SOME(n,r) else NONE)
5531                   | NONE => NONE)
5532             | NONE => NONE))
5533   | _ => NONE;
5534
5535fun p_disp (b,s) =
5536  case p_imm32 s of Option.SOME imm => Option.SOME(b,imm) | NONE => NONE;
5537
5538fun p_rip_disp s =
5539  let
5540    val (l,r) =
5541      L3.splitl(fn c => not(Set.mem(c,[#"+",#"-"])),stripLeftSpaces s)
5542  in
5543    if l = ""
5544      then if r = ""
5545             then NONE
5546           else case p_rip_disp(L3.strTl r) of
5547                   Option.SOME(ripfirst,imm) =>
5548                     (if (L3.strHd r) = #"+"
5549                        then Option.SOME(ripfirst,imm)
5550                      else if ripfirst
5551                        then NONE
5552                      else Option.SOME(false,BitsN.neg imm))
5553                 | NONE => NONE
5554    else if r = ""
5555      then if (stripSpaces l) = "rip"
5556             then Option.SOME(true,BitsN.B(0x0,64))
5557           else NONE
5558    else if (stripSpaces l) = "rip"
5559      then p_disp(true,r)
5560    else if (stripSpaces(L3.strTl r)) = "rip"
5561      then p_disp(false,l)
5562    else NONE
5563  end;
5564
5565fun p_parts (m,s) =
5566  let
5567    val (v'0,(v'1,(v'2,v'3))) = (stripLeftSpaces s,m)
5568  in
5569    case (String.explode v'0,(v'1,(v'2,v'3))) of
5570       ([],_) => m
5571     | (#"-" :: t,(si,(b,disp))) =>
5572       let
5573         val (l,r) =
5574           L3.splitl(fn c => not(Set.mem(c,[#"+",#"-"])),String.implode t)
5575       in
5576         case (p_imm32("-" ^ l),disp) of
5577            (NONE,_) => (NONE,(NONE,NONE))
5578          | (Option.SOME imm2,Option.SOME imm1) =>
5579            p_parts((si,(b,Option.SOME(BitsN.+(imm2,imm1)))),r)
5580          | (Option.SOME imm,NONE) => p_parts((si,(b,Option.SOME imm)),r)
5581       end
5582     | (#"+" :: t,(si,(b,disp))) =>
5583       let
5584         val (l,r) =
5585           L3.splitl(fn c => not(Set.mem(c,[#"+",#"-"])),String.implode t)
5586       in
5587         case (p_imm32 l,disp) of
5588            (NONE,_) =>
5589              (case (p_register l,(si,b)) of
5590                  (Option.SOME(sz,rg),(_,NONE)) =>
5591                    (if sz = Z64
5592                       then p_parts((si,(Option.SOME rg,disp)),r)
5593                     else (NONE,(NONE,NONE)))
5594                | (Option.SOME(sz,rg),(NONE,Option.SOME _)) =>
5595                  (if sz = Z64
5596                     then p_parts
5597                            ((Option.SOME(BitsN.B(0x0,2),rg),(b,disp)),r)
5598                   else (NONE,(NONE,NONE)))
5599                | (NONE,(NONE,_)) =>
5600                  (case p_scale_index l of
5601                      NONE => (NONE,(NONE,NONE))
5602                    | si => p_parts((si,(b,disp)),r))
5603                | (NONE,(Option.SOME(n1,r1),NONE)) =>
5604                  (case p_scale_index l of
5605                      NONE => (NONE,(NONE,NONE))
5606                    | Option.SOME(n2,r2) =>
5607                      (if n1 = (BitsN.B(0x0,2))
5608                         then p_parts
5609                                ((Option.SOME(n2,r2),(Option.SOME r1,disp)),
5610                                 r)
5611                       else if n2 = (BitsN.B(0x0,2))
5612                         then p_parts
5613                                ((Option.SOME(n1,r1),(Option.SOME r2,disp)),
5614                                 r)
5615                       else (NONE,(NONE,NONE))))
5616                | _ => (NONE,(NONE,NONE)))
5617          | (Option.SOME imm2,Option.SOME imm1) =>
5618            p_parts((si,(b,Option.SOME(BitsN.+(imm2,imm1)))),r)
5619          | (Option.SOME imm,NONE) => p_parts((si,(b,Option.SOME imm)),r)
5620       end
5621     | (t,(si,(b,disp))) =>
5622       let
5623         val (l,r) =
5624           L3.splitl(fn c => not(Set.mem(c,[#"+",#"-"])),String.implode t)
5625       in
5626         case (p_imm32 l,disp) of
5627            (NONE,_) =>
5628              (case (p_register l,(si,b)) of
5629                  (Option.SOME(sz,rg),(_,NONE)) =>
5630                    (if sz = Z64
5631                       then p_parts((si,(Option.SOME rg,disp)),r)
5632                     else (NONE,(NONE,NONE)))
5633                | (Option.SOME(sz,rg),(NONE,Option.SOME _)) =>
5634                  (if sz = Z64
5635                     then p_parts
5636                            ((Option.SOME(BitsN.B(0x0,2),rg),(b,disp)),r)
5637                   else (NONE,(NONE,NONE)))
5638                | (NONE,(NONE,_)) =>
5639                  (case p_scale_index l of
5640                      NONE => (NONE,(NONE,NONE))
5641                    | si => p_parts((si,(b,disp)),r))
5642                | (NONE,(Option.SOME(n1,r1),NONE)) =>
5643                  (case p_scale_index l of
5644                      NONE => (NONE,(NONE,NONE))
5645                    | Option.SOME(n2,r2) =>
5646                      (if n1 = (BitsN.B(0x0,2))
5647                         then p_parts
5648                                ((Option.SOME(n2,r2),(Option.SOME r1,disp)),
5649                                 r)
5650                       else if n2 = (BitsN.B(0x0,2))
5651                         then p_parts
5652                                ((Option.SOME(n1,r1),(Option.SOME r2,disp)),
5653                                 r)
5654                       else (NONE,(NONE,NONE))))
5655                | _ => (NONE,(NONE,NONE)))
5656          | (Option.SOME imm2,Option.SOME imm1) =>
5657            p_parts((si,(b,Option.SOME(BitsN.+(imm2,imm1)))),r)
5658          | (Option.SOME imm,NONE) => p_parts((si,(b,Option.SOME imm)),r)
5659       end
5660  end;
5661
5662fun p_mem_aux s =
5663  let
5664    val (si,(b,disp)) =
5665      case p_parts((NONE,(NONE,NONE)),s) of
5666         (Option.SOME(BitsN.B(0x0,_),r),(NONE,disp)) =>
5667           (NONE,(Option.SOME r,disp))
5668       | (Option.SOME(BitsN.B(0x0,_),RSP),(Option.SOME r,disp)) =>
5669         (Option.SOME(BitsN.B(0x0,2),r),(Option.SOME RSP,disp))
5670       | x => x
5671  in
5672    if (si = NONE) andalso ((b = NONE) andalso (disp = NONE))
5673      then NONE
5674    else Option.SOME
5675           (si,
5676            (case b of Option.SOME r => ZregBase r | NONE => ZnoBase,
5677             case disp of Option.SOME imm => imm | NONE => BitsN.B(0x0,64)))
5678  end;
5679
5680fun p_mem s =
5681  let
5682    val (l,r) = L3.splitr(fn c => c = #"]",stripSpaces s)
5683  in
5684    if r = "]"
5685      then case p_mem_aux l of
5686              Option.SOME v => Option.SOME v
5687            | NONE =>
5688              (case p_rip_disp l of
5689                  Option.SOME(_,imm) => Option.SOME(NONE,(ZripBase,imm))
5690                | NONE => NONE)
5691    else NONE
5692  end;
5693
5694fun p_rm s =
5695  case String.explode(stripSpaces s) of
5696     #"[" :: r =>
5697       (case p_mem(String.implode r) of
5698           Option.SOME v => Option.SOME(Z64,Zm v)
5699         | NONE => NONE)
5700   | r =>
5701     (case p_register(String.implode r) of
5702         Option.SOME(sz,r) => Option.SOME(sz,Zr r)
5703       | NONE => NONE);
5704
5705fun p_xmm s =
5706  case String.explode(stripSpaces s) of
5707     #"[" :: r =>
5708       (case p_mem(String.implode r) of
5709           Option.SOME v => Option.SOME(xmm_mem v)
5710         | NONE => NONE)
5711   | r =>
5712     (case p_xreg(String.implode r) of
5713         Option.SOME r => Option.SOME(xmm_reg r)
5714       | NONE => NONE);
5715
5716fun checkSizeDelim (sz,s) =
5717  case String.explode s of
5718     [] => Option.SOME sz
5719   | h :: _ =>
5720     (if (Char.isSpace h) orelse (h = #"[") then Option.SOME sz else NONE);
5721
5722fun p_sz s =
5723  case String.explode(stripLeftSpaces s) of
5724     #"b" :: (#"y" :: (#"t" :: (#"e" :: r))) =>
5725       let val r = String.implode r in (checkSizeDelim(Z8 false,r),r) end
5726   | #"w" :: (#"o" :: (#"r" :: (#"d" :: r))) =>
5727     let val r = String.implode r in (checkSizeDelim(Z16,r),r) end
5728   | #"d" :: (#"w" :: (#"o" :: (#"r" :: (#"d" :: r)))) =>
5729     let val r = String.implode r in (checkSizeDelim(Z32,r),r) end
5730   | #"q" :: (#"w" :: (#"o" :: (#"r" :: (#"d" :: r)))) =>
5731     let val r = String.implode r in (checkSizeDelim(Z64,r),r) end
5732   | r => (NONE,String.implode r);
5733
5734fun s_sz sz =
5735  case sz of
5736     Z8 _ => "byte"
5737   | Z16 => "word"
5738   | Z32 => "dword"
5739   | Z64 => "qword";
5740
5741fun p_sz_rm s =
5742  let
5743    val (sz1,r) = p_sz s
5744  in
5745    case p_rm r of
5746       Option.SOME(sz2,Zr r) =>
5747         (case (sz1,sz2) of
5748             (Option.SOME(Z8 _),Z8 _) => ""
5749           | (NONE,_) => ""
5750           | (Option.SOME s1,_) =>
5751             (if s1 = sz2 then "" else "cannot override register size"),
5752          (sz2,Zr r))
5753     | Option.SOME(sz2,x) =>
5754       (case sz1 of
5755           NONE => ("any size",(sz2,x))
5756         | Option.SOME s1 => ("",(s1,x)))
5757     | NONE => ("syntax error",(Z16,Zr RAX))
5758  end;
5759
5760fun check_sizes (sz1,(arg1,(sz2,arg2))) =
5761  if sz1 = sz2
5762    then ""
5763  else case (sz1,(sz2,(arg1,arg2))) of
5764          (Z8 _,(Z8 _,(Option.SOME r1,Zr r2))) =>
5765            (if (Set.mem(r1,[RSP,RBP,RSI,RDI])) andalso
5766                (Set.mem(r2,[RSP,RBP,RSI,RDI]))
5767               then "cannot mix high and low byte registers"
5768             else "")
5769        | _ =>
5770          String.concat
5771            ["expecting ",s_sz sz1," value but got ",s_sz sz2," value"];
5772
5773fun p_rm_of_size (sz,(arg1,s)) =
5774  case p_sz_rm s of
5775     ("any size",(_,rm)) => ("",rm)
5776   | ("",(sz2,rm)) => (check_sizes(sz,(arg1,(sz2,rm))),rm)
5777   | (err,_) => (err,Zr RAX);
5778
5779fun p_rm32 s = p_rm_of_size(Z32,(NONE,s));
5780
5781fun p_rm64 s = p_rm_of_size(Z64,(NONE,s));
5782
5783fun p_imm_rm s =
5784  case p_imm32 s of
5785     Option.SOME imm => ("",Zimm imm)
5786   | NONE => let val (err,rm) = p_rm64 s in (err,Zrm rm) end;
5787
5788fun p_dest_src (nfull_imm,(a,b)) =
5789  case p_sz_rm a of
5790     ("",(sz,Zr r)) =>
5791       (case p_imm_of_size(sz,b) of
5792           Option.SOME imm =>
5793             (if (sz = Z64) andalso
5794                 (nfull_imm andalso
5795                  (not((BitsN.signExtend 64 (BitsN.bits(31,0) imm)) = imm)))
5796                then ("syntax error: bad immediate",NONE)
5797              else ("",Option.SOME(sz,Zrm_i(Zr r,imm))))
5798         | NONE =>
5799           (case p_rm_of_size(sz,(Option.SOME r,b)) of
5800               ("",Zr r2) => ("",Option.SOME(sz,Zrm_r(Zr r,r2)))
5801             | ("",rm) => ("",Option.SOME(sz,Zr_rm(r,rm)))
5802             | (err,_) => (err,NONE)))
5803   | (message,(sz,Zm m)) =>
5804     (case p_register b of
5805         Option.SOME(sz2,r) =>
5806           (if (message = "any size") orelse (sz = sz2)
5807              then ("",Option.SOME(sz2,Zrm_r(Zm m,r)))
5808            else ("inconsistent sizes",NONE))
5809       | NONE =>
5810         (case p_imm_of_size(sz,b) of
5811             Option.SOME imm =>
5812               (if (sz = Z64) andalso
5813                   (not((BitsN.signExtend 64 (BitsN.bits(31,0) imm)) = imm))
5814                  then ("syntax error: bad immediate",NONE)
5815                else ("",Option.SOME(sz,Zrm_i(Zm m,imm))))
5816           | NONE => ("syntax error",NONE)))
5817   | (err,_) => (err,NONE);
5818
5819fun p_cond s =
5820  case s of
5821     "o" => Option.SOME Z_O
5822   | "b" => Option.SOME Z_B
5823   | "c" => Option.SOME Z_B
5824   | "nae" => Option.SOME Z_B
5825   | "e" => Option.SOME Z_E
5826   | "z" => Option.SOME Z_E
5827   | "a" => Option.SOME Z_A
5828   | "nbe" => Option.SOME Z_A
5829   | "s" => Option.SOME Z_S
5830   | "p" => Option.SOME Z_P
5831   | "pe" => Option.SOME Z_P
5832   | "l" => Option.SOME Z_L
5833   | "nge" => Option.SOME Z_L
5834   | "g" => Option.SOME Z_G
5835   | "nle" => Option.SOME Z_G
5836   | "no" => Option.SOME Z_NO
5837   | "nb" => Option.SOME Z_NB
5838   | "nc" => Option.SOME Z_NB
5839   | "ae" => Option.SOME Z_NB
5840   | "ne" => Option.SOME Z_NE
5841   | "nz" => Option.SOME Z_NE
5842   | "na" => Option.SOME Z_NA
5843   | "be" => Option.SOME Z_NA
5844   | "ns" => Option.SOME Z_NS
5845   | "np" => Option.SOME Z_NP
5846   | "po" => Option.SOME Z_NP
5847   | "nl" => Option.SOME Z_NL
5848   | "ge" => Option.SOME Z_NL
5849   | "ng" => Option.SOME Z_NG
5850   | "le" => Option.SOME Z_NG
5851   | "" => Option.SOME Z_ALWAYS
5852   | _ => NONE;
5853
5854fun p_binop (bop,(a,b)) =
5855  case p_dest_src(true,(a,b)) of
5856     ("",Option.SOME(sz,dst_src)) => OK(Zbinop(bop,(sz,dst_src)))
5857   | ("",NONE) => FAIL "syntax error"
5858   | (err,_) => FAIL err;
5859
5860fun p_monop (opc,s) =
5861  let
5862    val (err,sz_rm) = p_sz_rm s
5863  in
5864    if (err = "") orelse (err = "any size")
5865      then case opc of
5866              0 => OK(Zdiv sz_rm)
5867            | 1 => OK(Zidiv sz_rm)
5868            | 2 => OK(Zmul sz_rm)
5869            | 3 => OK(Zimul sz_rm)
5870            | 4 => OK(Zmonop(Zdec,sz_rm))
5871            | 5 => OK(Zmonop(Zinc,sz_rm))
5872            | 6 => OK(Zmonop(Znot,sz_rm))
5873            | 7 => OK(Zmonop(Zneg,sz_rm))
5874            | _ => FAIL "unsupported monop"
5875    else FAIL err
5876  end;
5877
5878fun p_xop (opc,(a,b)) =
5879  case p_register b of
5880     Option.SOME(sz,r) =>
5881       let
5882         val (err,rm) =
5883           case p_rm a of
5884              Option.SOME(sz2,Zr r2) =>
5885                (check_sizes(sz,(Option.SOME r,(sz2,Zr r2))),Zr r2)
5886            | Option.SOME(Z64,m) => ("",m)
5887            | _ => ("syntax error",Zm(NONE,(ZnoBase,BitsN.B(0x0,64))))
5888       in
5889         if err = ""
5890           then let
5891                  val arg = (sz,(rm,r))
5892                in
5893                  OK(case opc of
5894                        0 => Zxadd arg
5895                      | 1 => Zxchg arg
5896                      | _ => Zcmpxchg arg)
5897                end
5898         else FAIL err
5899       end
5900   | NONE => FAIL "syntax error";
5901
5902fun p_imul3 (c,(sz,(r,rm))) =
5903  case p_imm8 c of
5904     Option.SOME imm8 => OK(Zimul3(sz,(r,(rm,imm8))))
5905   | NONE =>
5906     if sz = Z16
5907       then case p_imm16 c of
5908               Option.SOME imm16 => OK(Zimul3(sz,(r,(rm,imm16))))
5909             | NONE => FAIL "syntax error"
5910     else case p_imm32 c of
5911             Option.SOME imm32 => OK(Zimul3(sz,(r,(rm,imm32))))
5912           | NONE => FAIL "syntax error";
5913
5914fun p_sse (s,(a,b)) =
5915  case (p_xreg a,p_xmm b) of
5916     (Option.SOME dst,Option.SOME src) =>
5917       let
5918         val a = (dst,src)
5919         val i =
5920           case s of
5921              "addpd" => bin_PD(sse_add,a)
5922            | "addps" => bin_PS(sse_add,a)
5923            | "addsd" => bin_SD(sse_add,a)
5924            | "addss" => bin_SS(sse_add,a)
5925            | "divpd" => bin_PD(sse_div,a)
5926            | "divps" => bin_PS(sse_div,a)
5927            | "divsd" => bin_SD(sse_div,a)
5928            | "divss" => bin_SS(sse_div,a)
5929            | "maxpd" => bin_PD(sse_max,a)
5930            | "maxps" => bin_PS(sse_max,a)
5931            | "maxsd" => bin_SD(sse_max,a)
5932            | "maxss" => bin_SS(sse_max,a)
5933            | "minpd" => bin_PD(sse_min,a)
5934            | "minps" => bin_PS(sse_min,a)
5935            | "minsd" => bin_SD(sse_min,a)
5936            | "minss" => bin_SS(sse_min,a)
5937            | "mulpd" => bin_PD(sse_mul,a)
5938            | "mulps" => bin_PS(sse_mul,a)
5939            | "mulsd" => bin_SD(sse_mul,a)
5940            | "mulss" => bin_SS(sse_mul,a)
5941            | "subpd" => bin_PD(sse_sub,a)
5942            | "subps" => bin_PS(sse_sub,a)
5943            | "subsd" => bin_SD(sse_sub,a)
5944            | "subss" => bin_SS(sse_sub,a)
5945            | "andpd" => logic_PD(sse_and,a)
5946            | "andps" => logic_PS(sse_and,a)
5947            | "andnpd" => logic_PD(sse_andn,a)
5948            | "andnps" => logic_PS(sse_andn,a)
5949            | "orpd" => logic_PD(sse_or,a)
5950            | "orps" => logic_PS(sse_or,a)
5951            | "xorpd" => logic_PD(sse_xor,a)
5952            | "xorps" => logic_PS(sse_xor,a)
5953            | "cmpeqpd" => CMPPD(sse_eq_oq,a)
5954            | "cmpeqps" => CMPPS(sse_eq_oq,a)
5955            | "cmpeqsd" => CMPSD(sse_eq_oq,a)
5956            | "cmpeqss" => CMPSS(sse_eq_oq,a)
5957            | "cmpltpd" => CMPPD(sse_lt_os,a)
5958            | "cmpltps" => CMPPS(sse_lt_os,a)
5959            | "cmpltsd" => CMPSD(sse_lt_os,a)
5960            | "cmpltss" => CMPSS(sse_lt_os,a)
5961            | "cmplepd" => CMPPD(sse_le_os,a)
5962            | "cmpleps" => CMPPS(sse_le_os,a)
5963            | "cmplesd" => CMPSD(sse_le_os,a)
5964            | "cmpless" => CMPSS(sse_le_os,a)
5965            | "cmpunordpd" => CMPPD(sse_unord_q,a)
5966            | "cmpunordps" => CMPPS(sse_unord_q,a)
5967            | "cmpunordsd" => CMPSD(sse_unord_q,a)
5968            | "cmpunordss" => CMPSS(sse_unord_q,a)
5969            | "cmpneqpd" => CMPPD(sse_neq_uq,a)
5970            | "cmpneqps" => CMPPS(sse_neq_uq,a)
5971            | "cmpneqsd" => CMPSD(sse_neq_uq,a)
5972            | "cmpneqss" => CMPSS(sse_neq_uq,a)
5973            | "cmpnltpd" => CMPPD(sse_nlt_us,a)
5974            | "cmpnltps" => CMPPS(sse_nlt_us,a)
5975            | "cmpnltsd" => CMPSD(sse_nlt_us,a)
5976            | "cmpnltss" => CMPSS(sse_nlt_us,a)
5977            | "cmpnlepd" => CMPPD(sse_nle_us,a)
5978            | "cmpnleps" => CMPPS(sse_nle_us,a)
5979            | "cmpnlesd" => CMPSD(sse_nle_us,a)
5980            | "cmpnless" => CMPSS(sse_nle_us,a)
5981            | "cmpordpd" => CMPPD(sse_ord_q,a)
5982            | "cmpordps" => CMPPS(sse_ord_q,a)
5983            | "cmpordsd" => CMPSD(sse_ord_q,a)
5984            | "cmpordss" => CMPSS(sse_ord_q,a)
5985            | "comisd" => COMISD a
5986            | "comiss" => COMISS a
5987            | "cvtdq2pd" => CVTDQ2PD a
5988            | "cvtdq2ps" => CVTDQ2PS a
5989            | "cvtpd2dq" => CVTPD2DQ(false,a)
5990            | "cvttpd2dq" => CVTPD2DQ(true,a)
5991            | "cvtpd2ps" => CVTPD2PS a
5992            | "cvtps2dq" => CVTPS2DQ(false,a)
5993            | "cvttps2dq" => CVTPS2DQ(true,a)
5994            | "cvtps2pd" => CVTPS2PD a
5995            | "cvtsd2ss" => CVTSD2SS a
5996            | "cvtss2sd" => CVTSS2SD a
5997            | "pcmpeqq" => PCMPEQQ a
5998            | "sqrtpd" => SQRTPD a
5999            | "sqrtsd" => SQRTPS a
6000            | "sqrtps" => SQRTSD a
6001            | "sqrtss" => SQRTSS a
6002            | _ =>
6003              CMPPD
6004                (sse_eq_oq,
6005                 (BitsN.B(0x0,3),xmm_mem(NONE,(ZnoBase,BitsN.B(0x0,64)))))
6006       in
6007         OK(SSE i)
6008       end
6009   | _ => FAIL("syntax error: SSE " ^ s);
6010
6011fun p_cvt_2si (double,(truncate,(a,b))) =
6012  case (p_register a,p_xmm b) of
6013     (Option.SOME(sz,r),Option.SOME x) =>
6014       (if Set.mem(sz,[Z32,Z64])
6015          then let
6016                 val a = (truncate,(sz = Z64,(r,x)))
6017               in
6018                 OK(SSE(if double then CVTSD2SI a else CVTSS2SI a))
6019               end
6020        else FAIL "expecting 32-bit or 64-bit register destination")
6021   | _ => FAIL "syntax error";
6022
6023fun p_cvtsi2 (double,(a,b)) =
6024  case (p_xreg a,p_rm b) of
6025     (Option.SOME r,Option.SOME(sz,rm)) =>
6026       (if Set.mem(sz,[Z32,Z64])
6027          then let
6028                 val a = (sz = Z64,(r,rm))
6029               in
6030                 OK(SSE(if double then CVTSI2SD a else CVTSI2SS a))
6031               end
6032        else FAIL "expecting 32-bit or 64-bit register source")
6033   | _ => FAIL "syntax error";
6034
6035fun p_movap_movup (align,(double,(a,b))) =
6036  case (p_xmm a,p_xmm b) of
6037     (Option.SOME(xmm_mem _),Option.SOME(xmm_mem _)) =>
6038       FAIL "syntax error"
6039   | (Option.SOME dst,Option.SOME src) =>
6040     let
6041       val a = (double,(dst,src))
6042     in
6043       OK(SSE(if align then MOVAP_D_S a else MOVUP_D_S a))
6044     end
6045   | _ => FAIL "syntax error";
6046
6047fun p_movsd_movss (double,(a,b)) =
6048  case (p_xmm a,p_xmm b) of
6049     (Option.SOME(xmm_mem _),Option.SOME(xmm_mem _)) =>
6050       FAIL "syntax error"
6051   | (Option.SOME dst,Option.SOME src) =>
6052     let
6053       val a = (dst,src)
6054     in
6055       OK(SSE(if double then MOVSD a else MOVSS a))
6056     end
6057   | _ => FAIL "syntax error";
6058
6059fun p_mov_d_q (quad,(a,b)) =
6060  let
6061    val sz = if quad then Z64 else Z32
6062  in
6063    case p_xreg a of
6064       Option.SOME r =>
6065         (case p_rm_of_size(sz,(NONE,b)) of
6066             ("",Zm m) =>
6067               OK(SSE(if quad
6068                        then MOVQ(xmm_reg r,xmm_mem m)
6069                      else MOV_D_Q(true,(quad,(r,Zm m)))))
6070           | ("",rm) => OK(SSE(MOV_D_Q(true,(quad,(r,rm)))))
6071           | (s,_) =>
6072             (case (quad,p_xreg b) of
6073                 (true,Option.SOME t) =>
6074                   OK(SSE(MOVQ(xmm_reg r,xmm_reg t)))
6075               | _ => FAIL s))
6076     | NONE =>
6077       (case (p_rm_of_size(sz,(NONE,a)),(quad,p_xreg b)) of
6078           (("",Zm m),(true,Option.SOME r)) =>
6079             OK(SSE(MOVQ(xmm_mem m,xmm_reg r)))
6080         | (("",rm),(_,Option.SOME r)) =>
6081           OK(SSE(MOV_D_Q(false,(quad,(r,rm)))))
6082         | ((s,_),(_,Option.SOME _)) => FAIL s
6083         | _ => FAIL "syntax error")
6084  end;
6085
6086fun p_pshift (n,(a,b)) =
6087  case (p_xreg a,p_number b) of
6088     (Option.SOME r,Option.SOME i) =>
6089       (if Nat.<(i,256)
6090          then let
6091                 val a = (r,BitsN.fromNat(i,8))
6092               in
6093                 OK(SSE(case n of
6094                           BitsN.B(0x0,_) => PSRLW_imm a
6095                         | BitsN.B(0x1,_) => PSRAW_imm a
6096                         | BitsN.B(0x2,_) => PSLLW_imm a
6097                         | BitsN.B(0x3,_) => PSRLD_imm a
6098                         | BitsN.B(0x4,_) => PSRAD_imm a
6099                         | BitsN.B(0x5,_) => PSLLD_imm a
6100                         | BitsN.B(0x6,_) => PSRLQ_imm a
6101                         | BitsN.B(0x7,_) => PSLLQ_imm a
6102                         | BitsN.B(0x8,_) => PSLLDQ a
6103                         | _ => PSRLDQ a))
6104               end
6105        else FAIL "syntax error: expecting byte immediate")
6106   | _ => FAIL "syntax error";
6107
6108fun p_tokens s =
6109  let
6110    val (l,r) =
6111      L3.splitl
6112        (fn c => not(Char.isSpace c),
6113         L3.lowercase(L3.snd(L3.splitl(fn c => Char.isSpace c,s))))
6114    val r = L3.uncurry String.fields (fn c => c = #",",r)
6115    val r =
6116      if ((L3.length r) = 1) andalso ((stripSpaces(List.hd r)) = "")
6117        then []
6118      else r
6119  in
6120    l :: r
6121  end;
6122
6123fun instructionFromString s =
6124  let
6125    val toks = p_tokens s
6126  in
6127    case toks of
6128       v'0 :: v'1 =>
6129         (case v'1 of
6130             v'4 :: v'5 =>
6131               (case ((String.explode v'4,v'5),String.explode v'0) of
6132                   ((a,[b]),[#"o",#"r"]) =>
6133                     p_binop(Zor,(String.implode a,b))
6134                 | ((a,[b]),[#"a",#"d",#"d"]) =>
6135                   p_binop(Zadd,(String.implode a,b))
6136                 | ((a,[b]),[#"a",#"d",#"c"]) =>
6137                   p_binop(Zadc,(String.implode a,b))
6138                 | ((a,[b]),[#"s",#"b",#"b"]) =>
6139                   p_binop(Zsbb,(String.implode a,b))
6140                 | ((a,[b]),[#"a",#"n",#"d"]) =>
6141                   p_binop(Zand,(String.implode a,b))
6142                 | ((a,[b]),[#"s",#"u",#"b"]) =>
6143                   p_binop(Zsub,(String.implode a,b))
6144                 | ((a,[b]),[#"x",#"o",#"r"]) =>
6145                   p_binop(Zxor,(String.implode a,b))
6146                 | ((a,[b]),[#"c",#"m",#"p"]) =>
6147                   p_binop(Zcmp,(String.implode a,b))
6148                 | ((a,[b]),[#"r",#"o",#"l"]) =>
6149                   p_binop(Zrol,(String.implode a,b))
6150                 | ((a,[b]),[#"r",#"o",#"r"]) =>
6151                   p_binop(Zror,(String.implode a,b))
6152                 | ((a,[b]),[#"r",#"c",#"l"]) =>
6153                   p_binop(Zrcl,(String.implode a,b))
6154                 | ((a,[b]),[#"r",#"c",#"r"]) =>
6155                   p_binop(Zrcr,(String.implode a,b))
6156                 | ((a,[b]),[#"s",#"h",#"l"]) =>
6157                   p_binop(Zshl,(String.implode a,b))
6158                 | ((a,[b]),[#"s",#"h",#"r"]) =>
6159                   p_binop(Zshr,(String.implode a,b))
6160                 | ((a,[b]),[#"s",#"a",#"r"]) =>
6161                   p_binop(Zsar,(String.implode a,b))
6162                 | ((a,[b]),[#"t",#"e",#"s",#"t"]) =>
6163                   p_binop(Ztest,(String.implode a,b))
6164                 | ((a,[]),#"s" :: (#"e" :: (#"t" :: cond))) =>
6165                   let
6166                     val cond = String.implode cond
6167                   in
6168                     case p_cond cond of
6169                        Option.SOME c =>
6170                          (case p_sz_rm(String.implode a) of
6171                              ("",(Z8 have_rex,rm)) =>
6172                                OK(Zset(c,(have_rex,rm)))
6173                            | ("any size",(Z8 have_rex,rm)) =>
6174                              OK(Zset(c,(have_rex,rm)))
6175                            | ("any size",_) =>
6176                              FAIL "SETcc requires byte source"
6177                            | (err,_) => FAIL err)
6178                      | NONE =>
6179                        FAIL
6180                          (String.concat
6181                             ["Unrecognised mnemonic: ","set",cond])
6182                   end
6183                 | ((a,[]),[#"c",#"a",#"l",#"l"]) =>
6184                   let
6185                     val a = String.implode a
6186                   in
6187                     case p_imm_rm a of
6188                        ("",Zimm imm) =>
6189                          OK(Zcall(Zimm(BitsN.-(imm,BitsN.B(0x5,64)))))
6190                      | ("",Zrm rm) => OK(Zcall(Zrm rm))
6191                      | (err,_) =>
6192                        (case p_label a of
6193                            Option.SOME l =>
6194                              PENDING(l,Zcall(Zimm(BitsN.B(0x0,64))))
6195                          | NONE => FAIL err)
6196                   end
6197                 | ((a,[]),[#"p",#"u",#"s",#"h"]) =>
6198                   (case p_imm_rm(String.implode a) of
6199                       ("",x) => OK(Zpush x)
6200                     | (err,_) => FAIL err)
6201                 | ((a,[]),[#"p",#"o",#"p"]) =>
6202                   (case p_rm64(String.implode a) of
6203                       ("",rm) => OK(Zpop rm)
6204                     | (err,_) => FAIL err)
6205                 | ((a,[]),#"j" :: cond) =>
6206                   let
6207                     val a = String.implode a
6208                     val cond = String.implode cond
6209                     val condition =
6210                       if cond = "mp"
6211                         then Option.SOME Z_ALWAYS
6212                       else if cond = "" then NONE else p_cond cond
6213                   in
6214                     case condition of
6215                        Option.SOME c =>
6216                          (case p_imm_rm a of
6217                              ("",Zrm rm) =>
6218                                (if c = Z_ALWAYS
6219                                   then OK(Zjmp rm)
6220                                 else FAIL "syntax error")
6221                            | ("",Zimm imm) =>
6222                              let
6223                                val ast =
6224                                  Zjcc(c,BitsN.-(imm,BitsN.B(0x2,64)))
6225                              in
6226                                if (L3.length(encode ast)) = 2
6227                                  then OK ast
6228                                else OK(Zjcc
6229                                          (c,
6230                                           BitsN.-
6231                                             (imm,
6232                                              if c = Z_ALWAYS
6233                                                then BitsN.B(0x5,64)
6234                                              else BitsN.B(0x6,64))))
6235                              end
6236                            | (err,_) =>
6237                              let
6238                                val (sz,b) = p_sz a
6239                                val far =
6240                                  case sz of
6241                                     Option.SOME Z32 => true
6242                                   | _ => false
6243                              in
6244                                case p_label b of
6245                                   Option.SOME l =>
6246                                     PENDING
6247                                       (l,Zjcc(c,BitsN.fromBool 64 far))
6248                                 | NONE => FAIL err
6249                              end)
6250                      | NONE =>
6251                        FAIL
6252                          (String.concat
6253                             ["Unrecognised mnemonic: ","j",cond])
6254                   end
6255                 | ((a,[]),[#"d",#"i",#"v"]) =>
6256                   p_monop(0,String.implode a)
6257                 | ((a,[]),[#"i",#"d",#"i",#"v"]) =>
6258                   p_monop(1,String.implode a)
6259                 | ((a,[]),[#"m",#"u",#"l"]) =>
6260                   p_monop(2,String.implode a)
6261                 | ((a,[]),[#"i",#"m",#"u",#"l"]) =>
6262                   p_monop(3,String.implode a)
6263                 | ((a,[]),[#"d",#"e",#"c"]) =>
6264                   p_monop(4,String.implode a)
6265                 | ((a,[]),[#"i",#"n",#"c"]) =>
6266                   p_monop(5,String.implode a)
6267                 | ((a,[]),[#"n",#"o",#"t"]) =>
6268                   p_monop(6,String.implode a)
6269                 | ((a,[]),[#"n",#"e",#"g"]) =>
6270                   p_monop(7,String.implode a)
6271                 | ((i,[]),[#"r",#"e",#"t"]) =>
6272                   (case p_imm16(String.implode i) of
6273                       Option.SOME imm => OK(Zret imm)
6274                     | NONE => FAIL "syntax error: bad immediate")
6275                 | ((i,[]),#"l" :: (#"o" :: (#"o" :: (#"p" :: cond)))) =>
6276                   let
6277                     val i = String.implode i
6278                     val cond = String.implode cond
6279                   in
6280                     case p_cond cond of
6281                        Option.SOME c =>
6282                          (if Set.mem(c,[Z_ALWAYS,Z_E,Z_NE])
6283                             then case p_imm8 i of
6284                                     Option.SOME imm =>
6285                                       OK(Zloop
6286                                            (c,
6287                                             BitsN.-(imm,BitsN.B(0x2,64))))
6288                                   | NONE =>
6289                                     (case p_label i of
6290                                         Option.SOME l =>
6291                                           PENDING
6292                                             (l,Zloop(c,BitsN.B(0x0,64)))
6293                                       | NONE =>
6294                                         FAIL
6295                                           "syntax error: bad immediate")
6296                           else FAIL "bad condition")
6297                      | NONE =>
6298                        FAIL
6299                          (String.concat
6300                             ["Unrecognised mnemonic: ","loop",cond])
6301                   end
6302                 | ((a,[b]),#"b" :: (#"t" :: s)) =>
6303                   let
6304                     val s = String.implode s
6305                   in
6306                     if Set.mem(s,["","s","r","c"])
6307                       then let
6308                              val bt =
6309                                Cast.stringToZbit_test_name
6310                                  (String.concat["Z","bt",s])
6311                            in
6312                              case p_dest_src(false,(String.implode a,b)) of
6313                                 ("",Option.SOME(Z8 _,_)) =>
6314                                   FAIL "syntax error"
6315                               | ("",Option.SOME(_,Zr_rm _)) =>
6316                                 FAIL "syntax error"
6317                               | ("",Option.SOME(sz,Zrm_i(rm,i))) =>
6318                                 (if (BitsN.<=
6319                                        (BitsN.neg(BitsN.B(0x80,64)),i)) andalso
6320                                     (BitsN.<=(i,BitsN.B(0xFF,64)))
6321                                    then OK(Zbit_test(bt,(sz,Zrm_i(rm,i))))
6322                                  else FAIL
6323                                         "syntax error: expecting byte immediate")
6324                               | ("",Option.SOME(sz,ds)) =>
6325                                 OK(Zbit_test(bt,(sz,ds)))
6326                               | ("",NONE) => FAIL "syntax error"
6327                               | (err,_) => FAIL err
6328                            end
6329                     else FAIL
6330                            (String.concat
6331                               ["Unrecognised mnemonic: ","bt",s])
6332                   end
6333                 | ((a,[b]),[#"i",#"m",#"u",#"l"]) =>
6334                   (case p_dest_src(false,(String.implode a,b)) of
6335                       ("",Option.SOME(Z8 _,_)) => FAIL "syntax error"
6336                     | ("",Option.SOME(sz,Zrm_r(Zr r1,r2))) =>
6337                       OK(Zimul2(sz,(r1,Zr r2)))
6338                     | ("",Option.SOME(sz,Zr_rm(r,rm))) =>
6339                       OK(Zimul2(sz,(r,rm)))
6340                     | ("",NONE) => FAIL "syntax error"
6341                     | (err,_) => FAIL err)
6342                 | ((a,[b,c]),[#"i",#"m",#"u",#"l"]) =>
6343                   (case p_dest_src(false,(String.implode a,b)) of
6344                       ("",Option.SOME(Z8 _,_)) => FAIL "syntax error"
6345                     | ("",Option.SOME(sz,Zrm_r(Zr r1,r2))) =>
6346                       p_imul3(c,(sz,(r1,Zr r2)))
6347                     | ("",Option.SOME(sz,Zr_rm(r,rm))) =>
6348                       p_imul3(c,(sz,(r,rm)))
6349                     | ("",NONE) => FAIL "syntax error"
6350                     | (err,_) => FAIL err)
6351                 | ((a,[b]),[#"m",#"o",#"v",#"z",#"x"]) =>
6352                   (case p_register(String.implode a) of
6353                       Option.SOME(sz2,r) =>
6354                         let
6355                           val (message,(sz1,rm)) = p_sz_rm b
6356                         in
6357                           if (message = "") orelse (message = "any size")
6358                             then OK(Zmovzx(sz1,(Zr_rm(r,rm),sz2)))
6359                           else FAIL message
6360                         end
6361                     | NONE => FAIL "syntax error")
6362                 | ((a,[b]),[#"m",#"o",#"v",#"s",#"x"]) =>
6363                   (case p_register(String.implode a) of
6364                       Option.SOME(sz2,r) =>
6365                         let
6366                           val (message,(sz1,rm)) = p_sz_rm b
6367                         in
6368                           if (message = "") orelse (message = "any size")
6369                             then if (sz1 = Z32) andalso (sz2 = Z64)
6370                                    then FAIL
6371                                           "movsx used instead of movsxd"
6372                                  else OK(Zmovsx(sz1,(Zr_rm(r,rm),sz2)))
6373                           else FAIL message
6374                         end
6375                     | NONE => FAIL "syntax error")
6376                 | ((a,[b]),[#"m",#"o",#"v",#"s",#"x",#"d"]) =>
6377                   (case p_register(String.implode a) of
6378                       Option.SOME(Z64,r) =>
6379                         (case p_rm32 b of
6380                             ("",rm) => OK(Zmovsx(Z32,(Zr_rm(r,rm),Z64)))
6381                           | (err,_) => FAIL err)
6382                     | Option.SOME _ =>
6383                       FAIL "destination must be a 64-bit register"
6384                     | NONE => FAIL "syntax error")
6385                 | ((a,[b]),[#"m",#"o",#"v"]) =>
6386                   (case p_dest_src(false,(String.implode a,b)) of
6387                       ("",Option.SOME(sz,dst_src)) =>
6388                         OK(Zmov(Z_ALWAYS,(sz,dst_src)))
6389                     | ("",NONE) => FAIL "syntax error"
6390                     | (err,_) => FAIL err)
6391                 | ((a,[b]),#"c" :: (#"m" :: (#"o" :: (#"v" :: cond)))) =>
6392                   let
6393                     val cond = String.implode cond
6394                   in
6395                     case p_cond cond of
6396                        Option.SOME Z_ALWAYS =>
6397                          FAIL
6398                            (String.concat
6399                               ["Unrecognised mnemonic: ","cmov",cond])
6400                      | Option.SOME c =>
6401                        (case p_dest_src(true,(String.implode a,b)) of
6402                            ("",Option.SOME(Z8 _,_)) =>
6403                              FAIL "syntax error"
6404                          | ("",Option.SOME(sz,Zrm_r(Zr r1,r2))) =>
6405                            OK(Zmov(c,(sz,Zr_rm(r1,Zr r2))))
6406                          | ("",Option.SOME(sz,Zr_rm(r,rm))) =>
6407                            OK(Zmov(c,(sz,Zr_rm(r,rm))))
6408                          | ("",_) => FAIL "syntax error"
6409                          | (err,_) => FAIL err)
6410                      | NONE =>
6411                        FAIL
6412                          (String.concat
6413                             ["Unrecognised mnemonic: ","cmov",cond])
6414                   end
6415                 | ((a,[b]),[#"l",#"e",#"a"]) =>
6416                   (case p_register(String.implode a) of
6417                       Option.SOME(Z8 _,r) => FAIL "syntax error"
6418                     | Option.SOME(sz,r) =>
6419                       (case p_rm b of
6420                           Option.SOME(Z64,Zm m) =>
6421                             OK(Zlea(sz,Zr_rm(r,Zm m)))
6422                         | _ => FAIL "syntax error")
6423                     | NONE => FAIL "syntax error")
6424                 | ((a,[b]),[#"x",#"a",#"d",#"d"]) =>
6425                   p_xop(0,(String.implode a,b))
6426                 | ((a,[b]),[#"x",#"c",#"h",#"g"]) =>
6427                   p_xop(1,(String.implode a,b))
6428                 | ((a,[b]),[#"c",#"m",#"p",#"x",#"c",#"h",#"g"]) =>
6429                   p_xop(2,(String.implode a,b))
6430                 | ((a,[b]),[#"a",#"d",#"d",#"p",#"d"]) =>
6431                   p_sse(List.hd toks,(String.implode a,b))
6432                 | ((a,[b]),[#"a",#"d",#"d",#"p",#"s"]) =>
6433                   p_sse(List.hd toks,(String.implode a,b))
6434                 | ((a,[b]),[#"a",#"d",#"d",#"s",#"d"]) =>
6435                   p_sse(List.hd toks,(String.implode a,b))
6436                 | ((a,[b]),[#"a",#"d",#"d",#"s",#"s"]) =>
6437                   p_sse(List.hd toks,(String.implode a,b))
6438                 | ((a,[b]),[#"d",#"i",#"v",#"p",#"d"]) =>
6439                   p_sse(List.hd toks,(String.implode a,b))
6440                 | ((a,[b]),[#"d",#"i",#"v",#"p",#"s"]) =>
6441                   p_sse(List.hd toks,(String.implode a,b))
6442                 | ((a,[b]),[#"d",#"i",#"v",#"s",#"d"]) =>
6443                   p_sse(List.hd toks,(String.implode a,b))
6444                 | ((a,[b]),[#"d",#"i",#"v",#"s",#"s"]) =>
6445                   p_sse(List.hd toks,(String.implode a,b))
6446                 | ((a,[b]),[#"m",#"a",#"x",#"p",#"d"]) =>
6447                   p_sse(List.hd toks,(String.implode a,b))
6448                 | ((a,[b]),[#"m",#"a",#"x",#"p",#"s"]) =>
6449                   p_sse(List.hd toks,(String.implode a,b))
6450                 | ((a,[b]),[#"m",#"a",#"x",#"s",#"d"]) =>
6451                   p_sse(List.hd toks,(String.implode a,b))
6452                 | ((a,[b]),[#"m",#"a",#"x",#"s",#"s"]) =>
6453                   p_sse(List.hd toks,(String.implode a,b))
6454                 | ((a,[b]),[#"m",#"i",#"n",#"p",#"d"]) =>
6455                   p_sse(List.hd toks,(String.implode a,b))
6456                 | ((a,[b]),[#"m",#"i",#"n",#"p",#"s"]) =>
6457                   p_sse(List.hd toks,(String.implode a,b))
6458                 | ((a,[b]),[#"m",#"i",#"n",#"s",#"d"]) =>
6459                   p_sse(List.hd toks,(String.implode a,b))
6460                 | ((a,[b]),[#"m",#"i",#"n",#"s",#"s"]) =>
6461                   p_sse(List.hd toks,(String.implode a,b))
6462                 | ((a,[b]),[#"m",#"u",#"l",#"p",#"d"]) =>
6463                   p_sse(List.hd toks,(String.implode a,b))
6464                 | ((a,[b]),[#"m",#"u",#"l",#"p",#"s"]) =>
6465                   p_sse(List.hd toks,(String.implode a,b))
6466                 | ((a,[b]),[#"m",#"u",#"l",#"s",#"d"]) =>
6467                   p_sse(List.hd toks,(String.implode a,b))
6468                 | ((a,[b]),[#"m",#"u",#"l",#"s",#"s"]) =>
6469                   p_sse(List.hd toks,(String.implode a,b))
6470                 | ((a,[b]),[#"s",#"u",#"b",#"p",#"d"]) =>
6471                   p_sse(List.hd toks,(String.implode a,b))
6472                 | ((a,[b]),[#"s",#"u",#"b",#"p",#"s"]) =>
6473                   p_sse(List.hd toks,(String.implode a,b))
6474                 | ((a,[b]),[#"s",#"u",#"b",#"s",#"d"]) =>
6475                   p_sse(List.hd toks,(String.implode a,b))
6476                 | ((a,[b]),[#"s",#"u",#"b",#"s",#"s"]) =>
6477                   p_sse(List.hd toks,(String.implode a,b))
6478                 | ((a,[b]),[#"a",#"n",#"d",#"p",#"d"]) =>
6479                   p_sse(List.hd toks,(String.implode a,b))
6480                 | ((a,[b]),[#"a",#"n",#"d",#"p",#"s"]) =>
6481                   p_sse(List.hd toks,(String.implode a,b))
6482                 | ((a,[b]),[#"a",#"n",#"d",#"n",#"p",#"d"]) =>
6483                   p_sse(List.hd toks,(String.implode a,b))
6484                 | ((a,[b]),[#"a",#"n",#"d",#"n",#"p",#"s"]) =>
6485                   p_sse(List.hd toks,(String.implode a,b))
6486                 | ((a,[b]),[#"o",#"r",#"p",#"d"]) =>
6487                   p_sse(List.hd toks,(String.implode a,b))
6488                 | ((a,[b]),[#"o",#"r",#"p",#"s"]) =>
6489                   p_sse(List.hd toks,(String.implode a,b))
6490                 | ((a,[b]),[#"x",#"o",#"r",#"p",#"d"]) =>
6491                   p_sse(List.hd toks,(String.implode a,b))
6492                 | ((a,[b]),[#"x",#"o",#"r",#"p",#"s"]) =>
6493                   p_sse(List.hd toks,(String.implode a,b))
6494                 | ((a,[b]),[#"c",#"m",#"p",#"e",#"q",#"p",#"d"]) =>
6495                   p_sse(List.hd toks,(String.implode a,b))
6496                 | ((a,[b]),[#"c",#"m",#"p",#"e",#"q",#"p",#"s"]) =>
6497                   p_sse(List.hd toks,(String.implode a,b))
6498                 | ((a,[b]),[#"c",#"m",#"p",#"e",#"q",#"s",#"d"]) =>
6499                   p_sse(List.hd toks,(String.implode a,b))
6500                 | ((a,[b]),[#"c",#"m",#"p",#"e",#"q",#"s",#"s"]) =>
6501                   p_sse(List.hd toks,(String.implode a,b))
6502                 | ((a,[b]),[#"c",#"m",#"p",#"l",#"t",#"p",#"d"]) =>
6503                   p_sse(List.hd toks,(String.implode a,b))
6504                 | ((a,[b]),[#"c",#"m",#"p",#"l",#"t",#"p",#"s"]) =>
6505                   p_sse(List.hd toks,(String.implode a,b))
6506                 | ((a,[b]),[#"c",#"m",#"p",#"l",#"t",#"s",#"d"]) =>
6507                   p_sse(List.hd toks,(String.implode a,b))
6508                 | ((a,[b]),[#"c",#"m",#"p",#"l",#"t",#"s",#"s"]) =>
6509                   p_sse(List.hd toks,(String.implode a,b))
6510                 | ((a,[b]),[#"c",#"m",#"p",#"l",#"e",#"p",#"d"]) =>
6511                   p_sse(List.hd toks,(String.implode a,b))
6512                 | ((a,[b]),[#"c",#"m",#"p",#"l",#"e",#"p",#"s"]) =>
6513                   p_sse(List.hd toks,(String.implode a,b))
6514                 | ((a,[b]),[#"c",#"m",#"p",#"l",#"e",#"s",#"d"]) =>
6515                   p_sse(List.hd toks,(String.implode a,b))
6516                 | ((a,[b]),[#"c",#"m",#"p",#"l",#"e",#"s",#"s"]) =>
6517                   p_sse(List.hd toks,(String.implode a,b))
6518                 | ((a,[b]),
6519                  [#"c",#"m",#"p",#"u",#"n",#"o",#"r",#"d",#"p",#"d"]) =>
6520                   p_sse(List.hd toks,(String.implode a,b))
6521                 | ((a,[b]),
6522                  [#"c",#"m",#"p",#"u",#"n",#"o",#"r",#"d",#"p",#"s"]) =>
6523                   p_sse(List.hd toks,(String.implode a,b))
6524                 | ((a,[b]),
6525                  [#"c",#"m",#"p",#"u",#"n",#"o",#"r",#"d",#"s",#"d"]) =>
6526                   p_sse(List.hd toks,(String.implode a,b))
6527                 | ((a,[b]),
6528                  [#"c",#"m",#"p",#"u",#"n",#"o",#"r",#"d",#"s",#"s"]) =>
6529                   p_sse(List.hd toks,(String.implode a,b))
6530                 | ((a,[b]),[#"c",#"m",#"p",#"n",#"e",#"q",#"p",#"d"]) =>
6531                   p_sse(List.hd toks,(String.implode a,b))
6532                 | ((a,[b]),[#"c",#"m",#"p",#"n",#"e",#"q",#"p",#"s"]) =>
6533                   p_sse(List.hd toks,(String.implode a,b))
6534                 | ((a,[b]),[#"c",#"m",#"p",#"n",#"e",#"q",#"s",#"d"]) =>
6535                   p_sse(List.hd toks,(String.implode a,b))
6536                 | ((a,[b]),[#"c",#"m",#"p",#"n",#"e",#"q",#"s",#"s"]) =>
6537                   p_sse(List.hd toks,(String.implode a,b))
6538                 | ((a,[b]),[#"c",#"m",#"p",#"n",#"l",#"t",#"p",#"d"]) =>
6539                   p_sse(List.hd toks,(String.implode a,b))
6540                 | ((a,[b]),[#"c",#"m",#"p",#"n",#"l",#"t",#"p",#"s"]) =>
6541                   p_sse(List.hd toks,(String.implode a,b))
6542                 | ((a,[b]),[#"c",#"m",#"p",#"n",#"l",#"t",#"s",#"d"]) =>
6543                   p_sse(List.hd toks,(String.implode a,b))
6544                 | ((a,[b]),[#"c",#"m",#"p",#"n",#"l",#"t",#"s",#"s"]) =>
6545                   p_sse(List.hd toks,(String.implode a,b))
6546                 | ((a,[b]),[#"c",#"m",#"p",#"n",#"l",#"e",#"p",#"d"]) =>
6547                   p_sse(List.hd toks,(String.implode a,b))
6548                 | ((a,[b]),[#"c",#"m",#"p",#"n",#"l",#"e",#"p",#"s"]) =>
6549                   p_sse(List.hd toks,(String.implode a,b))
6550                 | ((a,[b]),[#"c",#"m",#"p",#"n",#"l",#"e",#"s",#"d"]) =>
6551                   p_sse(List.hd toks,(String.implode a,b))
6552                 | ((a,[b]),[#"c",#"m",#"p",#"n",#"l",#"e",#"s",#"s"]) =>
6553                   p_sse(List.hd toks,(String.implode a,b))
6554                 | ((a,[b]),[#"c",#"m",#"p",#"o",#"r",#"d",#"p",#"d"]) =>
6555                   p_sse(List.hd toks,(String.implode a,b))
6556                 | ((a,[b]),[#"c",#"m",#"p",#"o",#"r",#"d",#"p",#"s"]) =>
6557                   p_sse(List.hd toks,(String.implode a,b))
6558                 | ((a,[b]),[#"c",#"m",#"p",#"o",#"r",#"d",#"s",#"d"]) =>
6559                   p_sse(List.hd toks,(String.implode a,b))
6560                 | ((a,[b]),[#"c",#"m",#"p",#"o",#"r",#"d",#"s",#"s"]) =>
6561                   p_sse(List.hd toks,(String.implode a,b))
6562                 | ((a,[b]),[#"c",#"o",#"m",#"i",#"s",#"d"]) =>
6563                   p_sse(List.hd toks,(String.implode a,b))
6564                 | ((a,[b]),[#"c",#"o",#"m",#"i",#"s",#"s"]) =>
6565                   p_sse(List.hd toks,(String.implode a,b))
6566                 | ((a,[b]),[#"c",#"v",#"t",#"d",#"q",#"2",#"p",#"d"]) =>
6567                   p_sse(List.hd toks,(String.implode a,b))
6568                 | ((a,[b]),[#"c",#"v",#"t",#"d",#"q",#"2",#"p",#"s"]) =>
6569                   p_sse(List.hd toks,(String.implode a,b))
6570                 | ((a,[b]),[#"c",#"v",#"t",#"p",#"d",#"2",#"d",#"q"]) =>
6571                   p_sse(List.hd toks,(String.implode a,b))
6572                 | ((a,[b]),[#"c",#"v",#"t",#"t",#"p",#"d",#"2",#"d",#"q"]) =>
6573                   p_sse(List.hd toks,(String.implode a,b))
6574                 | ((a,[b]),[#"c",#"v",#"t",#"p",#"d",#"2",#"p",#"s"]) =>
6575                   p_sse(List.hd toks,(String.implode a,b))
6576                 | ((a,[b]),[#"c",#"v",#"t",#"p",#"s",#"2",#"d",#"q"]) =>
6577                   p_sse(List.hd toks,(String.implode a,b))
6578                 | ((a,[b]),[#"c",#"v",#"t",#"t",#"p",#"s",#"2",#"d",#"q"]) =>
6579                   p_sse(List.hd toks,(String.implode a,b))
6580                 | ((a,[b]),[#"c",#"v",#"t",#"p",#"s",#"2",#"p",#"d"]) =>
6581                   p_sse(List.hd toks,(String.implode a,b))
6582                 | ((a,[b]),[#"c",#"v",#"t",#"s",#"d",#"2",#"s",#"s"]) =>
6583                   p_sse(List.hd toks,(String.implode a,b))
6584                 | ((a,[b]),[#"c",#"v",#"t",#"s",#"s",#"2",#"s",#"d"]) =>
6585                   p_sse(List.hd toks,(String.implode a,b))
6586                 | ((a,[b]),[#"c",#"v",#"t",#"s",#"d",#"2",#"s",#"i"]) =>
6587                   p_cvt_2si(true,(false,(String.implode a,b)))
6588                 | ((a,[b]),[#"c",#"v",#"t",#"t",#"s",#"d",#"2",#"s",#"i"]) =>
6589                   p_cvt_2si(true,(true,(String.implode a,b)))
6590                 | ((a,[b]),[#"c",#"v",#"t",#"s",#"s",#"2",#"s",#"i"]) =>
6591                   p_cvt_2si(false,(false,(String.implode a,b)))
6592                 | ((a,[b]),[#"c",#"v",#"t",#"t",#"s",#"s",#"2",#"s",#"i"]) =>
6593                   p_cvt_2si(false,(true,(String.implode a,b)))
6594                 | ((a,[b]),[#"c",#"v",#"t",#"s",#"i",#"2",#"s",#"d"]) =>
6595                   p_cvtsi2(true,(String.implode a,b))
6596                 | ((a,[b]),[#"c",#"v",#"t",#"s",#"i",#"2",#"s",#"s"]) =>
6597                   p_cvtsi2(false,(String.implode a,b))
6598                 | ((a,[b]),[#"m",#"o",#"v",#"a",#"p",#"d"]) =>
6599                   p_movap_movup(true,(true,(String.implode a,b)))
6600                 | ((a,[b]),[#"m",#"o",#"v",#"a",#"p",#"s"]) =>
6601                   p_movap_movup(true,(false,(String.implode a,b)))
6602                 | ((a,[b]),[#"m",#"o",#"v",#"u",#"p",#"d"]) =>
6603                   p_movap_movup(false,(true,(String.implode a,b)))
6604                 | ((a,[b]),[#"m",#"o",#"v",#"u",#"p",#"s"]) =>
6605                   p_movap_movup(false,(false,(String.implode a,b)))
6606                 | ((a,[b]),[#"m",#"o",#"v",#"s",#"d"]) =>
6607                   p_movsd_movss(true,(String.implode a,b))
6608                 | ((a,[b]),[#"m",#"o",#"v",#"s",#"s"]) =>
6609                   p_movsd_movss(false,(String.implode a,b))
6610                 | ((a,[b]),[#"m",#"o",#"v",#"d"]) =>
6611                   p_mov_d_q(false,(String.implode a,b))
6612                 | ((a,[b]),[#"m",#"o",#"v",#"q"]) =>
6613                   p_mov_d_q(true,(String.implode a,b))
6614                 | ((a,[b]),[#"p",#"c",#"m",#"p",#"e",#"q",#"q"]) =>
6615                   p_sse(List.hd toks,(String.implode a,b))
6616                 | ((a,[b]),[#"p",#"s",#"r",#"l",#"w"]) =>
6617                   p_pshift(BitsN.B(0x0,4),(String.implode a,b))
6618                 | ((a,[b]),[#"p",#"s",#"r",#"a",#"w"]) =>
6619                   p_pshift(BitsN.B(0x1,4),(String.implode a,b))
6620                 | ((a,[b]),[#"p",#"s",#"l",#"l",#"w"]) =>
6621                   p_pshift(BitsN.B(0x2,4),(String.implode a,b))
6622                 | ((a,[b]),[#"p",#"s",#"r",#"l",#"d"]) =>
6623                   p_pshift(BitsN.B(0x3,4),(String.implode a,b))
6624                 | ((a,[b]),[#"p",#"s",#"r",#"a",#"d"]) =>
6625                   p_pshift(BitsN.B(0x4,4),(String.implode a,b))
6626                 | ((a,[b]),[#"p",#"s",#"l",#"l",#"d"]) =>
6627                   p_pshift(BitsN.B(0x5,4),(String.implode a,b))
6628                 | ((a,[b]),[#"p",#"s",#"r",#"l",#"q"]) =>
6629                   p_pshift(BitsN.B(0x6,4),(String.implode a,b))
6630                 | ((a,[b]),[#"p",#"s",#"l",#"l",#"q"]) =>
6631                   p_pshift(BitsN.B(0x7,4),(String.implode a,b))
6632                 | ((a,[b]),[#"p",#"s",#"l",#"l",#"d",#"q"]) =>
6633                   p_pshift(BitsN.B(0x8,4),(String.implode a,b))
6634                 | ((a,[b]),[#"p",#"s",#"r",#"l",#"d",#"q"]) =>
6635                   p_pshift(BitsN.B(0x9,4),(String.implode a,b))
6636                 | ((a,[b]),[#"s",#"q",#"r",#"t",#"p",#"d"]) =>
6637                   p_sse(List.hd toks,(String.implode a,b))
6638                 | ((a,[b]),[#"s",#"q",#"r",#"t",#"p",#"s"]) =>
6639                   p_sse(List.hd toks,(String.implode a,b))
6640                 | ((a,[b]),[#"s",#"q",#"r",#"t",#"s",#"d"]) =>
6641                   p_sse(List.hd toks,(String.implode a,b))
6642                 | ((a,[b]),[#"s",#"q",#"r",#"t",#"s",#"s"]) =>
6643                   p_sse(List.hd toks,(String.implode a,b))
6644                 | ((s :: l,[]),[#"b",#"y",#"t",#"e",#"s"]) =>
6645                   (if Char.isSpace s
6646                      then case p_bytes(String.implode l) of
6647                              Option.SOME bs => STREAM bs
6648                            | NONE => FAIL "bad byte list"
6649                    else FAIL "syntax error")
6650                 | ((v'7,v'8),v'2) =>
6651                   FAIL
6652                     ("Unrecognised mnemonic or wrong number of args: "
6653                        ^
6654                        s))
6655           | [] =>
6656             (case v'0 of
6657                 "cmc" => OK Zcmc
6658               | "clc" => OK Zclc
6659               | "stc" => OK Zstc
6660               | "leave" => OK Zleave
6661               | "nop" => OK(Znop 1)
6662               | "ret" => OK(Zret(BitsN.B(0x0,64)))
6663               | v'2 =>
6664                 FAIL
6665                   ("Unrecognised mnemonic or wrong number of args: " ^ s)))
6666     | _ => FAIL("Unrecognised mnemonic or wrong number of args: " ^ s)
6667  end;
6668
6669fun s_register (sz,r) =
6670  case (sz,r) of
6671     (Z8 _,RAX) => "al"
6672   | (Z8 _,RCX) => "cl"
6673   | (Z8 _,RDX) => "dl"
6674   | (Z8 _,RBX) => "bl"
6675   | (Z8 false,RSP) => "ah"
6676   | (Z8 false,RBP) => "ch"
6677   | (Z8 false,RSI) => "dh"
6678   | (Z8 false,RDI) => "bh"
6679   | (Z8 true,RSP) => "spl"
6680   | (Z8 true,RBP) => "bpl"
6681   | (Z8 true,RSI) => "sil"
6682   | (Z8 true,RDI) => "dil"
6683   | (Z8 _,zR8) => "r8b"
6684   | (Z8 _,zR9) => "r9b"
6685   | (Z8 _,zR10) => "r10b"
6686   | (Z8 _,zR11) => "r11b"
6687   | (Z8 _,zR12) => "r12b"
6688   | (Z8 _,zR13) => "r13b"
6689   | (Z8 _,zR14) => "r14b"
6690   | (Z8 _,zR15) => "r15b"
6691   | (Z16,RAX) => "ax"
6692   | (Z16,RCX) => "cx"
6693   | (Z16,RDX) => "dx"
6694   | (Z16,RBX) => "bx"
6695   | (Z16,RSP) => "sp"
6696   | (Z16,RBP) => "bp"
6697   | (Z16,RSI) => "si"
6698   | (Z16,RDI) => "di"
6699   | (Z16,zR8) => "r8w"
6700   | (Z16,zR9) => "r9w"
6701   | (Z16,zR10) => "r10w"
6702   | (Z16,zR11) => "r11w"
6703   | (Z16,zR12) => "r12w"
6704   | (Z16,zR13) => "r13w"
6705   | (Z16,zR14) => "r14w"
6706   | (Z16,zR15) => "r15w"
6707   | (Z32,RAX) => "eax"
6708   | (Z32,RCX) => "ecx"
6709   | (Z32,RDX) => "edx"
6710   | (Z32,RBX) => "ebx"
6711   | (Z32,RSP) => "esp"
6712   | (Z32,RBP) => "ebp"
6713   | (Z32,RSI) => "esi"
6714   | (Z32,RDI) => "edi"
6715   | (Z32,zR8) => "r8d"
6716   | (Z32,zR9) => "r9d"
6717   | (Z32,zR10) => "r10d"
6718   | (Z32,zR11) => "r11d"
6719   | (Z32,zR12) => "r12d"
6720   | (Z32,zR13) => "r13d"
6721   | (Z32,zR14) => "r14d"
6722   | (Z32,zR15) => "r15d"
6723   | (Z64,RAX) => "rax"
6724   | (Z64,RCX) => "rcx"
6725   | (Z64,RDX) => "rdx"
6726   | (Z64,RBX) => "rbx"
6727   | (Z64,RSP) => "rsp"
6728   | (Z64,RBP) => "rbp"
6729   | (Z64,RSI) => "rsi"
6730   | (Z64,RDI) => "rdi"
6731   | (Z64,zR8) => "r8"
6732   | (Z64,zR9) => "r9"
6733   | (Z64,zR10) => "r10"
6734   | (Z64,zR11) => "r11"
6735   | (Z64,zR12) => "r12"
6736   | (Z64,zR13) => "r13"
6737   | (Z64,zR14) => "r14"
6738   | (Z64,zR15) => "r15";
6739
6740fun s_qword q =
6741  if BitsN.<(q,BitsN.B(0x0,64))
6742    then "-0x" ^ (BitsN.toHexString(BitsN.neg q))
6743  else "0x" ^ (BitsN.toHexString q);
6744
6745fun s_qword0 q =
6746  if q = (BitsN.B(0x0,64))
6747    then ""
6748  else if BitsN.<(q,BitsN.B(0x0,64))
6749    then "-0x" ^ (BitsN.toHexString(BitsN.neg q))
6750  else "+0x" ^ (BitsN.toHexString q);
6751
6752fun s_sib (scale,idx) =
6753  (s_register(Z64,idx))
6754    ^
6755    (if scale = (BitsN.B(0x0,2))
6756       then ""
6757     else "*" ^ (Nat.toString(Nat.pow(2,BitsN.toNat scale))));
6758
6759fun s_mem m =
6760  case m of
6761     (NONE,(ZripBase,imm)) => String.concat["[rip",s_qword0 imm,"]"]
6762   | (NONE,(ZnoBase,imm)) => String.concat["[",s_qword imm,"]"]
6763   | (NONE,(ZregBase base,imm)) =>
6764     String.concat["[",s_register(Z64,base),s_qword0 imm,"]"]
6765   | (Option.SOME(scale,idx),(ZnoBase,imm)) =>
6766     String.concat["[",s_sib(scale,idx),s_qword0 imm,"]"]
6767   | (Option.SOME(scale,idx),(ZregBase base,imm)) =>
6768     String.concat
6769       ["[",s_register(Z64,base),"+",s_sib(scale,idx),s_qword0 imm,"]"]
6770   | _ => "[invalid]";
6771
6772fun s_rm (sz,rm) = case rm of Zr r => s_register(sz,r) | Zm m => s_mem m;
6773
6774fun s_xreg r = "xmm" ^ (Nat.toString(BitsN.toNat r));
6775
6776fun s_xmm_mem x = case x of xmm_reg r => s_xreg r | xmm_mem m => s_mem m;
6777
6778fun s_xmm (r,x) = String.concat[s_xreg r,", ",s_xmm_mem x];
6779
6780fun s_imm_rm x =
6781  case x of Zrm rm => s_rm(Z64,rm) | Zimm imm => s_qword imm;
6782
6783fun s_sz_rm (sz,rm) =
6784  case rm of
6785     Zr _ => s_rm(sz,rm)
6786   | _ =>
6787     (if sz = Z64
6788        then s_rm(sz,rm)
6789      else String.concat[s_sz sz," ",s_rm(Z64,rm)]);
6790
6791fun s_dest_src (sz,ds) =
6792  case ds of
6793     Zrm_r(rm,r) => String.concat[s_rm(sz,rm),", ",s_register(sz,r)]
6794   | Zr_rm(r,rm) => String.concat[s_register(sz,r),", ",s_rm(sz,rm)]
6795   | Zrm_i(rm,i) => String.concat[s_sz_rm(sz,rm),", ",s_qword i];
6796
6797fun s_cond c =
6798  case c of
6799     Z_O => "o"
6800   | Z_B => "b"
6801   | Z_E => "e"
6802   | Z_A => "a"
6803   | Z_S => "s"
6804   | Z_P => "p"
6805   | Z_L => "l"
6806   | Z_G => "g"
6807   | Z_NO => "no"
6808   | Z_NB => "nb"
6809   | Z_NE => "ne"
6810   | Z_NA => "na"
6811   | Z_NS => "ns"
6812   | Z_NP => "np"
6813   | Z_NL => "nl"
6814   | Z_NG => "ng"
6815   | Z_ALWAYS => "";
6816
6817fun s_sse_binop n = L3.dropString(4,Cast.sse_binopToString n);
6818
6819fun s_sse_logic n = L3.dropString(4,Cast.sse_logicToString n);
6820
6821fun s_sse_compare n =
6822  L3.fst
6823    (L3.splitl
6824       (fn c => not(c = #"_"),L3.dropString(4,Cast.sse_compareToString n)));
6825
6826fun sse_instructionToString i =
6827  case i of
6828     bin_PD(bop,(dst,src)) => ((s_sse_binop bop) ^ "pd",s_xmm(dst,src))
6829   | bin_PS(bop,(dst,src)) => ((s_sse_binop bop) ^ "ps",s_xmm(dst,src))
6830   | bin_SD(bop,(dst,src)) => ((s_sse_binop bop) ^ "sd",s_xmm(dst,src))
6831   | bin_SS(bop,(dst,src)) => ((s_sse_binop bop) ^ "ss",s_xmm(dst,src))
6832   | logic_PD(bop,(dst,src)) => ((s_sse_logic bop) ^ "pd",s_xmm(dst,src))
6833   | logic_PS(bop,(dst,src)) => ((s_sse_logic bop) ^ "ps",s_xmm(dst,src))
6834   | CMPPD(bop,(dst,src)) =>
6835     (String.concat["cmp",s_sse_compare bop,"pd"],s_xmm(dst,src))
6836   | CMPPS(bop,(dst,src)) =>
6837     (String.concat["cmp",s_sse_compare bop,"ps"],s_xmm(dst,src))
6838   | CMPSD(bop,(dst,src)) =>
6839     (String.concat["cmp",s_sse_compare bop,"sd"],s_xmm(dst,src))
6840   | CMPSS(bop,(dst,src)) =>
6841     (String.concat["cmp",s_sse_compare bop,"ss"],s_xmm(dst,src))
6842   | COMISD(src1,src2) => ("comisd",s_xmm(src1,src2))
6843   | COMISS(src1,src2) => ("comiss",s_xmm(src1,src2))
6844   | CVTDQ2PD(dst,src) => ("cvtdq2pd",s_xmm(dst,src))
6845   | CVTDQ2PS(dst,src) => ("cvtdq2ps",s_xmm(dst,src))
6846   | CVTPD2DQ(false,(dst,src)) => ("cvtpd2dq",s_xmm(dst,src))
6847   | CVTPD2DQ(true,(dst,src)) => ("cvttpd2dq",s_xmm(dst,src))
6848   | CVTPD2PS(dst,src) => ("cvtpd2ps",s_xmm(dst,src))
6849   | CVTPS2DQ(false,(dst,src)) => ("cvtps2dq",s_xmm(dst,src))
6850   | CVTPS2DQ(true,(dst,src)) => ("cvttps2dq",s_xmm(dst,src))
6851   | CVTPS2PD(dst,src) => ("cvtps2pd",s_xmm(dst,src))
6852   | CVTSD2SI(truncate,(quad,(dst,src))) =>
6853     (String.concat["cvt",if truncate then "t" else "","sd2si"],
6854      String.concat
6855        [s_register(if quad then Z64 else Z32,dst),", ",s_xmm_mem src])
6856   | CVTSD2SS(dst,src) => ("cvtsd2ss",s_xmm(dst,src))
6857   | CVTSI2SD(quad,(dst,src)) =>
6858     ("cvtsi2sd",
6859      String.concat[s_xreg dst,", ",s_rm(if quad then Z64 else Z32,src)])
6860   | CVTSI2SS(quad,(dst,src)) =>
6861     ("cvtsi2ss",
6862      String.concat[s_xreg dst,", ",s_rm(if quad then Z64 else Z32,src)])
6863   | CVTSS2SD(dst,src) => ("cvtss2sd",s_xmm(dst,src))
6864   | CVTSS2SI(truncate,(quad,(dst,src))) =>
6865     (String.concat["cvt",if truncate then "t" else "","ss2si"],
6866      String.concat
6867        [s_register(if quad then Z64 else Z32,dst),", ",s_xmm_mem src])
6868   | MOVAP_D_S(double,(dst,src)) =>
6869     ("movap" ^ (if double then "d" else "s"),
6870      String.concat[s_xmm_mem dst,", ",s_xmm_mem src])
6871   | MOVUP_D_S(double,(dst,src)) =>
6872     ("movup" ^ (if double then "d" else "s"),
6873      String.concat[s_xmm_mem dst,", ",s_xmm_mem src])
6874   | MOV_D_Q(to_xmm,(quad,(xmm,rm))) =>
6875     ("mov" ^ (if quad then "q" else "d"),
6876      if to_xmm
6877        then String.concat
6878               [s_xreg xmm,", ",s_rm(if quad then Z64 else Z32,rm)]
6879      else String.concat
6880             [s_rm(if quad then Z64 else Z32,rm),", ",s_xreg xmm])
6881   | MOVSD(dst,src) =>
6882     ("movsd",String.concat[s_xmm_mem dst,", ",s_xmm_mem src])
6883   | MOVSS(dst,src) =>
6884     ("movss",String.concat[s_xmm_mem dst,", ",s_xmm_mem src])
6885   | MOVQ(dst,src) =>
6886     ("movq",String.concat[s_xmm_mem dst,", ",s_xmm_mem src])
6887   | PCMPEQQ(dst,src) => ("pcmpeqq",s_xmm(dst,src))
6888   | PSRLW_imm(dst,i) =>
6889     ("psrlw",String.concat[s_xreg dst,", ",BitsN.toHexString i])
6890   | PSRAW_imm(dst,i) =>
6891     ("psraw",String.concat[s_xreg dst,", ",BitsN.toHexString i])
6892   | PSLLW_imm(dst,i) =>
6893     ("psllw",String.concat[s_xreg dst,", ",BitsN.toHexString i])
6894   | PSRLD_imm(dst,i) =>
6895     ("psrld",String.concat[s_xreg dst,", ",BitsN.toHexString i])
6896   | PSRAD_imm(dst,i) =>
6897     ("psrad",String.concat[s_xreg dst,", ",BitsN.toHexString i])
6898   | PSLLD_imm(dst,i) =>
6899     ("pslld",String.concat[s_xreg dst,", ",BitsN.toHexString i])
6900   | PSRLQ_imm(dst,i) =>
6901     ("psrlq",String.concat[s_xreg dst,", ",BitsN.toHexString i])
6902   | PSLLQ_imm(dst,i) =>
6903     ("psllq",String.concat[s_xreg dst,", ",BitsN.toHexString i])
6904   | PSRLDQ(dst,i) =>
6905     ("psrldq",String.concat[s_xreg dst,", ",BitsN.toHexString i])
6906   | PSLLDQ(dst,i) =>
6907     ("pslldq",String.concat[s_xreg dst,", ",BitsN.toHexString i])
6908   | SQRTPD(dst,src) => ("sqrtpd",s_xmm(dst,src))
6909   | SQRTPS(dst,src) => ("sqrtps",s_xmm(dst,src))
6910   | SQRTSD(dst,src) => ("sqrtsd",s_xmm(dst,src))
6911   | SQRTSS(dst,src) => ("sqrtss",s_xmm(dst,src));
6912
6913fun instructionToString (i,width) =
6914  case i of
6915     SSE j => sse_instructionToString j
6916   | Zbinop(bop,(sz,dst_src)) =>
6917     (L3.strTl(Cast.Zbinop_nameToString bop),s_dest_src(sz,dst_src))
6918   | Zbit_test(bt,(sz,dst_src)) =>
6919     (L3.strTl(Cast.Zbit_test_nameToString bt),s_dest_src(sz,dst_src))
6920   | Zcall(Zimm imm) =>
6921     ("call",s_qword(BitsN.+(imm,BitsN.fromNat(width,64))))
6922   | Zcall(Zrm rm) => ("call",s_rm(Z64,rm))
6923   | Zcmc => ("cmc","")
6924   | Zclc => ("clc","")
6925   | Zstc => ("stc","")
6926   | Zcmpxchg(sz,(rm,r)) =>
6927     ("cmpxchg",String.concat[s_rm(sz,rm),", ",s_register(sz,r)])
6928   | Zdiv sz_rm => ("div",s_sz_rm sz_rm)
6929   | Zidiv sz_rm => ("idiv",s_sz_rm sz_rm)
6930   | Zjcc(Z_ALWAYS,imm) =>
6931     ("jmp",s_qword(BitsN.+(imm,BitsN.fromNat(width,64))))
6932   | Zjcc(cond,imm) =>
6933     ("j" ^ (s_cond cond),s_qword(BitsN.+(imm,BitsN.fromNat(width,64))))
6934   | Zjmp rm => ("jmp",s_rm(Z64,rm))
6935   | Zlea(Z8 _,_) => ("?lea?","")
6936   | Zlea(sz,Zr_rm(r,Zm m)) =>
6937     ("lea",String.concat[s_register(sz,r),", ",s_rm(Z64,Zm m)])
6938   | Zlea _ => ("?lea?","")
6939   | Zleave => ("leave","")
6940   | Zloop(Z_NE,imm) =>
6941     ("loopne",s_qword(BitsN.+(imm,BitsN.fromNat(width,64))))
6942   | Zloop(Z_E,imm) =>
6943     ("loope",s_qword(BitsN.+(imm,BitsN.fromNat(width,64))))
6944   | Zloop(Z_ALWAYS,imm) =>
6945     ("loop",s_qword(BitsN.+(imm,BitsN.fromNat(width,64))))
6946   | Zloop _ => ("?loop?","")
6947   | Zmonop(mop,sz_rm) =>
6948     (L3.strTl(Cast.Zmonop_nameToString mop),s_sz_rm sz_rm)
6949   | Zmov(Z_ALWAYS,(sz,dst_src)) => ("mov",s_dest_src(sz,dst_src))
6950   | Zmov(cond,(sz,Zr_rm(r,rm))) =>
6951     ("cmov" ^ (s_cond cond),
6952      String.concat[s_register(sz,r),", ",s_rm(sz,rm)])
6953   | Zmov _ => ("?mov?","")
6954   | Zmovsx(Z32,(Zr_rm(r,rm),Z64)) =>
6955     ("movsxd",String.concat[s_register(Z64,r),", ",s_sz_rm(Z32,rm)])
6956   | Zmovsx(Z32,_) => ("?movsx?","")
6957   | Zmovsx(Z64,_) => ("?movsx?","")
6958   | Zmovsx(sz1,(Zr_rm(r,rm),sz2)) =>
6959     ("movsx",String.concat[s_register(sz2,r),", ",s_sz_rm(sz1,rm)])
6960   | Zmovsx _ => ("?movsx?","")
6961   | Zmovzx(Z32,_) => ("?movzx?","")
6962   | Zmovzx(Z64,_) => ("?movzx?","")
6963   | Zmovzx(sz1,(Zr_rm(r,rm),sz2)) =>
6964     ("movzx",String.concat[s_register(sz2,r),", ",s_sz_rm(sz1,rm)])
6965   | Zmovzx _ => ("?movzx?","")
6966   | Zmul sz_rm => ("mul",s_sz_rm sz_rm)
6967   | Zimul sz_rm => ("imul",s_sz_rm sz_rm)
6968   | Zimul2(sz,(r,rm)) =>
6969     ("imul",String.concat[s_register(sz,r),", ",s_rm(sz,rm)])
6970   | Zimul3(sz,(r,(rm,imm))) =>
6971     ("imul",
6972      String.concat[s_register(sz,r),", ",s_rm(sz,rm),", ",s_qword imm])
6973   | Znop 1 => ("nop","")
6974   | Znop i => ("nop",Nat.toString i)
6975   | Zpop rm => ("pop",s_rm(Z64,rm))
6976   | Zpush x => ("push",s_imm_rm x)
6977   | Zret imm => ("ret",s_qword0 imm)
6978   | Zset(cond,(have_rex,rm)) =>
6979     ("set" ^ (s_cond cond),s_rm(Z8 have_rex,rm))
6980   | Zxadd(sz,(rm,r)) =>
6981     ("xadd",String.concat[s_rm(sz,rm),", ",s_register(sz,r)])
6982   | Zxchg(sz,(rm,r)) =>
6983     ("xchg",String.concat[s_rm(sz,rm),", ",s_register(sz,r)]);
6984
6985fun s_byte b = L3.padLeftString(#"0",(2,BitsN.toHexString b));
6986
6987fun writeBytesAux (acc,l) =
6988  case l of
6989     h :: t => writeBytesAux(String.concat[acc," ",s_byte h],t)
6990   | _ => acc;
6991
6992fun writeBytes l = L3.strTl(writeBytesAux("",l));
6993
6994fun joinString (s1,s2) = if s2 = "" then s1 else String.concat[s1," ",s2];
6995
6996end