AMD64Assembler.java revision 12651:6ef01bd40ce2
150477Speter/*
226240Swpaul * Copyright (c) 2009, 2016, Oracle and/or its affiliates. All rights reserved.
3156813Sru * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4156813Sru *
526240Swpaul * This code is free software; you can redistribute it and/or modify it
626240Swpaul * under the terms of the GNU General Public License version 2 only, as
780628Sbde * published by the Free Software Foundation.
880628Sbde *
9137675Sbz * This code is distributed in the hope that it will be useful, but WITHOUT
10156813Sru * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11137675Sbz * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12137675Sbz * version 2 for more details (a copy is included in the LICENSE file that
1380628Sbde * accompanied this code).
1480530Sdd *
1526240Swpaul * You should have received a copy of the GNU General Public License version
1626240Swpaul * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23package org.graalvm.compiler.asm.amd64;
24
25import static org.graalvm.compiler.asm.NumUtil.isByte;
26import static org.graalvm.compiler.asm.NumUtil.isInt;
27import static org.graalvm.compiler.asm.NumUtil.isShiftCount;
28import static org.graalvm.compiler.asm.NumUtil.isUByte;
29import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop;
30import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop;
31import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD;
32import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND;
33import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP;
34import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR;
35import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB;
36import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB;
37import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR;
38import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC;
39import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC;
40import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG;
41import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT;
42import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.BYTE;
43import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.DWORD;
44import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PD;
45import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PS;
46import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.QWORD;
47import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SD;
48import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SS;
49import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.WORD;
50import static jdk.vm.ci.amd64.AMD64.CPU;
51import static jdk.vm.ci.amd64.AMD64.XMM;
52import static jdk.vm.ci.amd64.AMD64.r12;
53import static jdk.vm.ci.amd64.AMD64.r13;
54import static jdk.vm.ci.amd64.AMD64.rbp;
55import static jdk.vm.ci.amd64.AMD64.rip;
56import static jdk.vm.ci.amd64.AMD64.rsp;
57import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD;
58
59import org.graalvm.compiler.asm.Assembler;
60import org.graalvm.compiler.asm.Label;
61import org.graalvm.compiler.asm.NumUtil;
62import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
63
64import jdk.vm.ci.amd64.AMD64;
65import jdk.vm.ci.amd64.AMD64.CPUFeature;
66import jdk.vm.ci.code.Register;
67import jdk.vm.ci.code.Register.RegisterCategory;
68import jdk.vm.ci.code.TargetDescription;
69
70/**
71 * This class implements an assembler that can encode most X86 instructions.
72 */
73public class AMD64Assembler extends Assembler {
74
75    private static final int MinEncodingNeedsRex = 8;
76
77    /**
78     * The x86 condition codes used for conditional jumps/moves.
79     */
80    public enum ConditionFlag {
81        Zero(0x4, "|zero|"),
82        NotZero(0x5, "|nzero|"),
83        Equal(0x4, "="),
84        NotEqual(0x5, "!="),
85        Less(0xc, "<"),
86        LessEqual(0xe, "<="),
87        Greater(0xf, ">"),
88        GreaterEqual(0xd, ">="),
89        Below(0x2, "|<|"),
90        BelowEqual(0x6, "|<=|"),
91        Above(0x7, "|>|"),
92        AboveEqual(0x3, "|>=|"),
93        Overflow(0x0, "|of|"),
94        NoOverflow(0x1, "|nof|"),
95        CarrySet(0x2, "|carry|"),
96        CarryClear(0x3, "|ncarry|"),
97        Negative(0x8, "|neg|"),
98        Positive(0x9, "|pos|"),
99        Parity(0xa, "|par|"),
100        NoParity(0xb, "|npar|");
101
102        private final int value;
103        private final String operator;
104
105        ConditionFlag(int value, String operator) {
106            this.value = value;
107            this.operator = operator;
108        }
109
110        public ConditionFlag negate() {
111            switch (this) {
112                case Zero:
113                    return NotZero;
114                case NotZero:
115                    return Zero;
116                case Equal:
117                    return NotEqual;
118                case NotEqual:
119                    return Equal;
120                case Less:
121                    return GreaterEqual;
122                case LessEqual:
123                    return Greater;
124                case Greater:
125                    return LessEqual;
126                case GreaterEqual:
127                    return Less;
128                case Below:
129                    return AboveEqual;
130                case BelowEqual:
131                    return Above;
132                case Above:
133                    return BelowEqual;
134                case AboveEqual:
135                    return Below;
136                case Overflow:
137                    return NoOverflow;
138                case NoOverflow:
139                    return Overflow;
140                case CarrySet:
141                    return CarryClear;
142                case CarryClear:
143                    return CarrySet;
144                case Negative:
145                    return Positive;
146                case Positive:
147                    return Negative;
148                case Parity:
149                    return NoParity;
150                case NoParity:
151                    return Parity;
152            }
153            throw new IllegalArgumentException();
154        }
155
156        public int getValue() {
157            return value;
158        }
159
160        @Override
161        public String toString() {
162            return operator;
163        }
164    }
165
166    /**
167     * Constants for X86 prefix bytes.
168     */
169    private static class Prefix {
170        private static final int REX = 0x40;
171        private static final int REXB = 0x41;
172        private static final int REXX = 0x42;
173        private static final int REXXB = 0x43;
174        private static final int REXR = 0x44;
175        private static final int REXRB = 0x45;
176        private static final int REXRX = 0x46;
177        private static final int REXRXB = 0x47;
178        private static final int REXW = 0x48;
179        private static final int REXWB = 0x49;
180        private static final int REXWX = 0x4A;
181        private static final int REXWXB = 0x4B;
182        private static final int REXWR = 0x4C;
183        private static final int REXWRB = 0x4D;
184        private static final int REXWRX = 0x4E;
185        private static final int REXWRXB = 0x4F;
186        private static final int VEX_3BYTES = 0xC4;
187        private static final int VEX_2BYTES = 0xC5;
188    }
189
190    private static class VexPrefix {
191        private static final int VEX_R = 0x80;
192        private static final int VEX_W = 0x80;
193    }
194
195    private static class AvxVectorLen {
196        private static final int AVX_128bit = 0x0;
197        private static final int AVX_256bit = 0x1;
198    }
199
200    private static class VexSimdPrefix {
201        private static final int VEX_SIMD_NONE = 0x0;
202        private static final int VEX_SIMD_66 = 0x1;
203        private static final int VEX_SIMD_F3 = 0x2;
204        private static final int VEX_SIMD_F2 = 0x3;
205    }
206
207    private static class VexOpcode {
208        private static final int VEX_OPCODE_NONE = 0x0;
209        private static final int VEX_OPCODE_0F = 0x1;
210        private static final int VEX_OPCODE_0F_38 = 0x2;
211        private static final int VEX_OPCODE_0F_3A = 0x3;
212    }
213
214    private AMD64InstructionAttr curAttributes;
215
216    AMD64InstructionAttr getCurAttributes() {
217        return curAttributes;
218    }
219
220    void setCurAttributes(AMD64InstructionAttr attributes) {
221        curAttributes = attributes;
222    }
223
224    /**
225     * The x86 operand sizes.
226     */
227    public enum OperandSize {
228        BYTE(1) {
229            @Override
230            protected void emitImmediate(AMD64Assembler asm, int imm) {
231                assert imm == (byte) imm;
232                asm.emitByte(imm);
233            }
234
235            @Override
236            protected int immediateSize() {
237                return 1;
238            }
239        },
240
241        WORD(2, 0x66) {
242            @Override
243            protected void emitImmediate(AMD64Assembler asm, int imm) {
244                assert imm == (short) imm;
245                asm.emitShort(imm);
246            }
247
248            @Override
249            protected int immediateSize() {
250                return 2;
251            }
252        },
253
254        DWORD(4) {
255            @Override
256            protected void emitImmediate(AMD64Assembler asm, int imm) {
257                asm.emitInt(imm);
258            }
259
260            @Override
261            protected int immediateSize() {
262                return 4;
263            }
264        },
265
266        QWORD(8) {
267            @Override
268            protected void emitImmediate(AMD64Assembler asm, int imm) {
269                asm.emitInt(imm);
270            }
271
272            @Override
273            protected int immediateSize() {
274                return 4;
275            }
276        },
277
278        SS(4, 0xF3, true),
279
280        SD(8, 0xF2, true),
281
282        PS(16, true),
283
284        PD(16, 0x66, true);
285
286        private final int sizePrefix;
287
288        private final int bytes;
289        private final boolean xmm;
290
291        OperandSize(int bytes) {
292            this(bytes, 0);
293        }
294
295        OperandSize(int bytes, int sizePrefix) {
296            this(bytes, sizePrefix, false);
297        }
298
299        OperandSize(int bytes, boolean xmm) {
300            this(bytes, 0, xmm);
301        }
302
303        OperandSize(int bytes, int sizePrefix, boolean xmm) {
304            this.sizePrefix = sizePrefix;
305            this.bytes = bytes;
306            this.xmm = xmm;
307        }
308
309        public int getBytes() {
310            return bytes;
311        }
312
313        public boolean isXmmType() {
314            return xmm;
315        }
316
317        /**
318         * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded
319         * as sign-extended 32-bit values.
320         *
321         * @param asm
322         * @param imm
323         */
324        protected void emitImmediate(AMD64Assembler asm, int imm) {
325            throw new UnsupportedOperationException();
326        }
327
328        protected int immediateSize() {
329            throw new UnsupportedOperationException();
330        }
331    }
332
333    /**
334     * Operand size and register type constraints.
335     */
336    private enum OpAssertion {
337        ByteAssertion(CPU, CPU, BYTE),
338        IntegerAssertion(CPU, CPU, WORD, DWORD, QWORD),
339        No16BitAssertion(CPU, CPU, DWORD, QWORD),
340        No32BitAssertion(CPU, CPU, WORD, QWORD),
341        QwordOnlyAssertion(CPU, CPU, QWORD),
342        FloatingAssertion(XMM, XMM, SS, SD, PS, PD),
343        PackedFloatingAssertion(XMM, XMM, PS, PD),
344        SingleAssertion(XMM, XMM, SS),
345        DoubleAssertion(XMM, XMM, SD),
346        PackedDoubleAssertion(XMM, XMM, PD),
347        IntToFloatingAssertion(XMM, CPU, DWORD, QWORD),
348        FloatingToIntAssertion(CPU, XMM, DWORD, QWORD);
349
350        private final RegisterCategory resultCategory;
351        private final RegisterCategory inputCategory;
352        private final OperandSize[] allowedSizes;
353
354        OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) {
355            this.resultCategory = resultCategory;
356            this.inputCategory = inputCategory;
357            this.allowedSizes = allowedSizes;
358        }
359
360        protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) {
361            assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op;
362            assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op;
363
364            for (OperandSize s : allowedSizes) {
365                if (size == s) {
366                    return true;
367                }
368            }
369
370            assert false : "invalid operand size " + size + " used in " + op;
371            return false;
372        }
373    }
374
375    public abstract static class OperandDataAnnotation extends CodeAnnotation {
376        /**
377         * The position (bytes from the beginning of the method) of the operand.
378         */
379        public final int operandPosition;
380        /**
381         * The size of the operand, in bytes.
382         */
383        public final int operandSize;
384        /**
385         * The position (bytes from the beginning of the method) of the next instruction. On AMD64,
386         * RIP-relative operands are relative to this position.
387         */
388        public final int nextInstructionPosition;
389
390        OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) {
391            super(instructionPosition);
392
393            this.operandPosition = operandPosition;
394            this.operandSize = operandSize;
395            this.nextInstructionPosition = nextInstructionPosition;
396        }
397
398        @Override
399        public String toString() {
400            return getClass().getSimpleName() + " instruction [" + instructionPosition + ", " + nextInstructionPosition + "[ operand at " + operandPosition + " size " + operandSize;
401        }
402    }
403
404    /**
405     * Annotation that stores additional information about the displacement of a
406     * {@link Assembler#getPlaceholder placeholder address} that needs patching.
407     */
408    public static class AddressDisplacementAnnotation extends OperandDataAnnotation {
409        AddressDisplacementAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) {
410            super(instructionPosition, operandPosition, operndSize, nextInstructionPosition);
411        }
412    }
413
414    /**
415     * Annotation that stores additional information about the immediate operand, e.g., of a call
416     * instruction, that needs patching.
417     */
418    public static class ImmediateOperandAnnotation extends OperandDataAnnotation {
419        ImmediateOperandAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) {
420            super(instructionPosition, operandPosition, operndSize, nextInstructionPosition);
421        }
422    }
423
424    /**
425     * Constructs an assembler for the AMD64 architecture.
426     */
427    public AMD64Assembler(TargetDescription target) {
428        super(target);
429    }
430
431    public boolean supports(CPUFeature feature) {
432        return ((AMD64) target.arch).getFeatures().contains(feature);
433    }
434
435    private static int encode(Register r) {
436        assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding;
437        return r.encoding & 0x7;
438    }
439
440    /**
441     * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a
442     * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm
443     * field. The X bit must be 0.
444     */
445    protected static int getRXB(Register reg, Register rm) {
446        int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
447        rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3;
448        return rxb;
449    }
450
451    /**
452     * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There
453     * are two cases for the memory operand:<br>
454     * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0.
455     * <br>
456     * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base.
457     */
458    protected static int getRXB(Register reg, AMD64Address rm) {
459        int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
460        if (!rm.getIndex().equals(Register.None)) {
461            rxb |= (rm.getIndex().encoding & 0x08) >> 2;
462        }
463        if (!rm.getBase().equals(Register.None)) {
464            rxb |= (rm.getBase().encoding & 0x08) >> 3;
465        }
466        return rxb;
467    }
468
469    /**
470     * Emit the ModR/M byte for one register operand and an opcode extension in the R field.
471     * <p>
472     * Format: [ 11 reg r/m ]
473     */
474    protected void emitModRM(int reg, Register rm) {
475        assert (reg & 0x07) == reg;
476        emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07));
477    }
478
479    /**
480     * Emit the ModR/M byte for two register operands.
481     * <p>
482     * Format: [ 11 reg r/m ]
483     */
484    protected void emitModRM(Register reg, Register rm) {
485        emitModRM(reg.encoding & 0x07, rm);
486    }
487
488    protected void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize) {
489        assert !reg.equals(Register.None);
490        emitOperandHelper(encode(reg), addr, false, additionalInstructionSize);
491    }
492
493    /**
494     * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand.
495     *
496     * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
497     */
498    protected void emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
499        assert !reg.equals(Register.None);
500        emitOperandHelper(encode(reg), addr, force4Byte, additionalInstructionSize);
501    }
502
503    protected void emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize) {
504        emitOperandHelper(reg, addr, false, additionalInstructionSize);
505    }
506
507    /**
508     * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode
509     * extension in the R field.
510     *
511     * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
512     * @param additionalInstructionSize the number of bytes that will be emitted after the operand,
513     *            so that the start position of the next instruction can be computed even though
514     *            this instruction has not been completely emitted yet.
515     */
516    protected void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
517        assert (reg & 0x07) == reg;
518        int regenc = reg << 3;
519
520        Register base = addr.getBase();
521        Register index = addr.getIndex();
522
523        AMD64Address.Scale scale = addr.getScale();
524        int disp = addr.getDisplacement();
525
526        if (base.equals(AMD64.rip)) { // also matches addresses returned by getPlaceholder()
527            // [00 000 101] disp32
528            assert index.equals(Register.None) : "cannot use RIP relative addressing with index register";
529            emitByte(0x05 | regenc);
530            if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) {
531                codePatchingAnnotationConsumer.accept(new AddressDisplacementAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize));
532            }
533            emitInt(disp);
534        } else if (base.isValid()) {
535            int baseenc = base.isValid() ? encode(base) : 0;
536            if (index.isValid()) {
537                int indexenc = encode(index) << 3;
538                // [base + indexscale + disp]
539                if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
540                    // [base + indexscale]
541                    // [00 reg 100][ss index base]
542                    assert !index.equals(rsp) : "illegal addressing mode";
543                    emitByte(0x04 | regenc);
544                    emitByte(scale.log2 << 6 | indexenc | baseenc);
545                } else if (isByte(disp) && !force4Byte) {
546                    // [base + indexscale + imm8]
547                    // [01 reg 100][ss index base] imm8
548                    assert !index.equals(rsp) : "illegal addressing mode";
549                    emitByte(0x44 | regenc);
550                    emitByte(scale.log2 << 6 | indexenc | baseenc);
551                    emitByte(disp & 0xFF);
552                } else {
553                    // [base + indexscale + disp32]
554                    // [10 reg 100][ss index base] disp32
555                    assert !index.equals(rsp) : "illegal addressing mode";
556                    emitByte(0x84 | regenc);
557                    emitByte(scale.log2 << 6 | indexenc | baseenc);
558                    emitInt(disp);
559                }
560            } else if (base.equals(rsp) || base.equals(r12)) {
561                // [rsp + disp]
562                if (disp == 0) {
563                    // [rsp]
564                    // [00 reg 100][00 100 100]
565                    emitByte(0x04 | regenc);
566                    emitByte(0x24);
567                } else if (isByte(disp) && !force4Byte) {
568                    // [rsp + imm8]
569                    // [01 reg 100][00 100 100] disp8
570                    emitByte(0x44 | regenc);
571                    emitByte(0x24);
572                    emitByte(disp & 0xFF);
573                } else {
574                    // [rsp + imm32]
575                    // [10 reg 100][00 100 100] disp32
576                    emitByte(0x84 | regenc);
577                    emitByte(0x24);
578                    emitInt(disp);
579                }
580            } else {
581                // [base + disp]
582                assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode";
583                if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
584                    // [base]
585                    // [00 reg base]
586                    emitByte(0x00 | regenc | baseenc);
587                } else if (isByte(disp) && !force4Byte) {
588                    // [base + disp8]
589                    // [01 reg base] disp8
590                    emitByte(0x40 | regenc | baseenc);
591                    emitByte(disp & 0xFF);
592                } else {
593                    // [base + disp32]
594                    // [10 reg base] disp32
595                    emitByte(0x80 | regenc | baseenc);
596                    emitInt(disp);
597                }
598            }
599        } else {
600            if (index.isValid()) {
601                int indexenc = encode(index) << 3;
602                // [indexscale + disp]
603                // [00 reg 100][ss index 101] disp32
604                assert !index.equals(rsp) : "illegal addressing mode";
605                emitByte(0x04 | regenc);
606                emitByte(scale.log2 << 6 | indexenc | 0x05);
607                emitInt(disp);
608            } else {
609                // [disp] ABSOLUTE
610                // [00 reg 100][00 100 101] disp32
611                emitByte(0x04 | regenc);
612                emitByte(0x25);
613                emitInt(disp);
614            }
615        }
616        setCurAttributes(null);
617    }
618
619    /**
620     * Base class for AMD64 opcodes.
621     */
622    public static class AMD64Op {
623
624        protected static final int P_0F = 0x0F;
625        protected static final int P_0F38 = 0x380F;
626        protected static final int P_0F3A = 0x3A0F;
627
628        private final String opcode;
629
630        protected final int prefix1;
631        protected final int prefix2;
632        protected final int op;
633
634        private final boolean dstIsByte;
635        private final boolean srcIsByte;
636
637        private final OpAssertion assertion;
638        private final CPUFeature feature;
639
640        protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
641            this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature);
642        }
643
644        protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
645            this.opcode = opcode;
646            this.prefix1 = prefix1;
647            this.prefix2 = prefix2;
648            this.op = op;
649
650            this.dstIsByte = dstIsByte;
651            this.srcIsByte = srcIsByte;
652
653            this.assertion = assertion;
654            this.feature = feature;
655        }
656
657        protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) {
658            if (prefix1 != 0) {
659                asm.emitByte(prefix1);
660            }
661            if (size.sizePrefix != 0) {
662                asm.emitByte(size.sizePrefix);
663            }
664            int rexPrefix = 0x40 | rxb;
665            if (size == QWORD) {
666                rexPrefix |= 0x08;
667            }
668            if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) {
669                asm.emitByte(rexPrefix);
670            }
671            if (prefix2 > 0xFF) {
672                asm.emitShort(prefix2);
673            } else if (prefix2 > 0) {
674                asm.emitByte(prefix2);
675            }
676            asm.emitByte(op);
677        }
678
679        protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) {
680            assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode);
681            assert assertion.checkOperands(this, size, resultReg, inputReg);
682            return true;
683        }
684
685        @Override
686        public String toString() {
687            return opcode;
688        }
689    }
690
691    /**
692     * Base class for AMD64 opcodes with immediate operands.
693     */
694    public static class AMD64ImmOp extends AMD64Op {
695
696        private final boolean immIsByte;
697
698        protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
699            super(opcode, 0, prefix, op, assertion, null);
700            this.immIsByte = immIsByte;
701        }
702
703        protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) {
704            if (immIsByte) {
705                assert imm == (byte) imm;
706                asm.emitByte(imm);
707            } else {
708                size.emitImmediate(asm, imm);
709            }
710        }
711
712        protected final int immediateSize(OperandSize size) {
713            if (immIsByte) {
714                return 1;
715            } else {
716                return size.bytes;
717            }
718        }
719    }
720
721    /**
722     * Opcode with operand order of either RM or MR for 2 address forms.
723     */
724    public abstract static class AMD64RROp extends AMD64Op {
725
726        protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
727            super(opcode, prefix1, prefix2, op, assertion, feature);
728        }
729
730        protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
731            super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature);
732        }
733
734        public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src);
735    }
736
737    /**
738     * Opcode with operand order of either RM or MR for 3 address forms.
739     */
740    public abstract static class AMD64RRROp extends AMD64Op {
741
742        protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
743            super(opcode, prefix1, prefix2, op, assertion, feature);
744        }
745
746        protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
747            super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature);
748        }
749
750        public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src);
751    }
752
753    /**
754     * Opcode with operand order of RM.
755     */
756    public static class AMD64RMOp extends AMD64RROp {
757        // @formatter:off
758        public static final AMD64RMOp IMUL   = new AMD64RMOp("IMUL",         P_0F, 0xAF);
759        public static final AMD64RMOp BSF    = new AMD64RMOp("BSF",          P_0F, 0xBC);
760        public static final AMD64RMOp BSR    = new AMD64RMOp("BSR",          P_0F, 0xBD);
761        public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, CPUFeature.POPCNT);
762        public static final AMD64RMOp TZCNT  = new AMD64RMOp("TZCNT",  0xF3, P_0F, 0xBC, CPUFeature.BMI1);
763        public static final AMD64RMOp LZCNT  = new AMD64RMOp("LZCNT",  0xF3, P_0F, 0xBD, CPUFeature.LZCNT);
764        public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB",       P_0F, 0xB6, false, true, OpAssertion.IntegerAssertion);
765        public static final AMD64RMOp MOVZX  = new AMD64RMOp("MOVZX",        P_0F, 0xB7, OpAssertion.No16BitAssertion);
766        public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB",       P_0F, 0xBE, false, true, OpAssertion.IntegerAssertion);
767        public static final AMD64RMOp MOVSX  = new AMD64RMOp("MOVSX",        P_0F, 0xBF, OpAssertion.No16BitAssertion);
768        public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD",             0x63, OpAssertion.QwordOnlyAssertion);
769        public static final AMD64RMOp MOVB   = new AMD64RMOp("MOVB",               0x8A, OpAssertion.ByteAssertion);
770        public static final AMD64RMOp MOV    = new AMD64RMOp("MOV",                0x8B);
771
772        // MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix
773        public static final AMD64RMOp MOVD   = new AMD64RMOp("MOVD",   0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
774        public static final AMD64RMOp MOVQ   = new AMD64RMOp("MOVQ",   0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
775        public static final AMD64RMOp MOVSS  = new AMD64RMOp("MOVSS",        P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE);
776        public static final AMD64RMOp MOVSD  = new AMD64RMOp("MOVSD",        P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE);
777
778        // TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient.
779        public static final AMD64RMOp TESTB  = new AMD64RMOp("TEST",               0x84, OpAssertion.ByteAssertion);
780        public static final AMD64RMOp TEST   = new AMD64RMOp("TEST",               0x85);
781        // @formatter:on
782
783        protected AMD64RMOp(String opcode, int op) {
784            this(opcode, 0, op);
785        }
786
787        protected AMD64RMOp(String opcode, int op, OpAssertion assertion) {
788            this(opcode, 0, op, assertion);
789        }
790
791        protected AMD64RMOp(String opcode, int prefix, int op) {
792            this(opcode, 0, prefix, op, null);
793        }
794
795        protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) {
796            this(opcode, 0, prefix, op, assertion, null);
797        }
798
799        protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
800            this(opcode, 0, prefix, op, assertion, feature);
801        }
802
803        protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) {
804            super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null);
805        }
806
807        protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) {
808            this(opcode, prefix1, prefix2, op, OpAssertion.IntegerAssertion, feature);
809        }
810
811        protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
812            super(opcode, prefix1, prefix2, op, assertion, feature);
813        }
814
815        @Override
816        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
817            assert verify(asm, size, dst, src);
818            boolean isSimd = false;
819            boolean noNds = false;
820
821            switch (op) {
822                case 0x2A:
823                case 0x2C:
824                case 0x2E:
825                case 0x5A:
826                case 0x6E:
827                    isSimd = true;
828                    noNds = true;
829                    break;
830                case 0x10:
831                case 0x51:
832                case 0x54:
833                case 0x55:
834                case 0x56:
835                case 0x57:
836                case 0x58:
837                case 0x59:
838                case 0x5C:
839                case 0x5D:
840                case 0x5E:
841                case 0x5F:
842                    isSimd = true;
843                    break;
844            }
845
846            if (isSimd) {
847                int pre;
848                int opc;
849                boolean rexVexW = (size == QWORD) ? true : false;
850                AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
851                int curPrefix = size.sizePrefix | prefix1;
852                switch (curPrefix) {
853                    case 0x66:
854                        pre = VexSimdPrefix.VEX_SIMD_66;
855                        break;
856                    case 0xF2:
857                        pre = VexSimdPrefix.VEX_SIMD_F2;
858                        break;
859                    case 0xF3:
860                        pre = VexSimdPrefix.VEX_SIMD_F3;
861                        break;
862                    default:
863                        pre = VexSimdPrefix.VEX_SIMD_NONE;
864                        break;
865                }
866                switch (prefix2) {
867                    case P_0F:
868                        opc = VexOpcode.VEX_OPCODE_0F;
869                        break;
870                    case P_0F38:
871                        opc = VexOpcode.VEX_OPCODE_0F_38;
872                        break;
873                    case P_0F3A:
874                        opc = VexOpcode.VEX_OPCODE_0F_3A;
875                        break;
876                    default:
877                        opc = VexOpcode.VEX_OPCODE_NONE;
878                        break;
879                }
880                int encode;
881                if (noNds) {
882                    encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes);
883                } else {
884                    encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes);
885                }
886                asm.emitByte(op);
887                asm.emitByte(0xC0 | encode);
888            } else {
889                emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
890                asm.emitModRM(dst, src);
891            }
892        }
893
894        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) {
895            assert verify(asm, size, dst, null);
896            boolean isSimd = false;
897            boolean noNds = false;
898
899            switch (op) {
900                case 0x10:
901                case 0x2A:
902                case 0x2C:
903                case 0x2E:
904                case 0x6E:
905                    isSimd = true;
906                    noNds = true;
907                    break;
908                case 0x51:
909                case 0x54:
910                case 0x55:
911                case 0x56:
912                case 0x57:
913                case 0x58:
914                case 0x59:
915                case 0x5C:
916                case 0x5D:
917                case 0x5E:
918                case 0x5F:
919                    isSimd = true;
920                    break;
921            }
922
923            if (isSimd) {
924                int pre;
925                int opc;
926                boolean rexVexW = (size == QWORD) ? true : false;
927                AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
928                int curPrefix = size.sizePrefix | prefix1;
929                switch (curPrefix) {
930                    case 0x66:
931                        pre = VexSimdPrefix.VEX_SIMD_66;
932                        break;
933                    case 0xF2:
934                        pre = VexSimdPrefix.VEX_SIMD_F2;
935                        break;
936                    case 0xF3:
937                        pre = VexSimdPrefix.VEX_SIMD_F3;
938                        break;
939                    default:
940                        pre = VexSimdPrefix.VEX_SIMD_NONE;
941                        break;
942                }
943                switch (prefix2) {
944                    case P_0F:
945                        opc = VexOpcode.VEX_OPCODE_0F;
946                        break;
947                    case P_0F38:
948                        opc = VexOpcode.VEX_OPCODE_0F_38;
949                        break;
950                    case P_0F3A:
951                        opc = VexOpcode.VEX_OPCODE_0F_3A;
952                        break;
953                    default:
954                        opc = VexOpcode.VEX_OPCODE_NONE;
955                        break;
956                }
957                if (noNds) {
958                    asm.simdPrefix(dst, Register.None, src, pre, opc, attributes);
959                } else {
960                    asm.simdPrefix(dst, dst, src, pre, opc, attributes);
961                }
962                asm.emitByte(op);
963                asm.emitOperandHelper(dst, src, 0);
964            } else {
965                emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
966                asm.emitOperandHelper(dst, src, 0);
967            }
968        }
969    }
970
971    /**
972     * Opcode with operand order of RM.
973     */
974    public static class AMD64RRMOp extends AMD64RRROp {
975        protected AMD64RRMOp(String opcode, int op) {
976            this(opcode, 0, op);
977        }
978
979        protected AMD64RRMOp(String opcode, int op, OpAssertion assertion) {
980            this(opcode, 0, op, assertion);
981        }
982
983        protected AMD64RRMOp(String opcode, int prefix, int op) {
984            this(opcode, 0, prefix, op, null);
985        }
986
987        protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion) {
988            this(opcode, 0, prefix, op, assertion, null);
989        }
990
991        protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
992            this(opcode, 0, prefix, op, assertion, feature);
993        }
994
995        protected AMD64RRMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) {
996            super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null);
997        }
998
999        protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) {
1000            this(opcode, prefix1, prefix2, op, OpAssertion.IntegerAssertion, feature);
1001        }
1002
1003        protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
1004            super(opcode, prefix1, prefix2, op, assertion, feature);
1005        }
1006
1007        @Override
1008        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src) {
1009            assert verify(asm, size, dst, src);
1010            int pre;
1011            int opc;
1012            boolean rexVexW = (size == QWORD) ? true : false;
1013            AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1014            int curPrefix = size.sizePrefix | prefix1;
1015            switch (curPrefix) {
1016                case 0x66:
1017                    pre = VexSimdPrefix.VEX_SIMD_66;
1018                    break;
1019                case 0xF2:
1020                    pre = VexSimdPrefix.VEX_SIMD_F2;
1021                    break;
1022                case 0xF3:
1023                    pre = VexSimdPrefix.VEX_SIMD_F3;
1024                    break;
1025                default:
1026                    pre = VexSimdPrefix.VEX_SIMD_NONE;
1027                    break;
1028            }
1029            switch (prefix2) {
1030                case P_0F:
1031                    opc = VexOpcode.VEX_OPCODE_0F;
1032                    break;
1033                case P_0F38:
1034                    opc = VexOpcode.VEX_OPCODE_0F_38;
1035                    break;
1036                case P_0F3A:
1037                    opc = VexOpcode.VEX_OPCODE_0F_3A;
1038                    break;
1039                default:
1040                    opc = VexOpcode.VEX_OPCODE_NONE;
1041                    break;
1042            }
1043            int encode;
1044            encode = asm.simdPrefixAndEncode(dst, nds, src, pre, opc, attributes);
1045            asm.emitByte(op);
1046            asm.emitByte(0xC0 | encode);
1047        }
1048
1049        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, AMD64Address src) {
1050            assert verify(asm, size, dst, null);
1051            int pre;
1052            int opc;
1053            boolean rexVexW = (size == QWORD) ? true : false;
1054            AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1055            int curPrefix = size.sizePrefix | prefix1;
1056            switch (curPrefix) {
1057                case 0x66:
1058                    pre = VexSimdPrefix.VEX_SIMD_66;
1059                    break;
1060                case 0xF2:
1061                    pre = VexSimdPrefix.VEX_SIMD_F2;
1062                    break;
1063                case 0xF3:
1064                    pre = VexSimdPrefix.VEX_SIMD_F3;
1065                    break;
1066                default:
1067                    pre = VexSimdPrefix.VEX_SIMD_NONE;
1068                    break;
1069            }
1070            switch (prefix2) {
1071                case P_0F:
1072                    opc = VexOpcode.VEX_OPCODE_0F;
1073                    break;
1074                case P_0F38:
1075                    opc = VexOpcode.VEX_OPCODE_0F_38;
1076                    break;
1077                case P_0F3A:
1078                    opc = VexOpcode.VEX_OPCODE_0F_3A;
1079                    break;
1080                default:
1081                    opc = VexOpcode.VEX_OPCODE_NONE;
1082                    break;
1083            }
1084            asm.simdPrefix(dst, nds, src, pre, opc, attributes);
1085            asm.emitByte(op);
1086            asm.emitOperandHelper(dst, src, 0);
1087        }
1088    }
1089
1090    /**
1091     * Opcode with operand order of MR.
1092     */
1093    public static class AMD64MROp extends AMD64RROp {
1094        // @formatter:off
1095        public static final AMD64MROp MOVB   = new AMD64MROp("MOVB",               0x88, OpAssertion.ByteAssertion);
1096        public static final AMD64MROp MOV    = new AMD64MROp("MOV",                0x89);
1097
1098        // MOVD and MOVQ are the same opcode, just with different operand size prefix
1099        // Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used.
1100        public static final AMD64MROp MOVD   = new AMD64MROp("MOVD",   0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
1101        public static final AMD64MROp MOVQ   = new AMD64MROp("MOVQ",   0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
1102
1103        // MOVSS and MOVSD are the same opcode, just with different operand size prefix
1104        public static final AMD64MROp MOVSS  = new AMD64MROp("MOVSS",        P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE);
1105        public static final AMD64MROp MOVSD  = new AMD64MROp("MOVSD",        P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE);
1106        // @formatter:on
1107
1108        protected AMD64MROp(String opcode, int op) {
1109            this(opcode, 0, op);
1110        }
1111
1112        protected AMD64MROp(String opcode, int op, OpAssertion assertion) {
1113            this(opcode, 0, op, assertion);
1114        }
1115
1116        protected AMD64MROp(String opcode, int prefix, int op) {
1117            this(opcode, prefix, op, OpAssertion.IntegerAssertion);
1118        }
1119
1120        protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) {
1121            this(opcode, prefix, op, assertion, null);
1122        }
1123
1124        protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
1125            this(opcode, 0, prefix, op, assertion, feature);
1126        }
1127
1128        protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
1129            super(opcode, prefix1, prefix2, op, assertion, feature);
1130        }
1131
1132        @Override
1133        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
1134            assert verify(asm, size, src, dst);
1135            boolean isSimd = false;
1136            boolean noNds = false;
1137
1138            switch (op) {
1139                case 0x7E:
1140                    isSimd = true;
1141                    noNds = true;
1142                    break;
1143                case 0x11:
1144                    isSimd = true;
1145                    break;
1146            }
1147
1148            if (isSimd) {
1149                int pre;
1150                int opc;
1151                boolean rexVexW = (size == QWORD) ? true : false;
1152                AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1153                int curPrefix = size.sizePrefix | prefix1;
1154                switch (curPrefix) {
1155                    case 0x66:
1156                        pre = VexSimdPrefix.VEX_SIMD_66;
1157                        break;
1158                    case 0xF2:
1159                        pre = VexSimdPrefix.VEX_SIMD_F2;
1160                        break;
1161                    case 0xF3:
1162                        pre = VexSimdPrefix.VEX_SIMD_F3;
1163                        break;
1164                    default:
1165                        pre = VexSimdPrefix.VEX_SIMD_NONE;
1166                        break;
1167                }
1168                switch (prefix2) {
1169                    case P_0F:
1170                        opc = VexOpcode.VEX_OPCODE_0F;
1171                        break;
1172                    case P_0F38:
1173                        opc = VexOpcode.VEX_OPCODE_0F_38;
1174                        break;
1175                    case P_0F3A:
1176                        opc = VexOpcode.VEX_OPCODE_0F_3A;
1177                        break;
1178                    default:
1179                        opc = VexOpcode.VEX_OPCODE_NONE;
1180                        break;
1181                }
1182                int encode;
1183                if (noNds) {
1184                    encode = asm.simdPrefixAndEncode(src, Register.None, dst, pre, opc, attributes);
1185                } else {
1186                    encode = asm.simdPrefixAndEncode(src, src, dst, pre, opc, attributes);
1187                }
1188                asm.emitByte(op);
1189                asm.emitByte(0xC0 | encode);
1190            } else {
1191                emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding);
1192                asm.emitModRM(src, dst);
1193            }
1194        }
1195
1196        public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) {
1197            assert verify(asm, size, null, src);
1198            boolean isSimd = false;
1199
1200            switch (op) {
1201                case 0x7E:
1202                case 0x11:
1203                    isSimd = true;
1204                    break;
1205            }
1206
1207            if (isSimd) {
1208                int pre;
1209                int opc;
1210                boolean rexVexW = (size == QWORD) ? true : false;
1211                AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1212                int curPrefix = size.sizePrefix | prefix1;
1213                switch (curPrefix) {
1214                    case 0x66:
1215                        pre = VexSimdPrefix.VEX_SIMD_66;
1216                        break;
1217                    case 0xF2:
1218                        pre = VexSimdPrefix.VEX_SIMD_F2;
1219                        break;
1220                    case 0xF3:
1221                        pre = VexSimdPrefix.VEX_SIMD_F3;
1222                        break;
1223                    default:
1224                        pre = VexSimdPrefix.VEX_SIMD_NONE;
1225                        break;
1226                }
1227                switch (prefix2) {
1228                    case P_0F:
1229                        opc = VexOpcode.VEX_OPCODE_0F;
1230                        break;
1231                    case P_0F38:
1232                        opc = VexOpcode.VEX_OPCODE_0F_38;
1233                        break;
1234                    case P_0F3A:
1235                        opc = VexOpcode.VEX_OPCODE_0F_3A;
1236                        break;
1237                    default:
1238                        opc = VexOpcode.VEX_OPCODE_NONE;
1239                        break;
1240                }
1241                asm.simdPrefix(src, Register.None, dst, pre, opc, attributes);
1242                asm.emitByte(op);
1243                asm.emitOperandHelper(src, dst, 0);
1244            } else {
1245                emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0);
1246                asm.emitOperandHelper(src, dst, 0);
1247            }
1248        }
1249    }
1250
1251    /**
1252     * Opcodes with operand order of M.
1253     */
1254    public static class AMD64MOp extends AMD64Op {
1255        // @formatter:off
1256        public static final AMD64MOp NOT  = new AMD64MOp("NOT",  0xF7, 2);
1257        public static final AMD64MOp NEG  = new AMD64MOp("NEG",  0xF7, 3);
1258        public static final AMD64MOp MUL  = new AMD64MOp("MUL",  0xF7, 4);
1259        public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5);
1260        public static final AMD64MOp DIV  = new AMD64MOp("DIV",  0xF7, 6);
1261        public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7);
1262        public static final AMD64MOp INC  = new AMD64MOp("INC",  0xFF, 0);
1263        public static final AMD64MOp DEC  = new AMD64MOp("DEC",  0xFF, 1);
1264        public static final AMD64MOp PUSH = new AMD64MOp("PUSH", 0xFF, 6);
1265        public static final AMD64MOp POP  = new AMD64MOp("POP",  0x8F, 0, OpAssertion.No32BitAssertion);
1266        // @formatter:on
1267
1268        private final int ext;
1269
1270        protected AMD64MOp(String opcode, int op, int ext) {
1271            this(opcode, 0, op, ext);
1272        }
1273
1274        protected AMD64MOp(String opcode, int prefix, int op, int ext) {
1275            this(opcode, prefix, op, ext, OpAssertion.IntegerAssertion);
1276        }
1277
1278        protected AMD64MOp(String opcode, int op, int ext, OpAssertion assertion) {
1279            this(opcode, 0, op, ext, assertion);
1280        }
1281
1282        protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) {
1283            super(opcode, 0, prefix, op, assertion, null);
1284            this.ext = ext;
1285        }
1286
1287        public final void emit(AMD64Assembler asm, OperandSize size, Register dst) {
1288            assert verify(asm, size, dst, null);
1289            emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
1290            asm.emitModRM(ext, dst);
1291        }
1292
1293        public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) {
1294            assert verify(asm, size, null, null);
1295            emitOpcode(asm, size, getRXB(null, dst), 0, 0);
1296            asm.emitOperandHelper(ext, dst, 0);
1297        }
1298    }
1299
1300    /**
1301     * Opcodes with operand order of MI.
1302     */
1303    public static class AMD64MIOp extends AMD64ImmOp {
1304        // @formatter:off
1305        public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true,  0xC6, 0, OpAssertion.ByteAssertion);
1306        public static final AMD64MIOp MOV  = new AMD64MIOp("MOV",  false, 0xC7, 0);
1307        public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0);
1308        // @formatter:on
1309
1310        private final int ext;
1311
1312        protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) {
1313            this(opcode, immIsByte, op, ext, OpAssertion.IntegerAssertion);
1314        }
1315
1316        protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) {
1317            this(opcode, immIsByte, 0, op, ext, assertion);
1318        }
1319
1320        protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) {
1321            super(opcode, immIsByte, prefix, op, assertion);
1322            this.ext = ext;
1323        }
1324
1325        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) {
1326            assert verify(asm, size, dst, null);
1327            emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
1328            asm.emitModRM(ext, dst);
1329            emitImmediate(asm, size, imm);
1330        }
1331
1332        public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) {
1333            assert verify(asm, size, null, null);
1334            emitOpcode(asm, size, getRXB(null, dst), 0, 0);
1335            asm.emitOperandHelper(ext, dst, immediateSize(size));
1336            emitImmediate(asm, size, imm);
1337        }
1338    }
1339
1340    /**
1341     * Opcodes with operand order of RMI.
1342     *
1343     * We only have one form of round as the operation is always treated with single variant input,
1344     * making its extension to 3 address forms redundant.
1345     */
1346    public static class AMD64RMIOp extends AMD64ImmOp {
1347        // @formatter:off
1348        public static final AMD64RMIOp IMUL    = new AMD64RMIOp("IMUL", false, 0x69);
1349        public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true,  0x6B);
1350        public static final AMD64RMIOp ROUNDSS = new AMD64RMIOp("ROUNDSS", true, P_0F3A, 0x0A, OpAssertion.PackedDoubleAssertion);
1351        public static final AMD64RMIOp ROUNDSD = new AMD64RMIOp("ROUNDSD", true, P_0F3A, 0x0B, OpAssertion.PackedDoubleAssertion);
1352        // @formatter:on
1353
1354        protected AMD64RMIOp(String opcode, boolean immIsByte, int op) {
1355            this(opcode, immIsByte, 0, op, OpAssertion.IntegerAssertion);
1356        }
1357
1358        protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
1359            super(opcode, immIsByte, prefix, op, assertion);
1360        }
1361
1362        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) {
1363            assert verify(asm, size, dst, src);
1364            boolean isSimd = false;
1365            boolean noNds = false;
1366
1367            switch (op) {
1368                case 0x0A:
1369                case 0x0B:
1370                    isSimd = true;
1371                    noNds = true;
1372                    break;
1373            }
1374
1375            if (isSimd) {
1376                int pre;
1377                int opc;
1378                AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1379                int curPrefix = size.sizePrefix | prefix1;
1380                switch (curPrefix) {
1381                    case 0x66:
1382                        pre = VexSimdPrefix.VEX_SIMD_66;
1383                        break;
1384                    case 0xF2:
1385                        pre = VexSimdPrefix.VEX_SIMD_F2;
1386                        break;
1387                    case 0xF3:
1388                        pre = VexSimdPrefix.VEX_SIMD_F3;
1389                        break;
1390                    default:
1391                        pre = VexSimdPrefix.VEX_SIMD_NONE;
1392                        break;
1393                }
1394                switch (prefix2) {
1395                    case P_0F:
1396                        opc = VexOpcode.VEX_OPCODE_0F;
1397                        break;
1398                    case P_0F38:
1399                        opc = VexOpcode.VEX_OPCODE_0F_38;
1400                        break;
1401                    case P_0F3A:
1402                        opc = VexOpcode.VEX_OPCODE_0F_3A;
1403                        break;
1404                    default:
1405                        opc = VexOpcode.VEX_OPCODE_NONE;
1406                        break;
1407                }
1408                int encode;
1409                if (noNds) {
1410                    encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes);
1411                } else {
1412                    encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes);
1413                }
1414                asm.emitByte(op);
1415                asm.emitByte(0xC0 | encode);
1416                emitImmediate(asm, size, imm);
1417            } else {
1418                emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
1419                asm.emitModRM(dst, src);
1420                emitImmediate(asm, size, imm);
1421            }
1422        }
1423
1424        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) {
1425            assert verify(asm, size, dst, null);
1426
1427            boolean isSimd = false;
1428            boolean noNds = false;
1429
1430            switch (op) {
1431                case 0x0A:
1432                case 0x0B:
1433                    isSimd = true;
1434                    noNds = true;
1435                    break;
1436            }
1437
1438            if (isSimd) {
1439                int pre;
1440                int opc;
1441                AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1442                int curPrefix = size.sizePrefix | prefix1;
1443                switch (curPrefix) {
1444                    case 0x66:
1445                        pre = VexSimdPrefix.VEX_SIMD_66;
1446                        break;
1447                    case 0xF2:
1448                        pre = VexSimdPrefix.VEX_SIMD_F2;
1449                        break;
1450                    case 0xF3:
1451                        pre = VexSimdPrefix.VEX_SIMD_F3;
1452                        break;
1453                    default:
1454                        pre = VexSimdPrefix.VEX_SIMD_NONE;
1455                        break;
1456                }
1457                switch (prefix2) {
1458                    case P_0F:
1459                        opc = VexOpcode.VEX_OPCODE_0F;
1460                        break;
1461                    case P_0F38:
1462                        opc = VexOpcode.VEX_OPCODE_0F_38;
1463                        break;
1464                    case P_0F3A:
1465                        opc = VexOpcode.VEX_OPCODE_0F_3A;
1466                        break;
1467                    default:
1468                        opc = VexOpcode.VEX_OPCODE_NONE;
1469                        break;
1470                }
1471
1472                if (noNds) {
1473                    asm.simdPrefix(dst, Register.None, src, pre, opc, attributes);
1474                } else {
1475                    asm.simdPrefix(dst, dst, src, pre, opc, attributes);
1476                }
1477                asm.emitByte(op);
1478                asm.emitOperandHelper(dst, src, immediateSize(size));
1479                emitImmediate(asm, size, imm);
1480            } else {
1481                emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
1482                asm.emitOperandHelper(dst, src, immediateSize(size));
1483                emitImmediate(asm, size, imm);
1484            }
1485        }
1486    }
1487
1488    public static class SSEOp extends AMD64RMOp {
1489        // @formatter:off
1490        public static final SSEOp CVTSI2SS  = new SSEOp("CVTSI2SS",  0xF3, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion);
1491        public static final SSEOp CVTSI2SD  = new SSEOp("CVTSI2SS",  0xF2, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion);
1492        public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion);
1493        public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion);
1494        public static final SSEOp UCOMIS    = new SSEOp("UCOMIS",          P_0F, 0x2E, OpAssertion.PackedFloatingAssertion);
1495        public static final SSEOp SQRT      = new SSEOp("SQRT",            P_0F, 0x51);
1496        public static final SSEOp AND       = new SSEOp("AND",             P_0F, 0x54, OpAssertion.PackedFloatingAssertion);
1497        public static final SSEOp ANDN      = new SSEOp("ANDN",            P_0F, 0x55, OpAssertion.PackedFloatingAssertion);
1498        public static final SSEOp OR        = new SSEOp("OR",              P_0F, 0x56, OpAssertion.PackedFloatingAssertion);
1499        public static final SSEOp XOR       = new SSEOp("XOR",             P_0F, 0x57, OpAssertion.PackedFloatingAssertion);
1500        public static final SSEOp ADD       = new SSEOp("ADD",             P_0F, 0x58);
1501        public static final SSEOp MUL       = new SSEOp("MUL",             P_0F, 0x59);
1502        public static final SSEOp CVTSS2SD  = new SSEOp("CVTSS2SD",        P_0F, 0x5A, OpAssertion.SingleAssertion);
1503        public static final SSEOp CVTSD2SS  = new SSEOp("CVTSD2SS",        P_0F, 0x5A, OpAssertion.DoubleAssertion);
1504        public static final SSEOp SUB       = new SSEOp("SUB",             P_0F, 0x5C);
1505        public static final SSEOp MIN       = new SSEOp("MIN",             P_0F, 0x5D);
1506        public static final SSEOp DIV       = new SSEOp("DIV",             P_0F, 0x5E);
1507        public static final SSEOp MAX       = new SSEOp("MAX",             P_0F, 0x5F);
1508        // @formatter:on
1509
1510        protected SSEOp(String opcode, int prefix, int op) {
1511            this(opcode, prefix, op, OpAssertion.FloatingAssertion);
1512        }
1513
1514        protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) {
1515            this(opcode, 0, prefix, op, assertion);
1516        }
1517
1518        protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) {
1519            super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2);
1520        }
1521    }
1522
1523    public static class AVXOp extends AMD64RRMOp {
1524        // @formatter:off
1525        public static final AVXOp AND       = new AVXOp("AND",             P_0F, 0x54, OpAssertion.PackedFloatingAssertion);
1526        public static final AVXOp ANDN      = new AVXOp("ANDN",            P_0F, 0x55, OpAssertion.PackedFloatingAssertion);
1527        public static final AVXOp OR        = new AVXOp("OR",              P_0F, 0x56, OpAssertion.PackedFloatingAssertion);
1528        public static final AVXOp XOR       = new AVXOp("XOR",             P_0F, 0x57, OpAssertion.PackedFloatingAssertion);
1529        public static final AVXOp ADD       = new AVXOp("ADD",             P_0F, 0x58);
1530        public static final AVXOp MUL       = new AVXOp("MUL",             P_0F, 0x59);
1531        public static final AVXOp SUB       = new AVXOp("SUB",             P_0F, 0x5C);
1532        public static final AVXOp MIN       = new AVXOp("MIN",             P_0F, 0x5D);
1533        public static final AVXOp DIV       = new AVXOp("DIV",             P_0F, 0x5E);
1534        public static final AVXOp MAX       = new AVXOp("MAX",             P_0F, 0x5F);
1535        // @formatter:on
1536
1537        protected AVXOp(String opcode, int prefix, int op) {
1538            this(opcode, prefix, op, OpAssertion.FloatingAssertion);
1539        }
1540
1541        protected AVXOp(String opcode, int prefix, int op, OpAssertion assertion) {
1542            this(opcode, 0, prefix, op, assertion);
1543        }
1544
1545        protected AVXOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) {
1546            super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.AVX);
1547        }
1548    }
1549
1550    /**
1551     * Arithmetic operation with operand order of RM, MR or MI.
1552     */
1553    public static final class AMD64BinaryArithmetic {
1554        // @formatter:off
1555        public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0);
1556        public static final AMD64BinaryArithmetic OR  = new AMD64BinaryArithmetic("OR",  1);
1557        public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2);
1558        public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3);
1559        public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4);
1560        public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5);
1561        public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6);
1562        public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7);
1563        // @formatter:on
1564
1565        private final AMD64MIOp byteImmOp;
1566        private final AMD64MROp byteMrOp;
1567        private final AMD64RMOp byteRmOp;
1568
1569        private final AMD64MIOp immOp;
1570        private final AMD64MIOp immSxOp;
1571        private final AMD64MROp mrOp;
1572        private final AMD64RMOp rmOp;
1573
1574        private AMD64BinaryArithmetic(String opcode, int code) {
1575            int baseOp = code << 3;
1576
1577            byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion);
1578            byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion);
1579            byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion);
1580
1581            immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.IntegerAssertion);
1582            immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.IntegerAssertion);
1583            mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.IntegerAssertion);
1584            rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.IntegerAssertion);
1585        }
1586
1587        public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) {
1588            if (size == BYTE) {
1589                return byteImmOp;
1590            } else if (sx) {
1591                return immSxOp;
1592            } else {
1593                return immOp;
1594            }
1595        }
1596
1597        public AMD64MROp getMROpcode(OperandSize size) {
1598            if (size == BYTE) {
1599                return byteMrOp;
1600            } else {
1601                return mrOp;
1602            }
1603        }
1604
1605        public AMD64RMOp getRMOpcode(OperandSize size) {
1606            if (size == BYTE) {
1607                return byteRmOp;
1608            } else {
1609                return rmOp;
1610            }
1611        }
1612    }
1613
1614    /**
1615     * Shift operation with operand order of M1, MC or MI.
1616     */
1617    public static final class AMD64Shift {
1618        // @formatter:off
1619        public static final AMD64Shift ROL = new AMD64Shift("ROL", 0);
1620        public static final AMD64Shift ROR = new AMD64Shift("ROR", 1);
1621        public static final AMD64Shift RCL = new AMD64Shift("RCL", 2);
1622        public static final AMD64Shift RCR = new AMD64Shift("RCR", 3);
1623        public static final AMD64Shift SHL = new AMD64Shift("SHL", 4);
1624        public static final AMD64Shift SHR = new AMD64Shift("SHR", 5);
1625        public static final AMD64Shift SAR = new AMD64Shift("SAR", 7);
1626        // @formatter:on
1627
1628        public final AMD64MOp m1Op;
1629        public final AMD64MOp mcOp;
1630        public final AMD64MIOp miOp;
1631
1632        private AMD64Shift(String opcode, int code) {
1633            m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.IntegerAssertion);
1634            mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.IntegerAssertion);
1635            miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.IntegerAssertion);
1636        }
1637    }
1638
1639    public final void addl(AMD64Address dst, int imm32) {
1640        ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1641    }
1642
1643    public final void addl(Register dst, int imm32) {
1644        ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1645    }
1646
1647    public final void addl(Register dst, Register src) {
1648        ADD.rmOp.emit(this, DWORD, dst, src);
1649    }
1650
1651    public final void addpd(Register dst, Register src) {
1652        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1653        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1654        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1655        emitByte(0x58);
1656        emitByte(0xC0 | encode);
1657    }
1658
1659    public final void addpd(Register dst, AMD64Address src) {
1660        assert dst.getRegisterCategory().equals(AMD64.XMM);
1661        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1662        simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1663        emitByte(0x58);
1664        emitOperandHelper(dst, src, 0);
1665    }
1666
1667    public final void addsd(Register dst, Register src) {
1668        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1669        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1670        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1671        emitByte(0x58);
1672        emitByte(0xC0 | encode);
1673    }
1674
1675    public final void addsd(Register dst, AMD64Address src) {
1676        assert dst.getRegisterCategory().equals(AMD64.XMM);
1677        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1678        simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1679        emitByte(0x58);
1680        emitOperandHelper(dst, src, 0);
1681    }
1682
1683    private void addrNop4() {
1684        // 4 bytes: NOP DWORD PTR [EAX+0]
1685        emitByte(0x0F);
1686        emitByte(0x1F);
1687        emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc);
1688        emitByte(0); // 8-bits offset (1 byte)
1689    }
1690
1691    private void addrNop5() {
1692        // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1693        emitByte(0x0F);
1694        emitByte(0x1F);
1695        emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4);
1696        emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
1697        emitByte(0); // 8-bits offset (1 byte)
1698    }
1699
1700    private void addrNop7() {
1701        // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1702        emitByte(0x0F);
1703        emitByte(0x1F);
1704        emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc);
1705        emitInt(0); // 32-bits offset (4 bytes)
1706    }
1707
1708    private void addrNop8() {
1709        // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1710        emitByte(0x0F);
1711        emitByte(0x1F);
1712        emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4);
1713        emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
1714        emitInt(0); // 32-bits offset (4 bytes)
1715    }
1716
1717    public final void andl(Register dst, int imm32) {
1718        AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1719    }
1720
1721    public final void andl(Register dst, Register src) {
1722        AND.rmOp.emit(this, DWORD, dst, src);
1723    }
1724
1725    public final void andpd(Register dst, Register src) {
1726        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1727        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1728        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1729        emitByte(0x54);
1730        emitByte(0xC0 | encode);
1731    }
1732
1733    public final void andpd(Register dst, AMD64Address src) {
1734        assert dst.getRegisterCategory().equals(AMD64.XMM);
1735        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1736        simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1737        emitByte(0x54);
1738        emitOperandHelper(dst, src, 0);
1739    }
1740
1741    public final void bsrl(Register dst, Register src) {
1742        int encode = prefixAndEncode(dst.encoding(), src.encoding());
1743        emitByte(0x0F);
1744        emitByte(0xBD);
1745        emitByte(0xC0 | encode);
1746    }
1747
1748    public final void bswapl(Register reg) {
1749        int encode = prefixAndEncode(reg.encoding);
1750        emitByte(0x0F);
1751        emitByte(0xC8 | encode);
1752    }
1753
1754    public final void cdql() {
1755        emitByte(0x99);
1756    }
1757
1758    public final void cmovl(ConditionFlag cc, Register dst, Register src) {
1759        int encode = prefixAndEncode(dst.encoding, src.encoding);
1760        emitByte(0x0F);
1761        emitByte(0x40 | cc.getValue());
1762        emitByte(0xC0 | encode);
1763    }
1764
1765    public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) {
1766        prefix(src, dst);
1767        emitByte(0x0F);
1768        emitByte(0x40 | cc.getValue());
1769        emitOperandHelper(dst, src, 0);
1770    }
1771
1772    public final void cmpl(Register dst, int imm32) {
1773        CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1774    }
1775
1776    public final void cmpl(Register dst, Register src) {
1777        CMP.rmOp.emit(this, DWORD, dst, src);
1778    }
1779
1780    public final void cmpl(Register dst, AMD64Address src) {
1781        CMP.rmOp.emit(this, DWORD, dst, src);
1782    }
1783
1784    public final void cmpl(AMD64Address dst, int imm32) {
1785        CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1786    }
1787
1788    // The 32-bit cmpxchg compares the value at adr with the contents of X86.rax,
1789    // and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,.
1790    // The ZF is set if the compared values were equal, and cleared otherwise.
1791    public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg
1792        prefix(adr, reg);
1793        emitByte(0x0F);
1794        emitByte(0xB1);
1795        emitOperandHelper(reg, adr, 0);
1796    }
1797
1798    public final void cvtsi2sdl(Register dst, Register src) {
1799        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
1800        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1801        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1802        emitByte(0x2A);
1803        emitByte(0xC0 | encode);
1804    }
1805
1806    public final void cvttsd2sil(Register dst, Register src) {
1807        assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
1808        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1809        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1810        emitByte(0x2C);
1811        emitByte(0xC0 | encode);
1812    }
1813
1814    protected final void decl(AMD64Address dst) {
1815        prefix(dst);
1816        emitByte(0xFF);
1817        emitOperandHelper(1, dst, 0);
1818    }
1819
1820    public final void divsd(Register dst, Register src) {
1821        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1822        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1823        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1824        emitByte(0x5E);
1825        emitByte(0xC0 | encode);
1826    }
1827
1828    public final void hlt() {
1829        emitByte(0xF4);
1830    }
1831
1832    public final void imull(Register dst, Register src, int value) {
1833        if (isByte(value)) {
1834            AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value);
1835        } else {
1836            AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value);
1837        }
1838    }
1839
1840    protected final void incl(AMD64Address dst) {
1841        prefix(dst);
1842        emitByte(0xFF);
1843        emitOperandHelper(0, dst, 0);
1844    }
1845
1846    public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) {
1847        int shortSize = 2;
1848        int longSize = 6;
1849        long disp = jumpTarget - position();
1850        if (!forceDisp32 && isByte(disp - shortSize)) {
1851            // 0111 tttn #8-bit disp
1852            emitByte(0x70 | cc.getValue());
1853            emitByte((int) ((disp - shortSize) & 0xFF));
1854        } else {
1855            // 0000 1111 1000 tttn #32-bit disp
1856            assert isInt(disp - longSize) : "must be 32bit offset (call4)";
1857            emitByte(0x0F);
1858            emitByte(0x80 | cc.getValue());
1859            emitInt((int) (disp - longSize));
1860        }
1861    }
1862
1863    public final void jcc(ConditionFlag cc, Label l) {
1864        assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc";
1865        if (l.isBound()) {
1866            jcc(cc, l.position(), false);
1867        } else {
1868            // Note: could eliminate cond. jumps to this jump if condition
1869            // is the same however, seems to be rather unlikely case.
1870            // Note: use jccb() if label to be bound is very close to get
1871            // an 8-bit displacement
1872            l.addPatchAt(position());
1873            emitByte(0x0F);
1874            emitByte(0x80 | cc.getValue());
1875            emitInt(0);
1876        }
1877
1878    }
1879
1880    public final void jccb(ConditionFlag cc, Label l) {
1881        if (l.isBound()) {
1882            int shortSize = 2;
1883            int entry = l.position();
1884            assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp";
1885            long disp = entry - position();
1886            // 0111 tttn #8-bit disp
1887            emitByte(0x70 | cc.getValue());
1888            emitByte((int) ((disp - shortSize) & 0xFF));
1889        } else {
1890            l.addPatchAt(position());
1891            emitByte(0x70 | cc.getValue());
1892            emitByte(0);
1893        }
1894    }
1895
1896    public final void jmp(int jumpTarget, boolean forceDisp32) {
1897        int shortSize = 2;
1898        int longSize = 5;
1899        long disp = jumpTarget - position();
1900        if (!forceDisp32 && isByte(disp - shortSize)) {
1901            emitByte(0xEB);
1902            emitByte((int) ((disp - shortSize) & 0xFF));
1903        } else {
1904            emitByte(0xE9);
1905            emitInt((int) (disp - longSize));
1906        }
1907    }
1908
1909    @Override
1910    public final void jmp(Label l) {
1911        if (l.isBound()) {
1912            jmp(l.position(), false);
1913        } else {
1914            // By default, forward jumps are always 32-bit displacements, since
1915            // we can't yet know where the label will be bound. If you're sure that
1916            // the forward jump will not run beyond 256 bytes, use jmpb to
1917            // force an 8-bit displacement.
1918
1919            l.addPatchAt(position());
1920            emitByte(0xE9);
1921            emitInt(0);
1922        }
1923    }
1924
1925    public final void jmp(Register entry) {
1926        int encode = prefixAndEncode(entry.encoding);
1927        emitByte(0xFF);
1928        emitByte(0xE0 | encode);
1929    }
1930
1931    public final void jmp(AMD64Address adr) {
1932        prefix(adr);
1933        emitByte(0xFF);
1934        emitOperandHelper(rsp, adr, 0);
1935    }
1936
1937    public final void jmpb(Label l) {
1938        if (l.isBound()) {
1939            int shortSize = 2;
1940            int entry = l.position();
1941            assert isByte((entry - position()) + shortSize) : "Dispacement too large for a short jmp";
1942            long offs = entry - position();
1943            emitByte(0xEB);
1944            emitByte((int) ((offs - shortSize) & 0xFF));
1945        } else {
1946
1947            l.addPatchAt(position());
1948            emitByte(0xEB);
1949            emitByte(0);
1950        }
1951    }
1952
1953    public final void leaq(Register dst, AMD64Address src) {
1954        prefixq(src, dst);
1955        emitByte(0x8D);
1956        emitOperandHelper(dst, src, 0);
1957    }
1958
1959    public final void leave() {
1960        emitByte(0xC9);
1961    }
1962
1963    public final void lock() {
1964        emitByte(0xF0);
1965    }
1966
1967    public final void movapd(Register dst, Register src) {
1968        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1969        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1970        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1971        emitByte(0x28);
1972        emitByte(0xC0 | encode);
1973    }
1974
1975    public final void movaps(Register dst, Register src) {
1976        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1977        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1978        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
1979        emitByte(0x28);
1980        emitByte(0xC0 | encode);
1981    }
1982
1983    public final void movb(AMD64Address dst, int imm8) {
1984        prefix(dst);
1985        emitByte(0xC6);
1986        emitOperandHelper(0, dst, 1);
1987        emitByte(imm8);
1988    }
1989
1990    public final void movb(AMD64Address dst, Register src) {
1991        assert src.getRegisterCategory().equals(AMD64.CPU) : "must have byte register";
1992        prefix(dst, src, true);
1993        emitByte(0x88);
1994        emitOperandHelper(src, dst, 0);
1995    }
1996
1997    public final void movl(Register dst, int imm32) {
1998        int encode = prefixAndEncode(dst.encoding);
1999        emitByte(0xB8 | encode);
2000        emitInt(imm32);
2001    }
2002
2003    public final void movl(Register dst, Register src) {
2004        int encode = prefixAndEncode(dst.encoding, src.encoding);
2005        emitByte(0x8B);
2006        emitByte(0xC0 | encode);
2007    }
2008
2009    public final void movl(Register dst, AMD64Address src) {
2010        prefix(src, dst);
2011        emitByte(0x8B);
2012        emitOperandHelper(dst, src, 0);
2013    }
2014
2015    public final void movl(AMD64Address dst, int imm32) {
2016        prefix(dst);
2017        emitByte(0xC7);
2018        emitOperandHelper(0, dst, 4);
2019        emitInt(imm32);
2020    }
2021
2022    public final void movl(AMD64Address dst, Register src) {
2023        prefix(dst, src);
2024        emitByte(0x89);
2025        emitOperandHelper(src, dst, 0);
2026    }
2027
2028    /**
2029     * New CPUs require use of movsd and movss to avoid partial register stall when loading from
2030     * memory. But for old Opteron use movlpd instead of movsd. The selection is done in
2031     * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and
2032     * {@link AMD64MacroAssembler#movflt(Register, Register)}.
2033     */
2034    public final void movlpd(Register dst, AMD64Address src) {
2035        assert dst.getRegisterCategory().equals(AMD64.XMM);
2036        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2037        simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2038        emitByte(0x12);
2039        emitOperandHelper(dst, src, 0);
2040    }
2041
2042    public final void movlhps(Register dst, Register src) {
2043        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2044        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2045        int encode = simdPrefixAndEncode(dst, src, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2046        emitByte(0x16);
2047        emitByte(0xC0 | encode);
2048    }
2049
2050    public final void movq(Register dst, AMD64Address src) {
2051        movq(dst, src, false);
2052    }
2053
2054    public final void movq(Register dst, AMD64Address src, boolean wide) {
2055        if (dst.getRegisterCategory().equals(AMD64.XMM)) {
2056            AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ wide, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2057            simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2058            emitByte(0x7E);
2059            emitOperandHelper(dst, src, wide, 0);
2060        } else {
2061            // gpr version of movq
2062            prefixq(src, dst);
2063            emitByte(0x8B);
2064            emitOperandHelper(dst, src, wide, 0);
2065        }
2066    }
2067
2068    public final void movq(Register dst, Register src) {
2069        int encode = prefixqAndEncode(dst.encoding, src.encoding);
2070        emitByte(0x8B);
2071        emitByte(0xC0 | encode);
2072    }
2073
2074    public final void movq(AMD64Address dst, Register src) {
2075        if (src.getRegisterCategory().equals(AMD64.XMM)) {
2076            AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2077            simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2078            emitByte(0xD6);
2079            emitOperandHelper(src, dst, 0);
2080        } else {
2081            // gpr version of movq
2082            prefixq(dst, src);
2083            emitByte(0x89);
2084            emitOperandHelper(src, dst, 0);
2085        }
2086    }
2087
2088    public final void movsbl(Register dst, AMD64Address src) {
2089        prefix(src, dst);
2090        emitByte(0x0F);
2091        emitByte(0xBE);
2092        emitOperandHelper(dst, src, 0);
2093    }
2094
2095    public final void movsbl(Register dst, Register src) {
2096        int encode = prefixAndEncode(dst.encoding, false, src.encoding, true);
2097        emitByte(0x0F);
2098        emitByte(0xBE);
2099        emitByte(0xC0 | encode);
2100    }
2101
2102    public final void movsbq(Register dst, AMD64Address src) {
2103        prefixq(src, dst);
2104        emitByte(0x0F);
2105        emitByte(0xBE);
2106        emitOperandHelper(dst, src, 0);
2107    }
2108
2109    public final void movsbq(Register dst, Register src) {
2110        int encode = prefixqAndEncode(dst.encoding, src.encoding);
2111        emitByte(0x0F);
2112        emitByte(0xBE);
2113        emitByte(0xC0 | encode);
2114    }
2115
2116    public final void movsd(Register dst, Register src) {
2117        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2118        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2119        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2120        emitByte(0x10);
2121        emitByte(0xC0 | encode);
2122    }
2123
2124    public final void movsd(Register dst, AMD64Address src) {
2125        assert dst.getRegisterCategory().equals(AMD64.XMM);
2126        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2127        simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2128        emitByte(0x10);
2129        emitOperandHelper(dst, src, 0);
2130    }
2131
2132    public final void movsd(AMD64Address dst, Register src) {
2133        assert src.getRegisterCategory().equals(AMD64.XMM);
2134        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2135        simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2136        emitByte(0x11);
2137        emitOperandHelper(src, dst, 0);
2138    }
2139
2140    public final void movss(Register dst, Register src) {
2141        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2142        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2143        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2144        emitByte(0x10);
2145        emitByte(0xC0 | encode);
2146    }
2147
2148    public final void movss(Register dst, AMD64Address src) {
2149        assert dst.getRegisterCategory().equals(AMD64.XMM);
2150        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2151        simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2152        emitByte(0x10);
2153        emitOperandHelper(dst, src, 0);
2154    }
2155
2156    public final void movss(AMD64Address dst, Register src) {
2157        assert src.getRegisterCategory().equals(AMD64.XMM);
2158        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2159        simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2160        emitByte(0x11);
2161        emitOperandHelper(src, dst, 0);
2162    }
2163
2164    public final void mulpd(Register dst, Register src) {
2165        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2166        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2167        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2168        emitByte(0x59);
2169        emitByte(0xC0 | encode);
2170    }
2171
2172    public final void mulpd(Register dst, AMD64Address src) {
2173        assert dst.getRegisterCategory().equals(AMD64.XMM);
2174        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2175        simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2176        emitByte(0x59);
2177        emitOperandHelper(dst, src, 0);
2178    }
2179
2180    public final void mulsd(Register dst, Register src) {
2181        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2182        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2183        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2184        emitByte(0x59);
2185        emitByte(0xC0 | encode);
2186    }
2187
2188    public final void mulsd(Register dst, AMD64Address src) {
2189        assert dst.getRegisterCategory().equals(AMD64.XMM);
2190        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2191        simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2192        emitByte(0x59);
2193        emitOperandHelper(dst, src, 0);
2194    }
2195
2196    public final void mulss(Register dst, Register src) {
2197        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2198        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2199        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2200        emitByte(0x59);
2201        emitByte(0xC0 | encode);
2202    }
2203
2204    public final void movswl(Register dst, AMD64Address src) {
2205        prefix(src, dst);
2206        emitByte(0x0F);
2207        emitByte(0xBF);
2208        emitOperandHelper(dst, src, 0);
2209    }
2210
2211    public final void movw(AMD64Address dst, int imm16) {
2212        emitByte(0x66); // switch to 16-bit mode
2213        prefix(dst);
2214        emitByte(0xC7);
2215        emitOperandHelper(0, dst, 2);
2216        emitShort(imm16);
2217    }
2218
2219    public final void movw(AMD64Address dst, Register src) {
2220        emitByte(0x66);
2221        prefix(dst, src);
2222        emitByte(0x89);
2223        emitOperandHelper(src, dst, 0);
2224    }
2225
2226    public final void movzbl(Register dst, AMD64Address src) {
2227        prefix(src, dst);
2228        emitByte(0x0F);
2229        emitByte(0xB6);
2230        emitOperandHelper(dst, src, 0);
2231    }
2232
2233    public final void movzwl(Register dst, AMD64Address src) {
2234        prefix(src, dst);
2235        emitByte(0x0F);
2236        emitByte(0xB7);
2237        emitOperandHelper(dst, src, 0);
2238    }
2239
2240    public final void negl(Register dst) {
2241        NEG.emit(this, DWORD, dst);
2242    }
2243
2244    public final void notl(Register dst) {
2245        NOT.emit(this, DWORD, dst);
2246    }
2247
2248    @Override
2249    public final void ensureUniquePC() {
2250        nop();
2251    }
2252
2253    public final void nop() {
2254        nop(1);
2255    }
2256
2257    public void nop(int count) {
2258        int i = count;
2259        if (UseNormalNop) {
2260            assert i > 0 : " ";
2261            // The fancy nops aren't currently recognized by debuggers making it a
2262            // pain to disassemble code while debugging. If assert are on clearly
2263            // speed is not an issue so simply use the single byte traditional nop
2264            // to do alignment.
2265
2266            for (; i > 0; i--) {
2267                emitByte(0x90);
2268            }
2269            return;
2270        }
2271
2272        if (UseAddressNop) {
2273            //
2274            // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD.
2275            // 1: 0x90
2276            // 2: 0x66 0x90
2277            // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2278            // 4: 0x0F 0x1F 0x40 0x00
2279            // 5: 0x0F 0x1F 0x44 0x00 0x00
2280            // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2281            // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2282            // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2283            // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2284            // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2285            // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2286
2287            // The rest coding is AMD specific - use consecutive Address nops
2288
2289            // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2290            // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2291            // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2292            // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2293            // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2294            // Size prefixes (0x66) are added for larger sizes
2295
2296            while (i >= 22) {
2297                i -= 11;
2298                emitByte(0x66); // size prefix
2299                emitByte(0x66); // size prefix
2300                emitByte(0x66); // size prefix
2301                addrNop8();
2302            }
2303            // Generate first nop for size between 21-12
2304            switch (i) {
2305                case 21:
2306                    i -= 11;
2307                    emitByte(0x66); // size prefix
2308                    emitByte(0x66); // size prefix
2309                    emitByte(0x66); // size prefix
2310                    addrNop8();
2311                    break;
2312                case 20:
2313                case 19:
2314                    i -= 10;
2315                    emitByte(0x66); // size prefix
2316                    emitByte(0x66); // size prefix
2317                    addrNop8();
2318                    break;
2319                case 18:
2320                case 17:
2321                    i -= 9;
2322                    emitByte(0x66); // size prefix
2323                    addrNop8();
2324                    break;
2325                case 16:
2326                case 15:
2327                    i -= 8;
2328                    addrNop8();
2329                    break;
2330                case 14:
2331                case 13:
2332                    i -= 7;
2333                    addrNop7();
2334                    break;
2335                case 12:
2336                    i -= 6;
2337                    emitByte(0x66); // size prefix
2338                    addrNop5();
2339                    break;
2340                default:
2341                    assert i < 12;
2342            }
2343
2344            // Generate second nop for size between 11-1
2345            switch (i) {
2346                case 11:
2347                    emitByte(0x66); // size prefix
2348                    emitByte(0x66); // size prefix
2349                    emitByte(0x66); // size prefix
2350                    addrNop8();
2351                    break;
2352                case 10:
2353                    emitByte(0x66); // size prefix
2354                    emitByte(0x66); // size prefix
2355                    addrNop8();
2356                    break;
2357                case 9:
2358                    emitByte(0x66); // size prefix
2359                    addrNop8();
2360                    break;
2361                case 8:
2362                    addrNop8();
2363                    break;
2364                case 7:
2365                    addrNop7();
2366                    break;
2367                case 6:
2368                    emitByte(0x66); // size prefix
2369                    addrNop5();
2370                    break;
2371                case 5:
2372                    addrNop5();
2373                    break;
2374                case 4:
2375                    addrNop4();
2376                    break;
2377                case 3:
2378                    // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2379                    emitByte(0x66); // size prefix
2380                    emitByte(0x66); // size prefix
2381                    emitByte(0x90); // nop
2382                    break;
2383                case 2:
2384                    emitByte(0x66); // size prefix
2385                    emitByte(0x90); // nop
2386                    break;
2387                case 1:
2388                    emitByte(0x90); // nop
2389                    break;
2390                default:
2391                    assert i == 0;
2392            }
2393            return;
2394        }
2395
2396        // Using nops with size prefixes "0x66 0x90".
2397        // From AMD Optimization Guide:
2398        // 1: 0x90
2399        // 2: 0x66 0x90
2400        // 3: 0x66 0x66 0x90
2401        // 4: 0x66 0x66 0x66 0x90
2402        // 5: 0x66 0x66 0x90 0x66 0x90
2403        // 6: 0x66 0x66 0x90 0x66 0x66 0x90
2404        // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2405        // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2406        // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2407        // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2408        //
2409        while (i > 12) {
2410            i -= 4;
2411            emitByte(0x66); // size prefix
2412            emitByte(0x66);
2413            emitByte(0x66);
2414            emitByte(0x90); // nop
2415        }
2416        // 1 - 12 nops
2417        if (i > 8) {
2418            if (i > 9) {
2419                i -= 1;
2420                emitByte(0x66);
2421            }
2422            i -= 3;
2423            emitByte(0x66);
2424            emitByte(0x66);
2425            emitByte(0x90);
2426        }
2427        // 1 - 8 nops
2428        if (i > 4) {
2429            if (i > 6) {
2430                i -= 1;
2431                emitByte(0x66);
2432            }
2433            i -= 3;
2434            emitByte(0x66);
2435            emitByte(0x66);
2436            emitByte(0x90);
2437        }
2438        switch (i) {
2439            case 4:
2440                emitByte(0x66);
2441                emitByte(0x66);
2442                emitByte(0x66);
2443                emitByte(0x90);
2444                break;
2445            case 3:
2446                emitByte(0x66);
2447                emitByte(0x66);
2448                emitByte(0x90);
2449                break;
2450            case 2:
2451                emitByte(0x66);
2452                emitByte(0x90);
2453                break;
2454            case 1:
2455                emitByte(0x90);
2456                break;
2457            default:
2458                assert i == 0;
2459        }
2460    }
2461
2462    public final void orl(Register dst, Register src) {
2463        OR.rmOp.emit(this, DWORD, dst, src);
2464    }
2465
2466    public final void orl(Register dst, int imm32) {
2467        OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2468    }
2469
2470    public final void pop(Register dst) {
2471        int encode = prefixAndEncode(dst.encoding);
2472        emitByte(0x58 | encode);
2473    }
2474
2475    public void popfq() {
2476        emitByte(0x9D);
2477    }
2478
2479    public final void ptest(Register dst, Register src) {
2480        assert supports(CPUFeature.SSE4_1);
2481        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2482        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2483        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes);
2484        emitByte(0x17);
2485        emitByte(0xC0 | encode);
2486    }
2487
2488    public final void vptest(Register dst, Register src) {
2489        assert supports(CPUFeature.AVX);
2490        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2491        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2492        int encode = vexPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes);
2493        emitByte(0x17);
2494        emitByte(0xC0 | encode);
2495    }
2496
2497    public final void push(Register src) {
2498        int encode = prefixAndEncode(src.encoding);
2499        emitByte(0x50 | encode);
2500    }
2501
2502    public void pushfq() {
2503        emitByte(0x9c);
2504    }
2505
2506    public final void paddd(Register dst, Register src) {
2507        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2508        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2509        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2510        emitByte(0xFE);
2511        emitByte(0xC0 | encode);
2512    }
2513
2514    public final void paddq(Register dst, Register src) {
2515        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2516        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2517        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2518        emitByte(0xD4);
2519        emitByte(0xC0 | encode);
2520    }
2521
2522    public final void pextrw(Register dst, Register src, int imm8) {
2523        assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
2524        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2525        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2526        emitByte(0xC5);
2527        emitByte(0xC0 | encode);
2528        emitByte(imm8);
2529    }
2530
2531    public final void pinsrw(Register dst, Register src, int imm8) {
2532        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
2533        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2534        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2535        emitByte(0xC4);
2536        emitByte(0xC0 | encode);
2537        emitByte(imm8);
2538    }
2539
2540    public final void por(Register dst, Register src) {
2541        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2542        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2543        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2544        emitByte(0xEB);
2545        emitByte(0xC0 | encode);
2546    }
2547
2548    public final void pand(Register dst, Register src) {
2549        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2550        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2551        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2552        emitByte(0xDB);
2553        emitByte(0xC0 | encode);
2554    }
2555
2556    public final void pxor(Register dst, Register src) {
2557        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2558        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2559        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2560        emitByte(0xEF);
2561        emitByte(0xC0 | encode);
2562    }
2563
2564    public final void vpxor(Register dst, Register nds, Register src) {
2565        assert supports(CPUFeature.AVX);
2566        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2567        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2568        int encode = vexPrefixAndEncode(dst, nds, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2569        emitByte(0xEF);
2570        emitByte(0xC0 | encode);
2571    }
2572
2573    public final void pslld(Register dst, int imm8) {
2574        assert isUByte(imm8) : "invalid value";
2575        assert dst.getRegisterCategory().equals(AMD64.XMM);
2576        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2577        // XMM6 is for /6 encoding: 66 0F 72 /6 ib
2578        int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2579        emitByte(0x72);
2580        emitByte(0xC0 | encode);
2581        emitByte(imm8 & 0xFF);
2582    }
2583
2584    public final void psllq(Register dst, Register shift) {
2585        assert dst.getRegisterCategory().equals(AMD64.XMM) && shift.getRegisterCategory().equals(AMD64.XMM);
2586        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2587        int encode = simdPrefixAndEncode(dst, dst, shift, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2588        emitByte(0xF3);
2589        emitByte(0xC0 | encode);
2590    }
2591
2592    public final void psllq(Register dst, int imm8) {
2593        assert isUByte(imm8) : "invalid value";
2594        assert dst.getRegisterCategory().equals(AMD64.XMM);
2595        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2596        // XMM6 is for /6 encoding: 66 0F 73 /6 ib
2597        int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2598        emitByte(0x73);
2599        emitByte(0xC0 | encode);
2600        emitByte(imm8);
2601    }
2602
2603    public final void psrad(Register dst, int imm8) {
2604        assert isUByte(imm8) : "invalid value";
2605        assert dst.getRegisterCategory().equals(AMD64.XMM);
2606        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2607        // XMM4 is for /2 encoding: 66 0F 72 /4 ib
2608        int encode = simdPrefixAndEncode(AMD64.xmm4, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2609        emitByte(0x72);
2610        emitByte(0xC0 | encode);
2611        emitByte(imm8);
2612    }
2613
2614    public final void psrld(Register dst, int imm8) {
2615        assert isUByte(imm8) : "invalid value";
2616        assert dst.getRegisterCategory().equals(AMD64.XMM);
2617        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2618        // XMM2 is for /2 encoding: 66 0F 72 /2 ib
2619        int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2620        emitByte(0x72);
2621        emitByte(0xC0 | encode);
2622        emitByte(imm8);
2623    }
2624
2625    public final void psrlq(Register dst, int imm8) {
2626        assert isUByte(imm8) : "invalid value";
2627        assert dst.getRegisterCategory().equals(AMD64.XMM);
2628        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2629        // XMM2 is for /2 encoding: 66 0F 73 /2 ib
2630        int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2631        emitByte(0x73);
2632        emitByte(0xC0 | encode);
2633        emitByte(imm8);
2634    }
2635
2636    public final void pshufd(Register dst, Register src, int imm8) {
2637        assert isUByte(imm8) : "invalid value";
2638        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2639        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2640        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2641        emitByte(0x70);
2642        emitByte(0xC0 | encode);
2643        emitByte(imm8);
2644    }
2645
2646    public final void psubd(Register dst, Register src) {
2647        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2648        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2649        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2650        emitByte(0xFA);
2651        emitByte(0xC0 | encode);
2652    }
2653
2654    public final void rcpps(Register dst, Register src) {
2655        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2656        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ true, /* noMaskReg */ false, /* usesVl */ false, target);
2657        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2658        emitByte(0x53);
2659        emitByte(0xC0 | encode);
2660    }
2661
2662    public final void ret(int imm16) {
2663        if (imm16 == 0) {
2664            emitByte(0xC3);
2665        } else {
2666            emitByte(0xC2);
2667            emitShort(imm16);
2668        }
2669    }
2670
2671    public final void sarl(Register dst, int imm8) {
2672        int encode = prefixAndEncode(dst.encoding);
2673        assert isShiftCount(imm8 >> 1) : "illegal shift count";
2674        if (imm8 == 1) {
2675            emitByte(0xD1);
2676            emitByte(0xF8 | encode);
2677        } else {
2678            emitByte(0xC1);
2679            emitByte(0xF8 | encode);
2680            emitByte(imm8);
2681        }
2682    }
2683
2684    public final void shll(Register dst, int imm8) {
2685        assert isShiftCount(imm8 >> 1) : "illegal shift count";
2686        int encode = prefixAndEncode(dst.encoding);
2687        if (imm8 == 1) {
2688            emitByte(0xD1);
2689            emitByte(0xE0 | encode);
2690        } else {
2691            emitByte(0xC1);
2692            emitByte(0xE0 | encode);
2693            emitByte(imm8);
2694        }
2695    }
2696
2697    public final void shll(Register dst) {
2698        int encode = prefixAndEncode(dst.encoding);
2699        emitByte(0xD3);
2700        emitByte(0xE0 | encode);
2701    }
2702
2703    public final void shrl(Register dst, int imm8) {
2704        assert isShiftCount(imm8 >> 1) : "illegal shift count";
2705        int encode = prefixAndEncode(dst.encoding);
2706        emitByte(0xC1);
2707        emitByte(0xE8 | encode);
2708        emitByte(imm8);
2709    }
2710
2711    public final void shrl(Register dst) {
2712        int encode = prefixAndEncode(dst.encoding);
2713        emitByte(0xD3);
2714        emitByte(0xE8 | encode);
2715    }
2716
2717    public final void subl(AMD64Address dst, int imm32) {
2718        SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2719    }
2720
2721    public final void subl(Register dst, int imm32) {
2722        SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2723    }
2724
2725    public final void subl(Register dst, Register src) {
2726        SUB.rmOp.emit(this, DWORD, dst, src);
2727    }
2728
2729    public final void subpd(Register dst, Register src) {
2730        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2731        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2732        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2733        emitByte(0x5C);
2734        emitByte(0xC0 | encode);
2735    }
2736
2737    public final void subsd(Register dst, Register src) {
2738        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2739        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2740        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2741        emitByte(0x5C);
2742        emitByte(0xC0 | encode);
2743    }
2744
2745    public final void subsd(Register dst, AMD64Address src) {
2746        assert dst.getRegisterCategory().equals(AMD64.XMM);
2747        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2748        simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2749        emitByte(0x5C);
2750        emitOperandHelper(dst, src, 0);
2751    }
2752
2753    public final void testl(Register dst, int imm32) {
2754        // not using emitArith because test
2755        // doesn't support sign-extension of
2756        // 8bit operands
2757        int encode = dst.encoding;
2758        if (encode == 0) {
2759            emitByte(0xA9);
2760        } else {
2761            encode = prefixAndEncode(encode);
2762            emitByte(0xF7);
2763            emitByte(0xC0 | encode);
2764        }
2765        emitInt(imm32);
2766    }
2767
2768    public final void testl(Register dst, Register src) {
2769        int encode = prefixAndEncode(dst.encoding, src.encoding);
2770        emitByte(0x85);
2771        emitByte(0xC0 | encode);
2772    }
2773
2774    public final void testl(Register dst, AMD64Address src) {
2775        prefix(src, dst);
2776        emitByte(0x85);
2777        emitOperandHelper(dst, src, 0);
2778    }
2779
2780    public final void unpckhpd(Register dst, Register src) {
2781        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2782        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2783        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2784        emitByte(0x15);
2785        emitByte(0xC0 | encode);
2786    }
2787
2788    public final void unpcklpd(Register dst, Register src) {
2789        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2790        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2791        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2792        emitByte(0x14);
2793        emitByte(0xC0 | encode);
2794    }
2795
2796    public final void xorl(Register dst, Register src) {
2797        XOR.rmOp.emit(this, DWORD, dst, src);
2798    }
2799
2800    public final void xorpd(Register dst, Register src) {
2801        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2802        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2803        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2804        emitByte(0x57);
2805        emitByte(0xC0 | encode);
2806    }
2807
2808    public final void xorps(Register dst, Register src) {
2809        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2810        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2811        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2812        emitByte(0x57);
2813        emitByte(0xC0 | encode);
2814    }
2815
2816    protected final void decl(Register dst) {
2817        // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
2818        int encode = prefixAndEncode(dst.encoding);
2819        emitByte(0xFF);
2820        emitByte(0xC8 | encode);
2821    }
2822
2823    protected final void incl(Register dst) {
2824        // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2825        int encode = prefixAndEncode(dst.encoding);
2826        emitByte(0xFF);
2827        emitByte(0xC0 | encode);
2828    }
2829
2830    private int prefixAndEncode(int regEnc) {
2831        return prefixAndEncode(regEnc, false);
2832    }
2833
2834    private int prefixAndEncode(int regEnc, boolean byteinst) {
2835        if (regEnc >= 8) {
2836            emitByte(Prefix.REXB);
2837            return regEnc - 8;
2838        } else if (byteinst && regEnc >= 4) {
2839            emitByte(Prefix.REX);
2840        }
2841        return regEnc;
2842    }
2843
2844    private int prefixqAndEncode(int regEnc) {
2845        if (regEnc < 8) {
2846            emitByte(Prefix.REXW);
2847            return regEnc;
2848        } else {
2849            emitByte(Prefix.REXWB);
2850            return regEnc - 8;
2851        }
2852    }
2853
2854    private int prefixAndEncode(int dstEnc, int srcEnc) {
2855        return prefixAndEncode(dstEnc, false, srcEnc, false);
2856    }
2857
2858    private int prefixAndEncode(int dstEncoding, boolean dstIsByte, int srcEncoding, boolean srcIsByte) {
2859        int srcEnc = srcEncoding;
2860        int dstEnc = dstEncoding;
2861        if (dstEnc < 8) {
2862            if (srcEnc >= 8) {
2863                emitByte(Prefix.REXB);
2864                srcEnc -= 8;
2865            } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) {
2866                emitByte(Prefix.REX);
2867            }
2868        } else {
2869            if (srcEnc < 8) {
2870                emitByte(Prefix.REXR);
2871            } else {
2872                emitByte(Prefix.REXRB);
2873                srcEnc -= 8;
2874            }
2875            dstEnc -= 8;
2876        }
2877        return dstEnc << 3 | srcEnc;
2878    }
2879
2880    /**
2881     * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand
2882     * prefix. If the given operands exceed 3 bits, the 4th bit is encoded in the prefix.
2883     *
2884     * @param regEncoding the encoding of the register part of the ModRM-Byte
2885     * @param rmEncoding the encoding of the r/m part of the ModRM-Byte
2886     * @return the lower 6 bits of the ModRM-Byte that should be emitted
2887     */
2888    private int prefixqAndEncode(int regEncoding, int rmEncoding) {
2889        int rmEnc = rmEncoding;
2890        int regEnc = regEncoding;
2891        if (regEnc < 8) {
2892            if (rmEnc < 8) {
2893                emitByte(Prefix.REXW);
2894            } else {
2895                emitByte(Prefix.REXWB);
2896                rmEnc -= 8;
2897            }
2898        } else {
2899            if (rmEnc < 8) {
2900                emitByte(Prefix.REXWR);
2901            } else {
2902                emitByte(Prefix.REXWRB);
2903                rmEnc -= 8;
2904            }
2905            regEnc -= 8;
2906        }
2907        return regEnc << 3 | rmEnc;
2908    }
2909
2910    private void vexPrefix(int rxb, int ndsEncoding, int pre, int opc, AMD64InstructionAttr attributes) {
2911        int vectorLen = attributes.getVectorLen();
2912        boolean vexW = attributes.isRexVexW();
2913        boolean isXorB = ((rxb & 0x3) > 0);
2914        if (isXorB || vexW || (opc == VexOpcode.VEX_OPCODE_0F_38) || (opc == VexOpcode.VEX_OPCODE_0F_3A)) {
2915            emitByte(Prefix.VEX_3BYTES);
2916
2917            int byte1 = (rxb << 5);
2918            byte1 = ((~byte1) & 0xE0) | opc;
2919            emitByte(byte1);
2920
2921            int byte2 = ((~ndsEncoding) & 0xf) << 3;
2922            byte2 |= (vexW ? VexPrefix.VEX_W : 0) | ((vectorLen > 0) ? 4 : 0) | pre;
2923            emitByte(byte2);
2924        } else {
2925            emitByte(Prefix.VEX_2BYTES);
2926
2927            int byte1 = ((rxb & 0x4) > 0) ? VexPrefix.VEX_R : 0;
2928            byte1 = (~byte1) & 0x80;
2929            byte1 |= ((~ndsEncoding) & 0xf) << 3;
2930            byte1 |= ((vectorLen > 0) ? 4 : 0) | pre;
2931            emitByte(byte1);
2932        }
2933    }
2934
2935    private void vexPrefix(AMD64Address adr, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
2936        int rxb = getRXB(src, adr);
2937        int ndsEncoding = nds.isValid() ? nds.encoding : 0;
2938        vexPrefix(rxb, ndsEncoding, pre, opc, attributes);
2939        setCurAttributes(attributes);
2940    }
2941
2942    private int vexPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
2943        int rxb = getRXB(dst, src);
2944        int ndsEncoding = nds.isValid() ? nds.encoding : 0;
2945        vexPrefix(rxb, ndsEncoding, pre, opc, attributes);
2946        // return modrm byte components for operands
2947        return (((dst.encoding & 7) << 3) | (src.encoding & 7));
2948    }
2949
2950    private void simdPrefix(Register xreg, Register nds, AMD64Address adr, int pre, int opc, AMD64InstructionAttr attributes) {
2951        if (supports(CPUFeature.AVX)) {
2952            vexPrefix(adr, nds, xreg, pre, opc, attributes);
2953        } else {
2954            switch (pre) {
2955                case VexSimdPrefix.VEX_SIMD_66:
2956                    emitByte(0x66);
2957                    break;
2958                case VexSimdPrefix.VEX_SIMD_F2:
2959                    emitByte(0xF2);
2960                    break;
2961                case VexSimdPrefix.VEX_SIMD_F3:
2962                    emitByte(0xF3);
2963                    break;
2964            }
2965            if (attributes.isRexVexW()) {
2966                prefixq(adr, xreg);
2967            } else {
2968                prefix(adr, xreg);
2969            }
2970            switch (opc) {
2971                case VexOpcode.VEX_OPCODE_0F:
2972                    emitByte(0x0F);
2973                    break;
2974                case VexOpcode.VEX_OPCODE_0F_38:
2975                    emitByte(0x0F);
2976                    emitByte(0x38);
2977                    break;
2978                case VexOpcode.VEX_OPCODE_0F_3A:
2979                    emitByte(0x0F);
2980                    emitByte(0x3A);
2981                    break;
2982            }
2983        }
2984    }
2985
2986    private int simdPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
2987        if (supports(CPUFeature.AVX)) {
2988            return vexPrefixAndEncode(dst, nds, src, pre, opc, attributes);
2989        } else {
2990            switch (pre) {
2991                case VexSimdPrefix.VEX_SIMD_66:
2992                    emitByte(0x66);
2993                    break;
2994                case VexSimdPrefix.VEX_SIMD_F2:
2995                    emitByte(0xF2);
2996                    break;
2997                case VexSimdPrefix.VEX_SIMD_F3:
2998                    emitByte(0xF3);
2999                    break;
3000            }
3001            int encode;
3002            int dstEncoding = dst.encoding;
3003            int srcEncoding = src.encoding;
3004            if (attributes.isRexVexW()) {
3005                encode = prefixqAndEncode(dstEncoding, srcEncoding);
3006            } else {
3007                encode = prefixAndEncode(dstEncoding, srcEncoding);
3008            }
3009            switch (opc) {
3010                case VexOpcode.VEX_OPCODE_0F:
3011                    emitByte(0x0F);
3012                    break;
3013                case VexOpcode.VEX_OPCODE_0F_38:
3014                    emitByte(0x0F);
3015                    emitByte(0x38);
3016                    break;
3017                case VexOpcode.VEX_OPCODE_0F_3A:
3018                    emitByte(0x0F);
3019                    emitByte(0x3A);
3020                    break;
3021            }
3022            return encode;
3023        }
3024    }
3025
3026    private static boolean needsRex(Register reg) {
3027        return reg.encoding >= MinEncodingNeedsRex;
3028    }
3029
3030    private void prefix(AMD64Address adr) {
3031        if (needsRex(adr.getBase())) {
3032            if (needsRex(adr.getIndex())) {
3033                emitByte(Prefix.REXXB);
3034            } else {
3035                emitByte(Prefix.REXB);
3036            }
3037        } else {
3038            if (needsRex(adr.getIndex())) {
3039                emitByte(Prefix.REXX);
3040            }
3041        }
3042    }
3043
3044    private void prefixq(AMD64Address adr) {
3045        if (needsRex(adr.getBase())) {
3046            if (needsRex(adr.getIndex())) {
3047                emitByte(Prefix.REXWXB);
3048            } else {
3049                emitByte(Prefix.REXWB);
3050            }
3051        } else {
3052            if (needsRex(adr.getIndex())) {
3053                emitByte(Prefix.REXWX);
3054            } else {
3055                emitByte(Prefix.REXW);
3056            }
3057        }
3058    }
3059
3060    private void prefix(AMD64Address adr, Register reg) {
3061        prefix(adr, reg, false);
3062    }
3063
3064    private void prefix(AMD64Address adr, Register reg, boolean byteinst) {
3065        if (reg.encoding < 8) {
3066            if (needsRex(adr.getBase())) {
3067                if (needsRex(adr.getIndex())) {
3068                    emitByte(Prefix.REXXB);
3069                } else {
3070                    emitByte(Prefix.REXB);
3071                }
3072            } else {
3073                if (needsRex(adr.getIndex())) {
3074                    emitByte(Prefix.REXX);
3075                } else if (byteinst && reg.encoding >= 4) {
3076                    emitByte(Prefix.REX);
3077                }
3078            }
3079        } else {
3080            if (needsRex(adr.getBase())) {
3081                if (needsRex(adr.getIndex())) {
3082                    emitByte(Prefix.REXRXB);
3083                } else {
3084                    emitByte(Prefix.REXRB);
3085                }
3086            } else {
3087                if (needsRex(adr.getIndex())) {
3088                    emitByte(Prefix.REXRX);
3089                } else {
3090                    emitByte(Prefix.REXR);
3091                }
3092            }
3093        }
3094    }
3095
3096    private void prefixq(AMD64Address adr, Register src) {
3097        if (src.encoding < 8) {
3098            if (needsRex(adr.getBase())) {
3099                if (needsRex(adr.getIndex())) {
3100                    emitByte(Prefix.REXWXB);
3101                } else {
3102                    emitByte(Prefix.REXWB);
3103                }
3104            } else {
3105                if (needsRex(adr.getIndex())) {
3106                    emitByte(Prefix.REXWX);
3107                } else {
3108                    emitByte(Prefix.REXW);
3109                }
3110            }
3111        } else {
3112            if (needsRex(adr.getBase())) {
3113                if (needsRex(adr.getIndex())) {
3114                    emitByte(Prefix.REXWRXB);
3115                } else {
3116                    emitByte(Prefix.REXWRB);
3117                }
3118            } else {
3119                if (needsRex(adr.getIndex())) {
3120                    emitByte(Prefix.REXWRX);
3121                } else {
3122                    emitByte(Prefix.REXWR);
3123                }
3124            }
3125        }
3126    }
3127
3128    public final void addq(Register dst, int imm32) {
3129        ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3130    }
3131
3132    public final void addq(AMD64Address dst, int imm32) {
3133        ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3134    }
3135
3136    public final void addq(Register dst, Register src) {
3137        ADD.rmOp.emit(this, QWORD, dst, src);
3138    }
3139
3140    public final void addq(AMD64Address dst, Register src) {
3141        ADD.mrOp.emit(this, QWORD, dst, src);
3142    }
3143
3144    public final void andq(Register dst, int imm32) {
3145        AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3146    }
3147
3148    public final void bsrq(Register dst, Register src) {
3149        int encode = prefixqAndEncode(dst.encoding(), src.encoding());
3150        emitByte(0x0F);
3151        emitByte(0xBD);
3152        emitByte(0xC0 | encode);
3153    }
3154
3155    public final void bswapq(Register reg) {
3156        int encode = prefixqAndEncode(reg.encoding);
3157        emitByte(0x0F);
3158        emitByte(0xC8 | encode);
3159    }
3160
3161    public final void cdqq() {
3162        emitByte(Prefix.REXW);
3163        emitByte(0x99);
3164    }
3165
3166    public final void cmovq(ConditionFlag cc, Register dst, Register src) {
3167        int encode = prefixqAndEncode(dst.encoding, src.encoding);
3168        emitByte(0x0F);
3169        emitByte(0x40 | cc.getValue());
3170        emitByte(0xC0 | encode);
3171    }
3172
3173    public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) {
3174        prefixq(src, dst);
3175        emitByte(0x0F);
3176        emitByte(0x40 | cc.getValue());
3177        emitOperandHelper(dst, src, 0);
3178    }
3179
3180    public final void cmpq(Register dst, int imm32) {
3181        CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3182    }
3183
3184    public final void cmpq(Register dst, Register src) {
3185        CMP.rmOp.emit(this, QWORD, dst, src);
3186    }
3187
3188    public final void cmpq(Register dst, AMD64Address src) {
3189        CMP.rmOp.emit(this, QWORD, dst, src);
3190    }
3191
3192    public final void cmpxchgq(Register reg, AMD64Address adr) {
3193        prefixq(adr, reg);
3194        emitByte(0x0F);
3195        emitByte(0xB1);
3196        emitOperandHelper(reg, adr, 0);
3197    }
3198
3199    public final void cvtdq2pd(Register dst, Register src) {
3200        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
3201        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3202        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3203        emitByte(0xE6);
3204        emitByte(0xC0 | encode);
3205    }
3206
3207    public final void cvtsi2sdq(Register dst, Register src) {
3208        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
3209        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3210        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
3211        emitByte(0x2A);
3212        emitByte(0xC0 | encode);
3213    }
3214
3215    public final void cvttsd2siq(Register dst, Register src) {
3216        assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
3217        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3218        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
3219        emitByte(0x2C);
3220        emitByte(0xC0 | encode);
3221    }
3222
3223    public final void cvttpd2dq(Register dst, Register src) {
3224        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
3225        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3226        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3227        emitByte(0xE6);
3228        emitByte(0xC0 | encode);
3229    }
3230
3231    protected final void decq(Register dst) {
3232        // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3233        int encode = prefixqAndEncode(dst.encoding);
3234        emitByte(0xFF);
3235        emitByte(0xC8 | encode);
3236    }
3237
3238    public final void decq(AMD64Address dst) {
3239        DEC.emit(this, QWORD, dst);
3240    }
3241
3242    public final void imulq(Register dst, Register src) {
3243        int encode = prefixqAndEncode(dst.encoding, src.encoding);
3244        emitByte(0x0F);
3245        emitByte(0xAF);
3246        emitByte(0xC0 | encode);
3247    }
3248
3249    public final void incq(Register dst) {
3250        // Don't use it directly. Use Macroincrementq() instead.
3251        // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3252        int encode = prefixqAndEncode(dst.encoding);
3253        emitByte(0xFF);
3254        emitByte(0xC0 | encode);
3255    }
3256
3257    public final void incq(AMD64Address dst) {
3258        INC.emit(this, QWORD, dst);
3259    }
3260
3261    public final void movq(Register dst, long imm64) {
3262        int encode = prefixqAndEncode(dst.encoding);
3263        emitByte(0xB8 | encode);
3264        emitLong(imm64);
3265    }
3266
3267    public final void movslq(Register dst, int imm32) {
3268        int encode = prefixqAndEncode(dst.encoding);
3269        emitByte(0xC7);
3270        emitByte(0xC0 | encode);
3271        emitInt(imm32);
3272    }
3273
3274    public final void movdq(Register dst, AMD64Address src) {
3275        assert dst.getRegisterCategory().equals(AMD64.XMM);
3276        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3277        simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3278        emitByte(0x6E);
3279        emitOperandHelper(dst, src, 0);
3280    }
3281
3282    public final void movdq(AMD64Address dst, Register src) {
3283        assert src.getRegisterCategory().equals(AMD64.XMM);
3284        // swap src/dst to get correct prefix
3285        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3286        simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3287        emitByte(0x7E);
3288        emitOperandHelper(src, dst, 0);
3289    }
3290
3291    public final void movdq(Register dst, Register src) {
3292        if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) {
3293            AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3294            int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3295            emitByte(0x6E);
3296            emitByte(0xC0 | encode);
3297        } else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) {
3298            // swap src/dst to get correct prefix
3299            AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3300            int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3301            emitByte(0x7E);
3302            emitByte(0xC0 | encode);
3303        } else {
3304            throw new InternalError("should not reach here");
3305        }
3306    }
3307
3308    public final void movdl(Register dst, Register src) {
3309        if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) {
3310            AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3311            int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3312            emitByte(0x6E);
3313            emitByte(0xC0 | encode);
3314        } else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) {
3315            // swap src/dst to get correct prefix
3316            AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3317            int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3318            emitByte(0x7E);
3319            emitByte(0xC0 | encode);
3320        } else {
3321            throw new InternalError("should not reach here");
3322        }
3323    }
3324
3325    public final void movddup(Register dst, Register src) {
3326        assert supports(CPUFeature.SSE3);
3327        assert dst.getRegisterCategory().equals(AMD64.XMM);
3328        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3329        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
3330        emitByte(0x12);
3331        emitByte(0xC0 | encode);
3332    }
3333
3334    public final void movdqu(Register dst, AMD64Address src) {
3335        assert dst.getRegisterCategory().equals(AMD64.XMM);
3336        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3337        simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3338        emitByte(0x6F);
3339        emitOperandHelper(dst, src, 0);
3340    }
3341
3342    public final void movdqu(Register dst, Register src) {
3343        assert dst.getRegisterCategory().equals(AMD64.XMM);
3344        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3345        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3346        emitByte(0x6F);
3347        emitByte(0xC0 | encode);
3348    }
3349
3350    public final void vmovdqu(Register dst, AMD64Address src) {
3351        assert supports(CPUFeature.AVX);
3352        assert dst.getRegisterCategory().equals(AMD64.XMM);
3353        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3354        vexPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3355        emitByte(0x6F);
3356        emitOperandHelper(dst, src, 0);
3357    }
3358
3359    public final void vzeroupper() {
3360        assert supports(CPUFeature.AVX);
3361        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3362        vexPrefixAndEncode(AMD64.xmm0, AMD64.xmm0, AMD64.xmm0, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
3363        emitByte(0x77);
3364    }
3365
3366    public final void movslq(AMD64Address dst, int imm32) {
3367        prefixq(dst);
3368        emitByte(0xC7);
3369        emitOperandHelper(0, dst, 4);
3370        emitInt(imm32);
3371    }
3372
3373    public final void movslq(Register dst, AMD64Address src) {
3374        prefixq(src, dst);
3375        emitByte(0x63);
3376        emitOperandHelper(dst, src, 0);
3377    }
3378
3379    public final void movslq(Register dst, Register src) {
3380        int encode = prefixqAndEncode(dst.encoding, src.encoding);
3381        emitByte(0x63);
3382        emitByte(0xC0 | encode);
3383    }
3384
3385    public final void negq(Register dst) {
3386        int encode = prefixqAndEncode(dst.encoding);
3387        emitByte(0xF7);
3388        emitByte(0xD8 | encode);
3389    }
3390
3391    public final void orq(Register dst, Register src) {
3392        OR.rmOp.emit(this, QWORD, dst, src);
3393    }
3394
3395    public final void shlq(Register dst, int imm8) {
3396        assert isShiftCount(imm8 >> 1) : "illegal shift count";
3397        int encode = prefixqAndEncode(dst.encoding);
3398        if (imm8 == 1) {
3399            emitByte(0xD1);
3400            emitByte(0xE0 | encode);
3401        } else {
3402            emitByte(0xC1);
3403            emitByte(0xE0 | encode);
3404            emitByte(imm8);
3405        }
3406    }
3407
3408    public final void shlq(Register dst) {
3409        int encode = prefixqAndEncode(dst.encoding);
3410        emitByte(0xD3);
3411        emitByte(0xE0 | encode);
3412    }
3413
3414    public final void shrq(Register dst, int imm8) {
3415        assert isShiftCount(imm8 >> 1) : "illegal shift count";
3416        int encode = prefixqAndEncode(dst.encoding);
3417        if (imm8 == 1) {
3418            emitByte(0xD1);
3419            emitByte(0xE8 | encode);
3420        } else {
3421            emitByte(0xC1);
3422            emitByte(0xE8 | encode);
3423            emitByte(imm8);
3424        }
3425    }
3426
3427    public final void shrq(Register dst) {
3428        int encode = prefixqAndEncode(dst.encoding);
3429        emitByte(0xD3);
3430        emitByte(0xE8 | encode);
3431    }
3432
3433    public final void sbbq(Register dst, Register src) {
3434        SBB.rmOp.emit(this, QWORD, dst, src);
3435    }
3436
3437    public final void subq(Register dst, int imm32) {
3438        SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3439    }
3440
3441    public final void subq(AMD64Address dst, int imm32) {
3442        SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3443    }
3444
3445    public final void subqWide(Register dst, int imm32) {
3446        // don't use the sign-extending version, forcing a 32-bit immediate
3447        SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32);
3448    }
3449
3450    public final void subq(Register dst, Register src) {
3451        SUB.rmOp.emit(this, QWORD, dst, src);
3452    }
3453
3454    public final void testq(Register dst, Register src) {
3455        int encode = prefixqAndEncode(dst.encoding, src.encoding);
3456        emitByte(0x85);
3457        emitByte(0xC0 | encode);
3458    }
3459
3460    public final void xaddl(AMD64Address dst, Register src) {
3461        prefix(dst, src);
3462        emitByte(0x0F);
3463        emitByte(0xC1);
3464        emitOperandHelper(src, dst, 0);
3465    }
3466
3467    public final void xaddq(AMD64Address dst, Register src) {
3468        prefixq(dst, src);
3469        emitByte(0x0F);
3470        emitByte(0xC1);
3471        emitOperandHelper(src, dst, 0);
3472    }
3473
3474    public final void xchgl(Register dst, AMD64Address src) {
3475        prefix(src, dst);
3476        emitByte(0x87);
3477        emitOperandHelper(dst, src, 0);
3478    }
3479
3480    public final void xchgq(Register dst, AMD64Address src) {
3481        prefixq(src, dst);
3482        emitByte(0x87);
3483        emitOperandHelper(dst, src, 0);
3484    }
3485
3486    public final void membar(int barriers) {
3487        if (target.isMP) {
3488            // We only have to handle StoreLoad
3489            if ((barriers & STORE_LOAD) != 0) {
3490                // All usable chips support "locked" instructions which suffice
3491                // as barriers, and are much faster than the alternative of
3492                // using cpuid instruction. We use here a locked add [rsp],0.
3493                // This is conveniently otherwise a no-op except for blowing
3494                // flags.
3495                // Any change to this code may need to revisit other places in
3496                // the code where this idiom is used, in particular the
3497                // orderAccess code.
3498                lock();
3499                addl(new AMD64Address(rsp, 0), 0); // Assert the lock# signal here
3500            }
3501        }
3502    }
3503
3504    @Override
3505    protected final void patchJumpTarget(int branch, int branchTarget) {
3506        int op = getByte(branch);
3507        assert op == 0xE8 // call
3508                        ||
3509                        op == 0x00 // jump table entry
3510                        || op == 0xE9 // jmp
3511                        || op == 0xEB // short jmp
3512                        || (op & 0xF0) == 0x70 // short jcc
3513                        || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc
3514        : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op;
3515
3516        if (op == 0x00) {
3517            int offsetToJumpTableBase = getShort(branch + 1);
3518            int jumpTableBase = branch - offsetToJumpTableBase;
3519            int imm32 = branchTarget - jumpTableBase;
3520            emitInt(imm32, branch);
3521        } else if (op == 0xEB || (op & 0xF0) == 0x70) {
3522
3523            // short offset operators (jmp and jcc)
3524            final int imm8 = branchTarget - (branch + 2);
3525            /*
3526             * Since a wrongly patched short branch can potentially lead to working but really bad
3527             * behaving code we should always fail with an exception instead of having an assert.
3528             */
3529            if (!NumUtil.isByte(imm8)) {
3530                throw new InternalError("branch displacement out of range: " + imm8);
3531            }
3532            emitByte(imm8, branch + 1);
3533
3534        } else {
3535
3536            int off = 1;
3537            if (op == 0x0F) {
3538                off = 2;
3539            }
3540
3541            int imm32 = branchTarget - (branch + 4 + off);
3542            emitInt(imm32, branch + off);
3543        }
3544    }
3545
3546    public void nullCheck(AMD64Address address) {
3547        testl(AMD64.rax, address);
3548    }
3549
3550    @Override
3551    public void align(int modulus) {
3552        if (position() % modulus != 0) {
3553            nop(modulus - (position() % modulus));
3554        }
3555    }
3556
3557    /**
3558     * Emits a direct call instruction. Note that the actual call target is not specified, because
3559     * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is
3560     * responsible to add the call address to the appropriate patching tables.
3561     */
3562    public final void call() {
3563        if (codePatchingAnnotationConsumer != null) {
3564            int pos = position();
3565            codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(pos, pos + 1, 4, pos + 5));
3566        }
3567        emitByte(0xE8);
3568        emitInt(0);
3569    }
3570
3571    public final void call(Register src) {
3572        int encode = prefixAndEncode(src.encoding);
3573        emitByte(0xFF);
3574        emitByte(0xD0 | encode);
3575    }
3576
3577    public final void int3() {
3578        emitByte(0xCC);
3579    }
3580
3581    public final void pause() {
3582        emitByte(0xF3);
3583        emitByte(0x90);
3584    }
3585
3586    private void emitx87(int b1, int b2, int i) {
3587        assert 0 <= i && i < 8 : "illegal stack offset";
3588        emitByte(b1);
3589        emitByte(b2 + i);
3590    }
3591
3592    public final void fldd(AMD64Address src) {
3593        emitByte(0xDD);
3594        emitOperandHelper(0, src, 0);
3595    }
3596
3597    public final void flds(AMD64Address src) {
3598        emitByte(0xD9);
3599        emitOperandHelper(0, src, 0);
3600    }
3601
3602    public final void fldln2() {
3603        emitByte(0xD9);
3604        emitByte(0xED);
3605    }
3606
3607    public final void fldlg2() {
3608        emitByte(0xD9);
3609        emitByte(0xEC);
3610    }
3611
3612    public final void fyl2x() {
3613        emitByte(0xD9);
3614        emitByte(0xF1);
3615    }
3616
3617    public final void fstps(AMD64Address src) {
3618        emitByte(0xD9);
3619        emitOperandHelper(3, src, 0);
3620    }
3621
3622    public final void fstpd(AMD64Address src) {
3623        emitByte(0xDD);
3624        emitOperandHelper(3, src, 0);
3625    }
3626
3627    private void emitFPUArith(int b1, int b2, int i) {
3628        assert 0 <= i && i < 8 : "illegal FPU register: " + i;
3629        emitByte(b1);
3630        emitByte(b2 + i);
3631    }
3632
3633    public void ffree(int i) {
3634        emitFPUArith(0xDD, 0xC0, i);
3635    }
3636
3637    public void fincstp() {
3638        emitByte(0xD9);
3639        emitByte(0xF7);
3640    }
3641
3642    public void fxch(int i) {
3643        emitFPUArith(0xD9, 0xC8, i);
3644    }
3645
3646    public void fnstswAX() {
3647        emitByte(0xDF);
3648        emitByte(0xE0);
3649    }
3650
3651    public void fwait() {
3652        emitByte(0x9B);
3653    }
3654
3655    public void fprem() {
3656        emitByte(0xD9);
3657        emitByte(0xF8);
3658    }
3659
3660    public final void fsin() {
3661        emitByte(0xD9);
3662        emitByte(0xFE);
3663    }
3664
3665    public final void fcos() {
3666        emitByte(0xD9);
3667        emitByte(0xFF);
3668    }
3669
3670    public final void fptan() {
3671        emitByte(0xD9);
3672        emitByte(0xF2);
3673    }
3674
3675    public final void fstp(int i) {
3676        emitx87(0xDD, 0xD8, i);
3677    }
3678
3679    @Override
3680    public AMD64Address makeAddress(Register base, int displacement) {
3681        return new AMD64Address(base, displacement);
3682    }
3683
3684    @Override
3685    public AMD64Address getPlaceholder(int instructionStartPosition) {
3686        return new AMD64Address(rip, Register.None, Scale.Times1, 0, instructionStartPosition);
3687    }
3688
3689    private void prefetchPrefix(AMD64Address src) {
3690        prefix(src);
3691        emitByte(0x0F);
3692    }
3693
3694    public void prefetchnta(AMD64Address src) {
3695        prefetchPrefix(src);
3696        emitByte(0x18);
3697        emitOperandHelper(0, src, 0);
3698    }
3699
3700    void prefetchr(AMD64Address src) {
3701        assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
3702        prefetchPrefix(src);
3703        emitByte(0x0D);
3704        emitOperandHelper(0, src, 0);
3705    }
3706
3707    public void prefetcht0(AMD64Address src) {
3708        assert supports(CPUFeature.SSE);
3709        prefetchPrefix(src);
3710        emitByte(0x18);
3711        emitOperandHelper(1, src, 0);
3712    }
3713
3714    public void prefetcht1(AMD64Address src) {
3715        assert supports(CPUFeature.SSE);
3716        prefetchPrefix(src);
3717        emitByte(0x18);
3718        emitOperandHelper(2, src, 0);
3719    }
3720
3721    public void prefetcht2(AMD64Address src) {
3722        assert supports(CPUFeature.SSE);
3723        prefix(src);
3724        emitByte(0x0f);
3725        emitByte(0x18);
3726        emitOperandHelper(3, src, 0);
3727    }
3728
3729    public void prefetchw(AMD64Address src) {
3730        assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
3731        prefix(src);
3732        emitByte(0x0f);
3733        emitByte(0x0D);
3734        emitOperandHelper(1, src, 0);
3735    }
3736
3737    public void rdtsc() {
3738        emitByte(0x0F);
3739        emitByte(0x31);
3740    }
3741
3742    /**
3743     * Emits an instruction which is considered to be illegal. This is used if we deliberately want
3744     * to crash the program (debugging etc.).
3745     */
3746    public void illegal() {
3747        emitByte(0x0f);
3748        emitByte(0x0b);
3749    }
3750}
3751