AMD64Assembler.java revision 12968:4d8a004e5c6d
1/*
2 * Copyright (c) 2009, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23package org.graalvm.compiler.asm.amd64;
24
25import static org.graalvm.compiler.core.common.NumUtil.isByte;
26import static org.graalvm.compiler.core.common.NumUtil.isInt;
27import static org.graalvm.compiler.core.common.NumUtil.isShiftCount;
28import static org.graalvm.compiler.core.common.NumUtil.isUByte;
29import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop;
30import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop;
31import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD;
32import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND;
33import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP;
34import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR;
35import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB;
36import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB;
37import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR;
38import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC;
39import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC;
40import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG;
41import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT;
42import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.BYTE;
43import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.DWORD;
44import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PD;
45import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PS;
46import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.QWORD;
47import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SD;
48import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SS;
49import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.WORD;
50import static jdk.vm.ci.amd64.AMD64.CPU;
51import static jdk.vm.ci.amd64.AMD64.XMM;
52import static jdk.vm.ci.amd64.AMD64.r12;
53import static jdk.vm.ci.amd64.AMD64.r13;
54import static jdk.vm.ci.amd64.AMD64.rbp;
55import static jdk.vm.ci.amd64.AMD64.rip;
56import static jdk.vm.ci.amd64.AMD64.rsp;
57import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD;
58
59import org.graalvm.compiler.asm.Assembler;
60import org.graalvm.compiler.asm.Label;
61import org.graalvm.compiler.core.common.NumUtil;
62import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
63
64import jdk.vm.ci.amd64.AMD64;
65import jdk.vm.ci.amd64.AMD64.CPUFeature;
66import jdk.vm.ci.code.Register;
67import jdk.vm.ci.code.Register.RegisterCategory;
68import jdk.vm.ci.code.TargetDescription;
69
70/**
71 * This class implements an assembler that can encode most X86 instructions.
72 */
73public class AMD64Assembler extends Assembler {
74
75    private static final int MinEncodingNeedsRex = 8;
76
77    /**
78     * The x86 condition codes used for conditional jumps/moves.
79     */
80    public enum ConditionFlag {
81        Zero(0x4, "|zero|"),
82        NotZero(0x5, "|nzero|"),
83        Equal(0x4, "="),
84        NotEqual(0x5, "!="),
85        Less(0xc, "<"),
86        LessEqual(0xe, "<="),
87        Greater(0xf, ">"),
88        GreaterEqual(0xd, ">="),
89        Below(0x2, "|<|"),
90        BelowEqual(0x6, "|<=|"),
91        Above(0x7, "|>|"),
92        AboveEqual(0x3, "|>=|"),
93        Overflow(0x0, "|of|"),
94        NoOverflow(0x1, "|nof|"),
95        CarrySet(0x2, "|carry|"),
96        CarryClear(0x3, "|ncarry|"),
97        Negative(0x8, "|neg|"),
98        Positive(0x9, "|pos|"),
99        Parity(0xa, "|par|"),
100        NoParity(0xb, "|npar|");
101
102        private final int value;
103        private final String operator;
104
105        ConditionFlag(int value, String operator) {
106            this.value = value;
107            this.operator = operator;
108        }
109
110        public ConditionFlag negate() {
111            switch (this) {
112                case Zero:
113                    return NotZero;
114                case NotZero:
115                    return Zero;
116                case Equal:
117                    return NotEqual;
118                case NotEqual:
119                    return Equal;
120                case Less:
121                    return GreaterEqual;
122                case LessEqual:
123                    return Greater;
124                case Greater:
125                    return LessEqual;
126                case GreaterEqual:
127                    return Less;
128                case Below:
129                    return AboveEqual;
130                case BelowEqual:
131                    return Above;
132                case Above:
133                    return BelowEqual;
134                case AboveEqual:
135                    return Below;
136                case Overflow:
137                    return NoOverflow;
138                case NoOverflow:
139                    return Overflow;
140                case CarrySet:
141                    return CarryClear;
142                case CarryClear:
143                    return CarrySet;
144                case Negative:
145                    return Positive;
146                case Positive:
147                    return Negative;
148                case Parity:
149                    return NoParity;
150                case NoParity:
151                    return Parity;
152            }
153            throw new IllegalArgumentException();
154        }
155
156        public int getValue() {
157            return value;
158        }
159
160        @Override
161        public String toString() {
162            return operator;
163        }
164    }
165
166    /**
167     * Constants for X86 prefix bytes.
168     */
169    private static class Prefix {
170        private static final int REX = 0x40;
171        private static final int REXB = 0x41;
172        private static final int REXX = 0x42;
173        private static final int REXXB = 0x43;
174        private static final int REXR = 0x44;
175        private static final int REXRB = 0x45;
176        private static final int REXRX = 0x46;
177        private static final int REXRXB = 0x47;
178        private static final int REXW = 0x48;
179        private static final int REXWB = 0x49;
180        private static final int REXWX = 0x4A;
181        private static final int REXWXB = 0x4B;
182        private static final int REXWR = 0x4C;
183        private static final int REXWRB = 0x4D;
184        private static final int REXWRX = 0x4E;
185        private static final int REXWRXB = 0x4F;
186        private static final int VEX_3BYTES = 0xC4;
187        private static final int VEX_2BYTES = 0xC5;
188    }
189
190    private static class VexPrefix {
191        private static final int VEX_R = 0x80;
192        private static final int VEX_W = 0x80;
193    }
194
195    private static class AvxVectorLen {
196        private static final int AVX_128bit = 0x0;
197        private static final int AVX_256bit = 0x1;
198    }
199
200    private static class VexSimdPrefix {
201        private static final int VEX_SIMD_NONE = 0x0;
202        private static final int VEX_SIMD_66 = 0x1;
203        private static final int VEX_SIMD_F3 = 0x2;
204        private static final int VEX_SIMD_F2 = 0x3;
205    }
206
207    private static class VexOpcode {
208        private static final int VEX_OPCODE_NONE = 0x0;
209        private static final int VEX_OPCODE_0F = 0x1;
210        private static final int VEX_OPCODE_0F_38 = 0x2;
211        private static final int VEX_OPCODE_0F_3A = 0x3;
212    }
213
214    private AMD64InstructionAttr curAttributes;
215
216    AMD64InstructionAttr getCurAttributes() {
217        return curAttributes;
218    }
219
220    void setCurAttributes(AMD64InstructionAttr attributes) {
221        curAttributes = attributes;
222    }
223
224    /**
225     * The x86 operand sizes.
226     */
227    public enum OperandSize {
228        BYTE(1) {
229            @Override
230            protected void emitImmediate(AMD64Assembler asm, int imm) {
231                assert imm == (byte) imm;
232                asm.emitByte(imm);
233            }
234
235            @Override
236            protected int immediateSize() {
237                return 1;
238            }
239        },
240
241        WORD(2, 0x66) {
242            @Override
243            protected void emitImmediate(AMD64Assembler asm, int imm) {
244                assert imm == (short) imm;
245                asm.emitShort(imm);
246            }
247
248            @Override
249            protected int immediateSize() {
250                return 2;
251            }
252        },
253
254        DWORD(4) {
255            @Override
256            protected void emitImmediate(AMD64Assembler asm, int imm) {
257                asm.emitInt(imm);
258            }
259
260            @Override
261            protected int immediateSize() {
262                return 4;
263            }
264        },
265
266        QWORD(8) {
267            @Override
268            protected void emitImmediate(AMD64Assembler asm, int imm) {
269                asm.emitInt(imm);
270            }
271
272            @Override
273            protected int immediateSize() {
274                return 4;
275            }
276        },
277
278        SS(4, 0xF3, true),
279
280        SD(8, 0xF2, true),
281
282        PS(16, true),
283
284        PD(16, 0x66, true);
285
286        private final int sizePrefix;
287
288        private final int bytes;
289        private final boolean xmm;
290
291        OperandSize(int bytes) {
292            this(bytes, 0);
293        }
294
295        OperandSize(int bytes, int sizePrefix) {
296            this(bytes, sizePrefix, false);
297        }
298
299        OperandSize(int bytes, boolean xmm) {
300            this(bytes, 0, xmm);
301        }
302
303        OperandSize(int bytes, int sizePrefix, boolean xmm) {
304            this.sizePrefix = sizePrefix;
305            this.bytes = bytes;
306            this.xmm = xmm;
307        }
308
309        public int getBytes() {
310            return bytes;
311        }
312
313        public boolean isXmmType() {
314            return xmm;
315        }
316
317        /**
318         * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded
319         * as sign-extended 32-bit values.
320         *
321         * @param asm
322         * @param imm
323         */
324        protected void emitImmediate(AMD64Assembler asm, int imm) {
325            throw new UnsupportedOperationException();
326        }
327
328        protected int immediateSize() {
329            throw new UnsupportedOperationException();
330        }
331    }
332
333    /**
334     * Operand size and register type constraints.
335     */
336    private enum OpAssertion {
337        ByteAssertion(CPU, CPU, BYTE),
338        IntegerAssertion(CPU, CPU, WORD, DWORD, QWORD),
339        No16BitAssertion(CPU, CPU, DWORD, QWORD),
340        No32BitAssertion(CPU, CPU, WORD, QWORD),
341        QwordOnlyAssertion(CPU, CPU, QWORD),
342        FloatingAssertion(XMM, XMM, SS, SD, PS, PD),
343        PackedFloatingAssertion(XMM, XMM, PS, PD),
344        SingleAssertion(XMM, XMM, SS),
345        DoubleAssertion(XMM, XMM, SD),
346        PackedDoubleAssertion(XMM, XMM, PD),
347        IntToFloatingAssertion(XMM, CPU, DWORD, QWORD),
348        FloatingToIntAssertion(CPU, XMM, DWORD, QWORD);
349
350        private final RegisterCategory resultCategory;
351        private final RegisterCategory inputCategory;
352        private final OperandSize[] allowedSizes;
353
354        OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) {
355            this.resultCategory = resultCategory;
356            this.inputCategory = inputCategory;
357            this.allowedSizes = allowedSizes;
358        }
359
360        protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) {
361            assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op;
362            assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op;
363
364            for (OperandSize s : allowedSizes) {
365                if (size == s) {
366                    return true;
367                }
368            }
369
370            assert false : "invalid operand size " + size + " used in " + op;
371            return false;
372        }
373    }
374
375    public abstract static class OperandDataAnnotation extends CodeAnnotation {
376        /**
377         * The position (bytes from the beginning of the method) of the operand.
378         */
379        public final int operandPosition;
380        /**
381         * The size of the operand, in bytes.
382         */
383        public final int operandSize;
384        /**
385         * The position (bytes from the beginning of the method) of the next instruction. On AMD64,
386         * RIP-relative operands are relative to this position.
387         */
388        public final int nextInstructionPosition;
389
390        OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) {
391            super(instructionPosition);
392
393            this.operandPosition = operandPosition;
394            this.operandSize = operandSize;
395            this.nextInstructionPosition = nextInstructionPosition;
396        }
397
398        @Override
399        public String toString() {
400            return getClass().getSimpleName() + " instruction [" + instructionPosition + ", " + nextInstructionPosition + "[ operand at " + operandPosition + " size " + operandSize;
401        }
402    }
403
404    /**
405     * Annotation that stores additional information about the displacement of a
406     * {@link Assembler#getPlaceholder placeholder address} that needs patching.
407     */
408    public static class AddressDisplacementAnnotation extends OperandDataAnnotation {
409        AddressDisplacementAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) {
410            super(instructionPosition, operandPosition, operndSize, nextInstructionPosition);
411        }
412    }
413
414    /**
415     * Annotation that stores additional information about the immediate operand, e.g., of a call
416     * instruction, that needs patching.
417     */
418    public static class ImmediateOperandAnnotation extends OperandDataAnnotation {
419        ImmediateOperandAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) {
420            super(instructionPosition, operandPosition, operndSize, nextInstructionPosition);
421        }
422    }
423
424    /**
425     * Constructs an assembler for the AMD64 architecture.
426     */
427    public AMD64Assembler(TargetDescription target) {
428        super(target);
429    }
430
431    public boolean supports(CPUFeature feature) {
432        return ((AMD64) target.arch).getFeatures().contains(feature);
433    }
434
435    private static int encode(Register r) {
436        assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding;
437        return r.encoding & 0x7;
438    }
439
440    /**
441     * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a
442     * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm
443     * field. The X bit must be 0.
444     */
445    protected static int getRXB(Register reg, Register rm) {
446        int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
447        rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3;
448        return rxb;
449    }
450
451    /**
452     * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There
453     * are two cases for the memory operand:<br>
454     * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0.
455     * <br>
456     * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base.
457     */
458    protected static int getRXB(Register reg, AMD64Address rm) {
459        int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
460        if (!rm.getIndex().equals(Register.None)) {
461            rxb |= (rm.getIndex().encoding & 0x08) >> 2;
462        }
463        if (!rm.getBase().equals(Register.None)) {
464            rxb |= (rm.getBase().encoding & 0x08) >> 3;
465        }
466        return rxb;
467    }
468
469    /**
470     * Emit the ModR/M byte for one register operand and an opcode extension in the R field.
471     * <p>
472     * Format: [ 11 reg r/m ]
473     */
474    protected void emitModRM(int reg, Register rm) {
475        assert (reg & 0x07) == reg;
476        emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07));
477    }
478
479    /**
480     * Emit the ModR/M byte for two register operands.
481     * <p>
482     * Format: [ 11 reg r/m ]
483     */
484    protected void emitModRM(Register reg, Register rm) {
485        emitModRM(reg.encoding & 0x07, rm);
486    }
487
488    protected void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize) {
489        assert !reg.equals(Register.None);
490        emitOperandHelper(encode(reg), addr, false, additionalInstructionSize);
491    }
492
493    /**
494     * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand.
495     *
496     * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
497     */
498    protected void emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
499        assert !reg.equals(Register.None);
500        emitOperandHelper(encode(reg), addr, force4Byte, additionalInstructionSize);
501    }
502
503    protected void emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize) {
504        emitOperandHelper(reg, addr, false, additionalInstructionSize);
505    }
506
507    /**
508     * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode
509     * extension in the R field.
510     *
511     * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
512     * @param additionalInstructionSize the number of bytes that will be emitted after the operand,
513     *            so that the start position of the next instruction can be computed even though
514     *            this instruction has not been completely emitted yet.
515     */
516    protected void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
517        assert (reg & 0x07) == reg;
518        int regenc = reg << 3;
519
520        Register base = addr.getBase();
521        Register index = addr.getIndex();
522
523        AMD64Address.Scale scale = addr.getScale();
524        int disp = addr.getDisplacement();
525
526        if (base.equals(AMD64.rip)) { // also matches addresses returned by getPlaceholder()
527            // [00 000 101] disp32
528            assert index.equals(Register.None) : "cannot use RIP relative addressing with index register";
529            emitByte(0x05 | regenc);
530            if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) {
531                codePatchingAnnotationConsumer.accept(new AddressDisplacementAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize));
532            }
533            emitInt(disp);
534        } else if (base.isValid()) {
535            int baseenc = base.isValid() ? encode(base) : 0;
536            if (index.isValid()) {
537                int indexenc = encode(index) << 3;
538                // [base + indexscale + disp]
539                if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
540                    // [base + indexscale]
541                    // [00 reg 100][ss index base]
542                    assert !index.equals(rsp) : "illegal addressing mode";
543                    emitByte(0x04 | regenc);
544                    emitByte(scale.log2 << 6 | indexenc | baseenc);
545                } else if (isByte(disp) && !force4Byte) {
546                    // [base + indexscale + imm8]
547                    // [01 reg 100][ss index base] imm8
548                    assert !index.equals(rsp) : "illegal addressing mode";
549                    emitByte(0x44 | regenc);
550                    emitByte(scale.log2 << 6 | indexenc | baseenc);
551                    emitByte(disp & 0xFF);
552                } else {
553                    // [base + indexscale + disp32]
554                    // [10 reg 100][ss index base] disp32
555                    assert !index.equals(rsp) : "illegal addressing mode";
556                    emitByte(0x84 | regenc);
557                    emitByte(scale.log2 << 6 | indexenc | baseenc);
558                    emitInt(disp);
559                }
560            } else if (base.equals(rsp) || base.equals(r12)) {
561                // [rsp + disp]
562                if (disp == 0) {
563                    // [rsp]
564                    // [00 reg 100][00 100 100]
565                    emitByte(0x04 | regenc);
566                    emitByte(0x24);
567                } else if (isByte(disp) && !force4Byte) {
568                    // [rsp + imm8]
569                    // [01 reg 100][00 100 100] disp8
570                    emitByte(0x44 | regenc);
571                    emitByte(0x24);
572                    emitByte(disp & 0xFF);
573                } else {
574                    // [rsp + imm32]
575                    // [10 reg 100][00 100 100] disp32
576                    emitByte(0x84 | regenc);
577                    emitByte(0x24);
578                    emitInt(disp);
579                }
580            } else {
581                // [base + disp]
582                assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode";
583                if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
584                    // [base]
585                    // [00 reg base]
586                    emitByte(0x00 | regenc | baseenc);
587                } else if (isByte(disp) && !force4Byte) {
588                    // [base + disp8]
589                    // [01 reg base] disp8
590                    emitByte(0x40 | regenc | baseenc);
591                    emitByte(disp & 0xFF);
592                } else {
593                    // [base + disp32]
594                    // [10 reg base] disp32
595                    emitByte(0x80 | regenc | baseenc);
596                    emitInt(disp);
597                }
598            }
599        } else {
600            if (index.isValid()) {
601                int indexenc = encode(index) << 3;
602                // [indexscale + disp]
603                // [00 reg 100][ss index 101] disp32
604                assert !index.equals(rsp) : "illegal addressing mode";
605                emitByte(0x04 | regenc);
606                emitByte(scale.log2 << 6 | indexenc | 0x05);
607                emitInt(disp);
608            } else {
609                // [disp] ABSOLUTE
610                // [00 reg 100][00 100 101] disp32
611                emitByte(0x04 | regenc);
612                emitByte(0x25);
613                emitInt(disp);
614            }
615        }
616        setCurAttributes(null);
617    }
618
619    /**
620     * Base class for AMD64 opcodes.
621     */
622    public static class AMD64Op {
623
624        protected static final int P_0F = 0x0F;
625        protected static final int P_0F38 = 0x380F;
626        protected static final int P_0F3A = 0x3A0F;
627
628        private final String opcode;
629
630        protected final int prefix1;
631        protected final int prefix2;
632        protected final int op;
633
634        private final boolean dstIsByte;
635        private final boolean srcIsByte;
636
637        private final OpAssertion assertion;
638        private final CPUFeature feature;
639
640        protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
641            this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature);
642        }
643
644        protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
645            this.opcode = opcode;
646            this.prefix1 = prefix1;
647            this.prefix2 = prefix2;
648            this.op = op;
649
650            this.dstIsByte = dstIsByte;
651            this.srcIsByte = srcIsByte;
652
653            this.assertion = assertion;
654            this.feature = feature;
655        }
656
657        protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) {
658            if (prefix1 != 0) {
659                asm.emitByte(prefix1);
660            }
661            if (size.sizePrefix != 0) {
662                asm.emitByte(size.sizePrefix);
663            }
664            int rexPrefix = 0x40 | rxb;
665            if (size == QWORD) {
666                rexPrefix |= 0x08;
667            }
668            if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) {
669                asm.emitByte(rexPrefix);
670            }
671            if (prefix2 > 0xFF) {
672                asm.emitShort(prefix2);
673            } else if (prefix2 > 0) {
674                asm.emitByte(prefix2);
675            }
676            asm.emitByte(op);
677        }
678
679        protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) {
680            assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode);
681            assert assertion.checkOperands(this, size, resultReg, inputReg);
682            return true;
683        }
684
685        @Override
686        public String toString() {
687            return opcode;
688        }
689    }
690
691    /**
692     * Base class for AMD64 opcodes with immediate operands.
693     */
694    public static class AMD64ImmOp extends AMD64Op {
695
696        private final boolean immIsByte;
697
698        protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
699            super(opcode, 0, prefix, op, assertion, null);
700            this.immIsByte = immIsByte;
701        }
702
703        protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) {
704            if (immIsByte) {
705                assert imm == (byte) imm;
706                asm.emitByte(imm);
707            } else {
708                size.emitImmediate(asm, imm);
709            }
710        }
711
712        protected final int immediateSize(OperandSize size) {
713            if (immIsByte) {
714                return 1;
715            } else {
716                return size.bytes;
717            }
718        }
719    }
720
721    /**
722     * Opcode with operand order of either RM or MR for 2 address forms.
723     */
724    public abstract static class AMD64RROp extends AMD64Op {
725
726        protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
727            super(opcode, prefix1, prefix2, op, assertion, feature);
728        }
729
730        protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
731            super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature);
732        }
733
734        public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src);
735    }
736
737    /**
738     * Opcode with operand order of either RM or MR for 3 address forms.
739     */
740    public abstract static class AMD64RRROp extends AMD64Op {
741
742        protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
743            super(opcode, prefix1, prefix2, op, assertion, feature);
744        }
745
746        protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
747            super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature);
748        }
749
750        public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src);
751    }
752
753    /**
754     * Opcode with operand order of RM.
755     */
756    public static class AMD64RMOp extends AMD64RROp {
757        // @formatter:off
758        public static final AMD64RMOp IMUL   = new AMD64RMOp("IMUL",         P_0F, 0xAF);
759        public static final AMD64RMOp BSF    = new AMD64RMOp("BSF",          P_0F, 0xBC);
760        public static final AMD64RMOp BSR    = new AMD64RMOp("BSR",          P_0F, 0xBD);
761        public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, CPUFeature.POPCNT);
762        public static final AMD64RMOp TZCNT  = new AMD64RMOp("TZCNT",  0xF3, P_0F, 0xBC, CPUFeature.BMI1);
763        public static final AMD64RMOp LZCNT  = new AMD64RMOp("LZCNT",  0xF3, P_0F, 0xBD, CPUFeature.LZCNT);
764        public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB",       P_0F, 0xB6, false, true, OpAssertion.IntegerAssertion);
765        public static final AMD64RMOp MOVZX  = new AMD64RMOp("MOVZX",        P_0F, 0xB7, OpAssertion.No16BitAssertion);
766        public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB",       P_0F, 0xBE, false, true, OpAssertion.IntegerAssertion);
767        public static final AMD64RMOp MOVSX  = new AMD64RMOp("MOVSX",        P_0F, 0xBF, OpAssertion.No16BitAssertion);
768        public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD",             0x63, OpAssertion.QwordOnlyAssertion);
769        public static final AMD64RMOp MOVB   = new AMD64RMOp("MOVB",               0x8A, OpAssertion.ByteAssertion);
770        public static final AMD64RMOp MOV    = new AMD64RMOp("MOV",                0x8B);
771
772        // MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix
773        public static final AMD64RMOp MOVD   = new AMD64RMOp("MOVD",   0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
774        public static final AMD64RMOp MOVQ   = new AMD64RMOp("MOVQ",   0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
775        public static final AMD64RMOp MOVSS  = new AMD64RMOp("MOVSS",        P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE);
776        public static final AMD64RMOp MOVSD  = new AMD64RMOp("MOVSD",        P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE);
777
778        // TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient.
779        public static final AMD64RMOp TESTB  = new AMD64RMOp("TEST",               0x84, OpAssertion.ByteAssertion);
780        public static final AMD64RMOp TEST   = new AMD64RMOp("TEST",               0x85);
781        // @formatter:on
782
783        protected AMD64RMOp(String opcode, int op) {
784            this(opcode, 0, op);
785        }
786
787        protected AMD64RMOp(String opcode, int op, OpAssertion assertion) {
788            this(opcode, 0, op, assertion);
789        }
790
791        protected AMD64RMOp(String opcode, int prefix, int op) {
792            this(opcode, 0, prefix, op, null);
793        }
794
795        protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) {
796            this(opcode, 0, prefix, op, assertion, null);
797        }
798
799        protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
800            this(opcode, 0, prefix, op, assertion, feature);
801        }
802
803        protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) {
804            super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null);
805        }
806
807        protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) {
808            this(opcode, prefix1, prefix2, op, OpAssertion.IntegerAssertion, feature);
809        }
810
811        protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
812            super(opcode, prefix1, prefix2, op, assertion, feature);
813        }
814
815        @Override
816        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
817            assert verify(asm, size, dst, src);
818            boolean isSimd = false;
819            boolean noNds = false;
820
821            switch (op) {
822                case 0x2A:
823                case 0x2C:
824                case 0x2E:
825                case 0x5A:
826                case 0x6E:
827                    isSimd = true;
828                    noNds = true;
829                    break;
830                case 0x10:
831                case 0x51:
832                case 0x54:
833                case 0x55:
834                case 0x56:
835                case 0x57:
836                case 0x58:
837                case 0x59:
838                case 0x5C:
839                case 0x5D:
840                case 0x5E:
841                case 0x5F:
842                    isSimd = true;
843                    break;
844            }
845
846            if (isSimd) {
847                int pre;
848                int opc;
849                boolean rexVexW = (size == QWORD) ? true : false;
850                AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
851                int curPrefix = size.sizePrefix | prefix1;
852                switch (curPrefix) {
853                    case 0x66:
854                        pre = VexSimdPrefix.VEX_SIMD_66;
855                        break;
856                    case 0xF2:
857                        pre = VexSimdPrefix.VEX_SIMD_F2;
858                        break;
859                    case 0xF3:
860                        pre = VexSimdPrefix.VEX_SIMD_F3;
861                        break;
862                    default:
863                        pre = VexSimdPrefix.VEX_SIMD_NONE;
864                        break;
865                }
866                switch (prefix2) {
867                    case P_0F:
868                        opc = VexOpcode.VEX_OPCODE_0F;
869                        break;
870                    case P_0F38:
871                        opc = VexOpcode.VEX_OPCODE_0F_38;
872                        break;
873                    case P_0F3A:
874                        opc = VexOpcode.VEX_OPCODE_0F_3A;
875                        break;
876                    default:
877                        opc = VexOpcode.VEX_OPCODE_NONE;
878                        break;
879                }
880                int encode;
881                if (noNds) {
882                    encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes);
883                } else {
884                    encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes);
885                }
886                asm.emitByte(op);
887                asm.emitByte(0xC0 | encode);
888            } else {
889                emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
890                asm.emitModRM(dst, src);
891            }
892        }
893
894        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) {
895            assert verify(asm, size, dst, null);
896            boolean isSimd = false;
897            boolean noNds = false;
898
899            switch (op) {
900                case 0x10:
901                case 0x2A:
902                case 0x2C:
903                case 0x2E:
904                case 0x6E:
905                    isSimd = true;
906                    noNds = true;
907                    break;
908                case 0x51:
909                case 0x54:
910                case 0x55:
911                case 0x56:
912                case 0x57:
913                case 0x58:
914                case 0x59:
915                case 0x5C:
916                case 0x5D:
917                case 0x5E:
918                case 0x5F:
919                    isSimd = true;
920                    break;
921            }
922
923            if (isSimd) {
924                int pre;
925                int opc;
926                boolean rexVexW = (size == QWORD) ? true : false;
927                AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
928                int curPrefix = size.sizePrefix | prefix1;
929                switch (curPrefix) {
930                    case 0x66:
931                        pre = VexSimdPrefix.VEX_SIMD_66;
932                        break;
933                    case 0xF2:
934                        pre = VexSimdPrefix.VEX_SIMD_F2;
935                        break;
936                    case 0xF3:
937                        pre = VexSimdPrefix.VEX_SIMD_F3;
938                        break;
939                    default:
940                        pre = VexSimdPrefix.VEX_SIMD_NONE;
941                        break;
942                }
943                switch (prefix2) {
944                    case P_0F:
945                        opc = VexOpcode.VEX_OPCODE_0F;
946                        break;
947                    case P_0F38:
948                        opc = VexOpcode.VEX_OPCODE_0F_38;
949                        break;
950                    case P_0F3A:
951                        opc = VexOpcode.VEX_OPCODE_0F_3A;
952                        break;
953                    default:
954                        opc = VexOpcode.VEX_OPCODE_NONE;
955                        break;
956                }
957                if (noNds) {
958                    asm.simdPrefix(dst, Register.None, src, pre, opc, attributes);
959                } else {
960                    asm.simdPrefix(dst, dst, src, pre, opc, attributes);
961                }
962                asm.emitByte(op);
963                asm.emitOperandHelper(dst, src, 0);
964            } else {
965                emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
966                asm.emitOperandHelper(dst, src, 0);
967            }
968        }
969    }
970
971    /**
972     * Opcode with operand order of RM.
973     */
974    public static class AMD64RRMOp extends AMD64RRROp {
975        protected AMD64RRMOp(String opcode, int op) {
976            this(opcode, 0, op);
977        }
978
979        protected AMD64RRMOp(String opcode, int op, OpAssertion assertion) {
980            this(opcode, 0, op, assertion);
981        }
982
983        protected AMD64RRMOp(String opcode, int prefix, int op) {
984            this(opcode, 0, prefix, op, null);
985        }
986
987        protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion) {
988            this(opcode, 0, prefix, op, assertion, null);
989        }
990
991        protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
992            this(opcode, 0, prefix, op, assertion, feature);
993        }
994
995        protected AMD64RRMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) {
996            super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null);
997        }
998
999        protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) {
1000            this(opcode, prefix1, prefix2, op, OpAssertion.IntegerAssertion, feature);
1001        }
1002
1003        protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
1004            super(opcode, prefix1, prefix2, op, assertion, feature);
1005        }
1006
1007        @Override
1008        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src) {
1009            assert verify(asm, size, dst, src);
1010            int pre;
1011            int opc;
1012            boolean rexVexW = (size == QWORD) ? true : false;
1013            AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1014            int curPrefix = size.sizePrefix | prefix1;
1015            switch (curPrefix) {
1016                case 0x66:
1017                    pre = VexSimdPrefix.VEX_SIMD_66;
1018                    break;
1019                case 0xF2:
1020                    pre = VexSimdPrefix.VEX_SIMD_F2;
1021                    break;
1022                case 0xF3:
1023                    pre = VexSimdPrefix.VEX_SIMD_F3;
1024                    break;
1025                default:
1026                    pre = VexSimdPrefix.VEX_SIMD_NONE;
1027                    break;
1028            }
1029            switch (prefix2) {
1030                case P_0F:
1031                    opc = VexOpcode.VEX_OPCODE_0F;
1032                    break;
1033                case P_0F38:
1034                    opc = VexOpcode.VEX_OPCODE_0F_38;
1035                    break;
1036                case P_0F3A:
1037                    opc = VexOpcode.VEX_OPCODE_0F_3A;
1038                    break;
1039                default:
1040                    opc = VexOpcode.VEX_OPCODE_NONE;
1041                    break;
1042            }
1043            int encode;
1044            encode = asm.simdPrefixAndEncode(dst, nds, src, pre, opc, attributes);
1045            asm.emitByte(op);
1046            asm.emitByte(0xC0 | encode);
1047        }
1048
1049        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, AMD64Address src) {
1050            assert verify(asm, size, dst, null);
1051            int pre;
1052            int opc;
1053            boolean rexVexW = (size == QWORD) ? true : false;
1054            AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1055            int curPrefix = size.sizePrefix | prefix1;
1056            switch (curPrefix) {
1057                case 0x66:
1058                    pre = VexSimdPrefix.VEX_SIMD_66;
1059                    break;
1060                case 0xF2:
1061                    pre = VexSimdPrefix.VEX_SIMD_F2;
1062                    break;
1063                case 0xF3:
1064                    pre = VexSimdPrefix.VEX_SIMD_F3;
1065                    break;
1066                default:
1067                    pre = VexSimdPrefix.VEX_SIMD_NONE;
1068                    break;
1069            }
1070            switch (prefix2) {
1071                case P_0F:
1072                    opc = VexOpcode.VEX_OPCODE_0F;
1073                    break;
1074                case P_0F38:
1075                    opc = VexOpcode.VEX_OPCODE_0F_38;
1076                    break;
1077                case P_0F3A:
1078                    opc = VexOpcode.VEX_OPCODE_0F_3A;
1079                    break;
1080                default:
1081                    opc = VexOpcode.VEX_OPCODE_NONE;
1082                    break;
1083            }
1084            asm.simdPrefix(dst, nds, src, pre, opc, attributes);
1085            asm.emitByte(op);
1086            asm.emitOperandHelper(dst, src, 0);
1087        }
1088    }
1089
1090    /**
1091     * Opcode with operand order of MR.
1092     */
1093    public static class AMD64MROp extends AMD64RROp {
1094        // @formatter:off
1095        public static final AMD64MROp MOVB   = new AMD64MROp("MOVB",               0x88, OpAssertion.ByteAssertion);
1096        public static final AMD64MROp MOV    = new AMD64MROp("MOV",                0x89);
1097
1098        // MOVD and MOVQ are the same opcode, just with different operand size prefix
1099        // Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used.
1100        public static final AMD64MROp MOVD   = new AMD64MROp("MOVD",   0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
1101        public static final AMD64MROp MOVQ   = new AMD64MROp("MOVQ",   0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
1102
1103        // MOVSS and MOVSD are the same opcode, just with different operand size prefix
1104        public static final AMD64MROp MOVSS  = new AMD64MROp("MOVSS",        P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE);
1105        public static final AMD64MROp MOVSD  = new AMD64MROp("MOVSD",        P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE);
1106        // @formatter:on
1107
1108        protected AMD64MROp(String opcode, int op) {
1109            this(opcode, 0, op);
1110        }
1111
1112        protected AMD64MROp(String opcode, int op, OpAssertion assertion) {
1113            this(opcode, 0, op, assertion);
1114        }
1115
1116        protected AMD64MROp(String opcode, int prefix, int op) {
1117            this(opcode, prefix, op, OpAssertion.IntegerAssertion);
1118        }
1119
1120        protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) {
1121            this(opcode, prefix, op, assertion, null);
1122        }
1123
1124        protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
1125            this(opcode, 0, prefix, op, assertion, feature);
1126        }
1127
1128        protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
1129            super(opcode, prefix1, prefix2, op, assertion, feature);
1130        }
1131
1132        @Override
1133        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
1134            assert verify(asm, size, src, dst);
1135            boolean isSimd = false;
1136            boolean noNds = false;
1137
1138            switch (op) {
1139                case 0x7E:
1140                    isSimd = true;
1141                    noNds = true;
1142                    break;
1143                case 0x11:
1144                    isSimd = true;
1145                    break;
1146            }
1147
1148            if (isSimd) {
1149                int pre;
1150                int opc;
1151                boolean rexVexW = (size == QWORD) ? true : false;
1152                AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1153                int curPrefix = size.sizePrefix | prefix1;
1154                switch (curPrefix) {
1155                    case 0x66:
1156                        pre = VexSimdPrefix.VEX_SIMD_66;
1157                        break;
1158                    case 0xF2:
1159                        pre = VexSimdPrefix.VEX_SIMD_F2;
1160                        break;
1161                    case 0xF3:
1162                        pre = VexSimdPrefix.VEX_SIMD_F3;
1163                        break;
1164                    default:
1165                        pre = VexSimdPrefix.VEX_SIMD_NONE;
1166                        break;
1167                }
1168                switch (prefix2) {
1169                    case P_0F:
1170                        opc = VexOpcode.VEX_OPCODE_0F;
1171                        break;
1172                    case P_0F38:
1173                        opc = VexOpcode.VEX_OPCODE_0F_38;
1174                        break;
1175                    case P_0F3A:
1176                        opc = VexOpcode.VEX_OPCODE_0F_3A;
1177                        break;
1178                    default:
1179                        opc = VexOpcode.VEX_OPCODE_NONE;
1180                        break;
1181                }
1182                int encode;
1183                if (noNds) {
1184                    encode = asm.simdPrefixAndEncode(src, Register.None, dst, pre, opc, attributes);
1185                } else {
1186                    encode = asm.simdPrefixAndEncode(src, src, dst, pre, opc, attributes);
1187                }
1188                asm.emitByte(op);
1189                asm.emitByte(0xC0 | encode);
1190            } else {
1191                emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding);
1192                asm.emitModRM(src, dst);
1193            }
1194        }
1195
1196        public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) {
1197            assert verify(asm, size, null, src);
1198            boolean isSimd = false;
1199
1200            switch (op) {
1201                case 0x7E:
1202                case 0x11:
1203                    isSimd = true;
1204                    break;
1205            }
1206
1207            if (isSimd) {
1208                int pre;
1209                int opc;
1210                boolean rexVexW = (size == QWORD) ? true : false;
1211                AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1212                int curPrefix = size.sizePrefix | prefix1;
1213                switch (curPrefix) {
1214                    case 0x66:
1215                        pre = VexSimdPrefix.VEX_SIMD_66;
1216                        break;
1217                    case 0xF2:
1218                        pre = VexSimdPrefix.VEX_SIMD_F2;
1219                        break;
1220                    case 0xF3:
1221                        pre = VexSimdPrefix.VEX_SIMD_F3;
1222                        break;
1223                    default:
1224                        pre = VexSimdPrefix.VEX_SIMD_NONE;
1225                        break;
1226                }
1227                switch (prefix2) {
1228                    case P_0F:
1229                        opc = VexOpcode.VEX_OPCODE_0F;
1230                        break;
1231                    case P_0F38:
1232                        opc = VexOpcode.VEX_OPCODE_0F_38;
1233                        break;
1234                    case P_0F3A:
1235                        opc = VexOpcode.VEX_OPCODE_0F_3A;
1236                        break;
1237                    default:
1238                        opc = VexOpcode.VEX_OPCODE_NONE;
1239                        break;
1240                }
1241                asm.simdPrefix(src, Register.None, dst, pre, opc, attributes);
1242                asm.emitByte(op);
1243                asm.emitOperandHelper(src, dst, 0);
1244            } else {
1245                emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0);
1246                asm.emitOperandHelper(src, dst, 0);
1247            }
1248        }
1249    }
1250
1251    /**
1252     * Opcodes with operand order of M.
1253     */
1254    public static class AMD64MOp extends AMD64Op {
1255        // @formatter:off
1256        public static final AMD64MOp NOT  = new AMD64MOp("NOT",  0xF7, 2);
1257        public static final AMD64MOp NEG  = new AMD64MOp("NEG",  0xF7, 3);
1258        public static final AMD64MOp MUL  = new AMD64MOp("MUL",  0xF7, 4);
1259        public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5);
1260        public static final AMD64MOp DIV  = new AMD64MOp("DIV",  0xF7, 6);
1261        public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7);
1262        public static final AMD64MOp INC  = new AMD64MOp("INC",  0xFF, 0);
1263        public static final AMD64MOp DEC  = new AMD64MOp("DEC",  0xFF, 1);
1264        public static final AMD64MOp PUSH = new AMD64MOp("PUSH", 0xFF, 6);
1265        public static final AMD64MOp POP  = new AMD64MOp("POP",  0x8F, 0, OpAssertion.No32BitAssertion);
1266        // @formatter:on
1267
1268        private final int ext;
1269
1270        protected AMD64MOp(String opcode, int op, int ext) {
1271            this(opcode, 0, op, ext);
1272        }
1273
1274        protected AMD64MOp(String opcode, int prefix, int op, int ext) {
1275            this(opcode, prefix, op, ext, OpAssertion.IntegerAssertion);
1276        }
1277
1278        protected AMD64MOp(String opcode, int op, int ext, OpAssertion assertion) {
1279            this(opcode, 0, op, ext, assertion);
1280        }
1281
1282        protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) {
1283            super(opcode, 0, prefix, op, assertion, null);
1284            this.ext = ext;
1285        }
1286
1287        public final void emit(AMD64Assembler asm, OperandSize size, Register dst) {
1288            assert verify(asm, size, dst, null);
1289            emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
1290            asm.emitModRM(ext, dst);
1291        }
1292
1293        public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) {
1294            assert verify(asm, size, null, null);
1295            emitOpcode(asm, size, getRXB(null, dst), 0, 0);
1296            asm.emitOperandHelper(ext, dst, 0);
1297        }
1298    }
1299
1300    /**
1301     * Opcodes with operand order of MI.
1302     */
1303    public static class AMD64MIOp extends AMD64ImmOp {
1304        // @formatter:off
1305        public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true,  0xC6, 0, OpAssertion.ByteAssertion);
1306        public static final AMD64MIOp MOV  = new AMD64MIOp("MOV",  false, 0xC7, 0);
1307        public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0);
1308        // @formatter:on
1309
1310        private final int ext;
1311
1312        protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) {
1313            this(opcode, immIsByte, op, ext, OpAssertion.IntegerAssertion);
1314        }
1315
1316        protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) {
1317            this(opcode, immIsByte, 0, op, ext, assertion);
1318        }
1319
1320        protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) {
1321            super(opcode, immIsByte, prefix, op, assertion);
1322            this.ext = ext;
1323        }
1324
1325        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) {
1326            assert verify(asm, size, dst, null);
1327            emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
1328            asm.emitModRM(ext, dst);
1329            emitImmediate(asm, size, imm);
1330        }
1331
1332        public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) {
1333            assert verify(asm, size, null, null);
1334            emitOpcode(asm, size, getRXB(null, dst), 0, 0);
1335            asm.emitOperandHelper(ext, dst, immediateSize(size));
1336            emitImmediate(asm, size, imm);
1337        }
1338    }
1339
1340    /**
1341     * Opcodes with operand order of RMI.
1342     *
1343     * We only have one form of round as the operation is always treated with single variant input,
1344     * making its extension to 3 address forms redundant.
1345     */
1346    public static class AMD64RMIOp extends AMD64ImmOp {
1347        // @formatter:off
1348        public static final AMD64RMIOp IMUL    = new AMD64RMIOp("IMUL", false, 0x69);
1349        public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true,  0x6B);
1350        public static final AMD64RMIOp ROUNDSS = new AMD64RMIOp("ROUNDSS", true, P_0F3A, 0x0A, OpAssertion.PackedDoubleAssertion);
1351        public static final AMD64RMIOp ROUNDSD = new AMD64RMIOp("ROUNDSD", true, P_0F3A, 0x0B, OpAssertion.PackedDoubleAssertion);
1352        // @formatter:on
1353
1354        protected AMD64RMIOp(String opcode, boolean immIsByte, int op) {
1355            this(opcode, immIsByte, 0, op, OpAssertion.IntegerAssertion);
1356        }
1357
1358        protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
1359            super(opcode, immIsByte, prefix, op, assertion);
1360        }
1361
1362        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) {
1363            assert verify(asm, size, dst, src);
1364            boolean isSimd = false;
1365            boolean noNds = false;
1366
1367            switch (op) {
1368                case 0x0A:
1369                case 0x0B:
1370                    isSimd = true;
1371                    noNds = true;
1372                    break;
1373            }
1374
1375            if (isSimd) {
1376                int pre;
1377                int opc;
1378                AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1379                int curPrefix = size.sizePrefix | prefix1;
1380                switch (curPrefix) {
1381                    case 0x66:
1382                        pre = VexSimdPrefix.VEX_SIMD_66;
1383                        break;
1384                    case 0xF2:
1385                        pre = VexSimdPrefix.VEX_SIMD_F2;
1386                        break;
1387                    case 0xF3:
1388                        pre = VexSimdPrefix.VEX_SIMD_F3;
1389                        break;
1390                    default:
1391                        pre = VexSimdPrefix.VEX_SIMD_NONE;
1392                        break;
1393                }
1394                switch (prefix2) {
1395                    case P_0F:
1396                        opc = VexOpcode.VEX_OPCODE_0F;
1397                        break;
1398                    case P_0F38:
1399                        opc = VexOpcode.VEX_OPCODE_0F_38;
1400                        break;
1401                    case P_0F3A:
1402                        opc = VexOpcode.VEX_OPCODE_0F_3A;
1403                        break;
1404                    default:
1405                        opc = VexOpcode.VEX_OPCODE_NONE;
1406                        break;
1407                }
1408                int encode;
1409                if (noNds) {
1410                    encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes);
1411                } else {
1412                    encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes);
1413                }
1414                asm.emitByte(op);
1415                asm.emitByte(0xC0 | encode);
1416                emitImmediate(asm, size, imm);
1417            } else {
1418                emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
1419                asm.emitModRM(dst, src);
1420                emitImmediate(asm, size, imm);
1421            }
1422        }
1423
1424        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) {
1425            assert verify(asm, size, dst, null);
1426
1427            boolean isSimd = false;
1428            boolean noNds = false;
1429
1430            switch (op) {
1431                case 0x0A:
1432                case 0x0B:
1433                    isSimd = true;
1434                    noNds = true;
1435                    break;
1436            }
1437
1438            if (isSimd) {
1439                int pre;
1440                int opc;
1441                AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1442                int curPrefix = size.sizePrefix | prefix1;
1443                switch (curPrefix) {
1444                    case 0x66:
1445                        pre = VexSimdPrefix.VEX_SIMD_66;
1446                        break;
1447                    case 0xF2:
1448                        pre = VexSimdPrefix.VEX_SIMD_F2;
1449                        break;
1450                    case 0xF3:
1451                        pre = VexSimdPrefix.VEX_SIMD_F3;
1452                        break;
1453                    default:
1454                        pre = VexSimdPrefix.VEX_SIMD_NONE;
1455                        break;
1456                }
1457                switch (prefix2) {
1458                    case P_0F:
1459                        opc = VexOpcode.VEX_OPCODE_0F;
1460                        break;
1461                    case P_0F38:
1462                        opc = VexOpcode.VEX_OPCODE_0F_38;
1463                        break;
1464                    case P_0F3A:
1465                        opc = VexOpcode.VEX_OPCODE_0F_3A;
1466                        break;
1467                    default:
1468                        opc = VexOpcode.VEX_OPCODE_NONE;
1469                        break;
1470                }
1471
1472                if (noNds) {
1473                    asm.simdPrefix(dst, Register.None, src, pre, opc, attributes);
1474                } else {
1475                    asm.simdPrefix(dst, dst, src, pre, opc, attributes);
1476                }
1477                asm.emitByte(op);
1478                asm.emitOperandHelper(dst, src, immediateSize(size));
1479                emitImmediate(asm, size, imm);
1480            } else {
1481                emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
1482                asm.emitOperandHelper(dst, src, immediateSize(size));
1483                emitImmediate(asm, size, imm);
1484            }
1485        }
1486    }
1487
1488    public static class SSEOp extends AMD64RMOp {
1489        // @formatter:off
1490        public static final SSEOp CVTSI2SS  = new SSEOp("CVTSI2SS",  0xF3, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion);
1491        public static final SSEOp CVTSI2SD  = new SSEOp("CVTSI2SS",  0xF2, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion);
1492        public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion);
1493        public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion);
1494        public static final SSEOp UCOMIS    = new SSEOp("UCOMIS",          P_0F, 0x2E, OpAssertion.PackedFloatingAssertion);
1495        public static final SSEOp SQRT      = new SSEOp("SQRT",            P_0F, 0x51);
1496        public static final SSEOp AND       = new SSEOp("AND",             P_0F, 0x54, OpAssertion.PackedFloatingAssertion);
1497        public static final SSEOp ANDN      = new SSEOp("ANDN",            P_0F, 0x55, OpAssertion.PackedFloatingAssertion);
1498        public static final SSEOp OR        = new SSEOp("OR",              P_0F, 0x56, OpAssertion.PackedFloatingAssertion);
1499        public static final SSEOp XOR       = new SSEOp("XOR",             P_0F, 0x57, OpAssertion.PackedFloatingAssertion);
1500        public static final SSEOp ADD       = new SSEOp("ADD",             P_0F, 0x58);
1501        public static final SSEOp MUL       = new SSEOp("MUL",             P_0F, 0x59);
1502        public static final SSEOp CVTSS2SD  = new SSEOp("CVTSS2SD",        P_0F, 0x5A, OpAssertion.SingleAssertion);
1503        public static final SSEOp CVTSD2SS  = new SSEOp("CVTSD2SS",        P_0F, 0x5A, OpAssertion.DoubleAssertion);
1504        public static final SSEOp SUB       = new SSEOp("SUB",             P_0F, 0x5C);
1505        public static final SSEOp MIN       = new SSEOp("MIN",             P_0F, 0x5D);
1506        public static final SSEOp DIV       = new SSEOp("DIV",             P_0F, 0x5E);
1507        public static final SSEOp MAX       = new SSEOp("MAX",             P_0F, 0x5F);
1508        // @formatter:on
1509
1510        protected SSEOp(String opcode, int prefix, int op) {
1511            this(opcode, prefix, op, OpAssertion.FloatingAssertion);
1512        }
1513
1514        protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) {
1515            this(opcode, 0, prefix, op, assertion);
1516        }
1517
1518        protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) {
1519            super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2);
1520        }
1521    }
1522
1523    public static class AVXOp extends AMD64RRMOp {
1524        // @formatter:off
1525        public static final AVXOp AND       = new AVXOp("AND",             P_0F, 0x54, OpAssertion.PackedFloatingAssertion);
1526        public static final AVXOp ANDN      = new AVXOp("ANDN",            P_0F, 0x55, OpAssertion.PackedFloatingAssertion);
1527        public static final AVXOp OR        = new AVXOp("OR",              P_0F, 0x56, OpAssertion.PackedFloatingAssertion);
1528        public static final AVXOp XOR       = new AVXOp("XOR",             P_0F, 0x57, OpAssertion.PackedFloatingAssertion);
1529        public static final AVXOp ADD       = new AVXOp("ADD",             P_0F, 0x58);
1530        public static final AVXOp MUL       = new AVXOp("MUL",             P_0F, 0x59);
1531        public static final AVXOp SUB       = new AVXOp("SUB",             P_0F, 0x5C);
1532        public static final AVXOp MIN       = new AVXOp("MIN",             P_0F, 0x5D);
1533        public static final AVXOp DIV       = new AVXOp("DIV",             P_0F, 0x5E);
1534        public static final AVXOp MAX       = new AVXOp("MAX",             P_0F, 0x5F);
1535        // @formatter:on
1536
1537        protected AVXOp(String opcode, int prefix, int op) {
1538            this(opcode, prefix, op, OpAssertion.FloatingAssertion);
1539        }
1540
1541        protected AVXOp(String opcode, int prefix, int op, OpAssertion assertion) {
1542            this(opcode, 0, prefix, op, assertion);
1543        }
1544
1545        protected AVXOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) {
1546            super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.AVX);
1547        }
1548    }
1549
1550    /**
1551     * Arithmetic operation with operand order of RM, MR or MI.
1552     */
1553    public static final class AMD64BinaryArithmetic {
1554        // @formatter:off
1555        public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0);
1556        public static final AMD64BinaryArithmetic OR  = new AMD64BinaryArithmetic("OR",  1);
1557        public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2);
1558        public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3);
1559        public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4);
1560        public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5);
1561        public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6);
1562        public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7);
1563        // @formatter:on
1564
1565        private final AMD64MIOp byteImmOp;
1566        private final AMD64MROp byteMrOp;
1567        private final AMD64RMOp byteRmOp;
1568
1569        private final AMD64MIOp immOp;
1570        private final AMD64MIOp immSxOp;
1571        private final AMD64MROp mrOp;
1572        private final AMD64RMOp rmOp;
1573
1574        private AMD64BinaryArithmetic(String opcode, int code) {
1575            int baseOp = code << 3;
1576
1577            byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion);
1578            byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion);
1579            byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion);
1580
1581            immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.IntegerAssertion);
1582            immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.IntegerAssertion);
1583            mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.IntegerAssertion);
1584            rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.IntegerAssertion);
1585        }
1586
1587        public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) {
1588            if (size == BYTE) {
1589                return byteImmOp;
1590            } else if (sx) {
1591                return immSxOp;
1592            } else {
1593                return immOp;
1594            }
1595        }
1596
1597        public AMD64MROp getMROpcode(OperandSize size) {
1598            if (size == BYTE) {
1599                return byteMrOp;
1600            } else {
1601                return mrOp;
1602            }
1603        }
1604
1605        public AMD64RMOp getRMOpcode(OperandSize size) {
1606            if (size == BYTE) {
1607                return byteRmOp;
1608            } else {
1609                return rmOp;
1610            }
1611        }
1612    }
1613
1614    /**
1615     * Shift operation with operand order of M1, MC or MI.
1616     */
1617    public static final class AMD64Shift {
1618        // @formatter:off
1619        public static final AMD64Shift ROL = new AMD64Shift("ROL", 0);
1620        public static final AMD64Shift ROR = new AMD64Shift("ROR", 1);
1621        public static final AMD64Shift RCL = new AMD64Shift("RCL", 2);
1622        public static final AMD64Shift RCR = new AMD64Shift("RCR", 3);
1623        public static final AMD64Shift SHL = new AMD64Shift("SHL", 4);
1624        public static final AMD64Shift SHR = new AMD64Shift("SHR", 5);
1625        public static final AMD64Shift SAR = new AMD64Shift("SAR", 7);
1626        // @formatter:on
1627
1628        public final AMD64MOp m1Op;
1629        public final AMD64MOp mcOp;
1630        public final AMD64MIOp miOp;
1631
1632        private AMD64Shift(String opcode, int code) {
1633            m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.IntegerAssertion);
1634            mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.IntegerAssertion);
1635            miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.IntegerAssertion);
1636        }
1637    }
1638
1639    public final void addl(AMD64Address dst, int imm32) {
1640        ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1641    }
1642
1643    public final void addl(Register dst, int imm32) {
1644        ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1645    }
1646
1647    public final void addl(Register dst, Register src) {
1648        ADD.rmOp.emit(this, DWORD, dst, src);
1649    }
1650
1651    public final void addpd(Register dst, Register src) {
1652        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1653        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1654        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1655        emitByte(0x58);
1656        emitByte(0xC0 | encode);
1657    }
1658
1659    public final void addpd(Register dst, AMD64Address src) {
1660        assert dst.getRegisterCategory().equals(AMD64.XMM);
1661        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1662        simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1663        emitByte(0x58);
1664        emitOperandHelper(dst, src, 0);
1665    }
1666
1667    public final void addsd(Register dst, Register src) {
1668        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1669        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1670        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1671        emitByte(0x58);
1672        emitByte(0xC0 | encode);
1673    }
1674
1675    public final void addsd(Register dst, AMD64Address src) {
1676        assert dst.getRegisterCategory().equals(AMD64.XMM);
1677        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1678        simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1679        emitByte(0x58);
1680        emitOperandHelper(dst, src, 0);
1681    }
1682
1683    private void addrNop4() {
1684        // 4 bytes: NOP DWORD PTR [EAX+0]
1685        emitByte(0x0F);
1686        emitByte(0x1F);
1687        emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc);
1688        emitByte(0); // 8-bits offset (1 byte)
1689    }
1690
1691    private void addrNop5() {
1692        // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1693        emitByte(0x0F);
1694        emitByte(0x1F);
1695        emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4);
1696        emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
1697        emitByte(0); // 8-bits offset (1 byte)
1698    }
1699
1700    private void addrNop7() {
1701        // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1702        emitByte(0x0F);
1703        emitByte(0x1F);
1704        emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc);
1705        emitInt(0); // 32-bits offset (4 bytes)
1706    }
1707
1708    private void addrNop8() {
1709        // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1710        emitByte(0x0F);
1711        emitByte(0x1F);
1712        emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4);
1713        emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
1714        emitInt(0); // 32-bits offset (4 bytes)
1715    }
1716
1717    public final void andl(Register dst, int imm32) {
1718        AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1719    }
1720
1721    public final void andl(Register dst, Register src) {
1722        AND.rmOp.emit(this, DWORD, dst, src);
1723    }
1724
1725    public final void andpd(Register dst, Register src) {
1726        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1727        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1728        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1729        emitByte(0x54);
1730        emitByte(0xC0 | encode);
1731    }
1732
1733    public final void andpd(Register dst, AMD64Address src) {
1734        assert dst.getRegisterCategory().equals(AMD64.XMM);
1735        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1736        simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1737        emitByte(0x54);
1738        emitOperandHelper(dst, src, 0);
1739    }
1740
1741    public final void bsrl(Register dst, Register src) {
1742        int encode = prefixAndEncode(dst.encoding(), src.encoding());
1743        emitByte(0x0F);
1744        emitByte(0xBD);
1745        emitByte(0xC0 | encode);
1746    }
1747
1748    public final void bswapl(Register reg) {
1749        int encode = prefixAndEncode(reg.encoding);
1750        emitByte(0x0F);
1751        emitByte(0xC8 | encode);
1752    }
1753
1754    public final void cdql() {
1755        emitByte(0x99);
1756    }
1757
1758    public final void cmovl(ConditionFlag cc, Register dst, Register src) {
1759        int encode = prefixAndEncode(dst.encoding, src.encoding);
1760        emitByte(0x0F);
1761        emitByte(0x40 | cc.getValue());
1762        emitByte(0xC0 | encode);
1763    }
1764
1765    public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) {
1766        prefix(src, dst);
1767        emitByte(0x0F);
1768        emitByte(0x40 | cc.getValue());
1769        emitOperandHelper(dst, src, 0);
1770    }
1771
1772    public final void cmpl(Register dst, int imm32) {
1773        CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1774    }
1775
1776    public final void cmpl(Register dst, Register src) {
1777        CMP.rmOp.emit(this, DWORD, dst, src);
1778    }
1779
1780    public final void cmpl(Register dst, AMD64Address src) {
1781        CMP.rmOp.emit(this, DWORD, dst, src);
1782    }
1783
1784    public final void cmpl(AMD64Address dst, int imm32) {
1785        CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1786    }
1787
1788    // The 32-bit cmpxchg compares the value at adr with the contents of X86.rax,
1789    // and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,.
1790    // The ZF is set if the compared values were equal, and cleared otherwise.
1791    public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg
1792        prefix(adr, reg);
1793        emitByte(0x0F);
1794        emitByte(0xB1);
1795        emitOperandHelper(reg, adr, 0);
1796    }
1797
1798    public final void cvtsi2sdl(Register dst, Register src) {
1799        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
1800        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1801        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1802        emitByte(0x2A);
1803        emitByte(0xC0 | encode);
1804    }
1805
1806    public final void cvttsd2sil(Register dst, Register src) {
1807        assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
1808        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1809        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1810        emitByte(0x2C);
1811        emitByte(0xC0 | encode);
1812    }
1813
1814    protected final void decl(AMD64Address dst) {
1815        prefix(dst);
1816        emitByte(0xFF);
1817        emitOperandHelper(1, dst, 0);
1818    }
1819
1820    public final void divsd(Register dst, Register src) {
1821        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1822        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1823        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1824        emitByte(0x5E);
1825        emitByte(0xC0 | encode);
1826    }
1827
1828    public final void hlt() {
1829        emitByte(0xF4);
1830    }
1831
1832    public final void imull(Register dst, Register src, int value) {
1833        if (isByte(value)) {
1834            AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value);
1835        } else {
1836            AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value);
1837        }
1838    }
1839
1840    protected final void incl(AMD64Address dst) {
1841        prefix(dst);
1842        emitByte(0xFF);
1843        emitOperandHelper(0, dst, 0);
1844    }
1845
1846    public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) {
1847        int shortSize = 2;
1848        int longSize = 6;
1849        long disp = jumpTarget - position();
1850        if (!forceDisp32 && isByte(disp - shortSize)) {
1851            // 0111 tttn #8-bit disp
1852            emitByte(0x70 | cc.getValue());
1853            emitByte((int) ((disp - shortSize) & 0xFF));
1854        } else {
1855            // 0000 1111 1000 tttn #32-bit disp
1856            assert isInt(disp - longSize) : "must be 32bit offset (call4)";
1857            emitByte(0x0F);
1858            emitByte(0x80 | cc.getValue());
1859            emitInt((int) (disp - longSize));
1860        }
1861    }
1862
1863    public final void jcc(ConditionFlag cc, Label l) {
1864        assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc";
1865        if (l.isBound()) {
1866            jcc(cc, l.position(), false);
1867        } else {
1868            // Note: could eliminate cond. jumps to this jump if condition
1869            // is the same however, seems to be rather unlikely case.
1870            // Note: use jccb() if label to be bound is very close to get
1871            // an 8-bit displacement
1872            l.addPatchAt(position());
1873            emitByte(0x0F);
1874            emitByte(0x80 | cc.getValue());
1875            emitInt(0);
1876        }
1877
1878    }
1879
1880    public final void jccb(ConditionFlag cc, Label l) {
1881        if (l.isBound()) {
1882            int shortSize = 2;
1883            int entry = l.position();
1884            assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp";
1885            long disp = entry - position();
1886            // 0111 tttn #8-bit disp
1887            emitByte(0x70 | cc.getValue());
1888            emitByte((int) ((disp - shortSize) & 0xFF));
1889        } else {
1890            l.addPatchAt(position());
1891            emitByte(0x70 | cc.getValue());
1892            emitByte(0);
1893        }
1894    }
1895
1896    public final void jmp(int jumpTarget, boolean forceDisp32) {
1897        int shortSize = 2;
1898        int longSize = 5;
1899        long disp = jumpTarget - position();
1900        if (!forceDisp32 && isByte(disp - shortSize)) {
1901            emitByte(0xEB);
1902            emitByte((int) ((disp - shortSize) & 0xFF));
1903        } else {
1904            emitByte(0xE9);
1905            emitInt((int) (disp - longSize));
1906        }
1907    }
1908
1909    @Override
1910    public final void jmp(Label l) {
1911        if (l.isBound()) {
1912            jmp(l.position(), false);
1913        } else {
1914            // By default, forward jumps are always 32-bit displacements, since
1915            // we can't yet know where the label will be bound. If you're sure that
1916            // the forward jump will not run beyond 256 bytes, use jmpb to
1917            // force an 8-bit displacement.
1918
1919            l.addPatchAt(position());
1920            emitByte(0xE9);
1921            emitInt(0);
1922        }
1923    }
1924
1925    public final void jmp(Register entry) {
1926        int encode = prefixAndEncode(entry.encoding);
1927        emitByte(0xFF);
1928        emitByte(0xE0 | encode);
1929    }
1930
1931    public final void jmp(AMD64Address adr) {
1932        prefix(adr);
1933        emitByte(0xFF);
1934        emitOperandHelper(rsp, adr, 0);
1935    }
1936
1937    public final void jmpb(Label l) {
1938        if (l.isBound()) {
1939            int shortSize = 2;
1940            int entry = l.position();
1941            assert isByte((entry - position()) + shortSize) : "Dispacement too large for a short jmp";
1942            long offs = entry - position();
1943            emitByte(0xEB);
1944            emitByte((int) ((offs - shortSize) & 0xFF));
1945        } else {
1946
1947            l.addPatchAt(position());
1948            emitByte(0xEB);
1949            emitByte(0);
1950        }
1951    }
1952
1953    public final void leaq(Register dst, AMD64Address src) {
1954        prefixq(src, dst);
1955        emitByte(0x8D);
1956        emitOperandHelper(dst, src, 0);
1957    }
1958
1959    public final void leave() {
1960        emitByte(0xC9);
1961    }
1962
1963    public final void lock() {
1964        emitByte(0xF0);
1965    }
1966
1967    public final void movapd(Register dst, Register src) {
1968        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1969        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1970        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1971        emitByte(0x28);
1972        emitByte(0xC0 | encode);
1973    }
1974
1975    public final void movaps(Register dst, Register src) {
1976        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1977        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1978        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
1979        emitByte(0x28);
1980        emitByte(0xC0 | encode);
1981    }
1982
1983    public final void movb(AMD64Address dst, int imm8) {
1984        prefix(dst);
1985        emitByte(0xC6);
1986        emitOperandHelper(0, dst, 1);
1987        emitByte(imm8);
1988    }
1989
1990    public final void movb(AMD64Address dst, Register src) {
1991        assert src.getRegisterCategory().equals(AMD64.CPU) : "must have byte register";
1992        prefix(dst, src, true);
1993        emitByte(0x88);
1994        emitOperandHelper(src, dst, 0);
1995    }
1996
1997    public final void movl(Register dst, int imm32) {
1998        int encode = prefixAndEncode(dst.encoding);
1999        emitByte(0xB8 | encode);
2000        emitInt(imm32);
2001    }
2002
2003    public final void movl(Register dst, Register src) {
2004        int encode = prefixAndEncode(dst.encoding, src.encoding);
2005        emitByte(0x8B);
2006        emitByte(0xC0 | encode);
2007    }
2008
2009    public final void movl(Register dst, AMD64Address src) {
2010        prefix(src, dst);
2011        emitByte(0x8B);
2012        emitOperandHelper(dst, src, 0);
2013    }
2014
2015    public final void movl(AMD64Address dst, int imm32) {
2016        prefix(dst);
2017        emitByte(0xC7);
2018        emitOperandHelper(0, dst, 4);
2019        emitInt(imm32);
2020    }
2021
2022    public final void movl(AMD64Address dst, Register src) {
2023        prefix(dst, src);
2024        emitByte(0x89);
2025        emitOperandHelper(src, dst, 0);
2026    }
2027
2028    /**
2029     * New CPUs require use of movsd and movss to avoid partial register stall when loading from
2030     * memory. But for old Opteron use movlpd instead of movsd. The selection is done in
2031     * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and
2032     * {@link AMD64MacroAssembler#movflt(Register, Register)}.
2033     */
2034    public final void movlpd(Register dst, AMD64Address src) {
2035        assert dst.getRegisterCategory().equals(AMD64.XMM);
2036        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2037        simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2038        emitByte(0x12);
2039        emitOperandHelper(dst, src, 0);
2040    }
2041
2042    public final void movlhps(Register dst, Register src) {
2043        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2044        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2045        int encode = simdPrefixAndEncode(dst, src, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2046        emitByte(0x16);
2047        emitByte(0xC0 | encode);
2048    }
2049
2050    public final void movq(Register dst, AMD64Address src) {
2051        movq(dst, src, false);
2052    }
2053
2054    public final void movq(Register dst, AMD64Address src, boolean wide) {
2055        if (dst.getRegisterCategory().equals(AMD64.XMM)) {
2056            AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ wide, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2057            simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2058            emitByte(0x7E);
2059            emitOperandHelper(dst, src, wide, 0);
2060        } else {
2061            // gpr version of movq
2062            prefixq(src, dst);
2063            emitByte(0x8B);
2064            emitOperandHelper(dst, src, wide, 0);
2065        }
2066    }
2067
2068    public final void movq(Register dst, Register src) {
2069        int encode = prefixqAndEncode(dst.encoding, src.encoding);
2070        emitByte(0x8B);
2071        emitByte(0xC0 | encode);
2072    }
2073
2074    public final void movq(AMD64Address dst, Register src) {
2075        if (src.getRegisterCategory().equals(AMD64.XMM)) {
2076            AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2077            simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2078            emitByte(0xD6);
2079            emitOperandHelper(src, dst, 0);
2080        } else {
2081            // gpr version of movq
2082            prefixq(dst, src);
2083            emitByte(0x89);
2084            emitOperandHelper(src, dst, 0);
2085        }
2086    }
2087
2088    public final void movsbl(Register dst, AMD64Address src) {
2089        prefix(src, dst);
2090        emitByte(0x0F);
2091        emitByte(0xBE);
2092        emitOperandHelper(dst, src, 0);
2093    }
2094
2095    public final void movsbl(Register dst, Register src) {
2096        int encode = prefixAndEncode(dst.encoding, false, src.encoding, true);
2097        emitByte(0x0F);
2098        emitByte(0xBE);
2099        emitByte(0xC0 | encode);
2100    }
2101
2102    public final void movsbq(Register dst, AMD64Address src) {
2103        prefixq(src, dst);
2104        emitByte(0x0F);
2105        emitByte(0xBE);
2106        emitOperandHelper(dst, src, 0);
2107    }
2108
2109    public final void movsbq(Register dst, Register src) {
2110        int encode = prefixqAndEncode(dst.encoding, src.encoding);
2111        emitByte(0x0F);
2112        emitByte(0xBE);
2113        emitByte(0xC0 | encode);
2114    }
2115
2116    public final void movsd(Register dst, Register src) {
2117        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2118        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2119        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2120        emitByte(0x10);
2121        emitByte(0xC0 | encode);
2122    }
2123
2124    public final void movsd(Register dst, AMD64Address src) {
2125        assert dst.getRegisterCategory().equals(AMD64.XMM);
2126        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2127        simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2128        emitByte(0x10);
2129        emitOperandHelper(dst, src, 0);
2130    }
2131
2132    public final void movsd(AMD64Address dst, Register src) {
2133        assert src.getRegisterCategory().equals(AMD64.XMM);
2134        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2135        simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2136        emitByte(0x11);
2137        emitOperandHelper(src, dst, 0);
2138    }
2139
2140    public final void movss(Register dst, Register src) {
2141        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2142        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2143        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2144        emitByte(0x10);
2145        emitByte(0xC0 | encode);
2146    }
2147
2148    public final void movss(Register dst, AMD64Address src) {
2149        assert dst.getRegisterCategory().equals(AMD64.XMM);
2150        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2151        simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2152        emitByte(0x10);
2153        emitOperandHelper(dst, src, 0);
2154    }
2155
2156    public final void movss(AMD64Address dst, Register src) {
2157        assert src.getRegisterCategory().equals(AMD64.XMM);
2158        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2159        simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2160        emitByte(0x11);
2161        emitOperandHelper(src, dst, 0);
2162    }
2163
2164    public final void mulpd(Register dst, Register src) {
2165        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2166        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2167        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2168        emitByte(0x59);
2169        emitByte(0xC0 | encode);
2170    }
2171
2172    public final void mulpd(Register dst, AMD64Address src) {
2173        assert dst.getRegisterCategory().equals(AMD64.XMM);
2174        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2175        simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2176        emitByte(0x59);
2177        emitOperandHelper(dst, src, 0);
2178    }
2179
2180    public final void mulsd(Register dst, Register src) {
2181        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2182        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2183        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2184        emitByte(0x59);
2185        emitByte(0xC0 | encode);
2186    }
2187
2188    public final void mulsd(Register dst, AMD64Address src) {
2189        assert dst.getRegisterCategory().equals(AMD64.XMM);
2190        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2191        simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2192        emitByte(0x59);
2193        emitOperandHelper(dst, src, 0);
2194    }
2195
2196    public final void mulss(Register dst, Register src) {
2197        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2198        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2199        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2200        emitByte(0x59);
2201        emitByte(0xC0 | encode);
2202    }
2203
2204    public final void movswl(Register dst, AMD64Address src) {
2205        prefix(src, dst);
2206        emitByte(0x0F);
2207        emitByte(0xBF);
2208        emitOperandHelper(dst, src, 0);
2209    }
2210
2211    public final void movw(AMD64Address dst, int imm16) {
2212        emitByte(0x66); // switch to 16-bit mode
2213        prefix(dst);
2214        emitByte(0xC7);
2215        emitOperandHelper(0, dst, 2);
2216        emitShort(imm16);
2217    }
2218
2219    public final void movw(AMD64Address dst, Register src) {
2220        emitByte(0x66);
2221        prefix(dst, src);
2222        emitByte(0x89);
2223        emitOperandHelper(src, dst, 0);
2224    }
2225
2226    public final void movzbl(Register dst, AMD64Address src) {
2227        prefix(src, dst);
2228        emitByte(0x0F);
2229        emitByte(0xB6);
2230        emitOperandHelper(dst, src, 0);
2231    }
2232
2233    public final void movzwl(Register dst, AMD64Address src) {
2234        prefix(src, dst);
2235        emitByte(0x0F);
2236        emitByte(0xB7);
2237        emitOperandHelper(dst, src, 0);
2238    }
2239
2240    public final void negl(Register dst) {
2241        NEG.emit(this, DWORD, dst);
2242    }
2243
2244    public final void notl(Register dst) {
2245        NOT.emit(this, DWORD, dst);
2246    }
2247
2248    @Override
2249    public final void ensureUniquePC() {
2250        nop();
2251    }
2252
2253    public final void nop() {
2254        nop(1);
2255    }
2256
2257    public void nop(int count) {
2258        int i = count;
2259        if (UseNormalNop) {
2260            assert i > 0 : " ";
2261            // The fancy nops aren't currently recognized by debuggers making it a
2262            // pain to disassemble code while debugging. If assert are on clearly
2263            // speed is not an issue so simply use the single byte traditional nop
2264            // to do alignment.
2265
2266            for (; i > 0; i--) {
2267                emitByte(0x90);
2268            }
2269            return;
2270        }
2271
2272        if (UseAddressNop) {
2273            //
2274            // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD.
2275            // 1: 0x90
2276            // 2: 0x66 0x90
2277            // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2278            // 4: 0x0F 0x1F 0x40 0x00
2279            // 5: 0x0F 0x1F 0x44 0x00 0x00
2280            // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2281            // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2282            // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2283            // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2284            // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2285            // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2286
2287            // The rest coding is AMD specific - use consecutive Address nops
2288
2289            // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2290            // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2291            // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2292            // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2293            // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2294            // Size prefixes (0x66) are added for larger sizes
2295
2296            while (i >= 22) {
2297                i -= 11;
2298                emitByte(0x66); // size prefix
2299                emitByte(0x66); // size prefix
2300                emitByte(0x66); // size prefix
2301                addrNop8();
2302            }
2303            // Generate first nop for size between 21-12
2304            switch (i) {
2305                case 21:
2306                    i -= 11;
2307                    emitByte(0x66); // size prefix
2308                    emitByte(0x66); // size prefix
2309                    emitByte(0x66); // size prefix
2310                    addrNop8();
2311                    break;
2312                case 20:
2313                case 19:
2314                    i -= 10;
2315                    emitByte(0x66); // size prefix
2316                    emitByte(0x66); // size prefix
2317                    addrNop8();
2318                    break;
2319                case 18:
2320                case 17:
2321                    i -= 9;
2322                    emitByte(0x66); // size prefix
2323                    addrNop8();
2324                    break;
2325                case 16:
2326                case 15:
2327                    i -= 8;
2328                    addrNop8();
2329                    break;
2330                case 14:
2331                case 13:
2332                    i -= 7;
2333                    addrNop7();
2334                    break;
2335                case 12:
2336                    i -= 6;
2337                    emitByte(0x66); // size prefix
2338                    addrNop5();
2339                    break;
2340                default:
2341                    assert i < 12;
2342            }
2343
2344            // Generate second nop for size between 11-1
2345            switch (i) {
2346                case 11:
2347                    emitByte(0x66); // size prefix
2348                    emitByte(0x66); // size prefix
2349                    emitByte(0x66); // size prefix
2350                    addrNop8();
2351                    break;
2352                case 10:
2353                    emitByte(0x66); // size prefix
2354                    emitByte(0x66); // size prefix
2355                    addrNop8();
2356                    break;
2357                case 9:
2358                    emitByte(0x66); // size prefix
2359                    addrNop8();
2360                    break;
2361                case 8:
2362                    addrNop8();
2363                    break;
2364                case 7:
2365                    addrNop7();
2366                    break;
2367                case 6:
2368                    emitByte(0x66); // size prefix
2369                    addrNop5();
2370                    break;
2371                case 5:
2372                    addrNop5();
2373                    break;
2374                case 4:
2375                    addrNop4();
2376                    break;
2377                case 3:
2378                    // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2379                    emitByte(0x66); // size prefix
2380                    emitByte(0x66); // size prefix
2381                    emitByte(0x90); // nop
2382                    break;
2383                case 2:
2384                    emitByte(0x66); // size prefix
2385                    emitByte(0x90); // nop
2386                    break;
2387                case 1:
2388                    emitByte(0x90); // nop
2389                    break;
2390                default:
2391                    assert i == 0;
2392            }
2393            return;
2394        }
2395
2396        // Using nops with size prefixes "0x66 0x90".
2397        // From AMD Optimization Guide:
2398        // 1: 0x90
2399        // 2: 0x66 0x90
2400        // 3: 0x66 0x66 0x90
2401        // 4: 0x66 0x66 0x66 0x90
2402        // 5: 0x66 0x66 0x90 0x66 0x90
2403        // 6: 0x66 0x66 0x90 0x66 0x66 0x90
2404        // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2405        // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2406        // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2407        // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2408        //
2409        while (i > 12) {
2410            i -= 4;
2411            emitByte(0x66); // size prefix
2412            emitByte(0x66);
2413            emitByte(0x66);
2414            emitByte(0x90); // nop
2415        }
2416        // 1 - 12 nops
2417        if (i > 8) {
2418            if (i > 9) {
2419                i -= 1;
2420                emitByte(0x66);
2421            }
2422            i -= 3;
2423            emitByte(0x66);
2424            emitByte(0x66);
2425            emitByte(0x90);
2426        }
2427        // 1 - 8 nops
2428        if (i > 4) {
2429            if (i > 6) {
2430                i -= 1;
2431                emitByte(0x66);
2432            }
2433            i -= 3;
2434            emitByte(0x66);
2435            emitByte(0x66);
2436            emitByte(0x90);
2437        }
2438        switch (i) {
2439            case 4:
2440                emitByte(0x66);
2441                emitByte(0x66);
2442                emitByte(0x66);
2443                emitByte(0x90);
2444                break;
2445            case 3:
2446                emitByte(0x66);
2447                emitByte(0x66);
2448                emitByte(0x90);
2449                break;
2450            case 2:
2451                emitByte(0x66);
2452                emitByte(0x90);
2453                break;
2454            case 1:
2455                emitByte(0x90);
2456                break;
2457            default:
2458                assert i == 0;
2459        }
2460    }
2461
2462    public final void orl(Register dst, Register src) {
2463        OR.rmOp.emit(this, DWORD, dst, src);
2464    }
2465
2466    public final void orl(Register dst, int imm32) {
2467        OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2468    }
2469
2470    public final void pop(Register dst) {
2471        int encode = prefixAndEncode(dst.encoding);
2472        emitByte(0x58 | encode);
2473    }
2474
2475    public void popfq() {
2476        emitByte(0x9D);
2477    }
2478
2479    public final void ptest(Register dst, Register src) {
2480        assert supports(CPUFeature.SSE4_1);
2481        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2482        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2483        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes);
2484        emitByte(0x17);
2485        emitByte(0xC0 | encode);
2486    }
2487
2488    public final void vptest(Register dst, Register src) {
2489        assert supports(CPUFeature.AVX);
2490        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2491        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2492        int encode = vexPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes);
2493        emitByte(0x17);
2494        emitByte(0xC0 | encode);
2495    }
2496
2497    void pcmpestri(Register dst, AMD64Address src, int imm8) {
2498        assert supports(CPUFeature.SSE4_2);
2499        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2500        simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_3A, attributes);
2501        emitByte(0x61);
2502        emitOperandHelper(dst, src, 0);
2503        emitByte(imm8);
2504    }
2505
2506    void pcmpestri(Register dst, Register src, int imm8) {
2507        assert supports(CPUFeature.SSE4_2);
2508        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2509        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_3A, attributes);
2510        emitByte(0x61);
2511        emitByte(0xC0 | encode);
2512        emitByte(imm8);
2513    }
2514
2515    public final void push(Register src) {
2516        int encode = prefixAndEncode(src.encoding);
2517        emitByte(0x50 | encode);
2518    }
2519
2520    public void pushfq() {
2521        emitByte(0x9c);
2522    }
2523
2524    public final void paddd(Register dst, Register src) {
2525        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2526        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2527        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2528        emitByte(0xFE);
2529        emitByte(0xC0 | encode);
2530    }
2531
2532    public final void paddq(Register dst, Register src) {
2533        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2534        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2535        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2536        emitByte(0xD4);
2537        emitByte(0xC0 | encode);
2538    }
2539
2540    public final void pextrw(Register dst, Register src, int imm8) {
2541        assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
2542        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2543        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2544        emitByte(0xC5);
2545        emitByte(0xC0 | encode);
2546        emitByte(imm8);
2547    }
2548
2549    public final void pinsrw(Register dst, Register src, int imm8) {
2550        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
2551        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2552        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2553        emitByte(0xC4);
2554        emitByte(0xC0 | encode);
2555        emitByte(imm8);
2556    }
2557
2558    public final void por(Register dst, Register src) {
2559        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2560        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2561        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2562        emitByte(0xEB);
2563        emitByte(0xC0 | encode);
2564    }
2565
2566    public final void pand(Register dst, Register src) {
2567        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2568        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2569        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2570        emitByte(0xDB);
2571        emitByte(0xC0 | encode);
2572    }
2573
2574    public final void pxor(Register dst, Register src) {
2575        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2576        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2577        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2578        emitByte(0xEF);
2579        emitByte(0xC0 | encode);
2580    }
2581
2582    public final void vpxor(Register dst, Register nds, Register src) {
2583        assert supports(CPUFeature.AVX);
2584        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2585        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2586        int encode = vexPrefixAndEncode(dst, nds, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2587        emitByte(0xEF);
2588        emitByte(0xC0 | encode);
2589    }
2590
2591    public final void pslld(Register dst, int imm8) {
2592        assert isUByte(imm8) : "invalid value";
2593        assert dst.getRegisterCategory().equals(AMD64.XMM);
2594        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2595        // XMM6 is for /6 encoding: 66 0F 72 /6 ib
2596        int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2597        emitByte(0x72);
2598        emitByte(0xC0 | encode);
2599        emitByte(imm8 & 0xFF);
2600    }
2601
2602    public final void psllq(Register dst, Register shift) {
2603        assert dst.getRegisterCategory().equals(AMD64.XMM) && shift.getRegisterCategory().equals(AMD64.XMM);
2604        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2605        int encode = simdPrefixAndEncode(dst, dst, shift, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2606        emitByte(0xF3);
2607        emitByte(0xC0 | encode);
2608    }
2609
2610    public final void psllq(Register dst, int imm8) {
2611        assert isUByte(imm8) : "invalid value";
2612        assert dst.getRegisterCategory().equals(AMD64.XMM);
2613        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2614        // XMM6 is for /6 encoding: 66 0F 73 /6 ib
2615        int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2616        emitByte(0x73);
2617        emitByte(0xC0 | encode);
2618        emitByte(imm8);
2619    }
2620
2621    public final void psrad(Register dst, int imm8) {
2622        assert isUByte(imm8) : "invalid value";
2623        assert dst.getRegisterCategory().equals(AMD64.XMM);
2624        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2625        // XMM4 is for /2 encoding: 66 0F 72 /4 ib
2626        int encode = simdPrefixAndEncode(AMD64.xmm4, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2627        emitByte(0x72);
2628        emitByte(0xC0 | encode);
2629        emitByte(imm8);
2630    }
2631
2632    public final void psrld(Register dst, int imm8) {
2633        assert isUByte(imm8) : "invalid value";
2634        assert dst.getRegisterCategory().equals(AMD64.XMM);
2635        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2636        // XMM2 is for /2 encoding: 66 0F 72 /2 ib
2637        int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2638        emitByte(0x72);
2639        emitByte(0xC0 | encode);
2640        emitByte(imm8);
2641    }
2642
2643    public final void psrlq(Register dst, int imm8) {
2644        assert isUByte(imm8) : "invalid value";
2645        assert dst.getRegisterCategory().equals(AMD64.XMM);
2646        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2647        // XMM2 is for /2 encoding: 66 0F 73 /2 ib
2648        int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2649        emitByte(0x73);
2650        emitByte(0xC0 | encode);
2651        emitByte(imm8);
2652    }
2653
2654    public final void psrldq(Register dst, int imm8) {
2655        assert isUByte(imm8) : "invalid value";
2656        assert dst.getRegisterCategory().equals(AMD64.XMM);
2657        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2658        int encode = simdPrefixAndEncode(AMD64.xmm3, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2659        emitByte(0x73);
2660        emitByte(0xC0 | encode);
2661        emitByte(imm8);
2662    }
2663
2664    public final void pshufd(Register dst, Register src, int imm8) {
2665        assert isUByte(imm8) : "invalid value";
2666        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2667        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2668        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2669        emitByte(0x70);
2670        emitByte(0xC0 | encode);
2671        emitByte(imm8);
2672    }
2673
2674    public final void psubd(Register dst, Register src) {
2675        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2676        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2677        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2678        emitByte(0xFA);
2679        emitByte(0xC0 | encode);
2680    }
2681
2682    public final void rcpps(Register dst, Register src) {
2683        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2684        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ true, /* noMaskReg */ false, /* usesVl */ false, target);
2685        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2686        emitByte(0x53);
2687        emitByte(0xC0 | encode);
2688    }
2689
2690    public final void ret(int imm16) {
2691        if (imm16 == 0) {
2692            emitByte(0xC3);
2693        } else {
2694            emitByte(0xC2);
2695            emitShort(imm16);
2696        }
2697    }
2698
2699    public final void sarl(Register dst, int imm8) {
2700        int encode = prefixAndEncode(dst.encoding);
2701        assert isShiftCount(imm8 >> 1) : "illegal shift count";
2702        if (imm8 == 1) {
2703            emitByte(0xD1);
2704            emitByte(0xF8 | encode);
2705        } else {
2706            emitByte(0xC1);
2707            emitByte(0xF8 | encode);
2708            emitByte(imm8);
2709        }
2710    }
2711
2712    public final void shll(Register dst, int imm8) {
2713        assert isShiftCount(imm8 >> 1) : "illegal shift count";
2714        int encode = prefixAndEncode(dst.encoding);
2715        if (imm8 == 1) {
2716            emitByte(0xD1);
2717            emitByte(0xE0 | encode);
2718        } else {
2719            emitByte(0xC1);
2720            emitByte(0xE0 | encode);
2721            emitByte(imm8);
2722        }
2723    }
2724
2725    public final void shll(Register dst) {
2726        int encode = prefixAndEncode(dst.encoding);
2727        emitByte(0xD3);
2728        emitByte(0xE0 | encode);
2729    }
2730
2731    public final void shrl(Register dst, int imm8) {
2732        assert isShiftCount(imm8 >> 1) : "illegal shift count";
2733        int encode = prefixAndEncode(dst.encoding);
2734        emitByte(0xC1);
2735        emitByte(0xE8 | encode);
2736        emitByte(imm8);
2737    }
2738
2739    public final void shrl(Register dst) {
2740        int encode = prefixAndEncode(dst.encoding);
2741        emitByte(0xD3);
2742        emitByte(0xE8 | encode);
2743    }
2744
2745    public final void subl(AMD64Address dst, int imm32) {
2746        SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2747    }
2748
2749    public final void subl(Register dst, int imm32) {
2750        SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2751    }
2752
2753    public final void subl(Register dst, Register src) {
2754        SUB.rmOp.emit(this, DWORD, dst, src);
2755    }
2756
2757    public final void subpd(Register dst, Register src) {
2758        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2759        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2760        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2761        emitByte(0x5C);
2762        emitByte(0xC0 | encode);
2763    }
2764
2765    public final void subsd(Register dst, Register src) {
2766        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2767        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2768        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2769        emitByte(0x5C);
2770        emitByte(0xC0 | encode);
2771    }
2772
2773    public final void subsd(Register dst, AMD64Address src) {
2774        assert dst.getRegisterCategory().equals(AMD64.XMM);
2775        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2776        simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2777        emitByte(0x5C);
2778        emitOperandHelper(dst, src, 0);
2779    }
2780
2781    public final void testl(Register dst, int imm32) {
2782        // not using emitArith because test
2783        // doesn't support sign-extension of
2784        // 8bit operands
2785        int encode = dst.encoding;
2786        if (encode == 0) {
2787            emitByte(0xA9);
2788        } else {
2789            encode = prefixAndEncode(encode);
2790            emitByte(0xF7);
2791            emitByte(0xC0 | encode);
2792        }
2793        emitInt(imm32);
2794    }
2795
2796    public final void testl(Register dst, Register src) {
2797        int encode = prefixAndEncode(dst.encoding, src.encoding);
2798        emitByte(0x85);
2799        emitByte(0xC0 | encode);
2800    }
2801
2802    public final void testl(Register dst, AMD64Address src) {
2803        prefix(src, dst);
2804        emitByte(0x85);
2805        emitOperandHelper(dst, src, 0);
2806    }
2807
2808    public final void unpckhpd(Register dst, Register src) {
2809        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2810        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2811        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2812        emitByte(0x15);
2813        emitByte(0xC0 | encode);
2814    }
2815
2816    public final void unpcklpd(Register dst, Register src) {
2817        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2818        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2819        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2820        emitByte(0x14);
2821        emitByte(0xC0 | encode);
2822    }
2823
2824    public final void xorl(Register dst, Register src) {
2825        XOR.rmOp.emit(this, DWORD, dst, src);
2826    }
2827
2828    public final void xorpd(Register dst, Register src) {
2829        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2830        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2831        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2832        emitByte(0x57);
2833        emitByte(0xC0 | encode);
2834    }
2835
2836    public final void xorps(Register dst, Register src) {
2837        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2838        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2839        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2840        emitByte(0x57);
2841        emitByte(0xC0 | encode);
2842    }
2843
2844    protected final void decl(Register dst) {
2845        // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
2846        int encode = prefixAndEncode(dst.encoding);
2847        emitByte(0xFF);
2848        emitByte(0xC8 | encode);
2849    }
2850
2851    protected final void incl(Register dst) {
2852        // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2853        int encode = prefixAndEncode(dst.encoding);
2854        emitByte(0xFF);
2855        emitByte(0xC0 | encode);
2856    }
2857
2858    private int prefixAndEncode(int regEnc) {
2859        return prefixAndEncode(regEnc, false);
2860    }
2861
2862    private int prefixAndEncode(int regEnc, boolean byteinst) {
2863        if (regEnc >= 8) {
2864            emitByte(Prefix.REXB);
2865            return regEnc - 8;
2866        } else if (byteinst && regEnc >= 4) {
2867            emitByte(Prefix.REX);
2868        }
2869        return regEnc;
2870    }
2871
2872    private int prefixqAndEncode(int regEnc) {
2873        if (regEnc < 8) {
2874            emitByte(Prefix.REXW);
2875            return regEnc;
2876        } else {
2877            emitByte(Prefix.REXWB);
2878            return regEnc - 8;
2879        }
2880    }
2881
2882    private int prefixAndEncode(int dstEnc, int srcEnc) {
2883        return prefixAndEncode(dstEnc, false, srcEnc, false);
2884    }
2885
2886    private int prefixAndEncode(int dstEncoding, boolean dstIsByte, int srcEncoding, boolean srcIsByte) {
2887        int srcEnc = srcEncoding;
2888        int dstEnc = dstEncoding;
2889        if (dstEnc < 8) {
2890            if (srcEnc >= 8) {
2891                emitByte(Prefix.REXB);
2892                srcEnc -= 8;
2893            } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) {
2894                emitByte(Prefix.REX);
2895            }
2896        } else {
2897            if (srcEnc < 8) {
2898                emitByte(Prefix.REXR);
2899            } else {
2900                emitByte(Prefix.REXRB);
2901                srcEnc -= 8;
2902            }
2903            dstEnc -= 8;
2904        }
2905        return dstEnc << 3 | srcEnc;
2906    }
2907
2908    /**
2909     * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand
2910     * prefix. If the given operands exceed 3 bits, the 4th bit is encoded in the prefix.
2911     *
2912     * @param regEncoding the encoding of the register part of the ModRM-Byte
2913     * @param rmEncoding the encoding of the r/m part of the ModRM-Byte
2914     * @return the lower 6 bits of the ModRM-Byte that should be emitted
2915     */
2916    private int prefixqAndEncode(int regEncoding, int rmEncoding) {
2917        int rmEnc = rmEncoding;
2918        int regEnc = regEncoding;
2919        if (regEnc < 8) {
2920            if (rmEnc < 8) {
2921                emitByte(Prefix.REXW);
2922            } else {
2923                emitByte(Prefix.REXWB);
2924                rmEnc -= 8;
2925            }
2926        } else {
2927            if (rmEnc < 8) {
2928                emitByte(Prefix.REXWR);
2929            } else {
2930                emitByte(Prefix.REXWRB);
2931                rmEnc -= 8;
2932            }
2933            regEnc -= 8;
2934        }
2935        return regEnc << 3 | rmEnc;
2936    }
2937
2938    private void vexPrefix(int rxb, int ndsEncoding, int pre, int opc, AMD64InstructionAttr attributes) {
2939        int vectorLen = attributes.getVectorLen();
2940        boolean vexW = attributes.isRexVexW();
2941        boolean isXorB = ((rxb & 0x3) > 0);
2942        if (isXorB || vexW || (opc == VexOpcode.VEX_OPCODE_0F_38) || (opc == VexOpcode.VEX_OPCODE_0F_3A)) {
2943            emitByte(Prefix.VEX_3BYTES);
2944
2945            int byte1 = (rxb << 5);
2946            byte1 = ((~byte1) & 0xE0) | opc;
2947            emitByte(byte1);
2948
2949            int byte2 = ((~ndsEncoding) & 0xf) << 3;
2950            byte2 |= (vexW ? VexPrefix.VEX_W : 0) | ((vectorLen > 0) ? 4 : 0) | pre;
2951            emitByte(byte2);
2952        } else {
2953            emitByte(Prefix.VEX_2BYTES);
2954
2955            int byte1 = ((rxb & 0x4) > 0) ? VexPrefix.VEX_R : 0;
2956            byte1 = (~byte1) & 0x80;
2957            byte1 |= ((~ndsEncoding) & 0xf) << 3;
2958            byte1 |= ((vectorLen > 0) ? 4 : 0) | pre;
2959            emitByte(byte1);
2960        }
2961    }
2962
2963    private void vexPrefix(AMD64Address adr, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
2964        int rxb = getRXB(src, adr);
2965        int ndsEncoding = nds.isValid() ? nds.encoding : 0;
2966        vexPrefix(rxb, ndsEncoding, pre, opc, attributes);
2967        setCurAttributes(attributes);
2968    }
2969
2970    private int vexPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
2971        int rxb = getRXB(dst, src);
2972        int ndsEncoding = nds.isValid() ? nds.encoding : 0;
2973        vexPrefix(rxb, ndsEncoding, pre, opc, attributes);
2974        // return modrm byte components for operands
2975        return (((dst.encoding & 7) << 3) | (src.encoding & 7));
2976    }
2977
2978    private void simdPrefix(Register xreg, Register nds, AMD64Address adr, int pre, int opc, AMD64InstructionAttr attributes) {
2979        if (supports(CPUFeature.AVX)) {
2980            vexPrefix(adr, nds, xreg, pre, opc, attributes);
2981        } else {
2982            switch (pre) {
2983                case VexSimdPrefix.VEX_SIMD_66:
2984                    emitByte(0x66);
2985                    break;
2986                case VexSimdPrefix.VEX_SIMD_F2:
2987                    emitByte(0xF2);
2988                    break;
2989                case VexSimdPrefix.VEX_SIMD_F3:
2990                    emitByte(0xF3);
2991                    break;
2992            }
2993            if (attributes.isRexVexW()) {
2994                prefixq(adr, xreg);
2995            } else {
2996                prefix(adr, xreg);
2997            }
2998            switch (opc) {
2999                case VexOpcode.VEX_OPCODE_0F:
3000                    emitByte(0x0F);
3001                    break;
3002                case VexOpcode.VEX_OPCODE_0F_38:
3003                    emitByte(0x0F);
3004                    emitByte(0x38);
3005                    break;
3006                case VexOpcode.VEX_OPCODE_0F_3A:
3007                    emitByte(0x0F);
3008                    emitByte(0x3A);
3009                    break;
3010            }
3011        }
3012    }
3013
3014    private int simdPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
3015        if (supports(CPUFeature.AVX)) {
3016            return vexPrefixAndEncode(dst, nds, src, pre, opc, attributes);
3017        } else {
3018            switch (pre) {
3019                case VexSimdPrefix.VEX_SIMD_66:
3020                    emitByte(0x66);
3021                    break;
3022                case VexSimdPrefix.VEX_SIMD_F2:
3023                    emitByte(0xF2);
3024                    break;
3025                case VexSimdPrefix.VEX_SIMD_F3:
3026                    emitByte(0xF3);
3027                    break;
3028            }
3029            int encode;
3030            int dstEncoding = dst.encoding;
3031            int srcEncoding = src.encoding;
3032            if (attributes.isRexVexW()) {
3033                encode = prefixqAndEncode(dstEncoding, srcEncoding);
3034            } else {
3035                encode = prefixAndEncode(dstEncoding, srcEncoding);
3036            }
3037            switch (opc) {
3038                case VexOpcode.VEX_OPCODE_0F:
3039                    emitByte(0x0F);
3040                    break;
3041                case VexOpcode.VEX_OPCODE_0F_38:
3042                    emitByte(0x0F);
3043                    emitByte(0x38);
3044                    break;
3045                case VexOpcode.VEX_OPCODE_0F_3A:
3046                    emitByte(0x0F);
3047                    emitByte(0x3A);
3048                    break;
3049            }
3050            return encode;
3051        }
3052    }
3053
3054    private static boolean needsRex(Register reg) {
3055        return reg.encoding >= MinEncodingNeedsRex;
3056    }
3057
3058    private void prefix(AMD64Address adr) {
3059        if (needsRex(adr.getBase())) {
3060            if (needsRex(adr.getIndex())) {
3061                emitByte(Prefix.REXXB);
3062            } else {
3063                emitByte(Prefix.REXB);
3064            }
3065        } else {
3066            if (needsRex(adr.getIndex())) {
3067                emitByte(Prefix.REXX);
3068            }
3069        }
3070    }
3071
3072    private void prefixq(AMD64Address adr) {
3073        if (needsRex(adr.getBase())) {
3074            if (needsRex(adr.getIndex())) {
3075                emitByte(Prefix.REXWXB);
3076            } else {
3077                emitByte(Prefix.REXWB);
3078            }
3079        } else {
3080            if (needsRex(adr.getIndex())) {
3081                emitByte(Prefix.REXWX);
3082            } else {
3083                emitByte(Prefix.REXW);
3084            }
3085        }
3086    }
3087
3088    private void prefix(AMD64Address adr, Register reg) {
3089        prefix(adr, reg, false);
3090    }
3091
3092    private void prefix(AMD64Address adr, Register reg, boolean byteinst) {
3093        if (reg.encoding < 8) {
3094            if (needsRex(adr.getBase())) {
3095                if (needsRex(adr.getIndex())) {
3096                    emitByte(Prefix.REXXB);
3097                } else {
3098                    emitByte(Prefix.REXB);
3099                }
3100            } else {
3101                if (needsRex(adr.getIndex())) {
3102                    emitByte(Prefix.REXX);
3103                } else if (byteinst && reg.encoding >= 4) {
3104                    emitByte(Prefix.REX);
3105                }
3106            }
3107        } else {
3108            if (needsRex(adr.getBase())) {
3109                if (needsRex(adr.getIndex())) {
3110                    emitByte(Prefix.REXRXB);
3111                } else {
3112                    emitByte(Prefix.REXRB);
3113                }
3114            } else {
3115                if (needsRex(adr.getIndex())) {
3116                    emitByte(Prefix.REXRX);
3117                } else {
3118                    emitByte(Prefix.REXR);
3119                }
3120            }
3121        }
3122    }
3123
3124    private void prefixq(AMD64Address adr, Register src) {
3125        if (src.encoding < 8) {
3126            if (needsRex(adr.getBase())) {
3127                if (needsRex(adr.getIndex())) {
3128                    emitByte(Prefix.REXWXB);
3129                } else {
3130                    emitByte(Prefix.REXWB);
3131                }
3132            } else {
3133                if (needsRex(adr.getIndex())) {
3134                    emitByte(Prefix.REXWX);
3135                } else {
3136                    emitByte(Prefix.REXW);
3137                }
3138            }
3139        } else {
3140            if (needsRex(adr.getBase())) {
3141                if (needsRex(adr.getIndex())) {
3142                    emitByte(Prefix.REXWRXB);
3143                } else {
3144                    emitByte(Prefix.REXWRB);
3145                }
3146            } else {
3147                if (needsRex(adr.getIndex())) {
3148                    emitByte(Prefix.REXWRX);
3149                } else {
3150                    emitByte(Prefix.REXWR);
3151                }
3152            }
3153        }
3154    }
3155
3156    public final void addq(Register dst, int imm32) {
3157        ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3158    }
3159
3160    public final void addq(AMD64Address dst, int imm32) {
3161        ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3162    }
3163
3164    public final void addq(Register dst, Register src) {
3165        ADD.rmOp.emit(this, QWORD, dst, src);
3166    }
3167
3168    public final void addq(AMD64Address dst, Register src) {
3169        ADD.mrOp.emit(this, QWORD, dst, src);
3170    }
3171
3172    public final void andq(Register dst, int imm32) {
3173        AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3174    }
3175
3176    public final void bsrq(Register dst, Register src) {
3177        int encode = prefixqAndEncode(dst.encoding(), src.encoding());
3178        emitByte(0x0F);
3179        emitByte(0xBD);
3180        emitByte(0xC0 | encode);
3181    }
3182
3183    public final void bswapq(Register reg) {
3184        int encode = prefixqAndEncode(reg.encoding);
3185        emitByte(0x0F);
3186        emitByte(0xC8 | encode);
3187    }
3188
3189    public final void cdqq() {
3190        emitByte(Prefix.REXW);
3191        emitByte(0x99);
3192    }
3193
3194    public final void cmovq(ConditionFlag cc, Register dst, Register src) {
3195        int encode = prefixqAndEncode(dst.encoding, src.encoding);
3196        emitByte(0x0F);
3197        emitByte(0x40 | cc.getValue());
3198        emitByte(0xC0 | encode);
3199    }
3200
3201    public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) {
3202        prefixq(src, dst);
3203        emitByte(0x0F);
3204        emitByte(0x40 | cc.getValue());
3205        emitOperandHelper(dst, src, 0);
3206    }
3207
3208    public final void cmpq(Register dst, int imm32) {
3209        CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3210    }
3211
3212    public final void cmpq(Register dst, Register src) {
3213        CMP.rmOp.emit(this, QWORD, dst, src);
3214    }
3215
3216    public final void cmpq(Register dst, AMD64Address src) {
3217        CMP.rmOp.emit(this, QWORD, dst, src);
3218    }
3219
3220    public final void cmpxchgq(Register reg, AMD64Address adr) {
3221        prefixq(adr, reg);
3222        emitByte(0x0F);
3223        emitByte(0xB1);
3224        emitOperandHelper(reg, adr, 0);
3225    }
3226
3227    public final void cvtdq2pd(Register dst, Register src) {
3228        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
3229        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3230        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3231        emitByte(0xE6);
3232        emitByte(0xC0 | encode);
3233    }
3234
3235    public final void cvtsi2sdq(Register dst, Register src) {
3236        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
3237        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3238        int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
3239        emitByte(0x2A);
3240        emitByte(0xC0 | encode);
3241    }
3242
3243    public final void cvttsd2siq(Register dst, Register src) {
3244        assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
3245        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3246        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
3247        emitByte(0x2C);
3248        emitByte(0xC0 | encode);
3249    }
3250
3251    public final void cvttpd2dq(Register dst, Register src) {
3252        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
3253        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3254        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3255        emitByte(0xE6);
3256        emitByte(0xC0 | encode);
3257    }
3258
3259    protected final void decq(Register dst) {
3260        // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3261        int encode = prefixqAndEncode(dst.encoding);
3262        emitByte(0xFF);
3263        emitByte(0xC8 | encode);
3264    }
3265
3266    public final void decq(AMD64Address dst) {
3267        DEC.emit(this, QWORD, dst);
3268    }
3269
3270    public final void imulq(Register dst, Register src) {
3271        int encode = prefixqAndEncode(dst.encoding, src.encoding);
3272        emitByte(0x0F);
3273        emitByte(0xAF);
3274        emitByte(0xC0 | encode);
3275    }
3276
3277    public final void incq(Register dst) {
3278        // Don't use it directly. Use Macroincrementq() instead.
3279        // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3280        int encode = prefixqAndEncode(dst.encoding);
3281        emitByte(0xFF);
3282        emitByte(0xC0 | encode);
3283    }
3284
3285    public final void incq(AMD64Address dst) {
3286        INC.emit(this, QWORD, dst);
3287    }
3288
3289    public final void movq(Register dst, long imm64) {
3290        int encode = prefixqAndEncode(dst.encoding);
3291        emitByte(0xB8 | encode);
3292        emitLong(imm64);
3293    }
3294
3295    public final void movslq(Register dst, int imm32) {
3296        int encode = prefixqAndEncode(dst.encoding);
3297        emitByte(0xC7);
3298        emitByte(0xC0 | encode);
3299        emitInt(imm32);
3300    }
3301
3302    public final void movdq(Register dst, AMD64Address src) {
3303        assert dst.getRegisterCategory().equals(AMD64.XMM);
3304        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3305        simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3306        emitByte(0x6E);
3307        emitOperandHelper(dst, src, 0);
3308    }
3309
3310    public final void movdq(AMD64Address dst, Register src) {
3311        assert src.getRegisterCategory().equals(AMD64.XMM);
3312        // swap src/dst to get correct prefix
3313        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3314        simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3315        emitByte(0x7E);
3316        emitOperandHelper(src, dst, 0);
3317    }
3318
3319    public final void movdq(Register dst, Register src) {
3320        if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) {
3321            AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3322            int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3323            emitByte(0x6E);
3324            emitByte(0xC0 | encode);
3325        } else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) {
3326            // swap src/dst to get correct prefix
3327            AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3328            int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3329            emitByte(0x7E);
3330            emitByte(0xC0 | encode);
3331        } else {
3332            throw new InternalError("should not reach here");
3333        }
3334    }
3335
3336    public final void movdl(Register dst, Register src) {
3337        if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) {
3338            AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3339            int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3340            emitByte(0x6E);
3341            emitByte(0xC0 | encode);
3342        } else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) {
3343            // swap src/dst to get correct prefix
3344            AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3345            int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3346            emitByte(0x7E);
3347            emitByte(0xC0 | encode);
3348        } else {
3349            throw new InternalError("should not reach here");
3350        }
3351    }
3352
3353    public final void movdl(Register dst, AMD64Address src) {
3354        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3355        simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3356        emitByte(0x6E);
3357        emitOperandHelper(dst, src, 0);
3358    }
3359
3360    public final void movddup(Register dst, Register src) {
3361        assert supports(CPUFeature.SSE3);
3362        assert dst.getRegisterCategory().equals(AMD64.XMM);
3363        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3364        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
3365        emitByte(0x12);
3366        emitByte(0xC0 | encode);
3367    }
3368
3369    public final void movdqu(Register dst, AMD64Address src) {
3370        assert dst.getRegisterCategory().equals(AMD64.XMM);
3371        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3372        simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3373        emitByte(0x6F);
3374        emitOperandHelper(dst, src, 0);
3375    }
3376
3377    public final void movdqu(Register dst, Register src) {
3378        assert dst.getRegisterCategory().equals(AMD64.XMM);
3379        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3380        int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3381        emitByte(0x6F);
3382        emitByte(0xC0 | encode);
3383    }
3384
3385    public final void vmovdqu(Register dst, AMD64Address src) {
3386        assert supports(CPUFeature.AVX);
3387        assert dst.getRegisterCategory().equals(AMD64.XMM);
3388        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3389        vexPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3390        emitByte(0x6F);
3391        emitOperandHelper(dst, src, 0);
3392    }
3393
3394    public final void vzeroupper() {
3395        assert supports(CPUFeature.AVX);
3396        AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3397        vexPrefixAndEncode(AMD64.xmm0, AMD64.xmm0, AMD64.xmm0, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
3398        emitByte(0x77);
3399    }
3400
3401    public final void movslq(AMD64Address dst, int imm32) {
3402        prefixq(dst);
3403        emitByte(0xC7);
3404        emitOperandHelper(0, dst, 4);
3405        emitInt(imm32);
3406    }
3407
3408    public final void movslq(Register dst, AMD64Address src) {
3409        prefixq(src, dst);
3410        emitByte(0x63);
3411        emitOperandHelper(dst, src, 0);
3412    }
3413
3414    public final void movslq(Register dst, Register src) {
3415        int encode = prefixqAndEncode(dst.encoding, src.encoding);
3416        emitByte(0x63);
3417        emitByte(0xC0 | encode);
3418    }
3419
3420    public final void negq(Register dst) {
3421        int encode = prefixqAndEncode(dst.encoding);
3422        emitByte(0xF7);
3423        emitByte(0xD8 | encode);
3424    }
3425
3426    public final void orq(Register dst, Register src) {
3427        OR.rmOp.emit(this, QWORD, dst, src);
3428    }
3429
3430    public final void shlq(Register dst, int imm8) {
3431        assert isShiftCount(imm8 >> 1) : "illegal shift count";
3432        int encode = prefixqAndEncode(dst.encoding);
3433        if (imm8 == 1) {
3434            emitByte(0xD1);
3435            emitByte(0xE0 | encode);
3436        } else {
3437            emitByte(0xC1);
3438            emitByte(0xE0 | encode);
3439            emitByte(imm8);
3440        }
3441    }
3442
3443    public final void shlq(Register dst) {
3444        int encode = prefixqAndEncode(dst.encoding);
3445        emitByte(0xD3);
3446        emitByte(0xE0 | encode);
3447    }
3448
3449    public final void shrq(Register dst, int imm8) {
3450        assert isShiftCount(imm8 >> 1) : "illegal shift count";
3451        int encode = prefixqAndEncode(dst.encoding);
3452        if (imm8 == 1) {
3453            emitByte(0xD1);
3454            emitByte(0xE8 | encode);
3455        } else {
3456            emitByte(0xC1);
3457            emitByte(0xE8 | encode);
3458            emitByte(imm8);
3459        }
3460    }
3461
3462    public final void shrq(Register dst) {
3463        int encode = prefixqAndEncode(dst.encoding);
3464        emitByte(0xD3);
3465        emitByte(0xE8 | encode);
3466    }
3467
3468    public final void sbbq(Register dst, Register src) {
3469        SBB.rmOp.emit(this, QWORD, dst, src);
3470    }
3471
3472    public final void subq(Register dst, int imm32) {
3473        SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3474    }
3475
3476    public final void subq(AMD64Address dst, int imm32) {
3477        SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3478    }
3479
3480    public final void subqWide(Register dst, int imm32) {
3481        // don't use the sign-extending version, forcing a 32-bit immediate
3482        SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32);
3483    }
3484
3485    public final void subq(Register dst, Register src) {
3486        SUB.rmOp.emit(this, QWORD, dst, src);
3487    }
3488
3489    public final void testq(Register dst, Register src) {
3490        int encode = prefixqAndEncode(dst.encoding, src.encoding);
3491        emitByte(0x85);
3492        emitByte(0xC0 | encode);
3493    }
3494
3495    public final void xaddl(AMD64Address dst, Register src) {
3496        prefix(dst, src);
3497        emitByte(0x0F);
3498        emitByte(0xC1);
3499        emitOperandHelper(src, dst, 0);
3500    }
3501
3502    public final void xaddq(AMD64Address dst, Register src) {
3503        prefixq(dst, src);
3504        emitByte(0x0F);
3505        emitByte(0xC1);
3506        emitOperandHelper(src, dst, 0);
3507    }
3508
3509    public final void xchgl(Register dst, AMD64Address src) {
3510        prefix(src, dst);
3511        emitByte(0x87);
3512        emitOperandHelper(dst, src, 0);
3513    }
3514
3515    public final void xchgq(Register dst, AMD64Address src) {
3516        prefixq(src, dst);
3517        emitByte(0x87);
3518        emitOperandHelper(dst, src, 0);
3519    }
3520
3521    public final void membar(int barriers) {
3522        if (target.isMP) {
3523            // We only have to handle StoreLoad
3524            if ((barriers & STORE_LOAD) != 0) {
3525                // All usable chips support "locked" instructions which suffice
3526                // as barriers, and are much faster than the alternative of
3527                // using cpuid instruction. We use here a locked add [rsp],0.
3528                // This is conveniently otherwise a no-op except for blowing
3529                // flags.
3530                // Any change to this code may need to revisit other places in
3531                // the code where this idiom is used, in particular the
3532                // orderAccess code.
3533                lock();
3534                addl(new AMD64Address(rsp, 0), 0); // Assert the lock# signal here
3535            }
3536        }
3537    }
3538
3539    @Override
3540    protected final void patchJumpTarget(int branch, int branchTarget) {
3541        int op = getByte(branch);
3542        assert op == 0xE8 // call
3543                        ||
3544                        op == 0x00 // jump table entry
3545                        || op == 0xE9 // jmp
3546                        || op == 0xEB // short jmp
3547                        || (op & 0xF0) == 0x70 // short jcc
3548                        || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc
3549        : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op;
3550
3551        if (op == 0x00) {
3552            int offsetToJumpTableBase = getShort(branch + 1);
3553            int jumpTableBase = branch - offsetToJumpTableBase;
3554            int imm32 = branchTarget - jumpTableBase;
3555            emitInt(imm32, branch);
3556        } else if (op == 0xEB || (op & 0xF0) == 0x70) {
3557
3558            // short offset operators (jmp and jcc)
3559            final int imm8 = branchTarget - (branch + 2);
3560            /*
3561             * Since a wrongly patched short branch can potentially lead to working but really bad
3562             * behaving code we should always fail with an exception instead of having an assert.
3563             */
3564            if (!NumUtil.isByte(imm8)) {
3565                throw new InternalError("branch displacement out of range: " + imm8);
3566            }
3567            emitByte(imm8, branch + 1);
3568
3569        } else {
3570
3571            int off = 1;
3572            if (op == 0x0F) {
3573                off = 2;
3574            }
3575
3576            int imm32 = branchTarget - (branch + 4 + off);
3577            emitInt(imm32, branch + off);
3578        }
3579    }
3580
3581    public void nullCheck(AMD64Address address) {
3582        testl(AMD64.rax, address);
3583    }
3584
3585    @Override
3586    public void align(int modulus) {
3587        if (position() % modulus != 0) {
3588            nop(modulus - (position() % modulus));
3589        }
3590    }
3591
3592    /**
3593     * Emits a direct call instruction. Note that the actual call target is not specified, because
3594     * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is
3595     * responsible to add the call address to the appropriate patching tables.
3596     */
3597    public final void call() {
3598        if (codePatchingAnnotationConsumer != null) {
3599            int pos = position();
3600            codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(pos, pos + 1, 4, pos + 5));
3601        }
3602        emitByte(0xE8);
3603        emitInt(0);
3604    }
3605
3606    public final void call(Register src) {
3607        int encode = prefixAndEncode(src.encoding);
3608        emitByte(0xFF);
3609        emitByte(0xD0 | encode);
3610    }
3611
3612    public final void int3() {
3613        emitByte(0xCC);
3614    }
3615
3616    public final void pause() {
3617        emitByte(0xF3);
3618        emitByte(0x90);
3619    }
3620
3621    private void emitx87(int b1, int b2, int i) {
3622        assert 0 <= i && i < 8 : "illegal stack offset";
3623        emitByte(b1);
3624        emitByte(b2 + i);
3625    }
3626
3627    public final void fldd(AMD64Address src) {
3628        emitByte(0xDD);
3629        emitOperandHelper(0, src, 0);
3630    }
3631
3632    public final void flds(AMD64Address src) {
3633        emitByte(0xD9);
3634        emitOperandHelper(0, src, 0);
3635    }
3636
3637    public final void fldln2() {
3638        emitByte(0xD9);
3639        emitByte(0xED);
3640    }
3641
3642    public final void fldlg2() {
3643        emitByte(0xD9);
3644        emitByte(0xEC);
3645    }
3646
3647    public final void fyl2x() {
3648        emitByte(0xD9);
3649        emitByte(0xF1);
3650    }
3651
3652    public final void fstps(AMD64Address src) {
3653        emitByte(0xD9);
3654        emitOperandHelper(3, src, 0);
3655    }
3656
3657    public final void fstpd(AMD64Address src) {
3658        emitByte(0xDD);
3659        emitOperandHelper(3, src, 0);
3660    }
3661
3662    private void emitFPUArith(int b1, int b2, int i) {
3663        assert 0 <= i && i < 8 : "illegal FPU register: " + i;
3664        emitByte(b1);
3665        emitByte(b2 + i);
3666    }
3667
3668    public void ffree(int i) {
3669        emitFPUArith(0xDD, 0xC0, i);
3670    }
3671
3672    public void fincstp() {
3673        emitByte(0xD9);
3674        emitByte(0xF7);
3675    }
3676
3677    public void fxch(int i) {
3678        emitFPUArith(0xD9, 0xC8, i);
3679    }
3680
3681    public void fnstswAX() {
3682        emitByte(0xDF);
3683        emitByte(0xE0);
3684    }
3685
3686    public void fwait() {
3687        emitByte(0x9B);
3688    }
3689
3690    public void fprem() {
3691        emitByte(0xD9);
3692        emitByte(0xF8);
3693    }
3694
3695    public final void fsin() {
3696        emitByte(0xD9);
3697        emitByte(0xFE);
3698    }
3699
3700    public final void fcos() {
3701        emitByte(0xD9);
3702        emitByte(0xFF);
3703    }
3704
3705    public final void fptan() {
3706        emitByte(0xD9);
3707        emitByte(0xF2);
3708    }
3709
3710    public final void fstp(int i) {
3711        emitx87(0xDD, 0xD8, i);
3712    }
3713
3714    @Override
3715    public AMD64Address makeAddress(Register base, int displacement) {
3716        return new AMD64Address(base, displacement);
3717    }
3718
3719    @Override
3720    public AMD64Address getPlaceholder(int instructionStartPosition) {
3721        return new AMD64Address(rip, Register.None, Scale.Times1, 0, instructionStartPosition);
3722    }
3723
3724    private void prefetchPrefix(AMD64Address src) {
3725        prefix(src);
3726        emitByte(0x0F);
3727    }
3728
3729    public void prefetchnta(AMD64Address src) {
3730        prefetchPrefix(src);
3731        emitByte(0x18);
3732        emitOperandHelper(0, src, 0);
3733    }
3734
3735    void prefetchr(AMD64Address src) {
3736        assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
3737        prefetchPrefix(src);
3738        emitByte(0x0D);
3739        emitOperandHelper(0, src, 0);
3740    }
3741
3742    public void prefetcht0(AMD64Address src) {
3743        assert supports(CPUFeature.SSE);
3744        prefetchPrefix(src);
3745        emitByte(0x18);
3746        emitOperandHelper(1, src, 0);
3747    }
3748
3749    public void prefetcht1(AMD64Address src) {
3750        assert supports(CPUFeature.SSE);
3751        prefetchPrefix(src);
3752        emitByte(0x18);
3753        emitOperandHelper(2, src, 0);
3754    }
3755
3756    public void prefetcht2(AMD64Address src) {
3757        assert supports(CPUFeature.SSE);
3758        prefix(src);
3759        emitByte(0x0f);
3760        emitByte(0x18);
3761        emitOperandHelper(3, src, 0);
3762    }
3763
3764    public void prefetchw(AMD64Address src) {
3765        assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
3766        prefix(src);
3767        emitByte(0x0f);
3768        emitByte(0x0D);
3769        emitOperandHelper(1, src, 0);
3770    }
3771
3772    public void rdtsc() {
3773        emitByte(0x0F);
3774        emitByte(0x31);
3775    }
3776
3777    /**
3778     * Emits an instruction which is considered to be illegal. This is used if we deliberately want
3779     * to crash the program (debugging etc.).
3780     */
3781    public void illegal() {
3782        emitByte(0x0f);
3783        emitByte(0x0b);
3784    }
3785}
3786