1/*
2 * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23package org.graalvm.compiler.asm.aarch64;
24
25import static jdk.vm.ci.aarch64.AArch64.cpuRegisters;
26import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ADD;
27import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ADDS;
28import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ADR;
29import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.AND;
30import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ANDS;
31import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ASRV;
32import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BFM;
33import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BIC;
34import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BICS;
35import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BLR;
36import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BR;
37import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.BRK;
38import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CLREX;
39import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CLS;
40import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CLZ;
41import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CSEL;
42import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CSINC;
43import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CSNEG;
44import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.DMB;
45import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.EON;
46import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.EOR;
47import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.EXTR;
48import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FABS;
49import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FADD;
50import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCCMP;
51import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCMP;
52import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCMPZERO;
53import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCSEL;
54import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCVTDS;
55import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCVTSD;
56import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FCVTZS;
57import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FDIV;
58import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FMADD;
59import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FMOV;
60import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FMSUB;
61import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FMUL;
62import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FNEG;
63import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FRINTZ;
64import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FSQRT;
65import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.FSUB;
66import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.HINT;
67import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.HLT;
68import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDAR;
69import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDAXR;
70import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDP;
71import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDR;
72import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDRS;
73import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LDXR;
74import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LSLV;
75import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.LSRV;
76import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MADD;
77import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MOVK;
78import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MOVN;
79import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MOVZ;
80import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.MSUB;
81import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ORN;
82import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ORR;
83import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.RBIT;
84import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.RET;
85import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.REVW;
86import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.REVX;
87import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.RORV;
88import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SBFM;
89import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SCVTF;
90import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SDIV;
91import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STLR;
92import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STLXR;
93import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STP;
94import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STR;
95import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.STXR;
96import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SUB;
97import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SUBS;
98import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.TBZ;
99import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.TBNZ;
100import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.UBFM;
101import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.UDIV;
102import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.FP32;
103import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.FP64;
104import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.General32;
105import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.General64;
106import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.floatFromSize;
107import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.generalFromSize;
108import static jdk.vm.ci.aarch64.AArch64.CPU;
109import static jdk.vm.ci.aarch64.AArch64.SIMD;
110import static jdk.vm.ci.aarch64.AArch64.r0;
111import static jdk.vm.ci.aarch64.AArch64.sp;
112import static jdk.vm.ci.aarch64.AArch64.zr;
113
114import java.util.Arrays;
115
116import org.graalvm.compiler.asm.Assembler;
117import org.graalvm.compiler.core.common.NumUtil;
118import org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode;
119import org.graalvm.compiler.debug.GraalError;
120
121import jdk.vm.ci.code.Register;
122import jdk.vm.ci.code.TargetDescription;
123
124public abstract class AArch64Assembler extends Assembler {
125
126    public static class LogicalImmediateTable {
127
128        private static final Immediate[] IMMEDIATE_TABLE = buildImmediateTable();
129
130        private static final int ImmediateOffset = 10;
131        private static final int ImmediateRotateOffset = 16;
132        private static final int ImmediateSizeOffset = 22;
133
134        /**
135         * Specifies whether immediate can be represented in all cases (YES), as a 64bit instruction
136         * (SIXTY_FOUR_BIT_ONLY) or not at all (NO).
137         */
138        enum Representable {
139            YES,
140            SIXTY_FOUR_BIT_ONLY,
141            NO
142        }
143
144        /**
145         * Tests whether an immediate can be encoded for logical instructions.
146         *
147         * @param is64bit if true immediate is considered a 64-bit pattern. If false we may use a
148         *            64-bit instruction to load the 32-bit pattern into a register.
149         * @return enum specifying whether immediate can be used for 32- and 64-bit logical
150         *         instructions ({@code #Representable.YES}), for 64-bit instructions only (
151         *         {@link Representable#SIXTY_FOUR_BIT_ONLY}) or not at all (
152         *         {@link Representable#NO}).
153         */
154        public static Representable isRepresentable(boolean is64bit, long immediate) {
155            int pos = getLogicalImmTablePos(is64bit, immediate);
156            if (pos < 0) {
157                // if 32bit instruction we can try again as 64bit immediate which may succeed.
158                // i.e. 0xffffffff fails as a 32bit immediate but works as 64bit one.
159                if (!is64bit) {
160                    assert NumUtil.isUnsignedNbit(32, immediate);
161                    pos = getLogicalImmTablePos(true, immediate);
162                    return pos >= 0 ? Representable.SIXTY_FOUR_BIT_ONLY : Representable.NO;
163                }
164                return Representable.NO;
165            }
166            Immediate imm = IMMEDIATE_TABLE[pos];
167            return imm.only64bit() ? Representable.SIXTY_FOUR_BIT_ONLY : Representable.YES;
168        }
169
170        public static Representable isRepresentable(int immediate) {
171            return isRepresentable(false, immediate & 0xFFFF_FFFFL);
172        }
173
174        public static int getLogicalImmEncoding(boolean is64bit, long value) {
175            int pos = getLogicalImmTablePos(is64bit, value);
176            assert pos >= 0 : "Value cannot be represented as logical immediate: " + value + ", is64bit=" + is64bit;
177            Immediate imm = IMMEDIATE_TABLE[pos];
178            assert is64bit || !imm.only64bit() : "Immediate can only be represented for 64bit, but 32bit instruction specified";
179            return IMMEDIATE_TABLE[pos].encoding;
180        }
181
182        /**
183         * @param is64bit if true also allow 64-bit only encodings to be returned.
184         * @return If positive the return value is the position into the IMMEDIATE_TABLE for the
185         *         given immediate, if negative the immediate cannot be encoded.
186         */
187        private static int getLogicalImmTablePos(boolean is64bit, long value) {
188            Immediate imm;
189            if (!is64bit) {
190                // 32bit instructions can only have 32bit immediates.
191                if (!NumUtil.isUnsignedNbit(32, value)) {
192                    return -1;
193                }
194                // If we have a 32bit instruction (and therefore immediate) we have to duplicate it
195                // across 64bit to find it in the table.
196                imm = new Immediate(value << 32 | value);
197            } else {
198                imm = new Immediate(value);
199            }
200            int pos = Arrays.binarySearch(IMMEDIATE_TABLE, imm);
201            if (pos < 0) {
202                return -1;
203            }
204            if (!is64bit && IMMEDIATE_TABLE[pos].only64bit()) {
205                return -1;
206            }
207            return pos;
208        }
209
210        /**
211         * To quote 5.4.2: [..] an immediate is a 32 or 64 bit pattern viewed as a vector of
212         * identical elements of size e = 2, 4, 8, 16, 32 or (in the case of bimm64) 64 bits. Each
213         * element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by
214         * 0 to e-1 bits. It is encoded in the following: 10-16: rotation amount (6bit) starting
215         * from 1s in the LSB (i.e. 0111->1011->1101->1110) 16-22: This stores a combination of the
216         * number of set bits and the pattern size. The pattern size is encoded as follows (x is
217         * used to store the number of 1 bits - 1) e pattern 2 1111xx 4 1110xx 8 110xxx 16 10xxxx 32
218         * 0xxxxx 64 xxxxxx 22: if set we have an instruction with 64bit pattern?
219         */
220        private static final class Immediate implements Comparable<Immediate> {
221            public final long imm;
222            public final int encoding;
223
224            Immediate(long imm, boolean is64, int s, int r) {
225                this.imm = imm;
226                this.encoding = computeEncoding(is64, s, r);
227            }
228
229            // Used to be able to binary search for an immediate in the table.
230            Immediate(long imm) {
231                this(imm, false, 0, 0);
232            }
233
234            /**
235             * Returns true if this pattern is only representable as 64bit.
236             */
237            public boolean only64bit() {
238                return (encoding & (1 << ImmediateSizeOffset)) != 0;
239            }
240
241            private static int computeEncoding(boolean is64, int s, int r) {
242                int sf = is64 ? 1 : 0;
243                return sf << ImmediateSizeOffset | r << ImmediateRotateOffset | s << ImmediateOffset;
244            }
245
246            @Override
247            public int compareTo(Immediate o) {
248                return Long.compare(imm, o.imm);
249            }
250        }
251
252        private static Immediate[] buildImmediateTable() {
253            final int nrImmediates = 5334;
254            final Immediate[] table = new Immediate[nrImmediates];
255            int nrImms = 0;
256            for (int logE = 1; logE <= 6; logE++) {
257                int e = 1 << logE;
258                long mask = NumUtil.getNbitNumberLong(e);
259                for (int nrOnes = 1; nrOnes < e; nrOnes++) {
260                    long val = (1L << nrOnes) - 1;
261                    // r specifies how much we rotate the value
262                    for (int r = 0; r < e; r++) {
263                        long immediate = (val >>> r | val << (e - r)) & mask;
264                        // Duplicate pattern to fill whole 64bit range.
265                        switch (logE) {
266                            case 1:
267                                immediate |= immediate << 2;
268                                immediate |= immediate << 4;
269                                immediate |= immediate << 8;
270                                immediate |= immediate << 16;
271                                immediate |= immediate << 32;
272                                break;
273                            case 2:
274                                immediate |= immediate << 4;
275                                immediate |= immediate << 8;
276                                immediate |= immediate << 16;
277                                immediate |= immediate << 32;
278                                break;
279                            case 3:
280                                immediate |= immediate << 8;
281                                immediate |= immediate << 16;
282                                immediate |= immediate << 32;
283                                break;
284                            case 4:
285                                immediate |= immediate << 16;
286                                immediate |= immediate << 32;
287                                break;
288                            case 5:
289                                immediate |= immediate << 32;
290                                break;
291                        }
292                        // 5 - logE can underflow to -1, but we shift this bogus result
293                        // out of the masked area.
294                        int sizeEncoding = (1 << (5 - logE)) - 1;
295                        int s = ((sizeEncoding << (logE + 1)) & 0x3f) | (nrOnes - 1);
296                        table[nrImms++] = new Immediate(immediate, /* is64bit */e == 64, s, r);
297                    }
298                }
299            }
300            Arrays.sort(table);
301            assert nrImms == nrImmediates : nrImms + " instead of " + nrImmediates + " in table.";
302            assert checkDuplicates(table) : "Duplicate values in table.";
303            return table;
304        }
305
306        private static boolean checkDuplicates(Immediate[] table) {
307            for (int i = 0; i < table.length - 1; i++) {
308                if (table[i].imm >= table[i + 1].imm) {
309                    return false;
310                }
311            }
312            return true;
313        }
314    }
315
316    private static final int RdOffset = 0;
317    private static final int Rs1Offset = 5;
318    private static final int Rs2Offset = 16;
319    private static final int Rs3Offset = 10;
320    private static final int RtOffset = 0;
321    private static final int RnOffset = 5;
322    private static final int Rt2Offset = 10;
323
324    /* Helper functions */
325    private static int rd(Register reg) {
326        return reg.encoding << RdOffset;
327    }
328
329    private static int rs1(Register reg) {
330        return reg.encoding << Rs1Offset;
331    }
332
333    private static int rs2(Register reg) {
334        return reg.encoding << Rs2Offset;
335    }
336
337    private static int rs3(Register reg) {
338        return reg.encoding << Rs3Offset;
339    }
340
341    private static int rt(Register reg) {
342        return reg.encoding << RtOffset;
343    }
344
345    private static int rt2(Register reg) {
346        return reg.encoding << Rt2Offset;
347    }
348
349    private static int rn(Register reg) {
350        return reg.encoding << RnOffset;
351    }
352
353    private static int maskField(int sizeInBits, int n) {
354        assert NumUtil.isSignedNbit(sizeInBits, n);
355        return n & NumUtil.getNbitNumberInt(sizeInBits);
356    }
357
358    /**
359     * Enumeration of all different instruction kinds: General32/64 are the general instructions
360     * (integer, branch, etc.), for 32-, respectively 64-bit operands. FP32/64 is the encoding for
361     * the 32/64bit float operations
362     */
363    protected enum InstructionType {
364        General32(0b00 << 30, 32, true),
365        General64(0b10 << 30, 64, true),
366        FP32(0x00000000, 32, false),
367        FP64(0x00400000, 64, false);
368
369        public final int encoding;
370        public final int width;
371        public final boolean isGeneral;
372
373        InstructionType(int encoding, int width, boolean isGeneral) {
374            this.encoding = encoding;
375            this.width = width;
376            this.isGeneral = isGeneral;
377        }
378
379        public static InstructionType generalFromSize(int size) {
380            assert size == 32 || size == 64;
381            return size == 32 ? General32 : General64;
382        }
383
384        public static InstructionType floatFromSize(int size) {
385            assert size == 32 || size == 64;
386            return size == 32 ? FP32 : FP64;
387        }
388
389    }
390
391    private static final int ImmediateOffset = 10;
392    private static final int ImmediateRotateOffset = 16;
393    private static final int ImmediateSizeOffset = 22;
394    private static final int ExtendTypeOffset = 13;
395
396    private static final int AddSubImmOp = 0x11000000;
397    private static final int AddSubShift12 = 0b01 << 22;
398    private static final int AddSubSetFlag = 0x20000000;
399
400    private static final int LogicalImmOp = 0x12000000;
401
402    private static final int MoveWideImmOp = 0x12800000;
403    private static final int MoveWideImmOffset = 5;
404    private static final int MoveWideShiftOffset = 21;
405
406    private static final int BitfieldImmOp = 0x13000000;
407
408    private static final int AddSubShiftedOp = 0x0B000000;
409    private static final int ShiftTypeOffset = 22;
410
411    private static final int AddSubExtendedOp = 0x0B200000;
412
413    private static final int MulOp = 0x1B000000;
414    private static final int DataProcessing1SourceOp = 0x5AC00000;
415    private static final int DataProcessing2SourceOp = 0x1AC00000;
416
417    private static final int Fp1SourceOp = 0x1E204000;
418    private static final int Fp2SourceOp = 0x1E200800;
419    private static final int Fp3SourceOp = 0x1F000000;
420
421    private static final int FpConvertOp = 0x1E200000;
422    private static final int FpImmOp = 0x1E201000;
423    private static final int FpImmOffset = 13;
424
425    private static final int FpCmpOp = 0x1E202000;
426
427    private static final int PcRelImmHiOffset = 5;
428    private static final int PcRelImmLoOffset = 29;
429
430    private static final int PcRelImmOp = 0x10000000;
431
432    private static final int UnconditionalBranchImmOp = 0x14000000;
433    private static final int UnconditionalBranchRegOp = 0xD6000000;
434    private static final int CompareBranchOp = 0x34000000;
435
436    private static final int ConditionalBranchImmOffset = 5;
437
438    private static final int ConditionalSelectOp = 0x1A800000;
439    private static final int ConditionalConditionOffset = 12;
440
441    private static final int LoadStoreScaledOp = 0b111_0_01_00 << 22;
442    private static final int LoadStoreUnscaledOp = 0b111_0_00_00 << 22;
443
444    private static final int LoadStoreRegisterOp = 0b111_0_00_00_1 << 21 | 0b10 << 10;
445
446    private static final int LoadLiteralOp = 0x18000000;
447
448    private static final int LoadStorePostIndexedOp = 0b111_0_00_00_0 << 21 | 0b01 << 10;
449    private static final int LoadStorePreIndexedOp = 0b111_0_00_00_0 << 21 | 0b11 << 10;
450
451    private static final int LoadStoreUnscaledImmOffset = 12;
452    private static final int LoadStoreScaledImmOffset = 10;
453    private static final int LoadStoreScaledRegOffset = 12;
454    private static final int LoadStoreIndexedImmOffset = 12;
455    private static final int LoadStoreTransferSizeOffset = 30;
456    private static final int LoadStoreFpFlagOffset = 26;
457    private static final int LoadLiteralImmeOffset = 5;
458
459    private static final int LoadStorePairOp = 0b101_0 << 26;
460    @SuppressWarnings("unused") private static final int LoadStorePairPostIndexOp = 0b101_0_001 << 23;
461    @SuppressWarnings("unused") private static final int LoadStorePairPreIndexOp = 0b101_0_011 << 23;
462    private static final int LoadStorePairImm7Offset = 15;
463
464    private static final int LogicalShiftOp = 0x0A000000;
465
466    private static final int ExceptionOp = 0xD4000000;
467    private static final int SystemImmediateOffset = 5;
468
469    @SuppressWarnings("unused") private static final int SimdImmediateOffset = 16;
470
471    private static final int BarrierOp = 0xD503301F;
472    private static final int BarrierKindOffset = 8;
473
474    /**
475     * Encoding for all instructions.
476     */
477    public enum Instruction {
478        BCOND(0x54000000),
479        CBNZ(0x01000000),
480        CBZ(0x00000000),
481        TBZ(0x36000000),
482        TBNZ(0x37000000),
483
484        B(0x00000000),
485        BL(0x80000000),
486        BR(0x001F0000),
487        BLR(0x003F0000),
488        RET(0x005F0000),
489
490        LDR(0x00000000),
491        LDRS(0x00800000),
492        LDXR(0x081f7c00),
493        LDAR(0x8dffc00),
494        LDAXR(0x85ffc00),
495
496        STR(0x00000000),
497        STXR(0x08007c00),
498        STLR(0x089ffc00),
499        STLXR(0x0800fc00),
500
501        LDP(0b1 << 22),
502        STP(0b0 << 22),
503
504        ADR(0x00000000),
505        ADRP(0x80000000),
506
507        ADD(0x00000000),
508        ADDS(ADD.encoding | AddSubSetFlag),
509        SUB(0x40000000),
510        SUBS(SUB.encoding | AddSubSetFlag),
511
512        NOT(0x00200000),
513        AND(0x00000000),
514        BIC(AND.encoding | NOT.encoding),
515        ORR(0x20000000),
516        ORN(ORR.encoding | NOT.encoding),
517        EOR(0x40000000),
518        EON(EOR.encoding | NOT.encoding),
519        ANDS(0x60000000),
520        BICS(ANDS.encoding | NOT.encoding),
521
522        ASRV(0x00002800),
523        RORV(0x00002C00),
524        LSRV(0x00002400),
525        LSLV(0x00002000),
526
527        CLS(0x00001400),
528        CLZ(0x00001000),
529        RBIT(0x00000000),
530        REVX(0x00000C00),
531        REVW(0x00000800),
532
533        MOVN(0x00000000),
534        MOVZ(0x40000000),
535        MOVK(0x60000000),
536
537        CSEL(0x00000000),
538        CSNEG(0x40000400),
539        CSINC(0x00000400),
540
541        BFM(0x20000000),
542        SBFM(0x00000000),
543        UBFM(0x40000000),
544        EXTR(0x13800000),
545
546        MADD(0x00000000),
547        MSUB(0x00008000),
548        SDIV(0x00000C00),
549        UDIV(0x00000800),
550
551        FMOV(0x00000000),
552        FMOVCPU2FPU(0x00070000),
553        FMOVFPU2CPU(0x00060000),
554
555        FCVTDS(0x00028000),
556        FCVTSD(0x00020000),
557
558        FCVTZS(0x00180000),
559        SCVTF(0x00020000),
560
561        FABS(0x00008000),
562        FSQRT(0x00018000),
563        FNEG(0x00010000),
564
565        FRINTZ(0x00058000),
566
567        FADD(0x00002000),
568        FSUB(0x00003000),
569        FMUL(0x00000000),
570        FDIV(0x00001000),
571        FMAX(0x00004000),
572        FMIN(0x00005000),
573
574        FMADD(0x00000000),
575        FMSUB(0x00008000),
576
577        FCMP(0x00000000),
578        FCMPZERO(0x00000008),
579        FCCMP(0x1E200400),
580        FCSEL(0x1E200C00),
581
582        INS(0x4e081c00),
583        UMOV(0x4e083c00),
584
585        CNT(0xe205800),
586        USRA(0x6f001400),
587
588        HLT(0x00400000),
589        BRK(0x00200000),
590
591        CLREX(0xd5033f5f),
592        HINT(0xD503201F),
593        DMB(0x000000A0),
594
595        BLR_NATIVE(0xc0000000);
596
597        public final int encoding;
598
599        Instruction(int encoding) {
600            this.encoding = encoding;
601        }
602
603    }
604
605    public enum ShiftType {
606        LSL(0),
607        LSR(1),
608        ASR(2),
609        ROR(3);
610
611        public final int encoding;
612
613        ShiftType(int encoding) {
614            this.encoding = encoding;
615        }
616    }
617
618    public enum ExtendType {
619        UXTB(0),
620        UXTH(1),
621        UXTW(2),
622        UXTX(3),
623        SXTB(4),
624        SXTH(5),
625        SXTW(6),
626        SXTX(7);
627
628        public final int encoding;
629
630        ExtendType(int encoding) {
631            this.encoding = encoding;
632        }
633    }
634
635    /**
636     * Condition Flags for branches. See 4.3
637     */
638    public enum ConditionFlag {
639        // Integer | Floating-point meanings
640        /** Equal | Equal. */
641        EQ(0x0),
642
643        /** Not Equal | Not equal or unordered. */
644        NE(0x1),
645
646        /** Unsigned Higher or Same | Greater than, equal or unordered. */
647        HS(0x2),
648
649        /** Unsigned lower | less than. */
650        LO(0x3),
651
652        /** Minus (negative) | less than. */
653        MI(0x4),
654
655        /** Plus (positive or zero) | greater than, equal or unordered. */
656        PL(0x5),
657
658        /** Overflow set | unordered. */
659        VS(0x6),
660
661        /** Overflow clear | ordered. */
662        VC(0x7),
663
664        /** Unsigned higher | greater than or unordered. */
665        HI(0x8),
666
667        /** Unsigned lower or same | less than or equal. */
668        LS(0x9),
669
670        /** Signed greater than or equal | greater than or equal. */
671        GE(0xA),
672
673        /** Signed less than | less than or unordered. */
674        LT(0xB),
675
676        /** Signed greater than | greater than. */
677        GT(0xC),
678
679        /** Signed less than or equal | less than, equal or unordered. */
680        LE(0xD),
681
682        /** Always | always. */
683        AL(0xE),
684
685        /** Always | always (identical to AL, just to have valid 0b1111 encoding). */
686        NV(0xF);
687
688        public final int encoding;
689
690        ConditionFlag(int encoding) {
691            this.encoding = encoding;
692        }
693
694        /**
695         * @return ConditionFlag specified by decoding.
696         */
697        public static ConditionFlag fromEncoding(int encoding) {
698            return values()[encoding];
699        }
700
701        public ConditionFlag negate() {
702            switch (this) {
703                case EQ:
704                    return NE;
705                case NE:
706                    return EQ;
707                case HS:
708                    return LO;
709                case LO:
710                    return HS;
711                case MI:
712                    return PL;
713                case PL:
714                    return MI;
715                case VS:
716                    return VC;
717                case VC:
718                    return VS;
719                case HI:
720                    return LS;
721                case LS:
722                    return HI;
723                case GE:
724                    return LT;
725                case LT:
726                    return GE;
727                case GT:
728                    return LE;
729                case LE:
730                    return GT;
731                case AL:
732                case NV:
733                default:
734                    throw GraalError.shouldNotReachHere();
735            }
736        }
737    }
738
739    public AArch64Assembler(TargetDescription target) {
740        super(target);
741    }
742
743    /* Conditional Branch (5.2.1) */
744
745    /**
746     * Branch conditionally.
747     *
748     * @param condition may not be null.
749     * @param imm21 Signed 21-bit offset, has to be word aligned.
750     */
751    protected void b(ConditionFlag condition, int imm21) {
752        b(condition, imm21, -1);
753    }
754
755    /**
756     * Branch conditionally. Inserts instruction into code buffer at pos.
757     *
758     * @param condition may not be null.
759     * @param imm21 Signed 21-bit offset, has to be word aligned.
760     * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
761     */
762    protected void b(ConditionFlag condition, int imm21, int pos) {
763        if (pos == -1) {
764            emitInt(Instruction.BCOND.encoding | getConditionalBranchImm(imm21) | condition.encoding);
765        } else {
766            emitInt(Instruction.BCOND.encoding | getConditionalBranchImm(imm21) | condition.encoding, pos);
767        }
768    }
769
770    /**
771     * Compare register and branch if non-zero.
772     *
773     * @param reg general purpose register. May not be null, zero-register or stackpointer.
774     * @param size Instruction size in bits. Should be either 32 or 64.
775     * @param imm21 Signed 21-bit offset, has to be word aligned.
776     */
777    protected void cbnz(int size, Register reg, int imm21) {
778        conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBNZ, -1);
779    }
780
781    /**
782     * Compare register and branch if non-zero.
783     *
784     * @param reg general purpose register. May not be null, zero-register or stackpointer.
785     * @param size Instruction size in bits. Should be either 32 or 64.
786     * @param imm21 Signed 21-bit offset, has to be word aligned.
787     * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
788     */
789    protected void cbnz(int size, Register reg, int imm21, int pos) {
790        conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBNZ, pos);
791    }
792
793    /**
794     * Compare and branch if zero.
795     *
796     * @param reg general purpose register. May not be null, zero-register or stackpointer.
797     * @param size Instruction size in bits. Should be either 32 or 64.
798     * @param imm21 Signed 21-bit offset, has to be word aligned.
799     */
800    protected void cbz(int size, Register reg, int imm21) {
801        conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBZ, -1);
802    }
803
804    /**
805     * Compare register and branch if zero.
806     *
807     * @param reg general purpose register. May not be null, zero-register or stackpointer.
808     * @param size Instruction size in bits. Should be either 32 or 64.
809     * @param imm21 Signed 21-bit offset, has to be word aligned.
810     * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
811     */
812    protected void cbz(int size, Register reg, int imm21, int pos) {
813        conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBZ, pos);
814    }
815
816    /**
817     * Test a single bit and branch if the bit is nonzero.
818     *
819     * @param reg general purpose register. May not be null, zero-register or stackpointer.
820     * @param uimm6 Unsigned 6-bit bit index.
821     * @param imm16 signed 16 bit offset
822     */
823    protected void tbnz(Register reg, int uimm6, int imm16) {
824        tbnz(reg, uimm6, imm16, -1);
825    }
826
827    /**
828     * Test a single bit and branch if the bit is zero.
829     *
830     * @param reg general purpose register. May not be null, zero-register or stackpointer.
831     * @param uimm6 Unsigned 6-bit bit index.
832     * @param imm16 signed 16 bit offset
833     */
834    protected void tbz(Register reg, int uimm6, int imm16) {
835        tbz(reg, uimm6, imm16, -1);
836    }
837
838    /**
839     * Test a single bit and branch if the bit is nonzero.
840     *
841     * @param reg general purpose register. May not be null, zero-register or stackpointer.
842     * @param uimm6 Unsigned 6-bit bit index.
843     * @param imm16 signed 16 bit offset
844     * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
845     */
846    protected void tbnz(Register reg, int uimm6, int imm16, int pos) {
847        assert reg.getRegisterCategory().equals(CPU);
848        assert NumUtil.isUnsignedNbit(6, uimm6);
849        assert NumUtil.isSignedNbit(18, imm16);
850        assert (imm16 & 3) == 0;
851        // size bit is overloaded as top bit of uimm6 bit index
852        int size = (((uimm6 >> 5) & 1) == 0 ? 32 : 64);
853        // remaining 5 bits are encoded lower down
854        int uimm5 = uimm6 >> 1;
855        int offset = (imm16 & NumUtil.getNbitNumberInt(16)) >> 2;
856        InstructionType type = generalFromSize(size);
857        int encoding = type.encoding | TBNZ.encoding | (uimm5 << 19) | (offset << 5) | rd(reg);
858        if (pos == -1) {
859            emitInt(encoding);
860        } else {
861            emitInt(encoding, pos);
862        }
863    }
864
865    /**
866     * Test a single bit and branch if the bit is zero.
867     *
868     * @param reg general purpose register. May not be null, zero-register or stackpointer.
869     * @param uimm6 Unsigned 6-bit bit index.
870     * @param imm16 signed 16 bit offset
871     * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
872     */
873    protected void tbz(Register reg, int uimm6, int imm16, int pos) {
874        assert reg.getRegisterCategory().equals(CPU);
875        assert NumUtil.isUnsignedNbit(6, uimm6);
876        assert NumUtil.isSignedNbit(18, imm16);
877        assert (imm16 & 3) == 0;
878        // size bit is overloaded as top bit of uimm6 bit index
879        int size = (((uimm6 >> 5) & 1) == 0 ? 32 : 64);
880        // remaining 5 bits are encoded lower down
881        int uimm5 = uimm6 >> 1;
882        int offset = (imm16 & NumUtil.getNbitNumberInt(16)) >> 2;
883        InstructionType type = generalFromSize(size);
884        int encoding = type.encoding | TBZ.encoding | (uimm5 << 19) | (offset << 5) | rd(reg);
885        if (pos == -1) {
886            emitInt(encoding);
887        } else {
888            emitInt(encoding, pos);
889        }
890    }
891
892    private void conditionalBranchInstruction(Register reg, int imm21, InstructionType type, Instruction instr, int pos) {
893        assert reg.getRegisterCategory().equals(CPU);
894        int instrEncoding = instr.encoding | CompareBranchOp;
895        if (pos == -1) {
896            emitInt(type.encoding | instrEncoding | getConditionalBranchImm(imm21) | rd(reg));
897        } else {
898            emitInt(type.encoding | instrEncoding | getConditionalBranchImm(imm21) | rd(reg), pos);
899        }
900    }
901
902    private static int getConditionalBranchImm(int imm21) {
903        assert NumUtil.isSignedNbit(21, imm21) && (imm21 & 0x3) == 0 : "Immediate has to be 21bit signed number and word aligned";
904        int imm = (imm21 & NumUtil.getNbitNumberInt(21)) >> 2;
905        return imm << ConditionalBranchImmOffset;
906    }
907
908    /* Unconditional Branch (immediate) (5.2.2) */
909
910    /**
911     * @param imm28 Signed 28-bit offset, has to be word aligned.
912     */
913    protected void b(int imm28) {
914        unconditionalBranchImmInstruction(imm28, Instruction.B, -1);
915    }
916
917    /**
918     *
919     * @param imm28 Signed 28-bit offset, has to be word aligned.
920     * @param pos Position where instruction is inserted into code buffer.
921     */
922    protected void b(int imm28, int pos) {
923        unconditionalBranchImmInstruction(imm28, Instruction.B, pos);
924    }
925
926    /**
927     * Branch and link return address to register X30.
928     *
929     * @param imm28 Signed 28-bit offset, has to be word aligned.
930     */
931    public void bl(int imm28) {
932        unconditionalBranchImmInstruction(imm28, Instruction.BL, -1);
933    }
934
935    private void unconditionalBranchImmInstruction(int imm28, Instruction instr, int pos) {
936        assert NumUtil.isSignedNbit(28, imm28) && (imm28 & 0x3) == 0 : "Immediate has to be 28bit signed number and word aligned";
937        int imm = (imm28 & NumUtil.getNbitNumberInt(28)) >> 2;
938        int instrEncoding = instr.encoding | UnconditionalBranchImmOp;
939        if (pos == -1) {
940            emitInt(instrEncoding | imm);
941        } else {
942            emitInt(instrEncoding | imm, pos);
943        }
944    }
945
946    /* Unconditional Branch (register) (5.2.3) */
947
948    /**
949     * Branches to address in register and writes return address into register X30.
950     *
951     * @param reg general purpose register. May not be null, zero-register or stackpointer.
952     */
953    public void blr(Register reg) {
954        unconditionalBranchRegInstruction(BLR, reg);
955    }
956
957    /**
958     * Branches to address in register.
959     *
960     * @param reg general purpose register. May not be null, zero-register or stackpointer.
961     */
962    protected void br(Register reg) {
963        unconditionalBranchRegInstruction(BR, reg);
964    }
965
966    /**
967     * Return to address in register.
968     *
969     * @param reg general purpose register. May not be null, zero-register or stackpointer.
970     */
971    public void ret(Register reg) {
972        unconditionalBranchRegInstruction(RET, reg);
973    }
974
975    private void unconditionalBranchRegInstruction(Instruction instr, Register reg) {
976        assert reg.getRegisterCategory().equals(CPU);
977        assert !reg.equals(zr);
978        assert !reg.equals(sp);
979        emitInt(instr.encoding | UnconditionalBranchRegOp | rs1(reg));
980    }
981
982    /* Load-Store Single Register (5.3.1) */
983
984    /**
985     * Loads a srcSize value from address into rt zero-extending it.
986     *
987     * @param srcSize size of memory read in bits. Must be 8, 16, 32 or 64.
988     * @param rt general purpose register. May not be null or stackpointer.
989     * @param address all addressing modes allowed. May not be null.
990     */
991    public void ldr(int srcSize, Register rt, AArch64Address address) {
992        assert rt.getRegisterCategory().equals(CPU);
993        assert srcSize == 8 || srcSize == 16 || srcSize == 32 || srcSize == 64;
994        int transferSize = NumUtil.log2Ceil(srcSize / 8);
995        loadStoreInstruction(LDR, rt, address, General32, transferSize);
996    }
997
998    /**
999     * Loads a srcSize value from address into rt sign-extending it.
1000     *
1001     * @param targetSize size of target register in bits. Must be 32 or 64.
1002     * @param srcSize size of memory read in bits. Must be 8, 16 or 32, but may not be equivalent to
1003     *            targetSize.
1004     * @param rt general purpose register. May not be null or stackpointer.
1005     * @param address all addressing modes allowed. May not be null.
1006     */
1007    protected void ldrs(int targetSize, int srcSize, Register rt, AArch64Address address) {
1008        assert rt.getRegisterCategory().equals(CPU);
1009        assert (srcSize == 8 || srcSize == 16 || srcSize == 32) && srcSize != targetSize;
1010        int transferSize = NumUtil.log2Ceil(srcSize / 8);
1011        loadStoreInstruction(LDRS, rt, address, generalFromSize(targetSize), transferSize);
1012    }
1013
1014    public enum PrefetchMode {
1015        PLDL1KEEP(0b00000),
1016        PLDL1STRM(0b00001),
1017        PLDL2KEEP(0b00010),
1018        PLDL2STRM(0b00011),
1019        PLDL3KEEP(0b00100),
1020        PLDL3STRM(0b00101),
1021
1022        PLIL1KEEP(0b01000),
1023        PLIL1STRM(0b01001),
1024        PLIL2KEEP(0b01010),
1025        PLIL2STRM(0b01011),
1026        PLIL3KEEP(0b01100),
1027        PLIL3STRM(0b01101),
1028
1029        PSTL1KEEP(0b10000),
1030        PSTL1STRM(0b10001),
1031        PSTL2KEEP(0b10010),
1032        PSTL2STRM(0b10011),
1033        PSTL3KEEP(0b10100),
1034        PSTL3STRM(0b10101);
1035
1036        private final int encoding;
1037
1038        PrefetchMode(int encoding) {
1039            this.encoding = encoding;
1040        }
1041
1042        private static PrefetchMode[] modes = {
1043                        PLDL1KEEP,
1044                        PLDL1STRM,
1045                        PLDL2KEEP,
1046                        PLDL2STRM,
1047                        PLDL3KEEP,
1048                        PLDL3STRM,
1049
1050                        null,
1051                        null,
1052
1053                        PLIL1KEEP,
1054                        PLIL1STRM,
1055                        PLIL2KEEP,
1056                        PLIL2STRM,
1057                        PLIL3KEEP,
1058                        PLIL3STRM,
1059
1060                        null,
1061                        null,
1062
1063                        PSTL1KEEP,
1064                        PSTL1STRM,
1065                        PSTL2KEEP,
1066                        PSTL2STRM,
1067                        PSTL3KEEP,
1068                        PSTL3STRM
1069        };
1070
1071        public static PrefetchMode lookup(int enc) {
1072            assert enc >= 00 && enc < modes.length;
1073            return modes[enc];
1074        }
1075
1076        public Register toRegister() {
1077            return cpuRegisters.get(encoding);
1078        }
1079    }
1080
1081    /*
1082     * implements a prefetch at a 64-bit aligned address using a scaled 12 bit or unscaled 9 bit
1083     * displacement addressing mode
1084     *
1085     * @param rt general purpose register. May not be null, zr or stackpointer.
1086     *
1087     * @param address only displacement addressing modes allowed. May not be null.
1088     */
1089    public void prfm(AArch64Address address, PrefetchMode mode) {
1090        assert (address.getAddressingMode() == AddressingMode.IMMEDIATE_SCALED ||
1091                        address.getAddressingMode() == AddressingMode.IMMEDIATE_UNSCALED ||
1092                        address.getAddressingMode() == AddressingMode.REGISTER_OFFSET);
1093        assert mode != null;
1094        final int srcSize = 64;
1095        final int transferSize = NumUtil.log2Ceil(srcSize / 8);
1096        final Register rt = mode.toRegister();
1097        // this looks weird but that's because loadStoreInstruction is weird
1098        // instruction select fields are size [31:30], v [26] and opc [25:24]
1099        // prfm requires size == 0b11, v == 0b0 and opc == 0b11
1100        // passing LDRS ensures opc[1] == 0b1
1101        // (n.b. passing LDR/STR makes no difference to opc[1:0]!!)
1102        // passing General64 ensures opc[0] == 0b1 and v = 0b0
1103        // (n.b. passing General32 ensures opc[0] == 0b0 and v = 0b0)
1104        // srcSize 64 ensures size == 0b11
1105        loadStoreInstruction(LDRS, rt, address, General64, transferSize);
1106    }
1107
1108    /**
1109     * Stores register rt into memory pointed by address.
1110     *
1111     * @param destSize number of bits written to memory. Must be 8, 16, 32 or 64.
1112     * @param rt general purpose register. May not be null or stackpointer.
1113     * @param address all addressing modes allowed. May not be null.
1114     */
1115    public void str(int destSize, Register rt, AArch64Address address) {
1116        assert rt.getRegisterCategory().equals(CPU);
1117        assert destSize == 8 || destSize == 16 || destSize == 32 || destSize == 64;
1118        int transferSize = NumUtil.log2Ceil(destSize / 8);
1119        loadStoreInstruction(STR, rt, address, General64, transferSize);
1120    }
1121
1122    private void loadStoreInstruction(Instruction instr, Register reg, AArch64Address address, InstructionType type, int log2TransferSize) {
1123        assert log2TransferSize >= 0 && log2TransferSize < 4;
1124        int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
1125        int is32Bit = type.width == 32 ? 1 << ImmediateSizeOffset : 0;
1126        int isFloat = !type.isGeneral ? 1 << LoadStoreFpFlagOffset : 0;
1127        int memop = instr.encoding | transferSizeEncoding | is32Bit | isFloat | rt(reg);
1128        switch (address.getAddressingMode()) {
1129            case IMMEDIATE_SCALED:
1130                emitInt(memop | LoadStoreScaledOp | address.getImmediate() << LoadStoreScaledImmOffset | rs1(address.getBase()));
1131                break;
1132            case IMMEDIATE_UNSCALED:
1133                emitInt(memop | LoadStoreUnscaledOp | address.getImmediate() << LoadStoreUnscaledImmOffset | rs1(address.getBase()));
1134                break;
1135            case BASE_REGISTER_ONLY:
1136                emitInt(memop | LoadStoreScaledOp | rs1(address.getBase()));
1137                break;
1138            case EXTENDED_REGISTER_OFFSET:
1139            case REGISTER_OFFSET:
1140                ExtendType extendType = address.getAddressingMode() == AddressingMode.EXTENDED_REGISTER_OFFSET ? address.getExtendType() : ExtendType.UXTX;
1141                boolean shouldScale = address.isScaled() && log2TransferSize != 0;
1142                emitInt(memop | LoadStoreRegisterOp | rs2(address.getOffset()) | extendType.encoding << ExtendTypeOffset | (shouldScale ? 1 : 0) << LoadStoreScaledRegOffset | rs1(address.getBase()));
1143                break;
1144            case PC_LITERAL:
1145                assert log2TransferSize >= 2 : "PC literal loads only works for load/stores of 32-bit and larger";
1146                transferSizeEncoding = (log2TransferSize - 2) << LoadStoreTransferSizeOffset;
1147                emitInt(transferSizeEncoding | isFloat | LoadLiteralOp | rd(reg) | address.getImmediate() << LoadLiteralImmeOffset);
1148                break;
1149            case IMMEDIATE_POST_INDEXED:
1150                emitInt(memop | LoadStorePostIndexedOp | rs1(address.getBase()) | address.getImmediate() << LoadStoreIndexedImmOffset);
1151                break;
1152            case IMMEDIATE_PRE_INDEXED:
1153                emitInt(memop | LoadStorePreIndexedOp | rs1(address.getBase()) | address.getImmediate() << LoadStoreIndexedImmOffset);
1154                break;
1155            default:
1156                throw GraalError.shouldNotReachHere("Unhandled addressing mode: " + address.getAddressingMode());
1157        }
1158    }
1159
1160    /**
1161     * Load Pair of Registers calculates an address from a base register value and an immediate
1162     * offset, and stores two 32-bit words or two 64-bit doublewords to the calculated address, from
1163     * two registers.
1164     */
1165    public void ldp(int size, Register rt, Register rt2, AArch64Address address) {
1166        assert size == 32 || size == 64;
1167        loadStorePairInstruction(LDP, rt, rt2, address, generalFromSize(size));
1168    }
1169
1170    /**
1171     * Store Pair of Registers calculates an address from a base register value and an immediate
1172     * offset, and stores two 32-bit words or two 64-bit doublewords to the calculated address, from
1173     * two registers.
1174     */
1175    public void stp(int size, Register rt, Register rt2, AArch64Address address) {
1176        assert size == 32 || size == 64;
1177        loadStorePairInstruction(STP, rt, rt2, address, generalFromSize(size));
1178    }
1179
1180    private void loadStorePairInstruction(Instruction instr, Register rt, Register rt2, AArch64Address address, InstructionType type) {
1181        int scaledOffset = maskField(7, address.getImmediateRaw());  // LDP/STP use a 7-bit scaled
1182                                                                     // offset
1183        int memop = type.encoding | instr.encoding | scaledOffset << LoadStorePairImm7Offset | rt2(rt2) | rn(address.getBase()) | rt(rt);
1184        switch (address.getAddressingMode()) {
1185            case IMMEDIATE_SCALED:
1186                emitInt(memop | LoadStorePairOp | (0b010 << 23));
1187                break;
1188            case IMMEDIATE_POST_INDEXED:
1189                emitInt(memop | LoadStorePairOp | (0b001 << 23));
1190                break;
1191            case IMMEDIATE_PRE_INDEXED:
1192                emitInt(memop | LoadStorePairOp | (0b011 << 23));
1193                break;
1194            default:
1195                throw GraalError.shouldNotReachHere("Unhandled addressing mode: " + address.getAddressingMode());
1196        }
1197    }
1198
1199    /* Load-Store Exclusive (5.3.6) */
1200
1201    /**
1202     * Load address exclusive. Natural alignment of address is required.
1203     *
1204     * @param size size of memory read in bits. Must be 8, 16, 32 or 64.
1205     * @param rt general purpose register. May not be null or stackpointer.
1206     * @param rn general purpose register.
1207     */
1208    protected void ldxr(int size, Register rt, Register rn) {
1209        assert size == 8 || size == 16 || size == 32 || size == 64;
1210        int transferSize = NumUtil.log2Ceil(size / 8);
1211        exclusiveLoadInstruction(LDXR, rt, rn, transferSize);
1212    }
1213
1214    /**
1215     * Store address exclusive. Natural alignment of address is required. rs and rt may not point to
1216     * the same register.
1217     *
1218     * @param size size of bits written to memory. Must be 8, 16, 32 or 64.
1219     * @param rs general purpose register. Set to exclusive access status. 0 means success,
1220     *            everything else failure. May not be null, or stackpointer.
1221     * @param rt general purpose register. May not be null or stackpointer.
1222     * @param rn general purpose register.
1223     */
1224    protected void stxr(int size, Register rs, Register rt, Register rn) {
1225        assert size == 8 || size == 16 || size == 32 || size == 64;
1226        int transferSize = NumUtil.log2Ceil(size / 8);
1227        exclusiveStoreInstruction(STXR, rs, rt, rn, transferSize);
1228    }
1229
1230    /* Load-Acquire/Store-Release (5.3.7) */
1231
1232    /* non exclusive access */
1233    /**
1234     * Load acquire. Natural alignment of address is required.
1235     *
1236     * @param size size of memory read in bits. Must be 8, 16, 32 or 64.
1237     * @param rt general purpose register. May not be null or stackpointer.
1238     * @param rn general purpose register.
1239     */
1240    protected void ldar(int size, Register rt, Register rn) {
1241        assert size == 8 || size == 16 || size == 32 || size == 64;
1242        int transferSize = NumUtil.log2Ceil(size / 8);
1243        exclusiveLoadInstruction(LDAR, rt, rn, transferSize);
1244    }
1245
1246    /**
1247     * Store-release. Natural alignment of address is required.
1248     *
1249     * @param size size of bits written to memory. Must be 8, 16, 32 or 64.
1250     * @param rt general purpose register. May not be null or stackpointer.
1251     * @param rn general purpose register.
1252     */
1253    protected void stlr(int size, Register rt, Register rn) {
1254        assert size == 8 || size == 16 || size == 32 || size == 64;
1255        int transferSize = NumUtil.log2Ceil(size / 8);
1256        // Hack: Passing the zero-register means it is ignored when building the encoding.
1257        exclusiveStoreInstruction(STLR, r0, rt, rn, transferSize);
1258    }
1259
1260    /* exclusive access */
1261    /**
1262     * Load acquire exclusive. Natural alignment of address is required.
1263     *
1264     * @param size size of memory read in bits. Must be 8, 16, 32 or 64.
1265     * @param rt general purpose register. May not be null or stackpointer.
1266     * @param rn general purpose register.
1267     */
1268    public void ldaxr(int size, Register rt, Register rn) {
1269        assert size == 8 || size == 16 || size == 32 || size == 64;
1270        int transferSize = NumUtil.log2Ceil(size / 8);
1271        exclusiveLoadInstruction(LDAXR, rt, rn, transferSize);
1272    }
1273
1274    /**
1275     * Store-release exclusive. Natural alignment of address is required. rs and rt may not point to
1276     * the same register.
1277     *
1278     * @param size size of bits written to memory. Must be 8, 16, 32 or 64.
1279     * @param rs general purpose register. Set to exclusive access status. 0 means success,
1280     *            everything else failure. May not be null, or stackpointer.
1281     * @param rt general purpose register. May not be null or stackpointer.
1282     * @param rn general purpose register.
1283     */
1284    public void stlxr(int size, Register rs, Register rt, Register rn) {
1285        assert size == 8 || size == 16 || size == 32 || size == 64;
1286        int transferSize = NumUtil.log2Ceil(size / 8);
1287        exclusiveStoreInstruction(STLXR, rs, rt, rn, transferSize);
1288    }
1289
1290    private void exclusiveLoadInstruction(Instruction instr, Register reg, Register rn, int log2TransferSize) {
1291        assert log2TransferSize >= 0 && log2TransferSize < 4;
1292        assert reg.getRegisterCategory().equals(CPU);
1293        int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
1294        emitInt(transferSizeEncoding | instr.encoding | 1 << ImmediateSizeOffset | rn(rn) | rt(reg));
1295    }
1296
1297    /**
1298     * Stores data from rt into address and sets rs to the returned exclusive access status.
1299     *
1300     * @param rs general purpose register into which the exclusive access status is written. May not
1301     *            be null.
1302     * @param rt general purpose register containing data to be written to memory at address. May
1303     *            not be null
1304     * @param rn general purpose register containing the address specifying where rt is written to.
1305     * @param log2TransferSize log2Ceil of memory transfer size.
1306     */
1307    private void exclusiveStoreInstruction(Instruction instr, Register rs, Register rt, Register rn, int log2TransferSize) {
1308        assert log2TransferSize >= 0 && log2TransferSize < 4;
1309        assert rt.getRegisterCategory().equals(CPU) && rs.getRegisterCategory().equals(CPU) && !rs.equals(rt);
1310        int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
1311        emitInt(transferSizeEncoding | instr.encoding | rs2(rs) | rn(rn) | rt(rt));
1312    }
1313
1314    /* PC-relative Address Calculation (5.4.4) */
1315
1316    /**
1317     * Address of page: sign extends 21-bit offset, shifts if left by 12 and adds it to the value of
1318     * the PC with its bottom 12-bits cleared, writing the result to dst.
1319     *
1320     * @param dst general purpose register. May not be null, zero-register or stackpointer.
1321     * @param imm Signed 33-bit offset with lower 12bits clear.
1322     */
1323    // protected void adrp(Register dst, long imm) {
1324    // assert (imm & NumUtil.getNbitNumberInt(12)) == 0 : "Lower 12-bit of immediate must be zero.";
1325    // assert NumUtil.isSignedNbit(33, imm);
1326    // addressCalculationInstruction(dst, (int) (imm >>> 12), Instruction.ADRP);
1327    // }
1328
1329    /**
1330     * Adds a 21-bit signed offset to the program counter and writes the result to dst.
1331     *
1332     * @param dst general purpose register. May not be null, zero-register or stackpointer.
1333     * @param imm21 Signed 21-bit offset.
1334     */
1335    public void adr(Register dst, int imm21) {
1336        emitInt(ADR.encoding | PcRelImmOp | rd(dst) | getPcRelativeImmEncoding(imm21));
1337    }
1338
1339    public void adr(Register dst, int imm21, int pos) {
1340        emitInt(ADR.encoding | PcRelImmOp | rd(dst) | getPcRelativeImmEncoding(imm21), pos);
1341    }
1342
1343    private static int getPcRelativeImmEncoding(int imm21) {
1344        assert NumUtil.isSignedNbit(21, imm21);
1345        int imm = imm21 & NumUtil.getNbitNumberInt(21);
1346        // higher 19 bit
1347        int immHi = (imm >> 2) << PcRelImmHiOffset;
1348        // lower 2 bit
1349        int immLo = (imm & 0x3) << PcRelImmLoOffset;
1350        return immHi | immLo;
1351    }
1352
1353    /* Arithmetic (Immediate) (5.4.1) */
1354
1355    /**
1356     * dst = src + aimm.
1357     *
1358     * @param size register size. Has to be 32 or 64.
1359     * @param dst general purpose register. May not be null or zero-register.
1360     * @param src general purpose register. May not be null or zero-register.
1361     * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
1362     *            the lower 12-bit cleared.
1363     */
1364    protected void add(int size, Register dst, Register src, int aimm) {
1365        assert !dst.equals(zr);
1366        assert !src.equals(zr);
1367        addSubImmInstruction(ADD, dst, src, aimm, generalFromSize(size));
1368    }
1369
1370    /**
1371     * dst = src + aimm and sets condition flags.
1372     *
1373     * @param size register size. Has to be 32 or 64.
1374     * @param dst general purpose register. May not be null or stackpointer.
1375     * @param src general purpose register. May not be null or zero-register.
1376     * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
1377     *            the lower 12-bit cleared.
1378     */
1379    protected void adds(int size, Register dst, Register src, int aimm) {
1380        assert !dst.equals(sp);
1381        assert !src.equals(zr);
1382        addSubImmInstruction(ADDS, dst, src, aimm, generalFromSize(size));
1383    }
1384
1385    /**
1386     * dst = src - aimm.
1387     *
1388     * @param size register size. Has to be 32 or 64.
1389     * @param dst general purpose register. May not be null or zero-register.
1390     * @param src general purpose register. May not be null or zero-register.
1391     * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
1392     *            the lower 12-bit cleared.
1393     */
1394    protected void sub(int size, Register dst, Register src, int aimm) {
1395        assert !dst.equals(zr);
1396        assert !src.equals(zr);
1397        addSubImmInstruction(SUB, dst, src, aimm, generalFromSize(size));
1398    }
1399
1400    /**
1401     * dst = src - aimm and sets condition flags.
1402     *
1403     * @param size register size. Has to be 32 or 64.
1404     * @param dst general purpose register. May not be null or stackpointer.
1405     * @param src general purpose register. May not be null or zero-register.
1406     * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
1407     *            the lower 12-bit cleared.
1408     */
1409    protected void subs(int size, Register dst, Register src, int aimm) {
1410        assert !dst.equals(sp);
1411        assert !src.equals(zr);
1412        addSubImmInstruction(SUBS, dst, src, aimm, generalFromSize(size));
1413    }
1414
1415    private void addSubImmInstruction(Instruction instr, Register dst, Register src, int aimm, InstructionType type) {
1416        emitInt(type.encoding | instr.encoding | AddSubImmOp | encodeAimm(aimm) | rd(dst) | rs1(src));
1417    }
1418
1419    /**
1420     * Encodes arithmetic immediate.
1421     *
1422     * @param imm Immediate has to be either an unsigned 12-bit value or an unsigned 24-bit value
1423     *            with the lower 12 bits zero.
1424     * @return Representation of immediate for use with arithmetic instructions.
1425     */
1426    private static int encodeAimm(int imm) {
1427        assert isAimm(imm) : "Immediate has to be legal arithmetic immediate value " + imm;
1428        if (NumUtil.isUnsignedNbit(12, imm)) {
1429            return imm << ImmediateOffset;
1430        } else {
1431            // First 12-bit are zero, so shift immediate 12-bit and set flag to indicate
1432            // shifted immediate value.
1433            return (imm >>> 12 << ImmediateOffset) | AddSubShift12;
1434        }
1435    }
1436
1437    /**
1438     * Checks whether immediate can be encoded as an arithmetic immediate.
1439     *
1440     * @param imm Immediate has to be either an unsigned 12bit value or un unsigned 24bit value with
1441     *            the lower 12 bits 0.
1442     * @return true if valid arithmetic immediate, false otherwise.
1443     */
1444    protected static boolean isAimm(int imm) {
1445        return NumUtil.isUnsignedNbit(12, imm) || NumUtil.isUnsignedNbit(12, imm >>> 12) && (imm & 0xfff) == 0;
1446    }
1447
1448    /* Logical (immediate) (5.4.2) */
1449
1450    /**
1451     * dst = src & bimm.
1452     *
1453     * @param size register size. Has to be 32 or 64.
1454     * @param dst general purpose register. May not be null or zero-register.
1455     * @param src general purpose register. May not be null or stack-pointer.
1456     * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
1457     */
1458    public void and(int size, Register dst, Register src, long bimm) {
1459        assert !dst.equals(zr);
1460        assert !src.equals(sp);
1461        logicalImmInstruction(AND, dst, src, bimm, generalFromSize(size));
1462    }
1463
1464    /**
1465     * dst = src & bimm and sets condition flags.
1466     *
1467     * @param size register size. Has to be 32 or 64.
1468     * @param dst general purpose register. May not be null or stack-pointer.
1469     * @param src general purpose register. May not be null or stack-pointer.
1470     * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
1471     */
1472    public void ands(int size, Register dst, Register src, long bimm) {
1473        assert !dst.equals(sp);
1474        assert !src.equals(sp);
1475        logicalImmInstruction(ANDS, dst, src, bimm, generalFromSize(size));
1476    }
1477
1478    /**
1479     * dst = src ^ bimm.
1480     *
1481     * @param size register size. Has to be 32 or 64.
1482     * @param dst general purpose register. May not be null or zero-register.
1483     * @param src general purpose register. May not be null or stack-pointer.
1484     * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
1485     */
1486    public void eor(int size, Register dst, Register src, long bimm) {
1487        assert !dst.equals(zr);
1488        assert !src.equals(sp);
1489        logicalImmInstruction(EOR, dst, src, bimm, generalFromSize(size));
1490    }
1491
1492    /**
1493     * dst = src | bimm.
1494     *
1495     * @param size register size. Has to be 32 or 64.
1496     * @param dst general purpose register. May not be null or zero-register.
1497     * @param src general purpose register. May not be null or stack-pointer.
1498     * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
1499     */
1500    protected void orr(int size, Register dst, Register src, long bimm) {
1501        assert !dst.equals(zr);
1502        assert !src.equals(sp);
1503        logicalImmInstruction(ORR, dst, src, bimm, generalFromSize(size));
1504    }
1505
1506    private void logicalImmInstruction(Instruction instr, Register dst, Register src, long bimm, InstructionType type) {
1507        // Mask higher bits off, since we always pass longs around even for the 32-bit instruction.
1508        long bimmValue;
1509        if (type == General32) {
1510            assert (bimm >> 32) == 0 || (bimm >> 32) == -1L : "Higher order bits for 32-bit instruction must either all be 0 or 1.";
1511            bimmValue = bimm & NumUtil.getNbitNumberLong(32);
1512        } else {
1513            bimmValue = bimm;
1514        }
1515        int immEncoding = LogicalImmediateTable.getLogicalImmEncoding(type == General64, bimmValue);
1516        emitInt(type.encoding | instr.encoding | LogicalImmOp | immEncoding | rd(dst) | rs1(src));
1517    }
1518
1519    /* Move (wide immediate) (5.4.3) */
1520
1521    /**
1522     * dst = uimm16 << shiftAmt.
1523     *
1524     * @param size register size. Has to be 32 or 64.
1525     * @param dst general purpose register. May not be null, stackpointer or zero-register.
1526     * @param uimm16 16-bit unsigned immediate
1527     * @param shiftAmt amount by which uimm16 is left shifted. Can be any multiple of 16 smaller
1528     *            than size.
1529     */
1530    protected void movz(int size, Register dst, int uimm16, int shiftAmt) {
1531        moveWideImmInstruction(MOVZ, dst, uimm16, shiftAmt, generalFromSize(size));
1532    }
1533
1534    /**
1535     * dst = ~(uimm16 << shiftAmt).
1536     *
1537     * @param size register size. Has to be 32 or 64.
1538     * @param dst general purpose register. May not be null, stackpointer or zero-register.
1539     * @param uimm16 16-bit unsigned immediate
1540     * @param shiftAmt amount by which uimm16 is left shifted. Can be any multiple of 16 smaller
1541     *            than size.
1542     */
1543    protected void movn(int size, Register dst, int uimm16, int shiftAmt) {
1544        moveWideImmInstruction(MOVN, dst, uimm16, shiftAmt, generalFromSize(size));
1545    }
1546
1547    /**
1548     * dst<pos+15:pos> = uimm16.
1549     *
1550     * @param size register size. Has to be 32 or 64.
1551     * @param dst general purpose register. May not be null, stackpointer or zero-register.
1552     * @param uimm16 16-bit unsigned immediate
1553     * @param pos position into which uimm16 is inserted. Can be any multiple of 16 smaller than
1554     *            size.
1555     */
1556    protected void movk(int size, Register dst, int uimm16, int pos) {
1557        moveWideImmInstruction(MOVK, dst, uimm16, pos, generalFromSize(size));
1558    }
1559
1560    private void moveWideImmInstruction(Instruction instr, Register dst, int uimm16, int shiftAmt, InstructionType type) {
1561        assert dst.getRegisterCategory().equals(CPU);
1562        assert NumUtil.isUnsignedNbit(16, uimm16) : "Immediate has to be unsigned 16bit";
1563        assert shiftAmt == 0 || shiftAmt == 16 || (type == InstructionType.General64 && (shiftAmt == 32 || shiftAmt == 48)) : "Invalid shift amount: " + shiftAmt;
1564        int shiftValue = shiftAmt >> 4;
1565        emitInt(type.encoding | instr.encoding | MoveWideImmOp | rd(dst) | uimm16 << MoveWideImmOffset | shiftValue << MoveWideShiftOffset);
1566    }
1567
1568    /* Bitfield Operations (5.4.5) */
1569
1570    /**
1571     * Bitfield move.
1572     *
1573     * @param size register size. Has to be 32 or 64.
1574     * @param dst general purpose register. May not be null, stackpointer or zero-register.
1575     * @param src general purpose register. May not be null, stackpointer or zero-register.
1576     * @param r must be in the range 0 to size - 1
1577     * @param s must be in the range 0 to size - 1
1578     */
1579    protected void bfm(int size, Register dst, Register src, int r, int s) {
1580        bitfieldInstruction(BFM, dst, src, r, s, generalFromSize(size));
1581    }
1582
1583    /**
1584     * Unsigned bitfield move.
1585     *
1586     * @param size register size. Has to be 32 or 64.
1587     * @param dst general purpose register. May not be null, stackpointer or zero-register.
1588     * @param src general purpose register. May not be null, stackpointer or zero-register.
1589     * @param r must be in the range 0 to size - 1
1590     * @param s must be in the range 0 to size - 1
1591     */
1592    protected void ubfm(int size, Register dst, Register src, int r, int s) {
1593        bitfieldInstruction(UBFM, dst, src, r, s, generalFromSize(size));
1594    }
1595
1596    /**
1597     * Signed bitfield move.
1598     *
1599     * @param size register size. Has to be 32 or 64.
1600     * @param dst general purpose register. May not be null, stackpointer or zero-register.
1601     * @param src general purpose register. May not be null, stackpointer or zero-register.
1602     * @param r must be in the range 0 to size - 1
1603     * @param s must be in the range 0 to size - 1
1604     */
1605    protected void sbfm(int size, Register dst, Register src, int r, int s) {
1606        bitfieldInstruction(SBFM, dst, src, r, s, generalFromSize(size));
1607    }
1608
1609    private void bitfieldInstruction(Instruction instr, Register dst, Register src, int r, int s, InstructionType type) {
1610        assert !dst.equals(sp) && !dst.equals(zr);
1611        assert !src.equals(sp) && !src.equals(zr);
1612        assert s >= 0 && s < type.width && r >= 0 && r < type.width;
1613        int sf = type == General64 ? 1 << ImmediateSizeOffset : 0;
1614        emitInt(type.encoding | instr.encoding | BitfieldImmOp | sf | r << ImmediateRotateOffset | s << ImmediateOffset | rd(dst) | rs1(src));
1615    }
1616
1617    /* Extract (Immediate) (5.4.6) */
1618
1619    /**
1620     * Extract. dst = src1:src2<lsb+31:lsb>
1621     *
1622     * @param size register size. Has to be 32 or 64.
1623     * @param dst general purpose register. May not be null or stackpointer.
1624     * @param src1 general purpose register. May not be null or stackpointer.
1625     * @param src2 general purpose register. May not be null or stackpointer.
1626     * @param lsb must be in range 0 to size - 1.
1627     */
1628    protected void extr(int size, Register dst, Register src1, Register src2, int lsb) {
1629        assert !dst.equals(sp);
1630        assert !src1.equals(sp);
1631        assert !src2.equals(sp);
1632        InstructionType type = generalFromSize(size);
1633        assert lsb >= 0 && lsb < type.width;
1634        int sf = type == General64 ? 1 << ImmediateSizeOffset : 0;
1635        emitInt(type.encoding | EXTR.encoding | sf | lsb << ImmediateOffset | rd(dst) | rs1(src1) | rs2(src2));
1636    }
1637
1638    /* Arithmetic (shifted register) (5.5.1) */
1639
1640    /**
1641     * dst = src1 + shiftType(src2, imm).
1642     *
1643     * @param size register size. Has to be 32 or 64.
1644     * @param dst general purpose register. May not be null or stackpointer.
1645     * @param src1 general purpose register. May not be null or stackpointer.
1646     * @param src2 general purpose register. May not be null or stackpointer.
1647     * @param shiftType any type but ROR.
1648     * @param imm must be in range 0 to size - 1.
1649     */
1650    protected void add(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
1651        addSubShiftedInstruction(ADD, dst, src1, src2, shiftType, imm, generalFromSize(size));
1652    }
1653
1654    /**
1655     * dst = src1 + shiftType(src2, imm) and sets condition flags.
1656     *
1657     * @param size register size. Has to be 32 or 64.
1658     * @param dst general purpose register. May not be null or stackpointer.
1659     * @param src1 general purpose register. May not be null or stackpointer.
1660     * @param src2 general purpose register. May not be null or stackpointer.
1661     * @param shiftType any type but ROR.
1662     * @param imm must be in range 0 to size - 1.
1663     */
1664    public void adds(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
1665        addSubShiftedInstruction(ADDS, dst, src1, src2, shiftType, imm, generalFromSize(size));
1666    }
1667
1668    /**
1669     * dst = src1 - shiftType(src2, imm).
1670     *
1671     * @param size register size. Has to be 32 or 64.
1672     * @param dst general purpose register. May not be null or stackpointer.
1673     * @param src1 general purpose register. May not be null or stackpointer.
1674     * @param src2 general purpose register. May not be null or stackpointer.
1675     * @param shiftType any type but ROR.
1676     * @param imm must be in range 0 to size - 1.
1677     */
1678    protected void sub(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
1679        addSubShiftedInstruction(SUB, dst, src1, src2, shiftType, imm, generalFromSize(size));
1680    }
1681
1682    /**
1683     * dst = src1 - shiftType(src2, imm) and sets condition flags.
1684     *
1685     * @param size register size. Has to be 32 or 64.
1686     * @param dst general purpose register. May not be null or stackpointer.
1687     * @param src1 general purpose register. May not be null or stackpointer.
1688     * @param src2 general purpose register. May not be null or stackpointer.
1689     * @param shiftType any type but ROR.
1690     * @param imm must be in range 0 to size - 1.
1691     */
1692    public void subs(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
1693        addSubShiftedInstruction(SUBS, dst, src1, src2, shiftType, imm, generalFromSize(size));
1694    }
1695
1696    private void addSubShiftedInstruction(Instruction instr, Register dst, Register src1, Register src2, ShiftType shiftType, int imm, InstructionType type) {
1697        assert shiftType != ShiftType.ROR;
1698        assert imm >= 0 && imm < type.width;
1699        emitInt(type.encoding | instr.encoding | AddSubShiftedOp | imm << ImmediateOffset | shiftType.encoding << ShiftTypeOffset | rd(dst) | rs1(src1) | rs2(src2));
1700    }
1701
1702    /* Arithmetic (extended register) (5.5.2) */
1703    /**
1704     * dst = src1 + extendType(src2) << imm.
1705     *
1706     * @param size register size. Has to be 32 or 64.
1707     * @param dst general purpose register. May not be null or zero-register..
1708     * @param src1 general purpose register. May not be null or zero-register.
1709     * @param src2 general purpose register. May not be null or stackpointer.
1710     * @param extendType defines how src2 is extended to the same size as src1.
1711     * @param shiftAmt must be in range 0 to 4.
1712     */
1713    public void add(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
1714        assert !dst.equals(zr);
1715        assert !src1.equals(zr);
1716        assert !src2.equals(sp);
1717        addSubExtendedInstruction(ADD, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
1718    }
1719
1720    /**
1721     * dst = src1 + extendType(src2) << imm and sets condition flags.
1722     *
1723     * @param size register size. Has to be 32 or 64.
1724     * @param dst general purpose register. May not be null or stackpointer..
1725     * @param src1 general purpose register. May not be null or zero-register.
1726     * @param src2 general purpose register. May not be null or stackpointer.
1727     * @param extendType defines how src2 is extended to the same size as src1.
1728     * @param shiftAmt must be in range 0 to 4.
1729     */
1730    protected void adds(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
1731        assert !dst.equals(sp);
1732        assert !src1.equals(zr);
1733        assert !src2.equals(sp);
1734        addSubExtendedInstruction(ADDS, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
1735    }
1736
1737    /**
1738     * dst = src1 - extendType(src2) << imm.
1739     *
1740     * @param size register size. Has to be 32 or 64.
1741     * @param dst general purpose register. May not be null or zero-register..
1742     * @param src1 general purpose register. May not be null or zero-register.
1743     * @param src2 general purpose register. May not be null or stackpointer.
1744     * @param extendType defines how src2 is extended to the same size as src1.
1745     * @param shiftAmt must be in range 0 to 4.
1746     */
1747    protected void sub(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
1748        assert !dst.equals(zr);
1749        assert !src1.equals(zr);
1750        assert !src2.equals(sp);
1751        addSubExtendedInstruction(SUB, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
1752    }
1753
1754    /**
1755     * dst = src1 - extendType(src2) << imm and sets flags.
1756     *
1757     * @param size register size. Has to be 32 or 64.
1758     * @param dst general purpose register. May not be null or stackpointer..
1759     * @param src1 general purpose register. May not be null or zero-register.
1760     * @param src2 general purpose register. May not be null or stackpointer.
1761     * @param extendType defines how src2 is extended to the same size as src1.
1762     * @param shiftAmt must be in range 0 to 4.
1763     */
1764    public void subs(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
1765        assert !dst.equals(sp);
1766        assert !src1.equals(zr);
1767        assert !src2.equals(sp);
1768        addSubExtendedInstruction(SUBS, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
1769    }
1770
1771    private void addSubExtendedInstruction(Instruction instr, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt, InstructionType type) {
1772        assert shiftAmt >= 0 && shiftAmt <= 4;
1773        emitInt(type.encoding | instr.encoding | AddSubExtendedOp | shiftAmt << ImmediateOffset | extendType.encoding << ExtendTypeOffset | rd(dst) | rs1(src1) | rs2(src2));
1774    }
1775
1776    /* Logical (shifted register) (5.5.3) */
1777    /**
1778     * dst = src1 & shiftType(src2, imm).
1779     *
1780     * @param size register size. Has to be 32 or 64.
1781     * @param dst general purpose register. May not be null or stackpointer.
1782     * @param src1 general purpose register. May not be null or stackpointer.
1783     * @param src2 general purpose register. May not be null or stackpointer.
1784     * @param shiftType all types allowed, may not be null.
1785     * @param shiftAmt must be in range 0 to size - 1.
1786     */
1787    protected void and(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1788        logicalRegInstruction(AND, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1789    }
1790
1791    /**
1792     * dst = src1 & shiftType(src2, imm) and sets condition flags.
1793     *
1794     * @param size register size. Has to be 32 or 64.
1795     * @param dst general purpose register. May not be null or stackpointer.
1796     * @param src1 general purpose register. May not be null or stackpointer.
1797     * @param src2 general purpose register. May not be null or stackpointer.
1798     * @param shiftType all types allowed, may not be null.
1799     * @param shiftAmt must be in range 0 to size - 1.
1800     */
1801    protected void ands(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1802        logicalRegInstruction(ANDS, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1803    }
1804
1805    /**
1806     * dst = src1 & ~(shiftType(src2, imm)).
1807     *
1808     * @param size register size. Has to be 32 or 64.
1809     * @param dst general purpose register. May not be null or stackpointer.
1810     * @param src1 general purpose register. May not be null or stackpointer.
1811     * @param src2 general purpose register. May not be null or stackpointer.
1812     * @param shiftType all types allowed, may not be null.
1813     * @param shiftAmt must be in range 0 to size - 1.
1814     */
1815    protected void bic(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1816        logicalRegInstruction(BIC, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1817    }
1818
1819    /**
1820     * dst = src1 & ~(shiftType(src2, imm)) and sets condition flags.
1821     *
1822     * @param size register size. Has to be 32 or 64.
1823     * @param dst general purpose register. May not be null or stackpointer.
1824     * @param src1 general purpose register. May not be null or stackpointer.
1825     * @param src2 general purpose register. May not be null or stackpointer.
1826     * @param shiftType all types allowed, may not be null.
1827     * @param shiftAmt must be in range 0 to size - 1.
1828     */
1829    protected void bics(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1830        logicalRegInstruction(BICS, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1831    }
1832
1833    /**
1834     * dst = src1 ^ ~(shiftType(src2, imm)).
1835     *
1836     * @param size register size. Has to be 32 or 64.
1837     * @param dst general purpose register. May not be null or stackpointer.
1838     * @param src1 general purpose register. May not be null or stackpointer.
1839     * @param src2 general purpose register. May not be null or stackpointer.
1840     * @param shiftType all types allowed, may not be null.
1841     * @param shiftAmt must be in range 0 to size - 1.
1842     */
1843    protected void eon(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1844        logicalRegInstruction(EON, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1845    }
1846
1847    /**
1848     * dst = src1 ^ shiftType(src2, imm).
1849     *
1850     * @param size register size. Has to be 32 or 64.
1851     * @param dst general purpose register. May not be null or stackpointer.
1852     * @param src1 general purpose register. May not be null or stackpointer.
1853     * @param src2 general purpose register. May not be null or stackpointer.
1854     * @param shiftType all types allowed, may not be null.
1855     * @param shiftAmt must be in range 0 to size - 1.
1856     */
1857    protected void eor(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1858        logicalRegInstruction(EOR, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1859    }
1860
1861    /**
1862     * dst = src1 | shiftType(src2, imm).
1863     *
1864     * @param size register size. Has to be 32 or 64.
1865     * @param dst general purpose register. May not be null or stackpointer.
1866     * @param src1 general purpose register. May not be null or stackpointer.
1867     * @param src2 general purpose register. May not be null or stackpointer.
1868     * @param shiftType all types allowed, may not be null.
1869     * @param shiftAmt must be in range 0 to size - 1.
1870     */
1871    protected void orr(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1872        logicalRegInstruction(ORR, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1873    }
1874
1875    /**
1876     * dst = src1 | ~(shiftType(src2, imm)).
1877     *
1878     * @param size register size. Has to be 32 or 64.
1879     * @param dst general purpose register. May not be null or stackpointer.
1880     * @param src1 general purpose register. May not be null or stackpointer.
1881     * @param src2 general purpose register. May not be null or stackpointer.
1882     * @param shiftType all types allowed, may not be null.
1883     * @param shiftAmt must be in range 0 to size - 1.
1884     */
1885    protected void orn(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
1886        logicalRegInstruction(ORN, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
1887    }
1888
1889    private void logicalRegInstruction(Instruction instr, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt, InstructionType type) {
1890        assert !dst.equals(sp);
1891        assert !src1.equals(sp);
1892        assert !src2.equals(sp);
1893        assert shiftAmt >= 0 && shiftAmt < type.width;
1894        emitInt(type.encoding | instr.encoding | LogicalShiftOp | shiftAmt << ImmediateOffset | shiftType.encoding << ShiftTypeOffset | rd(dst) | rs1(src1) | rs2(src2));
1895    }
1896
1897    /* Variable Shift (5.5.4) */
1898    /**
1899     * dst = src1 >> (src2 & log2(size)).
1900     *
1901     * @param size register size. Has to be 32 or 64.
1902     * @param dst general purpose register. May not be null or stackpointer.
1903     * @param src1 general purpose register. May not be null or stackpointer.
1904     * @param src2 general purpose register. May not be null or stackpointer.
1905     */
1906    protected void asr(int size, Register dst, Register src1, Register src2) {
1907        dataProcessing2SourceOp(ASRV, dst, src1, src2, generalFromSize(size));
1908    }
1909
1910    /**
1911     * dst = src1 << (src2 & log2(size)).
1912     *
1913     * @param size register size. Has to be 32 or 64.
1914     * @param dst general purpose register. May not be null or stackpointer.
1915     * @param src1 general purpose register. May not be null or stackpointer.
1916     * @param src2 general purpose register. May not be null or stackpointer.
1917     */
1918    protected void lsl(int size, Register dst, Register src1, Register src2) {
1919        dataProcessing2SourceOp(LSLV, dst, src1, src2, generalFromSize(size));
1920    }
1921
1922    /**
1923     * dst = src1 >>> (src2 & log2(size)).
1924     *
1925     * @param size register size. Has to be 32 or 64.
1926     * @param dst general purpose register. May not be null or stackpointer.
1927     * @param src1 general purpose register. May not be null or stackpointer.
1928     * @param src2 general purpose register. May not be null or stackpointer.
1929     */
1930    protected void lsr(int size, Register dst, Register src1, Register src2) {
1931        dataProcessing2SourceOp(LSRV, dst, src1, src2, generalFromSize(size));
1932    }
1933
1934    /**
1935     * dst = rotateRight(src1, (src2 & log2(size))).
1936     *
1937     * @param size register size. Has to be 32 or 64.
1938     * @param dst general purpose register. May not be null or stackpointer.
1939     * @param src1 general purpose register. May not be null or stackpointer.
1940     * @param src2 general purpose register. May not be null or stackpointer.
1941     */
1942    protected void ror(int size, Register dst, Register src1, Register src2) {
1943        dataProcessing2SourceOp(RORV, dst, src1, src2, generalFromSize(size));
1944    }
1945
1946    /* Bit Operations (5.5.5) */
1947
1948    /**
1949     * Counts leading sign bits. Sets Wd to the number of consecutive bits following the topmost bit
1950     * in dst, that are the same as the topmost bit. The count does not include the topmost bit
1951     * itself , so the result will be in the range 0 to size-1 inclusive.
1952     *
1953     * @param size register size. Has to be 32 or 64.
1954     * @param dst general purpose register. May not be null, zero-register or the stackpointer.
1955     * @param src source register. May not be null, zero-register or the stackpointer.
1956     */
1957    protected void cls(int size, Register dst, Register src) {
1958        dataProcessing1SourceOp(CLS, dst, src, generalFromSize(size));
1959    }
1960
1961    /**
1962     * Counts leading zeros.
1963     *
1964     * @param size register size. Has to be 32 or 64.
1965     * @param dst general purpose register. May not be null, zero-register or the stackpointer.
1966     * @param src source register. May not be null, zero-register or the stackpointer.
1967     */
1968    public void clz(int size, Register dst, Register src) {
1969        dataProcessing1SourceOp(CLZ, dst, src, generalFromSize(size));
1970    }
1971
1972    /**
1973     * Reverses bits.
1974     *
1975     * @param size register size. Has to be 32 or 64.
1976     * @param dst general purpose register. May not be null, zero-register or the stackpointer.
1977     * @param src source register. May not be null, zero-register or the stackpointer.
1978     */
1979    public void rbit(int size, Register dst, Register src) {
1980        dataProcessing1SourceOp(RBIT, dst, src, generalFromSize(size));
1981    }
1982
1983    /**
1984     * Reverses bytes.
1985     *
1986     * @param size register size. Has to be 32 or 64.
1987     * @param dst general purpose register. May not be null or the stackpointer.
1988     * @param src source register. May not be null or the stackpointer.
1989     */
1990    public void rev(int size, Register dst, Register src) {
1991        if (size == 64) {
1992            dataProcessing1SourceOp(REVX, dst, src, generalFromSize(size));
1993        } else {
1994            assert size == 32;
1995            dataProcessing1SourceOp(REVW, dst, src, generalFromSize(size));
1996        }
1997    }
1998
1999    /* Conditional Data Processing (5.5.6) */
2000
2001    /**
2002     * Conditional select. dst = src1 if condition else src2.
2003     *
2004     * @param size register size. Has to be 32 or 64.
2005     * @param dst general purpose register. May not be null or the stackpointer.
2006     * @param src1 general purpose register. May not be null or the stackpointer.
2007     * @param src2 general purpose register. May not be null or the stackpointer.
2008     * @param condition any condition flag. May not be null.
2009     */
2010    protected void csel(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
2011        conditionalSelectInstruction(CSEL, dst, src1, src2, condition, generalFromSize(size));
2012    }
2013
2014    /**
2015     * Conditional select negate. dst = src1 if condition else -src2.
2016     *
2017     * @param size register size. Has to be 32 or 64.
2018     * @param dst general purpose register. May not be null or the stackpointer.
2019     * @param src1 general purpose register. May not be null or the stackpointer.
2020     * @param src2 general purpose register. May not be null or the stackpointer.
2021     * @param condition any condition flag. May not be null.
2022     */
2023    protected void csneg(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
2024        conditionalSelectInstruction(CSNEG, dst, src1, src2, condition, generalFromSize(size));
2025    }
2026
2027    /**
2028     * Conditional increase. dst = src1 if condition else src2 + 1.
2029     *
2030     * @param size register size. Has to be 32 or 64.
2031     * @param dst general purpose register. May not be null or the stackpointer.
2032     * @param src1 general purpose register. May not be null or the stackpointer.
2033     * @param src2 general purpose register. May not be null or the stackpointer.
2034     * @param condition any condition flag. May not be null.
2035     */
2036    protected void csinc(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
2037        conditionalSelectInstruction(CSINC, dst, src1, src2, condition, generalFromSize(size));
2038    }
2039
2040    private void conditionalSelectInstruction(Instruction instr, Register dst, Register src1, Register src2, ConditionFlag condition, InstructionType type) {
2041        assert !dst.equals(sp);
2042        assert !src1.equals(sp);
2043        assert !src2.equals(sp);
2044        emitInt(type.encoding | instr.encoding | ConditionalSelectOp | rd(dst) | rs1(src1) | rs2(src2) | condition.encoding << ConditionalConditionOffset);
2045    }
2046
2047    /* Integer Multiply/Divide (5.6) */
2048
2049    /**
2050     * dst = src1 * src2 + src3.
2051     *
2052     * @param size register size. Has to be 32 or 64.
2053     * @param dst general purpose register. May not be null or the stackpointer.
2054     * @param src1 general purpose register. May not be null or the stackpointer.
2055     * @param src2 general purpose register. May not be null or the stackpointer.
2056     * @param src3 general purpose register. May not be null or the stackpointer.
2057     */
2058    protected void madd(int size, Register dst, Register src1, Register src2, Register src3) {
2059        mulInstruction(MADD, dst, src1, src2, src3, generalFromSize(size));
2060    }
2061
2062    /**
2063     * dst = src3 - src1 * src2.
2064     *
2065     * @param size register size. Has to be 32 or 64.
2066     * @param dst general purpose register. May not be null or the stackpointer.
2067     * @param src1 general purpose register. May not be null or the stackpointer.
2068     * @param src2 general purpose register. May not be null or the stackpointer.
2069     * @param src3 general purpose register. May not be null or the stackpointer.
2070     */
2071    protected void msub(int size, Register dst, Register src1, Register src2, Register src3) {
2072        mulInstruction(MSUB, dst, src1, src2, src3, generalFromSize(size));
2073    }
2074
2075    /**
2076     * Signed multiply high. dst = (src1 * src2)[127:64]
2077     *
2078     * @param dst general purpose register. May not be null or the stackpointer.
2079     * @param src1 general purpose register. May not be null or the stackpointer.
2080     * @param src2 general purpose register. May not be null or the stackpointer.
2081     */
2082    protected void smulh(Register dst, Register src1, Register src2) {
2083        assert !dst.equals(sp);
2084        assert !src1.equals(sp);
2085        assert !src2.equals(sp);
2086        emitInt(0b10011011010 << 21 | dst.encoding | rs1(src1) | rs2(src2) | 0b011111 << ImmediateOffset);
2087    }
2088
2089    /**
2090     * unsigned multiply high. dst = (src1 * src2)[127:64]
2091     *
2092     * @param dst general purpose register. May not be null or the stackpointer.
2093     * @param src1 general purpose register. May not be null or the stackpointer.
2094     * @param src2 general purpose register. May not be null or the stackpointer.
2095     */
2096    protected void umulh(Register dst, Register src1, Register src2) {
2097        assert !dst.equals(sp);
2098        assert !src1.equals(sp);
2099        assert !src2.equals(sp);
2100        emitInt(0b10011011110 << 21 | dst.encoding | rs1(src1) | rs2(src2) | 0b011111 << ImmediateOffset);
2101    }
2102
2103    /**
2104     * unsigned multiply add-long. xDst = xSrc3 + (wSrc1 * wSrc2)
2105     *
2106     * @param dst general purpose register. May not be null or the stackpointer.
2107     * @param src1 general purpose register. May not be null or the stackpointer.
2108     * @param src2 general purpose register. May not be null or the stackpointer.
2109     * @param src3 general purpose register. May not be null or the stackpointer.
2110     */
2111    protected void umaddl(Register dst, Register src1, Register src2, Register src3) {
2112        assert !dst.equals(sp);
2113        assert !src1.equals(sp);
2114        assert !src2.equals(sp);
2115        assert !src3.equals(sp);
2116        emitInt(0b10011011101 << 21 | dst.encoding | rs1(src1) | rs2(src2) | 0b011111 << ImmediateOffset);
2117    }
2118
2119    /**
2120     * signed multiply add-long. xDst = xSrc3 + (wSrc1 * wSrc2)
2121     *
2122     * @param dst general purpose register. May not be null or the stackpointer.
2123     * @param src1 general purpose register. May not be null or the stackpointer.
2124     * @param src2 general purpose register. May not be null or the stackpointer.
2125     * @param src3 general purpose register. May not be null or the stackpointer.
2126     */
2127    public void smaddl(Register dst, Register src1, Register src2, Register src3) {
2128        assert !dst.equals(sp);
2129        assert !src1.equals(sp);
2130        assert !src2.equals(sp);
2131        assert !src3.equals(sp);
2132        emitInt(0b10011011001 << 21 | dst.encoding | rs1(src1) | rs2(src2) | rs3(src3));
2133    }
2134
2135    private void mulInstruction(Instruction instr, Register dst, Register src1, Register src2, Register src3, InstructionType type) {
2136        assert !dst.equals(sp);
2137        assert !src1.equals(sp);
2138        assert !src2.equals(sp);
2139        assert !src3.equals(sp);
2140        emitInt(type.encoding | instr.encoding | MulOp | rd(dst) | rs1(src1) | rs2(src2) | rs3(src3));
2141    }
2142
2143    /**
2144     * Signed divide. dst = src1 / src2.
2145     *
2146     * @param size register size. Has to be 32 or 64.
2147     * @param dst general purpose register. May not be null or the stackpointer.
2148     * @param src1 general purpose register. May not be null or the stackpointer.
2149     * @param src2 general purpose register. May not be null or the stackpointer.
2150     */
2151    public void sdiv(int size, Register dst, Register src1, Register src2) {
2152        dataProcessing2SourceOp(SDIV, dst, src1, src2, generalFromSize(size));
2153    }
2154
2155    /**
2156     * Unsigned divide. dst = src1 / src2.
2157     *
2158     * @param size register size. Has to be 32 or 64.
2159     * @param dst general purpose register. May not be null or the stackpointer.
2160     * @param src1 general purpose register. May not be null or the stackpointer.
2161     * @param src2 general purpose register. May not be null or the stackpointer.
2162     */
2163    public void udiv(int size, Register dst, Register src1, Register src2) {
2164        dataProcessing2SourceOp(UDIV, dst, src1, src2, generalFromSize(size));
2165    }
2166
2167    private void dataProcessing1SourceOp(Instruction instr, Register dst, Register src, InstructionType type) {
2168        emitInt(type.encoding | instr.encoding | DataProcessing1SourceOp | rd(dst) | rs1(src));
2169    }
2170
2171    private void dataProcessing2SourceOp(Instruction instr, Register dst, Register src1, Register src2, InstructionType type) {
2172        assert !dst.equals(sp);
2173        assert !src1.equals(sp);
2174        assert !src2.equals(sp);
2175        emitInt(type.encoding | instr.encoding | DataProcessing2SourceOp | rd(dst) | rs1(src1) | rs2(src2));
2176    }
2177
2178    /* Floating point operations */
2179
2180    /* Load-Store Single FP register (5.7.1.1) */
2181    /**
2182     * Floating point load.
2183     *
2184     * @param size number of bits read from memory into rt. Must be 32 or 64.
2185     * @param rt floating point register. May not be null.
2186     * @param address all addressing modes allowed. May not be null.
2187     */
2188    public void fldr(int size, Register rt, AArch64Address address) {
2189        assert rt.getRegisterCategory().equals(SIMD);
2190        assert size == 32 || size == 64;
2191        int transferSize = NumUtil.log2Ceil(size / 8);
2192        loadStoreInstruction(LDR, rt, address, InstructionType.FP32, transferSize);
2193    }
2194
2195    /**
2196     * Floating point store.
2197     *
2198     * @param size number of bits read from memory into rt. Must be 32 or 64.
2199     * @param rt floating point register. May not be null.
2200     * @param address all addressing modes allowed. May not be null.
2201     */
2202    public void fstr(int size, Register rt, AArch64Address address) {
2203        assert rt.getRegisterCategory().equals(SIMD);
2204        assert size == 32 || size == 64;
2205        int transferSize = NumUtil.log2Ceil(size / 8);
2206        loadStoreInstruction(STR, rt, address, InstructionType.FP64, transferSize);
2207    }
2208
2209    /* Floating-point Move (register) (5.7.2) */
2210
2211    /**
2212     * Floating point move.
2213     *
2214     * @param size register size. Has to be 32 or 64.
2215     * @param dst floating point register. May not be null.
2216     * @param src floating point register. May not be null.
2217     */
2218    protected void fmov(int size, Register dst, Register src) {
2219        fpDataProcessing1Source(FMOV, dst, src, floatFromSize(size));
2220    }
2221
2222    /**
2223     * Move size bits from floating point register unchanged to general purpose register.
2224     *
2225     * @param size number of bits read from memory into rt. Must be 32 or 64.
2226     * @param dst general purpose register. May not be null, stack-pointer or zero-register
2227     * @param src floating point register. May not be null.
2228     */
2229    protected void fmovFpu2Cpu(int size, Register dst, Register src) {
2230        assert dst.getRegisterCategory().equals(CPU);
2231        assert src.getRegisterCategory().equals(SIMD);
2232        fmovCpuFpuInstruction(dst, src, size == 64, Instruction.FMOVFPU2CPU);
2233    }
2234
2235    /**
2236     * Move size bits from general purpose register unchanged to floating point register.
2237     *
2238     * @param size register size. Has to be 32 or 64.
2239     * @param dst floating point register. May not be null.
2240     * @param src general purpose register. May not be null or stack-pointer.
2241     */
2242    protected void fmovCpu2Fpu(int size, Register dst, Register src) {
2243        assert dst.getRegisterCategory().equals(SIMD);
2244        assert src.getRegisterCategory().equals(CPU);
2245        fmovCpuFpuInstruction(dst, src, size == 64, Instruction.FMOVCPU2FPU);
2246    }
2247
2248    private void fmovCpuFpuInstruction(Register dst, Register src, boolean is64bit, Instruction instr) {
2249        int sf = is64bit ? FP64.encoding | General64.encoding : FP32.encoding | General32.encoding;
2250        emitInt(sf | instr.encoding | FpConvertOp | rd(dst) | rs1(src));
2251    }
2252
2253    /* Floating-point Move (immediate) (5.7.3) */
2254
2255    /**
2256     * Move immediate into register.
2257     *
2258     * @param size register size. Has to be 32 or 64.
2259     * @param dst floating point register. May not be null.
2260     * @param imm immediate that is loaded into dst. If size is 32 only float immediates can be
2261     *            loaded, i.e. (float) imm == imm must be true. In all cases
2262     *            {@code isFloatImmediate}, respectively {@code #isDoubleImmediate} must be true
2263     *            depending on size.
2264     */
2265    protected void fmov(int size, Register dst, double imm) {
2266        assert dst.getRegisterCategory().equals(SIMD);
2267        InstructionType type = floatFromSize(size);
2268        int immEncoding;
2269        if (type == FP64) {
2270            immEncoding = getDoubleImmediate(imm);
2271        } else {
2272            assert imm == (float) imm : "float mov must use an immediate that can be represented using a float.";
2273            immEncoding = getFloatImmediate((float) imm);
2274        }
2275        emitInt(type.encoding | FMOV.encoding | FpImmOp | immEncoding | rd(dst));
2276    }
2277
2278    private static int getDoubleImmediate(double imm) {
2279        assert isDoubleImmediate(imm);
2280        // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
2281        // 0000.0000.0000.0000.0000.0000.0000.0000
2282        long repr = Double.doubleToRawLongBits(imm);
2283        int a = (int) (repr >>> 63) << 7;
2284        int b = (int) ((repr >>> 61) & 0x1) << 6;
2285        int cToH = (int) (repr >>> 48) & 0x3f;
2286        return (a | b | cToH) << FpImmOffset;
2287    }
2288
2289    protected static boolean isDoubleImmediate(double imm) {
2290        // Valid values will have the form:
2291        // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
2292        // 0000.0000.0000.0000.0000.0000.0000.0000
2293        long bits = Double.doubleToRawLongBits(imm);
2294        // lower 48 bits are cleared
2295        if ((bits & NumUtil.getNbitNumberLong(48)) != 0) {
2296            return false;
2297        }
2298        // bits[61..54] are all set or all cleared.
2299        long pattern = (bits >> 54) & NumUtil.getNbitNumberLong(7);
2300        if (pattern != 0 && pattern != NumUtil.getNbitNumberLong(7)) {
2301            return false;
2302        }
2303        // bits[62] and bits[61] are opposites.
2304        return ((bits ^ (bits << 1)) & (1L << 62)) != 0;
2305    }
2306
2307    private static int getFloatImmediate(float imm) {
2308        assert isFloatImmediate(imm);
2309        // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
2310        int repr = Float.floatToRawIntBits(imm);
2311        int a = (repr >>> 31) << 7;
2312        int b = ((repr >>> 29) & 0x1) << 6;
2313        int cToH = (repr >>> 19) & NumUtil.getNbitNumberInt(6);
2314        return (a | b | cToH) << FpImmOffset;
2315    }
2316
2317    protected static boolean isFloatImmediate(float imm) {
2318        // Valid values will have the form:
2319        // aBbb.bbbc.defg.h000.0000.0000.0000.0000
2320        int bits = Float.floatToRawIntBits(imm);
2321        // lower 20 bits are cleared.
2322        if ((bits & NumUtil.getNbitNumberInt(19)) != 0) {
2323            return false;
2324        }
2325        // bits[29..25] are all set or all cleared
2326        int pattern = (bits >> 25) & NumUtil.getNbitNumberInt(5);
2327        if (pattern != 0 && pattern != NumUtil.getNbitNumberInt(5)) {
2328            return false;
2329        }
2330        // bits[29] and bits[30] have to be opposite
2331        return ((bits ^ (bits << 1)) & (1 << 30)) != 0;
2332    }
2333
2334    /* Convert Floating-point Precision (5.7.4.1) */
2335    /* Converts float to double and vice-versa */
2336
2337    /**
2338     * Convert float to double and vice-versa.
2339     *
2340     * @param srcSize size of source register in bits.
2341     * @param dst floating point register. May not be null.
2342     * @param src floating point register. May not be null.
2343     */
2344    public void fcvt(int srcSize, Register dst, Register src) {
2345        if (srcSize == 32) {
2346            fpDataProcessing1Source(FCVTDS, dst, src, floatFromSize(srcSize));
2347        } else {
2348            fpDataProcessing1Source(FCVTSD, dst, src, floatFromSize(srcSize));
2349        }
2350    }
2351
2352    /* Convert to Integer (5.7.4.2) */
2353
2354    /**
2355     * Convert floating point to integer. Rounds towards zero.
2356     *
2357     * @param targetSize size of integer register. 32 or 64.
2358     * @param srcSize size of floating point register. 32 or 64.
2359     * @param dst general purpose register. May not be null, the zero-register or the stackpointer.
2360     * @param src floating point register. May not be null.
2361     */
2362    public void fcvtzs(int targetSize, int srcSize, Register dst, Register src) {
2363        assert !dst.equals(zr) && !dst.equals(sp);
2364        assert src.getRegisterCategory().equals(SIMD);
2365        fcvtCpuFpuInstruction(FCVTZS, dst, src, generalFromSize(targetSize), floatFromSize(srcSize));
2366    }
2367
2368    /* Convert from Integer (5.7.4.2) */
2369    /**
2370     * Converts integer to floating point. Uses rounding mode defined by FCPR.
2371     *
2372     * @param targetSize size of floating point register. 32 or 64.
2373     * @param srcSize size of integer register. 32 or 64.
2374     * @param dst floating point register. May not be null.
2375     * @param src general purpose register. May not be null or the stackpointer.
2376     */
2377    public void scvtf(int targetSize, int srcSize, Register dst, Register src) {
2378        assert dst.getRegisterCategory().equals(SIMD);
2379        assert !src.equals(sp);
2380        fcvtCpuFpuInstruction(SCVTF, dst, src, floatFromSize(targetSize), generalFromSize(srcSize));
2381    }
2382
2383    private void fcvtCpuFpuInstruction(Instruction instr, Register dst, Register src, InstructionType type1, InstructionType type2) {
2384        emitInt(type1.encoding | type2.encoding | instr.encoding | FpConvertOp | rd(dst) | rs1(src));
2385    }
2386
2387    /* Floating-point Round to Integral (5.7.5) */
2388
2389    /**
2390     * Rounds floating-point to integral. Rounds towards zero.
2391     *
2392     * @param size register size.
2393     * @param dst floating point register. May not be null.
2394     * @param src floating point register. May not be null.
2395     */
2396    protected void frintz(int size, Register dst, Register src) {
2397        fpDataProcessing1Source(FRINTZ, dst, src, floatFromSize(size));
2398    }
2399
2400    /* Floating-point Arithmetic (1 source) (5.7.6) */
2401
2402    /**
2403     * dst = |src|.
2404     *
2405     * @param size register size.
2406     * @param dst floating point register. May not be null.
2407     * @param src floating point register. May not be null.
2408     */
2409    public void fabs(int size, Register dst, Register src) {
2410        fpDataProcessing1Source(FABS, dst, src, floatFromSize(size));
2411    }
2412
2413    /**
2414     * dst = -neg.
2415     *
2416     * @param size register size.
2417     * @param dst floating point register. May not be null.
2418     * @param src floating point register. May not be null.
2419     */
2420    public void fneg(int size, Register dst, Register src) {
2421        fpDataProcessing1Source(FNEG, dst, src, floatFromSize(size));
2422    }
2423
2424    /**
2425     * dst = Sqrt(src).
2426     *
2427     * @param size register size.
2428     * @param dst floating point register. May not be null.
2429     * @param src floating point register. May not be null.
2430     */
2431    public void fsqrt(int size, Register dst, Register src) {
2432        fpDataProcessing1Source(FSQRT, dst, src, floatFromSize(size));
2433    }
2434
2435    private void fpDataProcessing1Source(Instruction instr, Register dst, Register src, InstructionType type) {
2436        assert dst.getRegisterCategory().equals(SIMD);
2437        assert src.getRegisterCategory().equals(SIMD);
2438        emitInt(type.encoding | instr.encoding | Fp1SourceOp | rd(dst) | rs1(src));
2439    }
2440
2441    /* Floating-point Arithmetic (2 source) (5.7.7) */
2442
2443    /**
2444     * dst = src1 + src2.
2445     *
2446     * @param size register size.
2447     * @param dst floating point register. May not be null.
2448     * @param src1 floating point register. May not be null.
2449     * @param src2 floating point register. May not be null.
2450     */
2451    public void fadd(int size, Register dst, Register src1, Register src2) {
2452        fpDataProcessing2Source(FADD, dst, src1, src2, floatFromSize(size));
2453    }
2454
2455    /**
2456     * dst = src1 - src2.
2457     *
2458     * @param size register size.
2459     * @param dst floating point register. May not be null.
2460     * @param src1 floating point register. May not be null.
2461     * @param src2 floating point register. May not be null.
2462     */
2463    public void fsub(int size, Register dst, Register src1, Register src2) {
2464        fpDataProcessing2Source(FSUB, dst, src1, src2, floatFromSize(size));
2465    }
2466
2467    /**
2468     * dst = src1 * src2.
2469     *
2470     * @param size register size.
2471     * @param dst floating point register. May not be null.
2472     * @param src1 floating point register. May not be null.
2473     * @param src2 floating point register. May not be null.
2474     */
2475    public void fmul(int size, Register dst, Register src1, Register src2) {
2476        fpDataProcessing2Source(FMUL, dst, src1, src2, floatFromSize(size));
2477    }
2478
2479    /**
2480     * dst = src1 / src2.
2481     *
2482     * @param size register size.
2483     * @param dst floating point register. May not be null.
2484     * @param src1 floating point register. May not be null.
2485     * @param src2 floating point register. May not be null.
2486     */
2487    public void fdiv(int size, Register dst, Register src1, Register src2) {
2488        fpDataProcessing2Source(FDIV, dst, src1, src2, floatFromSize(size));
2489    }
2490
2491    private void fpDataProcessing2Source(Instruction instr, Register dst, Register src1, Register src2, InstructionType type) {
2492        assert dst.getRegisterCategory().equals(SIMD);
2493        assert src1.getRegisterCategory().equals(SIMD);
2494        assert src2.getRegisterCategory().equals(SIMD);
2495        emitInt(type.encoding | instr.encoding | Fp2SourceOp | rd(dst) | rs1(src1) | rs2(src2));
2496    }
2497
2498    /* Floating-point Multiply-Add (5.7.9) */
2499
2500    /**
2501     * dst = src1 * src2 + src3.
2502     *
2503     * @param size register size.
2504     * @param dst floating point register. May not be null.
2505     * @param src1 floating point register. May not be null.
2506     * @param src2 floating point register. May not be null.
2507     * @param src3 floating point register. May not be null.
2508     */
2509    protected void fmadd(int size, Register dst, Register src1, Register src2, Register src3) {
2510        fpDataProcessing3Source(FMADD, dst, src1, src2, src3, floatFromSize(size));
2511    }
2512
2513    /**
2514     * dst = src3 - src1 * src2.
2515     *
2516     * @param size register size.
2517     * @param dst floating point register. May not be null.
2518     * @param src1 floating point register. May not be null.
2519     * @param src2 floating point register. May not be null.
2520     * @param src3 floating point register. May not be null.
2521     */
2522    protected void fmsub(int size, Register dst, Register src1, Register src2, Register src3) {
2523        fpDataProcessing3Source(FMSUB, dst, src1, src2, src3, floatFromSize(size));
2524    }
2525
2526    private void fpDataProcessing3Source(Instruction instr, Register dst, Register src1, Register src2, Register src3, InstructionType type) {
2527        assert dst.getRegisterCategory().equals(SIMD);
2528        assert src1.getRegisterCategory().equals(SIMD);
2529        assert src2.getRegisterCategory().equals(SIMD);
2530        assert src3.getRegisterCategory().equals(SIMD);
2531        emitInt(type.encoding | instr.encoding | Fp3SourceOp | rd(dst) | rs1(src1) | rs2(src2) | rs3(src3));
2532    }
2533
2534    /* Floating-point Comparison (5.7.10) */
2535
2536    /**
2537     * Compares src1 to src2.
2538     *
2539     * @param size register size.
2540     * @param src1 floating point register. May not be null.
2541     * @param src2 floating point register. May not be null.
2542     */
2543    public void fcmp(int size, Register src1, Register src2) {
2544        assert src1.getRegisterCategory().equals(SIMD);
2545        assert src2.getRegisterCategory().equals(SIMD);
2546        InstructionType type = floatFromSize(size);
2547        emitInt(type.encoding | FCMP.encoding | FpCmpOp | rs1(src1) | rs2(src2));
2548    }
2549
2550    /**
2551     * Conditional compare. NZCV = fcmp(src1, src2) if condition else uimm4.
2552     *
2553     * @param size register size.
2554     * @param src1 floating point register. May not be null.
2555     * @param src2 floating point register. May not be null.
2556     * @param uimm4 condition flags that are used if condition is false.
2557     * @param condition every condition allowed. May not be null.
2558     */
2559    public void fccmp(int size, Register src1, Register src2, int uimm4, ConditionFlag condition) {
2560        assert NumUtil.isUnsignedNbit(4, uimm4);
2561        assert src1.getRegisterCategory().equals(SIMD);
2562        assert src2.getRegisterCategory().equals(SIMD);
2563        InstructionType type = floatFromSize(size);
2564        emitInt(type.encoding | FCCMP.encoding | uimm4 | condition.encoding << ConditionalConditionOffset | rs1(src1) | rs2(src2));
2565    }
2566
2567    /**
2568     * Compare register to 0.0 .
2569     *
2570     * @param size register size.
2571     * @param src floating point register. May not be null.
2572     */
2573    public void fcmpZero(int size, Register src) {
2574        assert src.getRegisterCategory().equals(SIMD);
2575        InstructionType type = floatFromSize(size);
2576        emitInt(type.encoding | FCMPZERO.encoding | FpCmpOp | rs1(src));
2577    }
2578
2579    /* Floating-point Conditional Select (5.7.11) */
2580
2581    /**
2582     * Conditional select. dst = src1 if condition else src2.
2583     *
2584     * @param size register size.
2585     * @param dst floating point register. May not be null.
2586     * @param src1 floating point register. May not be null.
2587     * @param src2 floating point register. May not be null.
2588     * @param condition every condition allowed. May not be null.
2589     */
2590    protected void fcsel(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
2591        assert dst.getRegisterCategory().equals(SIMD);
2592        assert src1.getRegisterCategory().equals(SIMD);
2593        assert src2.getRegisterCategory().equals(SIMD);
2594        InstructionType type = floatFromSize(size);
2595        emitInt(type.encoding | FCSEL.encoding | rd(dst) | rs1(src1) | rs2(src2) | condition.encoding << ConditionalConditionOffset);
2596    }
2597
2598    /* Debug exceptions (5.9.1.2) */
2599
2600    /**
2601     * Halting mode software breakpoint: Enters halting mode debug state if enabled, else treated as
2602     * UNALLOCATED instruction.
2603     *
2604     * @param uimm16 Arbitrary 16-bit unsigned payload.
2605     */
2606    protected void hlt(int uimm16) {
2607        exceptionInstruction(HLT, uimm16);
2608    }
2609
2610    /**
2611     * Monitor mode software breakpoint: exception routed to a debug monitor executing in a higher
2612     * exception level.
2613     *
2614     * @param uimm16 Arbitrary 16-bit unsigned payload.
2615     */
2616    protected void brk(int uimm16) {
2617        exceptionInstruction(BRK, uimm16);
2618    }
2619
2620    private void exceptionInstruction(Instruction instr, int uimm16) {
2621        assert NumUtil.isUnsignedNbit(16, uimm16);
2622        emitInt(instr.encoding | ExceptionOp | uimm16 << SystemImmediateOffset);
2623    }
2624
2625    /* Architectural hints (5.9.4) */
2626    public enum SystemHint {
2627        NOP(0x0),
2628        YIELD(0x1),
2629        WFE(0x2),
2630        WFI(0x3),
2631        SEV(0x4),
2632        SEVL(0x5);
2633
2634        private final int encoding;
2635
2636        SystemHint(int encoding) {
2637            this.encoding = encoding;
2638        }
2639    }
2640
2641    /**
2642     * Architectural hints.
2643     *
2644     * @param hint Can be any of the defined hints. May not be null.
2645     */
2646    protected void hint(SystemHint hint) {
2647        emitInt(HINT.encoding | hint.encoding << SystemImmediateOffset);
2648    }
2649
2650    /**
2651     * Clear Exclusive: clears the local record of the executing processor that an address has had a
2652     * request for an exclusive access.
2653     */
2654    protected void clrex() {
2655        emitInt(CLREX.encoding);
2656    }
2657
2658    /**
2659     * Possible barrier definitions for Aarch64. LOAD_LOAD and LOAD_STORE map to the same underlying
2660     * barrier.
2661     *
2662     * We only need synchronization across the inner shareable domain (see B2-90 in the Reference
2663     * documentation).
2664     */
2665    public enum BarrierKind {
2666        LOAD_LOAD(0x9, "ISHLD"),
2667        LOAD_STORE(0x9, "ISHLD"),
2668        STORE_STORE(0xA, "ISHST"),
2669        ANY_ANY(0xB, "ISH");
2670
2671        public final int encoding;
2672        public final String optionName;
2673
2674        BarrierKind(int encoding, String optionName) {
2675            this.encoding = encoding;
2676            this.optionName = optionName;
2677        }
2678    }
2679
2680    /**
2681     * Data Memory Barrier.
2682     *
2683     * @param barrierKind barrier that is issued. May not be null.
2684     */
2685    public void dmb(BarrierKind barrierKind) {
2686        emitInt(DMB.encoding | BarrierOp | barrierKind.encoding << BarrierKindOffset);
2687    }
2688
2689}
2690