1/*
2 * Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23package org.graalvm.compiler.lir.amd64;
24
25import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.ILLEGAL;
26import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
27import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.STACK;
28import static jdk.vm.ci.code.ValueUtil.asRegister;
29
30import org.graalvm.compiler.asm.Label;
31import org.graalvm.compiler.asm.amd64.AMD64Address;
32import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
33import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag;
34import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
35import org.graalvm.compiler.core.common.LIRKind;
36import org.graalvm.compiler.debug.GraalError;
37import org.graalvm.compiler.lir.LIRInstructionClass;
38import org.graalvm.compiler.lir.Opcode;
39import org.graalvm.compiler.lir.asm.ArrayDataPointerConstant;
40import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
41import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
42
43import jdk.vm.ci.amd64.AMD64;
44import jdk.vm.ci.amd64.AMD64.CPUFeature;
45import jdk.vm.ci.amd64.AMD64Kind;
46import jdk.vm.ci.code.Register;
47import jdk.vm.ci.meta.AllocatableValue;
48import jdk.vm.ci.meta.Value;
49
50public final class AMD64MathIntrinsicUnaryOp extends AMD64LIRInstruction {
51    public static final LIRInstructionClass<AMD64MathIntrinsicUnaryOp> TYPE = LIRInstructionClass.create(AMD64MathIntrinsicUnaryOp.class);
52
53    public enum UnaryIntrinsicOpcode {
54        LOG,
55        LOG10,
56        SIN,
57        COS,
58        TAN,
59        EXP
60    }
61
62    @Opcode private final UnaryIntrinsicOpcode opcode;
63    @Def protected Value result;
64    @Use protected Value input;
65    @Temp({REG, ILLEGAL}) protected Value xmm1Temp = Value.ILLEGAL;
66    @Temp({REG, ILLEGAL}) protected Value xmm2Temp = Value.ILLEGAL;
67    @Temp({REG, ILLEGAL}) protected Value xmm3Temp = Value.ILLEGAL;
68    @Temp({REG, ILLEGAL}) protected Value xmm4Temp = Value.ILLEGAL;
69    @Temp({REG, ILLEGAL}) protected Value xmm5Temp = Value.ILLEGAL;
70    @Temp({REG, ILLEGAL}) protected Value xmm6Temp = Value.ILLEGAL;
71    @Temp({REG, ILLEGAL}) protected Value xmm7Temp = Value.ILLEGAL;
72    @Temp({REG, ILLEGAL}) protected Value xmm8Temp = Value.ILLEGAL;
73    @Temp({REG, ILLEGAL}) protected Value xmm9Temp = Value.ILLEGAL;
74    @Temp({REG, ILLEGAL}) protected Value xmm10Temp = Value.ILLEGAL;
75    @Temp({REG, ILLEGAL}) protected Value gpr1Temp = Value.ILLEGAL;
76    @Temp({REG, ILLEGAL}) protected Value gpr2Temp = Value.ILLEGAL;
77    @Temp protected AllocatableValue rcxTemp;
78    @Temp({REG, ILLEGAL}) protected Value gpr4Temp = Value.ILLEGAL;
79    @Temp({REG, ILLEGAL}) protected Value gpr5Temp = Value.ILLEGAL;
80    @Temp({REG, ILLEGAL}) protected Value gpr6Temp = Value.ILLEGAL;
81    @Temp({REG, ILLEGAL}) protected Value gpr7Temp = Value.ILLEGAL;
82    @Temp({REG, ILLEGAL}) protected Value gpr8Temp = Value.ILLEGAL;
83    @Temp({REG, ILLEGAL}) protected Value gpr9Temp = Value.ILLEGAL;
84    @Temp({REG, ILLEGAL}) protected Value gpr10Temp = Value.ILLEGAL;
85    @Temp({STACK, ILLEGAL}) protected Value stackTemp = Value.ILLEGAL;
86
87    CompilationResultBuilder internalCrb;
88
89    public AMD64MathIntrinsicUnaryOp(LIRGeneratorTool tool, UnaryIntrinsicOpcode opcode, Value result, Value input, Value stackTemp) {
90        super(TYPE);
91        this.opcode = opcode;
92        this.result = result;
93        this.input = input;
94        if (opcode == UnaryIntrinsicOpcode.LOG || opcode == UnaryIntrinsicOpcode.LOG10 ||
95                        opcode == UnaryIntrinsicOpcode.SIN || opcode == UnaryIntrinsicOpcode.COS ||
96                        opcode == UnaryIntrinsicOpcode.TAN || opcode == UnaryIntrinsicOpcode.EXP) {
97            this.gpr1Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD));
98            this.gpr2Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD));
99            this.rcxTemp = AMD64.rcx.asValue(LIRKind.value(AMD64Kind.QWORD));
100            this.gpr4Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD));
101            this.xmm1Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE));
102            this.xmm2Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE));
103            this.xmm3Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE));
104            this.xmm4Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE));
105            this.xmm5Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE));
106            this.xmm6Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE));
107            this.xmm7Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE));
108
109            if (opcode == UnaryIntrinsicOpcode.EXP) {
110                this.gpr5Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD));
111                this.xmm8Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE));
112                this.xmm9Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE));
113                this.xmm10Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE));
114            }
115
116            if (opcode == UnaryIntrinsicOpcode.TAN) {
117                this.gpr5Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD));
118                this.gpr6Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD));
119                this.gpr7Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD));
120                this.gpr8Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD));
121                this.gpr9Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD));
122                this.gpr10Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD));
123            }
124
125            if (opcode == UnaryIntrinsicOpcode.SIN || opcode == UnaryIntrinsicOpcode.COS) {
126                this.gpr5Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD));
127                this.gpr6Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD));
128                this.gpr7Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD));
129                this.gpr8Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD));
130                this.gpr9Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD));
131                this.gpr10Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD));
132                this.xmm8Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE));
133                this.xmm9Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE));
134            }
135
136            this.stackTemp = stackTemp;
137        }
138    }
139
140    public AMD64MathIntrinsicUnaryOp(LIRGeneratorTool tool, UnaryIntrinsicOpcode opcode, Value result, Value input) {
141        this(tool, opcode, result, input, Value.ILLEGAL);
142    }
143
144    private void setCrb(CompilationResultBuilder crb) {
145        internalCrb = crb;
146    }
147
148    private AMD64Address externalAddress(ArrayDataPointerConstant curPtr) {
149        return (AMD64Address) internalCrb.recordDataReferenceInCode(curPtr);
150    }
151
152    @Override
153    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
154        switch (opcode) {
155            case LOG:
156                logIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm);
157                break;
158            case LOG10:
159                log10Intrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm);
160                break;
161            case SIN:
162                sinIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm);
163                break;
164            case COS:
165                cosIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm);
166                break;
167            case TAN:
168                tanIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm);
169                break;
170            case EXP:
171                expIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm);
172                break;
173            default:
174                throw GraalError.shouldNotReachHere();
175        }
176    }
177
178    private static int[] logTwoTable = {
179                    0xfefa3800, 0x3fe62e42, 0x93c76730, 0x3d2ef357, 0xaa241800,
180                    0x3fe5ee82, 0x0cda46be, 0x3d220238, 0x5c364800, 0x3fe5af40,
181                    0xac10c9fb, 0x3d2dfa63, 0x26bb8c00, 0x3fe5707a, 0xff3303dd,
182                    0x3d09980b, 0x26867800, 0x3fe5322e, 0x5d257531, 0x3d05ccc4,
183                    0x835a5000, 0x3fe4f45a, 0x6d93b8fb, 0xbd2e6c51, 0x6f970c00,
184                    0x3fe4b6fd, 0xed4c541c, 0x3cef7115, 0x27e8a400, 0x3fe47a15,
185                    0xf94d60aa, 0xbd22cb6a, 0xf2f92400, 0x3fe43d9f, 0x481051f7,
186                    0xbcfd984f, 0x2125cc00, 0x3fe4019c, 0x30f0c74c, 0xbd26ce79,
187                    0x0c36c000, 0x3fe3c608, 0x7cfe13c2, 0xbd02b736, 0x17197800,
188                    0x3fe38ae2, 0xbb5569a4, 0xbd218b7a, 0xad9d8c00, 0x3fe35028,
189                    0x9527e6ac, 0x3d10b83f, 0x44340800, 0x3fe315da, 0xc5a0ed9c,
190                    0xbd274e93, 0x57b0e000, 0x3fe2dbf5, 0x07b9dc11, 0xbd17a6e5,
191                    0x6d0ec000, 0x3fe2a278, 0xe797882d, 0x3d206d2b, 0x1134dc00,
192                    0x3fe26962, 0x05226250, 0xbd0b61f1, 0xd8bebc00, 0x3fe230b0,
193                    0x6e48667b, 0x3d12fc06, 0x5fc61800, 0x3fe1f863, 0xc9fe81d3,
194                    0xbd2a7242, 0x49ae6000, 0x3fe1c078, 0xed70e667, 0x3cccacde,
195                    0x40f23c00, 0x3fe188ee, 0xf8ab4650, 0x3d14cc4e, 0xf6f29800,
196                    0x3fe151c3, 0xa293ae49, 0xbd2edd97, 0x23c75c00, 0x3fe11af8,
197                    0xbb9ddcb2, 0xbd258647, 0x8611cc00, 0x3fe0e489, 0x07801742,
198                    0x3d1c2998, 0xe2d05400, 0x3fe0ae76, 0x887e7e27, 0x3d1f486b,
199                    0x0533c400, 0x3fe078bf, 0x41edf5fd, 0x3d268122, 0xbe760400,
200                    0x3fe04360, 0xe79539e0, 0xbd04c45f, 0xe5b20800, 0x3fe00e5a,
201                    0xb1727b1c, 0xbd053ba3, 0xaf7a4800, 0x3fdfb358, 0x3c164935,
202                    0x3d0085fa, 0xee031800, 0x3fdf4aa7, 0x6f014a8b, 0x3d12cde5,
203                    0x56b41000, 0x3fdee2a1, 0x5a470251, 0x3d2f27f4, 0xc3ddb000,
204                    0x3fde7b42, 0x5372bd08, 0xbd246550, 0x1a272800, 0x3fde148a,
205                    0x07322938, 0xbd1326b2, 0x484c9800, 0x3fddae75, 0x60dc616a,
206                    0xbd1ea42d, 0x46def800, 0x3fdd4902, 0xe9a767a8, 0x3d235baf,
207                    0x18064800, 0x3fdce42f, 0x3ec7a6b0, 0xbd0797c3, 0xc7455800,
208                    0x3fdc7ff9, 0xc15249ae, 0xbd29b6dd, 0x693fa000, 0x3fdc1c60,
209                    0x7fe8e180, 0x3d2cec80, 0x1b80e000, 0x3fdbb961, 0xf40a666d,
210                    0x3d27d85b, 0x04462800, 0x3fdb56fa, 0x2d841995, 0x3d109525,
211                    0x5248d000, 0x3fdaf529, 0x52774458, 0xbd217cc5, 0x3c8ad800,
212                    0x3fda93ed, 0xbea77a5d, 0x3d1e36f2, 0x0224f800, 0x3fda3344,
213                    0x7f9d79f5, 0x3d23c645, 0xea15f000, 0x3fd9d32b, 0x10d0c0b0,
214                    0xbd26279e, 0x43135800, 0x3fd973a3, 0xa502d9f0, 0xbd152313,
215                    0x635bf800, 0x3fd914a8, 0x2ee6307d, 0xbd1766b5, 0xa88b3000,
216                    0x3fd8b639, 0xe5e70470, 0xbd205ae1, 0x776dc800, 0x3fd85855,
217                    0x3333778a, 0x3d2fd56f, 0x3bd81800, 0x3fd7fafa, 0xc812566a,
218                    0xbd272090, 0x687cf800, 0x3fd79e26, 0x2efd1778, 0x3d29ec7d,
219                    0x76c67800, 0x3fd741d8, 0x49dc60b3, 0x3d2d8b09, 0xe6af1800,
220                    0x3fd6e60e, 0x7c222d87, 0x3d172165, 0x3e9c6800, 0x3fd68ac8,
221                    0x2756eba0, 0x3d20a0d3, 0x0b3ab000, 0x3fd63003, 0xe731ae00,
222                    0xbd2db623, 0xdf596000, 0x3fd5d5bd, 0x08a465dc, 0xbd0a0b2a,
223                    0x53c8d000, 0x3fd57bf7, 0xee5d40ef, 0x3d1faded, 0x0738a000,
224                    0x3fd522ae, 0x8164c759, 0x3d2ebe70, 0x9e173000, 0x3fd4c9e0,
225                    0x1b0ad8a4, 0xbd2e2089, 0xc271c800, 0x3fd4718d, 0x0967d675,
226                    0xbd2f27ce, 0x23d5e800, 0x3fd419b4, 0xec90e09d, 0x3d08e436,
227                    0x77333000, 0x3fd3c252, 0xb606bd5c, 0x3d183b54, 0x76be1000,
228                    0x3fd36b67, 0xb0f177c8, 0x3d116ecd, 0xe1d36000, 0x3fd314f1,
229                    0xd3213cb8, 0xbd28e27a, 0x7cdc9000, 0x3fd2bef0, 0x4a5004f4,
230                    0x3d2a9cfa, 0x1134d800, 0x3fd26962, 0xdf5bb3b6, 0x3d2c93c1,
231                    0x6d0eb800, 0x3fd21445, 0xba46baea, 0x3d0a87de, 0x635a6800,
232                    0x3fd1bf99, 0x5147bdb7, 0x3d2ca6ed, 0xcbacf800, 0x3fd16b5c,
233                    0xf7a51681, 0x3d2b9acd, 0x8227e800, 0x3fd1178e, 0x63a5f01c,
234                    0xbd2c210e, 0x67616000, 0x3fd0c42d, 0x163ceae9, 0x3d27188b,
235                    0x604d5800, 0x3fd07138, 0x16ed4e91, 0x3cf89cdb, 0x5626c800,
236                    0x3fd01eae, 0x1485e94a, 0xbd16f08c, 0x6cb3b000, 0x3fcf991c,
237                    0xca0cdf30, 0x3d1bcbec, 0xe4dd0000, 0x3fcef5ad, 0x65bb8e11,
238                    0xbcca2115, 0xffe71000, 0x3fce530e, 0x6041f430, 0x3cc21227,
239                    0xb0d49000, 0x3fcdb13d, 0xf715b035, 0xbd2aff2a, 0xf2656000,
240                    0x3fcd1037, 0x75b6f6e4, 0xbd084a7e, 0xc6f01000, 0x3fcc6ffb,
241                    0xc5962bd2, 0xbcf1ec72, 0x383be000, 0x3fcbd087, 0x595412b6,
242                    0xbd2d4bc4, 0x575bd000, 0x3fcb31d8, 0x4eace1aa, 0xbd0c358d,
243                    0x3c8ae000, 0x3fca93ed, 0x50562169, 0xbd287243, 0x07089000,
244                    0x3fc9f6c4, 0x6865817a, 0x3d29904d, 0xdcf70000, 0x3fc95a5a,
245                    0x58a0ff6f, 0x3d07f228, 0xeb390000, 0x3fc8beaf, 0xaae92cd1,
246                    0xbd073d54, 0x6551a000, 0x3fc823c1, 0x9a631e83, 0x3d1e0ddb,
247                    0x85445000, 0x3fc7898d, 0x70914305, 0xbd1c6610, 0x8b757000,
248                    0x3fc6f012, 0xe59c21e1, 0xbd25118d, 0xbe8c1000, 0x3fc6574e,
249                    0x2c3c2e78, 0x3d19cf8b, 0x6b544000, 0x3fc5bf40, 0xeb68981c,
250                    0xbd127023, 0xe4a1b000, 0x3fc527e5, 0xe5697dc7, 0x3d2633e8,
251                    0x8333b000, 0x3fc4913d, 0x54fdb678, 0x3d258379, 0xa5993000,
252                    0x3fc3fb45, 0x7e6a354d, 0xbd2cd1d8, 0xb0159000, 0x3fc365fc,
253                    0x234b7289, 0x3cc62fa8, 0x0c868000, 0x3fc2d161, 0xcb81b4a1,
254                    0x3d039d6c, 0x2a49c000, 0x3fc23d71, 0x8fd3df5c, 0x3d100d23,
255                    0x7e23f000, 0x3fc1aa2b, 0x44389934, 0x3d2ca78e, 0x8227e000,
256                    0x3fc1178e, 0xce2d07f2, 0x3d21ef78, 0xb59e4000, 0x3fc08598,
257                    0x7009902c, 0xbd27e5dd, 0x39dbe000, 0x3fbfe891, 0x4fa10afd,
258                    0xbd2534d6, 0x830a2000, 0x3fbec739, 0xafe645e0, 0xbd2dc068,
259                    0x63844000, 0x3fbda727, 0x1fa71733, 0x3d1a8940, 0x01bc4000,
260                    0x3fbc8858, 0xc65aacd3, 0x3d2646d1, 0x8dad6000, 0x3fbb6ac8,
261                    0x2bf768e5, 0xbd139080, 0x40b1c000, 0x3fba4e76, 0xb94407c8,
262                    0xbd0e42b6, 0x5d594000, 0x3fb9335e, 0x3abd47da, 0x3d23115c,
263                    0x2f40e000, 0x3fb8197e, 0xf96ffdf7, 0x3d0f80dc, 0x0aeac000,
264                    0x3fb700d3, 0xa99ded32, 0x3cec1e8d, 0x4d97a000, 0x3fb5e95a,
265                    0x3c5d1d1e, 0xbd2c6906, 0x5d208000, 0x3fb4d311, 0x82f4e1ef,
266                    0xbcf53a25, 0xa7d1e000, 0x3fb3bdf5, 0xa5db4ed7, 0x3d2cc85e,
267                    0xa4472000, 0x3fb2aa04, 0xae9c697d, 0xbd20b6e8, 0xd1466000,
268                    0x3fb1973b, 0x560d9e9b, 0xbd25325d, 0xb59e4000, 0x3fb08598,
269                    0x7009902c, 0xbd17e5dd, 0xc006c000, 0x3faeea31, 0x4fc93b7b,
270                    0xbd0e113e, 0xcdddc000, 0x3faccb73, 0x47d82807, 0xbd1a68f2,
271                    0xd0fb0000, 0x3faaaef2, 0x353bb42e, 0x3d20fc1a, 0x149fc000,
272                    0x3fa894aa, 0xd05a267d, 0xbd197995, 0xf2d4c000, 0x3fa67c94,
273                    0xec19afa2, 0xbd029efb, 0xd42e0000, 0x3fa466ae, 0x75bdfd28,
274                    0xbd2c1673, 0x2f8d0000, 0x3fa252f3, 0xe021b67b, 0x3d283e9a,
275                    0x89e74000, 0x3fa0415d, 0x5cf1d753, 0x3d0111c0, 0xec148000,
276                    0x3f9c63d2, 0x3f9eb2f3, 0x3d2578c6, 0x28c90000, 0x3f984925,
277                    0x325a0c34, 0xbd2aa0ba, 0x25980000, 0x3f9432a9, 0x928637fe,
278                    0x3d098139, 0x58938000, 0x3f902056, 0x06e2f7d2, 0xbd23dc5b,
279                    0xa3890000, 0x3f882448, 0xda74f640, 0xbd275577, 0x75890000,
280                    0x3f801015, 0x999d2be8, 0xbd10c76b, 0x59580000, 0x3f700805,
281                    0xcb31c67b, 0x3d2166af, 0x00000000, 0x00000000, 0x00000000,
282                    0x80000000
283    };
284
285    private static int[] logTwoData = {
286                    0xfefa3800, 0x3fa62e42, 0x93c76730, 0x3ceef357
287    };
288
289    private static int[] coeffLogTwoData = {
290                    0x92492492, 0x3fc24924, 0x00000000, 0xbfd00000, 0x3d6fb175,
291                    0xbfc5555e, 0x55555555, 0x3fd55555, 0x9999999a, 0x3fc99999,
292                    0x00000000, 0xbfe00000
293    };
294
295    /*
296     * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM)
297     * Source Code
298     *
299     * ALGORITHM DESCRIPTION - LOG() ---------------------
300     *
301     * x=2^k * mx, mx in [1,2)
302     *
303     * Get B~1/mx based on the output of rcpps instruction (B0) B = int((B0*2^7+0.5))/2^7
304     *
305     * Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
306     *
307     * Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and p(r) is a degree 7
308     * polynomial -log(B) read from data table (high, low parts) Result is formed from high and low
309     * parts.
310     *
311     * Special cases: log(NaN) = quiet NaN, and raise invalid exception log(+INF) = that INF log(0)
312     * = -INF with divide-by-zero exception raised log(1) = +0 log(x) = NaN with invalid exception
313     * raised if x < -0, including -INF
314     *
315     */
316
317    public void logIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
318        ArrayDataPointerConstant logTwoTablePtr = new ArrayDataPointerConstant(logTwoTable, 16);
319        ArrayDataPointerConstant logTwoDataPtr = new ArrayDataPointerConstant(logTwoData, 16);
320        ArrayDataPointerConstant coeffLogTwoDataPtr = new ArrayDataPointerConstant(coeffLogTwoData, 16);
321
322        Label bb0 = new Label();
323        Label bb1 = new Label();
324        Label bb2 = new Label();
325        Label bb3 = new Label();
326        Label bb4 = new Label();
327        Label bb5 = new Label();
328        Label bb6 = new Label();
329        Label bb7 = new Label();
330        Label bb8 = new Label();
331
332        Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
333        Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
334        Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
335        Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
336
337        Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
338        Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
339        Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
340        Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
341        Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
342        Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
343        Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
344
345        AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
346
347        setCrb(crb);
348        masm.movdq(stackSlot, value);
349        if (dest.encoding != value.encoding) {
350            masm.movdqu(dest, value);
351        }
352        masm.movq(gpr1, 0x3ff0000000000000L);
353        masm.movdq(temp2, gpr1);
354        masm.movq(gpr3, 0x77f0000000000000L);
355        masm.movdq(temp3, gpr3);
356        masm.movl(gpr2, 32768);
357        masm.movdl(temp4, gpr2);
358        masm.movq(gpr2, 0xffffe00000000000L);
359        masm.movdq(temp5, gpr2);
360        masm.movdqu(temp1, value);
361        masm.pextrw(gpr1, dest, 3);
362        masm.por(dest, temp2);
363        masm.movl(gpr2, 16352);
364        masm.psrlq(dest, 27);
365        masm.leaq(gpr4, externalAddress(logTwoTablePtr));
366        masm.psrld(dest, 2);
367        masm.rcpps(dest, dest);
368        masm.psllq(temp1, 12);
369        masm.pshufd(temp6, temp5, 0xE4);
370        masm.psrlq(temp1, 12);
371        masm.subl(gpr1, 16);
372        masm.cmpl(gpr1, 32736);
373        masm.jcc(ConditionFlag.AboveEqual, bb0);
374
375        masm.bind(bb1);
376        masm.paddd(dest, temp4);
377        masm.por(temp1, temp3);
378        masm.movdl(gpr3, dest);
379        masm.psllq(dest, 29);
380        masm.pand(temp5, temp1);
381        masm.pand(dest, temp6);
382        masm.subsd(temp1, temp5);
383        masm.mulpd(temp5, dest);
384        masm.andl(gpr1, 32752);
385        masm.subl(gpr1, gpr2);
386        masm.cvtsi2sdl(temp7, gpr1);
387        masm.mulsd(temp1, dest);
388        masm.movdq(temp6, externalAddress(logTwoDataPtr));                                    // 0xfefa3800,
389                                                                                              // 0x3fa62e42
390        masm.movdqu(temp3, externalAddress(coeffLogTwoDataPtr));                              // 0x92492492,
391                                                                                              // 0x3fc24924,
392                                                                                              // 0x00000000,
393                                                                                              // 0xbfd00000
394        masm.subsd(temp5, temp2);
395        masm.andl(gpr3, 16711680);
396        masm.shrl(gpr3, 12);
397        masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, 0));
398        masm.leaq(gpr4, externalAddress(coeffLogTwoDataPtr));
399        masm.movdqu(temp4, new AMD64Address(gpr4, 16));                                       // 0x3d6fb175,
400                                                                                              // 0xbfc5555e,
401                                                                                              // 0x55555555,
402                                                                                              // 0x3fd55555
403        masm.addsd(temp1, temp5);
404        masm.movdqu(temp2, new AMD64Address(gpr4, 32));                                       // 0x9999999a,
405                                                                                              // 0x3fc99999,
406                                                                                              // 0x00000000,
407                                                                                              // 0xbfe00000
408        masm.mulsd(temp6, temp7);
409        if (masm.supports(CPUFeature.SSE3)) {
410            masm.movddup(temp5, temp1);
411        } else {
412            masm.movdqu(temp5, temp1);
413            masm.movlhps(temp5, temp5);
414        }
415        masm.leaq(gpr4, externalAddress(logTwoDataPtr));
416        masm.mulsd(temp7, new AMD64Address(gpr4, 8));                                         // 0x93c76730,
417                                                                                              // 0x3ceef357
418        masm.mulsd(temp3, temp1);
419        masm.addsd(dest, temp6);
420        masm.mulpd(temp4, temp5);
421        masm.mulpd(temp5, temp5);
422        if (masm.supports(CPUFeature.SSE3)) {
423            masm.movddup(temp6, dest);
424        } else {
425            masm.movdqu(temp6, dest);
426            masm.movlhps(temp6, temp6);
427        }
428        masm.addsd(dest, temp1);
429        masm.addpd(temp4, temp2);
430        masm.mulpd(temp3, temp5);
431        masm.subsd(temp6, dest);
432        masm.mulsd(temp4, temp1);
433        masm.pshufd(temp2, dest, 0xEE);
434        masm.addsd(temp1, temp6);
435        masm.mulsd(temp5, temp5);
436        masm.addsd(temp7, temp2);
437        masm.addpd(temp4, temp3);
438        masm.addsd(temp1, temp7);
439        masm.mulpd(temp4, temp5);
440        masm.addsd(temp1, temp4);
441        masm.pshufd(temp5, temp4, 0xEE);
442        masm.addsd(temp1, temp5);
443        masm.addsd(dest, temp1);
444        masm.jmp(bb8);
445
446        masm.bind(bb0);
447        masm.movdq(dest, stackSlot);
448        masm.movdq(temp1, stackSlot);
449        masm.addl(gpr1, 16);
450        masm.cmpl(gpr1, 32768);
451        masm.jcc(ConditionFlag.AboveEqual, bb2);
452
453        masm.cmpl(gpr1, 16);
454        masm.jcc(ConditionFlag.Below, bb3);
455
456        masm.bind(bb4);
457        masm.addsd(dest, dest);
458        masm.jmp(bb8);
459
460        masm.bind(bb5);
461        masm.jcc(ConditionFlag.Above, bb4);
462
463        masm.cmpl(gpr3, 0);
464        masm.jcc(ConditionFlag.Above, bb4);
465
466        masm.jmp(bb6);
467
468        masm.bind(bb3);
469        masm.xorpd(temp1, temp1);
470        masm.addsd(temp1, dest);
471        masm.movdl(gpr3, temp1);
472        masm.psrlq(temp1, 32);
473        masm.movdl(gpr2, temp1);
474        masm.orl(gpr3, gpr2);
475        masm.cmpl(gpr3, 0);
476        masm.jcc(ConditionFlag.Equal, bb7);
477
478        masm.xorpd(temp1, temp1);
479        masm.movl(gpr1, 18416);
480        masm.pinsrw(temp1, gpr1, 3);
481        masm.mulsd(dest, temp1);
482        masm.movdqu(temp1, dest);
483        masm.pextrw(gpr1, dest, 3);
484        masm.por(dest, temp2);
485        masm.psrlq(dest, 27);
486        masm.movl(gpr2, 18416);
487        masm.psrld(dest, 2);
488        masm.rcpps(dest, dest);
489        masm.psllq(temp1, 12);
490        masm.pshufd(temp6, temp5, 0xE4);
491        masm.psrlq(temp1, 12);
492        masm.jmp(bb1);
493
494        masm.bind(bb2);
495        masm.movdl(gpr3, temp1);
496        masm.psrlq(temp1, 32);
497        masm.movdl(gpr2, temp1);
498        masm.addl(gpr2, gpr2);
499        masm.cmpl(gpr2, -2097152);
500        masm.jcc(ConditionFlag.AboveEqual, bb5);
501
502        masm.orl(gpr3, gpr2);
503        masm.cmpl(gpr3, 0);
504        masm.jcc(ConditionFlag.Equal, bb7);
505
506        masm.bind(bb6);
507        masm.xorpd(temp1, temp1);
508        masm.xorpd(dest, dest);
509        masm.movl(gpr1, 32752);
510        masm.pinsrw(temp1, gpr1, 3);
511        masm.mulsd(dest, temp1);
512        masm.jmp(bb8);
513
514        masm.bind(bb7);
515        masm.xorpd(temp1, temp1);
516        masm.xorpd(dest, dest);
517        masm.movl(gpr1, 49136);
518        masm.pinsrw(dest, gpr1, 3);
519        masm.divsd(dest, temp1);
520
521        masm.bind(bb8);
522    }
523
524    private static int[] highmaskLogTen = {
525                    0xf8000000, 0xffffffff, 0x00000000, 0xffffe000
526    };
527
528    private static int[] logTenE = {
529                    0x00000000, 0x3fdbc000, 0xbf2e4108, 0x3f5a7a6c
530    };
531
532    private static int[] logTenTable = {
533                    0x509f7800, 0x3fd34413, 0x1f12b358, 0x3d1fef31, 0x80333400,
534                    0x3fd32418, 0xc671d9d0, 0xbcf542bf, 0x51195000, 0x3fd30442,
535                    0x78a4b0c3, 0x3d18216a, 0x6fc79400, 0x3fd2e490, 0x80fa389d,
536                    0xbc902869, 0x89d04000, 0x3fd2c502, 0x75c2f564, 0x3d040754,
537                    0x4ddd1c00, 0x3fd2a598, 0xd219b2c3, 0xbcfa1d84, 0x6baa7c00,
538                    0x3fd28651, 0xfd9abec1, 0x3d1be6d3, 0x94028800, 0x3fd2672d,
539                    0xe289a455, 0xbd1ede5e, 0x78b86400, 0x3fd2482c, 0x6734d179,
540                    0x3d1fe79b, 0xcca3c800, 0x3fd2294d, 0x981a40b8, 0xbced34ea,
541                    0x439c5000, 0x3fd20a91, 0xcc392737, 0xbd1a9cc3, 0x92752c00,
542                    0x3fd1ebf6, 0x03c9afe7, 0x3d1e98f8, 0x6ef8dc00, 0x3fd1cd7d,
543                    0x71dae7f4, 0x3d08a86c, 0x8fe4dc00, 0x3fd1af25, 0xee9185a1,
544                    0xbcff3412, 0xace59400, 0x3fd190ee, 0xc2cab353, 0x3cf17ed9,
545                    0x7e925000, 0x3fd172d8, 0x6952c1b2, 0x3cf1521c, 0xbe694400,
546                    0x3fd154e2, 0xcacb79ca, 0xbd0bdc78, 0x26cbac00, 0x3fd1370d,
547                    0xf71f4de1, 0xbd01f8be, 0x72fa0800, 0x3fd11957, 0x55bf910b,
548                    0x3c946e2b, 0x5f106000, 0x3fd0fbc1, 0x39e639c1, 0x3d14a84b,
549                    0xa802a800, 0x3fd0de4a, 0xd3f31d5d, 0xbd178385, 0x0b992000,
550                    0x3fd0c0f3, 0x3843106f, 0xbd1f602f, 0x486ce800, 0x3fd0a3ba,
551                    0x8819497c, 0x3cef987a, 0x1de49400, 0x3fd086a0, 0x1caa0467,
552                    0x3d0faec7, 0x4c30cc00, 0x3fd069a4, 0xa4424372, 0xbd1618fc,
553                    0x94490000, 0x3fd04cc6, 0x946517d2, 0xbd18384b, 0xb7e84000,
554                    0x3fd03006, 0xe0109c37, 0xbd19a6ac, 0x798a0c00, 0x3fd01364,
555                    0x5121e864, 0xbd164cf7, 0x38ce8000, 0x3fcfedbf, 0x46214d1a,
556                    0xbcbbc402, 0xc8e62000, 0x3fcfb4ef, 0xdab93203, 0x3d1e0176,
557                    0x2cb02800, 0x3fcf7c5a, 0x2a2ea8e4, 0xbcfec86a, 0xeeeaa000,
558                    0x3fcf43fd, 0xc18e49a4, 0x3cf110a8, 0x9bb6e800, 0x3fcf0bda,
559                    0x923cc9c0, 0xbd15ce99, 0xc093f000, 0x3fced3ef, 0x4d4b51e9,
560                    0x3d1a04c7, 0xec58f800, 0x3fce9c3c, 0x163cad59, 0x3cac8260,
561                    0x9a907000, 0x3fce2d7d, 0x3fa93646, 0x3ce4a1c0, 0x37311000,
562                    0x3fcdbf99, 0x32abd1fd, 0x3d07ea9d, 0x6744b800, 0x3fcd528c,
563                    0x4dcbdfd4, 0xbd1b08e2, 0xe36de800, 0x3fcce653, 0x0b7b7f7f,
564                    0xbd1b8f03, 0x77506800, 0x3fcc7aec, 0xa821c9fb, 0x3d13c163,
565                    0x00ff8800, 0x3fcc1053, 0x536bca76, 0xbd074ee5, 0x70719800,
566                    0x3fcba684, 0xd7da9b6b, 0xbd1fbf16, 0xc6f8d800, 0x3fcb3d7d,
567                    0xe2220bb3, 0x3d1a295d, 0x16c15800, 0x3fcad53c, 0xe724911e,
568                    0xbcf55822, 0x82533800, 0x3fca6dbc, 0x6d982371, 0x3cac567c,
569                    0x3c19e800, 0x3fca06fc, 0x84d17d80, 0x3d1da204, 0x85ef8000,
570                    0x3fc9a0f8, 0x54466a6a, 0xbd002204, 0xb0ac2000, 0x3fc93bae,
571                    0xd601fd65, 0x3d18840c, 0x1bb9b000, 0x3fc8d71c, 0x7bf58766,
572                    0xbd14f897, 0x34aae800, 0x3fc8733e, 0x3af6ac24, 0xbd0f5c45,
573                    0x76d68000, 0x3fc81012, 0x4303e1a1, 0xbd1f9a80, 0x6af57800,
574                    0x3fc7ad96, 0x43fbcb46, 0x3cf4c33e, 0xa6c51000, 0x3fc74bc7,
575                    0x70f0eac5, 0xbd192e3b, 0xccab9800, 0x3fc6eaa3, 0xc0093dfe,
576                    0xbd0faf15, 0x8b60b800, 0x3fc68a28, 0xde78d5fd, 0xbc9ea4ee,
577                    0x9d987000, 0x3fc62a53, 0x962bea6e, 0xbd194084, 0xc9b0e800,
578                    0x3fc5cb22, 0x888dd999, 0x3d1fe201, 0xe1634800, 0x3fc56c93,
579                    0x16ada7ad, 0x3d1b1188, 0xc176c000, 0x3fc50ea4, 0x4159b5b5,
580                    0xbcf09c08, 0x51766000, 0x3fc4b153, 0x84393d23, 0xbcf6a89c,
581                    0x83695000, 0x3fc4549d, 0x9f0b8bbb, 0x3d1c4b8c, 0x538d5800,
582                    0x3fc3f881, 0xf49df747, 0x3cf89b99, 0xc8138000, 0x3fc39cfc,
583                    0xd503b834, 0xbd13b99f, 0xf0df0800, 0x3fc3420d, 0xf011b386,
584                    0xbd05d8be, 0xe7466800, 0x3fc2e7b2, 0xf39c7bc2, 0xbd1bb94e,
585                    0xcdd62800, 0x3fc28de9, 0x05e6d69b, 0xbd10ed05, 0xd015d800,
586                    0x3fc234b0, 0xe29b6c9d, 0xbd1ff967, 0x224ea800, 0x3fc1dc06,
587                    0x727711fc, 0xbcffb30d, 0x01540000, 0x3fc183e8, 0x39786c5a,
588                    0x3cc23f57, 0xb24d9800, 0x3fc12c54, 0xc905a342, 0x3d003a1d,
589                    0x82835800, 0x3fc0d54a, 0x9b9920c0, 0x3d03b25a, 0xc72ac000,
590                    0x3fc07ec7, 0x46f26a24, 0x3cf0fa41, 0xdd35d800, 0x3fc028ca,
591                    0x41d9d6dc, 0x3d034a65, 0x52474000, 0x3fbfa6a4, 0x44f66449,
592                    0x3d19cad3, 0x2da3d000, 0x3fbefcb8, 0x67832999, 0x3d18400f,
593                    0x32a10000, 0x3fbe53ce, 0x9c0e3b1a, 0xbcff62fd, 0x556b7000,
594                    0x3fbdabe3, 0x02976913, 0xbcf8243b, 0x97e88000, 0x3fbd04f4,
595                    0xec793797, 0x3d1c0578, 0x09647000, 0x3fbc5eff, 0x05fc0565,
596                    0xbd1d799e, 0xc6426000, 0x3fbbb9ff, 0x4625f5ed, 0x3d1f5723,
597                    0xf7afd000, 0x3fbb15f3, 0xdd5aae61, 0xbd1a7e1e, 0xd358b000,
598                    0x3fba72d8, 0x3314e4d3, 0x3d17bc91, 0x9b1f5000, 0x3fb9d0ab,
599                    0x9a4d514b, 0x3cf18c9b, 0x9cd4e000, 0x3fb92f69, 0x7e4496ab,
600                    0x3cf1f96d, 0x31f4f000, 0x3fb88f10, 0xf56479e7, 0x3d165818,
601                    0xbf628000, 0x3fb7ef9c, 0x26bf486d, 0xbd1113a6, 0xb526b000,
602                    0x3fb7510c, 0x1a1c3384, 0x3ca9898d, 0x8e31e000, 0x3fb6b35d,
603                    0xb3875361, 0xbd0661ac, 0xd01de000, 0x3fb6168c, 0x2a7cacfa,
604                    0xbd1bdf10, 0x0af23000, 0x3fb57a98, 0xff868816, 0x3cf046d0,
605                    0xd8ea0000, 0x3fb4df7c, 0x1515fbe7, 0xbd1fd529, 0xde3b2000,
606                    0x3fb44538, 0x6e59a132, 0x3d1faeee, 0xc8df9000, 0x3fb3abc9,
607                    0xf1322361, 0xbd198807, 0x505f1000, 0x3fb3132d, 0x0888e6ab,
608                    0x3d1e5380, 0x359bd000, 0x3fb27b61, 0xdfbcbb22, 0xbcfe2724,
609                    0x429ee000, 0x3fb1e463, 0x6eb4c58c, 0xbcfe4dd6, 0x4a673000,
610                    0x3fb14e31, 0x4ce1ac9b, 0x3d1ba691, 0x28b96000, 0x3fb0b8c9,
611                    0x8c7813b8, 0xbd0b3872, 0xc1f08000, 0x3fb02428, 0xc2bc8c2c,
612                    0x3cb5ea6b, 0x05a1a000, 0x3faf209c, 0x72e8f18e, 0xbce8df84,
613                    0xc0b5e000, 0x3fadfa6d, 0x9fdef436, 0x3d087364, 0xaf416000,
614                    0x3facd5c2, 0x1068c3a9, 0x3d0827e7, 0xdb356000, 0x3fabb296,
615                    0x120a34d3, 0x3d101a9f, 0x5dfea000, 0x3faa90e6, 0xdaded264,
616                    0xbd14c392, 0x6034c000, 0x3fa970ad, 0x1c9d06a9, 0xbd1b705e,
617                    0x194c6000, 0x3fa851e8, 0x83996ad9, 0xbd0117bc, 0xcf4ac000,
618                    0x3fa73492, 0xb1a94a62, 0xbca5ea42, 0xd67b4000, 0x3fa618a9,
619                    0x75aed8ca, 0xbd07119b, 0x9126c000, 0x3fa4fe29, 0x5291d533,
620                    0x3d12658f, 0x6f4d4000, 0x3fa3e50e, 0xcd2c5cd9, 0x3d1d5c70,
621                    0xee608000, 0x3fa2cd54, 0xd1008489, 0x3d1a4802, 0x9900e000,
622                    0x3fa1b6f9, 0x54fb5598, 0xbd16593f, 0x06bb6000, 0x3fa0a1f9,
623                    0x64ef57b4, 0xbd17636b, 0xb7940000, 0x3f9f1c9f, 0xee6a4737,
624                    0x3cb5d479, 0x91aa0000, 0x3f9cf7f5, 0x3a16373c, 0x3d087114,
625                    0x156b8000, 0x3f9ad5ed, 0x836c554a, 0x3c6900b0, 0xd4764000,
626                    0x3f98b67f, 0xed12f17b, 0xbcffc974, 0x77dec000, 0x3f9699a7,
627                    0x232ce7ea, 0x3d1e35bb, 0xbfbf4000, 0x3f947f5d, 0xd84ffa6e,
628                    0x3d0e0a49, 0x82c7c000, 0x3f92679c, 0x8d170e90, 0xbd14d9f2,
629                    0xadd20000, 0x3f90525d, 0x86d9f88e, 0x3cdeb986, 0x86f10000,
630                    0x3f8c7f36, 0xb9e0a517, 0x3ce29faa, 0xb75c8000, 0x3f885e9e,
631                    0x542568cb, 0xbd1f7bdb, 0x46b30000, 0x3f8442e8, 0xb954e7d9,
632                    0x3d1e5287, 0xb7e60000, 0x3f802c07, 0x22da0b17, 0xbd19fb27,
633                    0x6c8b0000, 0x3f7833e3, 0x821271ef, 0xbd190f96, 0x29910000,
634                    0x3f701936, 0xbc3491a5, 0xbd1bcf45, 0x354a0000, 0x3f600fe3,
635                    0xc0ff520a, 0xbd19d71c, 0x00000000, 0x00000000, 0x00000000,
636                    0x00000000
637    };
638
639    private static int[] logTwoLogTenData = {
640                    0x509f7800, 0x3f934413, 0x1f12b358, 0x3cdfef31
641    };
642
643    private static int[] coeffLogTenData = {
644                    0xc1a5f12e, 0x40358874, 0x64d4ef0d, 0xc0089309, 0x385593b1,
645                    0xc025c917, 0xdc963467, 0x3ffc6a02, 0x7f9d3aa1, 0x4016ab9f,
646                    0xdc77b115, 0xbff27af2
647    };
648
649    /*
650     * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM)
651     * Source Code
652     *
653     * ALGORITHM DESCRIPTION - LOG10() ---------------------
654     *
655     * Let x=2^k * mx, mx in [1,2)
656     *
657     * Get B~1/mx based on the output of rcpss instruction (B0) B = int((B0*LH*2^7+0.5))/2^7 LH is a
658     * short approximation for log10(e)
659     *
660     * Reduced argument: r=B*mx-LH (computed accurately in high and low parts)
661     *
662     * Result: k*log10(2) - log(B) + p(r) p(r) is a degree 7 polynomial -log(B) read from data table
663     * (high, low parts) Result is formed from high and low parts
664     *
665     * Special cases: log10(0) = -INF with divide-by-zero exception raised log10(1) = +0 log10(x) =
666     * NaN with invalid exception raised if x < -0, including -INF log10(+INF) = +INF
667     *
668     */
669
670    public void log10Intrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
671        ArrayDataPointerConstant highmaskLogTenPtr = new ArrayDataPointerConstant(highmaskLogTen, 16);
672        ArrayDataPointerConstant logTenEPtr = new ArrayDataPointerConstant(logTenE, 16);
673        ArrayDataPointerConstant logTenTablePtr = new ArrayDataPointerConstant(logTenTable, 16);
674        ArrayDataPointerConstant logTwoLogTenDataPtr = new ArrayDataPointerConstant(logTwoLogTenData, 16);
675        ArrayDataPointerConstant coeffLogTenDataPtr = new ArrayDataPointerConstant(coeffLogTenData, 16);
676
677        Label bb0 = new Label();
678        Label bb1 = new Label();
679        Label bb2 = new Label();
680        Label bb3 = new Label();
681        Label bb4 = new Label();
682        Label bb5 = new Label();
683        Label bb6 = new Label();
684        Label bb7 = new Label();
685        Label bb8 = new Label();
686
687        Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
688        Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
689        Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
690        Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
691
692        Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
693        Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
694        Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
695        Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
696        Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
697        Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
698        Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
699
700        AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
701
702        setCrb(crb);
703        masm.movdq(stackSlot, value);
704        if (dest.encoding != value.encoding) {
705            masm.movdqu(dest, value);
706        }
707        masm.movdqu(temp5, externalAddress(highmaskLogTenPtr));                               // 0xf8000000,
708                                                                                              // 0xffffffff,
709                                                                                              // 0x00000000,
710                                                                                              // 0xffffe000
711        masm.xorpd(temp2, temp2);
712        masm.movl(gpr1, 16368);
713        masm.pinsrw(temp2, gpr1, 3);
714        masm.movl(gpr2, 1054736384);
715        masm.movdl(temp7, gpr2);
716        masm.xorpd(temp3, temp3);
717        masm.movl(gpr3, 30704);
718        masm.pinsrw(temp3, gpr3, 3);
719        masm.movl(gpr3, 32768);
720        masm.movdl(temp4, gpr3);
721        masm.movdqu(temp1, value);
722        masm.pextrw(gpr1, dest, 3);
723        masm.por(dest, temp2);
724        masm.movl(gpr2, 16352);
725        masm.psrlq(dest, 27);
726        masm.movdqu(temp2, externalAddress(logTenEPtr));                                      // 0x00000000,
727                                                                                              // 0x3fdbc000,
728                                                                                              // 0xbf2e4108,
729                                                                                              // 0x3f5a7a6c
730        masm.psrld(dest, 2);
731        masm.rcpps(dest, dest);
732        masm.psllq(temp1, 12);
733        masm.pshufd(temp6, temp5, 0x4E);
734        masm.psrlq(temp1, 12);
735        masm.subl(gpr1, 16);
736        masm.cmpl(gpr1, 32736);
737        masm.jcc(ConditionFlag.AboveEqual, bb0);
738
739        masm.bind(bb1);
740        masm.mulss(dest, temp7);
741        masm.por(temp1, temp3);
742        masm.andpd(temp5, temp1);
743        masm.paddd(dest, temp4);
744        masm.movdqu(temp3, externalAddress(coeffLogTenDataPtr));                              // 0xc1a5f12e,
745                                                                                              // 0x40358874,
746                                                                                              // 0x64d4ef0d,
747                                                                                              // 0xc0089309
748        masm.leaq(gpr4, externalAddress(coeffLogTenDataPtr));
749        masm.movdqu(temp4, new AMD64Address(gpr4, 16));                                       // 0x385593b1,
750                                                                                              // 0xc025c917,
751                                                                                              // 0xdc963467,
752                                                                                              // 0x3ffc6a02
753        masm.subsd(temp1, temp5);
754        masm.movdl(gpr3, dest);
755        masm.psllq(dest, 29);
756        masm.andpd(dest, temp6);
757        masm.movdq(temp6, externalAddress(logTwoLogTenDataPtr));                              // 0x509f7800,
758                                                                                              // 0x3f934413
759        masm.andl(gpr1, 32752);
760        masm.subl(gpr1, gpr2);
761        masm.cvtsi2sdl(temp7, gpr1);
762        masm.mulpd(temp5, dest);
763        masm.mulsd(temp1, dest);
764        masm.subsd(temp5, temp2);
765        masm.movdqu(temp2, new AMD64Address(gpr4, 32));                                       // 0x7f9d3aa1,
766                                                                                              // 0x4016ab9f,
767                                                                                              // 0xdc77b115,
768                                                                                              // 0xbff27af2
769        masm.leaq(gpr4, externalAddress(logTenTablePtr));
770        masm.andl(gpr3, 16711680);
771        masm.shrl(gpr3, 12);
772        masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, -1504));
773        masm.addsd(temp1, temp5);
774        masm.mulsd(temp6, temp7);
775        masm.pshufd(temp5, temp1, 0x44);
776        masm.leaq(gpr4, externalAddress(logTwoLogTenDataPtr));
777        masm.mulsd(temp7, new AMD64Address(gpr4, 8));                                         // 0x1f12b358,
778                                                                                              // 0x3cdfef31
779        masm.mulsd(temp3, temp1);
780        masm.addsd(dest, temp6);
781        masm.mulpd(temp4, temp5);
782        masm.leaq(gpr4, externalAddress(logTenEPtr));
783        masm.movdq(temp6, new AMD64Address(gpr4, 8));                                         // 0xbf2e4108,
784                                                                                              // 0x3f5a7a6c
785        masm.mulpd(temp5, temp5);
786        masm.addpd(temp4, temp2);
787        masm.mulpd(temp3, temp5);
788        masm.pshufd(temp2, dest, 0xE4);
789        masm.addsd(dest, temp1);
790        masm.mulsd(temp4, temp1);
791        masm.subsd(temp2, dest);
792        masm.mulsd(temp6, temp1);
793        masm.addsd(temp1, temp2);
794        masm.pshufd(temp2, dest, 0xEE);
795        masm.mulsd(temp5, temp5);
796        masm.addsd(temp7, temp2);
797        masm.addsd(temp1, temp6);
798        masm.addpd(temp4, temp3);
799        masm.addsd(temp1, temp7);
800        masm.mulpd(temp4, temp5);
801        masm.addsd(temp1, temp4);
802        masm.pshufd(temp5, temp4, 0xEE);
803        masm.addsd(temp1, temp5);
804        masm.addsd(dest, temp1);
805        masm.jmp(bb8);
806
807        masm.bind(bb0);
808        masm.movdq(dest, stackSlot);
809        masm.movdq(temp1, stackSlot);
810        masm.addl(gpr1, 16);
811        masm.cmpl(gpr1, 32768);
812        masm.jcc(ConditionFlag.AboveEqual, bb2);
813
814        masm.cmpl(gpr1, 16);
815        masm.jcc(ConditionFlag.Below, bb3);
816
817        masm.bind(bb4);
818        masm.addsd(dest, dest);
819        masm.jmp(bb8);
820
821        masm.bind(bb5);
822        masm.jcc(ConditionFlag.Above, bb4);
823
824        masm.cmpl(gpr3, 0);
825        masm.jcc(ConditionFlag.Above, bb4);
826
827        masm.jmp(bb6);
828
829        masm.bind(bb3);
830        masm.xorpd(temp1, temp1);
831        masm.addsd(temp1, dest);
832        masm.movdl(gpr3, temp1);
833        masm.psrlq(temp1, 32);
834        masm.movdl(gpr2, temp1);
835        masm.orl(gpr3, gpr2);
836        masm.cmpl(gpr3, 0);
837        masm.jcc(ConditionFlag.Equal, bb7);
838
839        masm.xorpd(temp1, temp1);
840        masm.xorpd(temp2, temp2);
841        masm.movl(gpr1, 18416);
842        masm.pinsrw(temp1, gpr1, 3);
843        masm.mulsd(dest, temp1);
844        masm.movl(gpr1, 16368);
845        masm.pinsrw(temp2, gpr1, 3);
846        masm.movdqu(temp1, dest);
847        masm.pextrw(gpr1, dest, 3);
848        masm.por(dest, temp2);
849        masm.movl(gpr2, 18416);
850        masm.psrlq(dest, 27);
851        masm.movdqu(temp2, externalAddress(logTenEPtr));                                      // 0x00000000,
852                                                                                              // 0x3fdbc000,
853                                                                                              // 0xbf2e4108,
854                                                                                              // 0x3f5a7a6c
855        masm.psrld(dest, 2);
856        masm.rcpps(dest, dest);
857        masm.psllq(temp1, 12);
858        masm.pshufd(temp6, temp5, 0x4E);
859        masm.psrlq(temp1, 12);
860        masm.jmp(bb1);
861
862        masm.bind(bb2);
863        masm.movdl(gpr3, temp1);
864        masm.psrlq(temp1, 32);
865        masm.movdl(gpr2, temp1);
866        masm.addl(gpr2, gpr2);
867        masm.cmpl(gpr2, -2097152);
868        masm.jcc(ConditionFlag.AboveEqual, bb5);
869
870        masm.orl(gpr3, gpr2);
871        masm.cmpl(gpr3, 0);
872        masm.jcc(ConditionFlag.Equal, bb7);
873
874        masm.bind(bb6);
875        masm.xorpd(temp1, temp1);
876        masm.xorpd(dest, dest);
877        masm.movl(gpr1, 32752);
878        masm.pinsrw(temp1, gpr1, 3);
879        masm.mulsd(dest, temp1);
880        masm.jmp(bb8);
881
882        masm.bind(bb7);
883        masm.xorpd(temp1, temp1);
884        masm.xorpd(dest, dest);
885        masm.movl(gpr1, 49136);
886        masm.pinsrw(dest, gpr1, 3);
887        masm.divsd(dest, temp1);
888
889        masm.bind(bb8);
890    }
891
892    /*
893     * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM)
894     * Source Code
895     *
896     * ALGORITHM DESCRIPTION - SIN() ---------------------
897     *
898     * 1. RANGE REDUCTION
899     *
900     * We perform an initial range reduction from X to r with
901     *
902     * X =~= N * pi/32 + r
903     *
904     * so that |r| <= pi/64 + epsilon. We restrict inputs to those where |N| <= 932560. Beyond this,
905     * the range reduction is insufficiently accurate. For extremely small inputs, denormalization
906     * can occur internally, impacting performance. This means that the main path is actually only
907     * taken for 2^-252 <= |X| < 90112.
908     *
909     * To avoid branches, we perform the range reduction to full accuracy each time.
910     *
911     * X - N * (P_1 + P_2 + P_3)
912     *
913     * where P_1 and P_2 are 32-bit numbers (so multiplication by N is exact) and P_3 is a 53-bit
914     * number. Together, these approximate pi well enough for all cases in the restricted range.
915     *
916     * The main reduction sequence is:
917     *
918     * y = 32/pi * x N = integer(y) (computed by adding and subtracting off SHIFTER)
919     *
920     * m_1 = N * P_1 m_2 = N * P_2 r_1 = x - m_1 r = r_1 - m_2 (this r can be used for most of the
921     * calculation)
922     *
923     * c_1 = r_1 - r m_3 = N * P_3 c_2 = c_1 - m_2 c = c_2 - m_3
924     *
925     * 2. MAIN ALGORITHM
926     *
927     * The algorithm uses a table lookup based on B = M * pi / 32 where M = N mod 64. The stored
928     * values are: sigma closest power of 2 to cos(B) C_hl 53-bit cos(B) - sigma S_hi + S_lo 2 *
929     * 53-bit sin(B)
930     *
931     * The computation is organized as follows:
932     *
933     * sin(B + r + c) = [sin(B) + sigma * r] + r * (cos(B) - sigma) + sin(B) * [cos(r + c) - 1] +
934     * cos(B) * [sin(r + c) - r]
935     *
936     * which is approximately:
937     *
938     * [S_hi + sigma * r] + C_hl * r + S_lo + S_hi * [(cos(r) - 1) - r * c] + (C_hl + sigma) *
939     * [(sin(r) - r) + c]
940     *
941     * and this is what is actually computed. We separate this sum into four parts:
942     *
943     * hi + med + pols + corr
944     *
945     * where
946     *
947     * hi = S_hi + sigma r med = C_hl * r pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r)
948     * corr = S_lo + c * ((C_hl + sigma) - S_hi * r)
949     *
950     * 3. POLYNOMIAL
951     *
952     * The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) can be rearranged freely,
953     * since it is quite small, so we exploit parallelism to the fullest.
954     *
955     * psc4 = SC_4 * r_1 msc4 = psc4 * r r2 = r * r msc2 = SC_2 * r2 r4 = r2 * r2 psc3 = SC_3 + msc4
956     * psc1 = SC_1 + msc2 msc3 = r4 * psc3 sincospols = psc1 + msc3 pols = sincospols * <S_hi * r^2
957     * | (C_hl + sigma) * r^3>
958     *
959     * 4. CORRECTION TERM
960     *
961     * This is where the "c" component of the range reduction is taken into account; recall that
962     * just "r" is used for most of the calculation.
963     *
964     * -c = m_3 - c_2 -d = S_hi * r - (C_hl + sigma) corr = -c * -d + S_lo
965     *
966     * 5. COMPENSATED SUMMATIONS
967     *
968     * The two successive compensated summations add up the high and medium parts, leaving just the
969     * low parts to add up at the end.
970     *
971     * rs = sigma * r res_int = S_hi + rs k_0 = S_hi - res_int k_2 = k_0 + rs med = C_hl * r res_hi
972     * = res_int + med k_1 = res_int - res_hi k_3 = k_1 + med
973     *
974     * 6. FINAL SUMMATION
975     *
976     * We now add up all the small parts:
977     *
978     * res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3
979     *
980     * Now the overall result is just:
981     *
982     * res_hi + res_lo
983     *
984     * 7. SMALL ARGUMENTS
985     *
986     * If |x| < SNN (SNN meaning the smallest normal number), we simply perform 0.1111111 cdots 1111
987     * * x. For SNN <= |x|, we do 2^-55 * (2^55 * x - x).
988     *
989     * Special cases: sin(NaN) = quiet NaN, and raise invalid exception sin(INF) = NaN and raise
990     * invalid exception sin(+/-0) = +/-0
991     *
992     */
993
994    public int[] oneHalf = {
995                    0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000
996    };
997
998    public int[] pTwo = {
999                    0x1a600000, 0x3d90b461, 0x1a600000, 0x3d90b461
1000    };
1001
1002    public int[] scFour = {
1003                    0xa556c734, 0x3ec71de3, 0x1a01a01a, 0x3efa01a0
1004    };
1005
1006    public int[] cTable = {
1007                    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1008                    0x00000000, 0x00000000, 0x3ff00000, 0x176d6d31, 0xbf73b92e,
1009                    0xbc29b42c, 0x3fb917a6, 0xe0000000, 0xbc3e2718, 0x00000000,
1010                    0x3ff00000, 0x011469fb, 0xbf93ad06, 0x3c69a60b, 0x3fc8f8b8,
1011                    0xc0000000, 0xbc626d19, 0x00000000, 0x3ff00000, 0x939d225a,
1012                    0xbfa60bea, 0x2ed59f06, 0x3fd29406, 0xa0000000, 0xbc75d28d,
1013                    0x00000000, 0x3ff00000, 0x866b95cf, 0xbfb37ca1, 0xa6aea963,
1014                    0x3fd87de2, 0xe0000000, 0xbc672ced, 0x00000000, 0x3ff00000,
1015                    0x73fa1279, 0xbfbe3a68, 0x3806f63b, 0x3fde2b5d, 0x20000000,
1016                    0x3c5e0d89, 0x00000000, 0x3ff00000, 0x5bc57974, 0xbfc59267,
1017                    0x39ae68c8, 0x3fe1c73b, 0x20000000, 0x3c8b25dd, 0x00000000,
1018                    0x3ff00000, 0x53aba2fd, 0xbfcd0dfe, 0x25091dd6, 0x3fe44cf3,
1019                    0x20000000, 0x3c68076a, 0x00000000, 0x3ff00000, 0x99fcef32,
1020                    0x3fca8279, 0x667f3bcd, 0x3fe6a09e, 0x20000000, 0xbc8bdd34,
1021                    0x00000000, 0x3fe00000, 0x94247758, 0x3fc133cc, 0x6b151741,
1022                    0x3fe8bc80, 0x20000000, 0xbc82c5e1, 0x00000000, 0x3fe00000,
1023                    0x9ae68c87, 0x3fac73b3, 0x290ea1a3, 0x3fea9b66, 0xe0000000,
1024                    0x3c39f630, 0x00000000, 0x3fe00000, 0x7f909c4e, 0xbf9d4a2c,
1025                    0xf180bdb1, 0x3fec38b2, 0x80000000, 0xbc76e0b1, 0x00000000,
1026                    0x3fe00000, 0x65455a75, 0xbfbe0875, 0xcf328d46, 0x3fed906b,
1027                    0x20000000, 0x3c7457e6, 0x00000000, 0x3fe00000, 0x76acf82d,
1028                    0x3fa4a031, 0x56c62dda, 0x3fee9f41, 0xe0000000, 0x3c8760b1,
1029                    0x00000000, 0x3fd00000, 0x0e5967d5, 0xbfac1d1f, 0xcff75cb0,
1030                    0x3fef6297, 0x20000000, 0x3c756217, 0x00000000, 0x3fd00000,
1031                    0x0f592f50, 0xbf9ba165, 0xa3d12526, 0x3fefd88d, 0x40000000,
1032                    0xbc887df6, 0x00000000, 0x3fc00000, 0x00000000, 0x00000000,
1033                    0x00000000, 0x3ff00000, 0x00000000, 0x00000000, 0x00000000,
1034                    0x00000000, 0x0f592f50, 0x3f9ba165, 0xa3d12526, 0x3fefd88d,
1035                    0x40000000, 0xbc887df6, 0x00000000, 0xbfc00000, 0x0e5967d5,
1036                    0x3fac1d1f, 0xcff75cb0, 0x3fef6297, 0x20000000, 0x3c756217,
1037                    0x00000000, 0xbfd00000, 0x76acf82d, 0xbfa4a031, 0x56c62dda,
1038                    0x3fee9f41, 0xe0000000, 0x3c8760b1, 0x00000000, 0xbfd00000,
1039                    0x65455a75, 0x3fbe0875, 0xcf328d46, 0x3fed906b, 0x20000000,
1040                    0x3c7457e6, 0x00000000, 0xbfe00000, 0x7f909c4e, 0x3f9d4a2c,
1041                    0xf180bdb1, 0x3fec38b2, 0x80000000, 0xbc76e0b1, 0x00000000,
1042                    0xbfe00000, 0x9ae68c87, 0xbfac73b3, 0x290ea1a3, 0x3fea9b66,
1043                    0xe0000000, 0x3c39f630, 0x00000000, 0xbfe00000, 0x94247758,
1044                    0xbfc133cc, 0x6b151741, 0x3fe8bc80, 0x20000000, 0xbc82c5e1,
1045                    0x00000000, 0xbfe00000, 0x99fcef32, 0xbfca8279, 0x667f3bcd,
1046                    0x3fe6a09e, 0x20000000, 0xbc8bdd34, 0x00000000, 0xbfe00000,
1047                    0x53aba2fd, 0x3fcd0dfe, 0x25091dd6, 0x3fe44cf3, 0x20000000,
1048                    0x3c68076a, 0x00000000, 0xbff00000, 0x5bc57974, 0x3fc59267,
1049                    0x39ae68c8, 0x3fe1c73b, 0x20000000, 0x3c8b25dd, 0x00000000,
1050                    0xbff00000, 0x73fa1279, 0x3fbe3a68, 0x3806f63b, 0x3fde2b5d,
1051                    0x20000000, 0x3c5e0d89, 0x00000000, 0xbff00000, 0x866b95cf,
1052                    0x3fb37ca1, 0xa6aea963, 0x3fd87de2, 0xe0000000, 0xbc672ced,
1053                    0x00000000, 0xbff00000, 0x939d225a, 0x3fa60bea, 0x2ed59f06,
1054                    0x3fd29406, 0xa0000000, 0xbc75d28d, 0x00000000, 0xbff00000,
1055                    0x011469fb, 0x3f93ad06, 0x3c69a60b, 0x3fc8f8b8, 0xc0000000,
1056                    0xbc626d19, 0x00000000, 0xbff00000, 0x176d6d31, 0x3f73b92e,
1057                    0xbc29b42c, 0x3fb917a6, 0xe0000000, 0xbc3e2718, 0x00000000,
1058                    0xbff00000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1059                    0x00000000, 0x00000000, 0x00000000, 0xbff00000, 0x176d6d31,
1060                    0x3f73b92e, 0xbc29b42c, 0xbfb917a6, 0xe0000000, 0x3c3e2718,
1061                    0x00000000, 0xbff00000, 0x011469fb, 0x3f93ad06, 0x3c69a60b,
1062                    0xbfc8f8b8, 0xc0000000, 0x3c626d19, 0x00000000, 0xbff00000,
1063                    0x939d225a, 0x3fa60bea, 0x2ed59f06, 0xbfd29406, 0xa0000000,
1064                    0x3c75d28d, 0x00000000, 0xbff00000, 0x866b95cf, 0x3fb37ca1,
1065                    0xa6aea963, 0xbfd87de2, 0xe0000000, 0x3c672ced, 0x00000000,
1066                    0xbff00000, 0x73fa1279, 0x3fbe3a68, 0x3806f63b, 0xbfde2b5d,
1067                    0x20000000, 0xbc5e0d89, 0x00000000, 0xbff00000, 0x5bc57974,
1068                    0x3fc59267, 0x39ae68c8, 0xbfe1c73b, 0x20000000, 0xbc8b25dd,
1069                    0x00000000, 0xbff00000, 0x53aba2fd, 0x3fcd0dfe, 0x25091dd6,
1070                    0xbfe44cf3, 0x20000000, 0xbc68076a, 0x00000000, 0xbff00000,
1071                    0x99fcef32, 0xbfca8279, 0x667f3bcd, 0xbfe6a09e, 0x20000000,
1072                    0x3c8bdd34, 0x00000000, 0xbfe00000, 0x94247758, 0xbfc133cc,
1073                    0x6b151741, 0xbfe8bc80, 0x20000000, 0x3c82c5e1, 0x00000000,
1074                    0xbfe00000, 0x9ae68c87, 0xbfac73b3, 0x290ea1a3, 0xbfea9b66,
1075                    0xe0000000, 0xbc39f630, 0x00000000, 0xbfe00000, 0x7f909c4e,
1076                    0x3f9d4a2c, 0xf180bdb1, 0xbfec38b2, 0x80000000, 0x3c76e0b1,
1077                    0x00000000, 0xbfe00000, 0x65455a75, 0x3fbe0875, 0xcf328d46,
1078                    0xbfed906b, 0x20000000, 0xbc7457e6, 0x00000000, 0xbfe00000,
1079                    0x76acf82d, 0xbfa4a031, 0x56c62dda, 0xbfee9f41, 0xe0000000,
1080                    0xbc8760b1, 0x00000000, 0xbfd00000, 0x0e5967d5, 0x3fac1d1f,
1081                    0xcff75cb0, 0xbfef6297, 0x20000000, 0xbc756217, 0x00000000,
1082                    0xbfd00000, 0x0f592f50, 0x3f9ba165, 0xa3d12526, 0xbfefd88d,
1083                    0x40000000, 0x3c887df6, 0x00000000, 0xbfc00000, 0x00000000,
1084                    0x00000000, 0x00000000, 0xbff00000, 0x00000000, 0x00000000,
1085                    0x00000000, 0x00000000, 0x0f592f50, 0xbf9ba165, 0xa3d12526,
1086                    0xbfefd88d, 0x40000000, 0x3c887df6, 0x00000000, 0x3fc00000,
1087                    0x0e5967d5, 0xbfac1d1f, 0xcff75cb0, 0xbfef6297, 0x20000000,
1088                    0xbc756217, 0x00000000, 0x3fd00000, 0x76acf82d, 0x3fa4a031,
1089                    0x56c62dda, 0xbfee9f41, 0xe0000000, 0xbc8760b1, 0x00000000,
1090                    0x3fd00000, 0x65455a75, 0xbfbe0875, 0xcf328d46, 0xbfed906b,
1091                    0x20000000, 0xbc7457e6, 0x00000000, 0x3fe00000, 0x7f909c4e,
1092                    0xbf9d4a2c, 0xf180bdb1, 0xbfec38b2, 0x80000000, 0x3c76e0b1,
1093                    0x00000000, 0x3fe00000, 0x9ae68c87, 0x3fac73b3, 0x290ea1a3,
1094                    0xbfea9b66, 0xe0000000, 0xbc39f630, 0x00000000, 0x3fe00000,
1095                    0x94247758, 0x3fc133cc, 0x6b151741, 0xbfe8bc80, 0x20000000,
1096                    0x3c82c5e1, 0x00000000, 0x3fe00000, 0x99fcef32, 0x3fca8279,
1097                    0x667f3bcd, 0xbfe6a09e, 0x20000000, 0x3c8bdd34, 0x00000000,
1098                    0x3fe00000, 0x53aba2fd, 0xbfcd0dfe, 0x25091dd6, 0xbfe44cf3,
1099                    0x20000000, 0xbc68076a, 0x00000000, 0x3ff00000, 0x5bc57974,
1100                    0xbfc59267, 0x39ae68c8, 0xbfe1c73b, 0x20000000, 0xbc8b25dd,
1101                    0x00000000, 0x3ff00000, 0x73fa1279, 0xbfbe3a68, 0x3806f63b,
1102                    0xbfde2b5d, 0x20000000, 0xbc5e0d89, 0x00000000, 0x3ff00000,
1103                    0x866b95cf, 0xbfb37ca1, 0xa6aea963, 0xbfd87de2, 0xe0000000,
1104                    0x3c672ced, 0x00000000, 0x3ff00000, 0x939d225a, 0xbfa60bea,
1105                    0x2ed59f06, 0xbfd29406, 0xa0000000, 0x3c75d28d, 0x00000000,
1106                    0x3ff00000, 0x011469fb, 0xbf93ad06, 0x3c69a60b, 0xbfc8f8b8,
1107                    0xc0000000, 0x3c626d19, 0x00000000, 0x3ff00000, 0x176d6d31,
1108                    0xbf73b92e, 0xbc29b42c, 0xbfb917a6, 0xe0000000, 0x3c3e2718,
1109                    0x00000000, 0x3ff00000
1110    };
1111
1112    public int[] scTwo = {
1113                    0x11111111, 0x3f811111, 0x55555555, 0x3fa55555
1114    };
1115
1116    public int[] scThree = {
1117                    0x1a01a01a, 0xbf2a01a0, 0x16c16c17, 0xbf56c16c
1118    };
1119
1120    public int[] scOne = {
1121                    0x55555555, 0xbfc55555, 0x00000000, 0xbfe00000
1122    };
1123
1124    public int[] piInvTable = {
1125                    0x00000000, 0x00000000, 0xa2f9836e, 0x4e441529, 0xfc2757d1,
1126                    0xf534ddc0, 0xdb629599, 0x3c439041, 0xfe5163ab, 0xdebbc561,
1127                    0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c, 0xfe1deb1c,
1128                    0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
1129                    0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff,
1130                    0xde05980f, 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7,
1131                    0x4f463f66, 0x9e5fea2d, 0x7527bac7, 0xebe5f17b, 0x3d0739f7,
1132                    0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08, 0x56033046, 0xfc7b6bab,
1133                    0xf0cfbc21
1134    };
1135
1136    public int[] piFour = {
1137                    0x40000000, 0x3fe921fb, 0x18469899, 0x3e64442d
1138    };
1139
1140    public int[] piThirtyTwoInv = {
1141                    0x6dc9c883, 0x40245f30
1142    };
1143
1144    public int[] shifter = {
1145                    0x00000000, 0x43380000
1146    };
1147
1148    public int[] signMask = {
1149                    0x00000000, 0x80000000
1150    };
1151
1152    public int[] pThree = {
1153                    0x2e037073, 0x3b63198a
1154    };
1155
1156    public int[] allOnes = {
1157                    0xffffffff, 0x3fefffff
1158    };
1159
1160    public int[] twoPowFiftyFive = {
1161                    0x00000000, 0x43600000
1162    };
1163
1164    public int[] twoPowFiftyFiveM = {
1165                    0x00000000, 0x3c800000
1166    };
1167
1168    public int[] pOne = {
1169                    0x54400000, 0x3fb921fb
1170    };
1171
1172    public void sinIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
1173        ArrayDataPointerConstant oneHalfPtr = new ArrayDataPointerConstant(oneHalf, 16);
1174        ArrayDataPointerConstant pTwoPtr = new ArrayDataPointerConstant(pTwo, 16);
1175        ArrayDataPointerConstant scFourPtr = new ArrayDataPointerConstant(scFour, 16);
1176        ArrayDataPointerConstant cTablePtr = new ArrayDataPointerConstant(cTable, 16);
1177        ArrayDataPointerConstant scTwoPtr = new ArrayDataPointerConstant(scTwo, 16);
1178        ArrayDataPointerConstant scThreePtr = new ArrayDataPointerConstant(scThree, 16);
1179        ArrayDataPointerConstant scOnePtr = new ArrayDataPointerConstant(scOne, 16);
1180        ArrayDataPointerConstant piInvTablePtr = new ArrayDataPointerConstant(piInvTable, 16);
1181        ArrayDataPointerConstant piFourPtr = new ArrayDataPointerConstant(piFour, 16);
1182        ArrayDataPointerConstant piThirtyTwoInvPtr = new ArrayDataPointerConstant(piThirtyTwoInv, 8);
1183        ArrayDataPointerConstant shifterPtr = new ArrayDataPointerConstant(shifter, 8);
1184        ArrayDataPointerConstant signMaskPtr = new ArrayDataPointerConstant(signMask, 8);
1185        ArrayDataPointerConstant pThreePtr = new ArrayDataPointerConstant(pThree, 8);
1186        ArrayDataPointerConstant allOnesPtr = new ArrayDataPointerConstant(allOnes, 8);
1187        ArrayDataPointerConstant twoPowFiftyFivePtr = new ArrayDataPointerConstant(twoPowFiftyFive, 8);
1188        ArrayDataPointerConstant twoPowFiftyFiveMPtr = new ArrayDataPointerConstant(twoPowFiftyFiveM, 8);
1189        ArrayDataPointerConstant pOnePtr = new ArrayDataPointerConstant(pOne, 8);
1190
1191        Label bb0 = new Label();
1192        Label bb1 = new Label();
1193        Label bb2 = new Label();
1194        Label bb4 = new Label();
1195        Label bb5 = new Label();
1196        Label bb6 = new Label();
1197        Label bb8 = new Label();
1198        Label bb9 = new Label();
1199        Label bb10 = new Label();
1200        Label bb11 = new Label();
1201        Label bb12 = new Label();
1202        Label bb13 = new Label();
1203        Label bb14 = new Label();
1204        Label bb15 = new Label();
1205
1206        Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
1207        Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
1208        Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
1209        Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
1210        Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD);
1211        Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD);
1212        Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD);
1213        Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD);
1214        Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD);
1215        Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD);
1216
1217        Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
1218        Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
1219        Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
1220        Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
1221        Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
1222        Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
1223        Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
1224        Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE);
1225        Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE);
1226
1227        AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
1228
1229        setCrb(crb);
1230        masm.movsd(stackSlot, value);
1231        if (dest.encoding != value.encoding) {
1232            masm.movdqu(dest, value);
1233        }
1234
1235        masm.leaq(gpr1, stackSlot);
1236        masm.movl(gpr1, new AMD64Address(gpr1, 4));
1237        masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr));                                // 0x6dc9c883,
1238                                                                                              // 0x40245f30
1239        masm.movdq(temp2, externalAddress(shifterPtr));                                       // 0x00000000,
1240                                                                                              // 0x43380000
1241
1242        masm.andl(gpr1, 2147418112);
1243        masm.subl(gpr1, 808452096);
1244        masm.cmpl(gpr1, 281346048);
1245        masm.jcc(ConditionFlag.Above, bb0);
1246
1247        masm.mulsd(temp1, dest);
1248        masm.movdqu(temp5, externalAddress(oneHalfPtr));                                      // 0x00000000,
1249                                                                                              // 0x3fe00000,
1250                                                                                              // 0x00000000,
1251                                                                                              // 0x3fe00000
1252        masm.movdq(temp4, externalAddress(signMaskPtr));                                      // 0x00000000,
1253                                                                                              // 0x80000000
1254        masm.pand(temp4, dest);
1255        masm.por(temp5, temp4);
1256        masm.addpd(temp1, temp5);
1257        masm.cvttsd2sil(gpr4, temp1);
1258        masm.cvtsi2sdl(temp1, gpr4);
1259        masm.movdqu(temp6, externalAddress(pTwoPtr));                                         // 0x1a600000,
1260                                                                                              // 0x3d90b461,
1261                                                                                              // 0x1a600000,
1262                                                                                              // 0x3d90b461
1263        masm.movq(gpr7, 0x3fb921fb54400000L);
1264        masm.movdq(temp3, gpr7);
1265        masm.movdqu(temp5, externalAddress(scFourPtr));                                       // 0xa556c734,
1266                                                                                              // 0x3ec71de3,
1267                                                                                              // 0x1a01a01a,
1268                                                                                              // 0x3efa01a0
1269        masm.pshufd(temp4, dest, 0x44);
1270        masm.mulsd(temp3, temp1);
1271        if (masm.supports(CPUFeature.SSE3)) {
1272            masm.movddup(temp1, temp1);
1273        } else {
1274            masm.movlhps(temp1, temp1);
1275        }
1276        masm.andl(gpr4, 63);
1277        masm.shll(gpr4, 5);
1278        masm.leaq(gpr1, externalAddress(cTablePtr));
1279        masm.addq(gpr1, gpr4);
1280        masm.movdqu(temp8, new AMD64Address(gpr1, 0));
1281        masm.mulpd(temp6, temp1);
1282        masm.mulsd(temp1, externalAddress(pThreePtr));                                        // 0x2e037073,
1283                                                                                              // 0x3b63198a
1284        masm.subsd(temp4, temp3);
1285        masm.subsd(dest, temp3);
1286        if (masm.supports(CPUFeature.SSE3)) {
1287            masm.movddup(temp3, temp4);
1288        } else {
1289            masm.movdqu(temp3, temp4);
1290            masm.movlhps(temp3, temp3);
1291        }
1292        masm.subsd(temp4, temp6);
1293        masm.pshufd(dest, dest, 0x44);
1294        masm.pshufd(temp7, temp8, 0xE);
1295        masm.movdqu(temp2, temp8);
1296        masm.movdqu(temp9, temp7);
1297        masm.mulpd(temp5, dest);
1298        masm.subpd(dest, temp6);
1299        masm.mulsd(temp7, temp4);
1300        masm.subsd(temp3, temp4);
1301        masm.mulpd(temp5, dest);
1302        masm.mulpd(dest, dest);
1303        masm.subsd(temp3, temp6);
1304        masm.movdqu(temp6, externalAddress(scTwoPtr));                                        // 0x11111111,
1305                                                                                              // 0x3f811111,
1306                                                                                              // 0x55555555,
1307                                                                                              // 0x3fa55555
1308        masm.subsd(temp1, temp3);
1309        masm.movdq(temp3, new AMD64Address(gpr1, 24));
1310        masm.addsd(temp2, temp3);
1311        masm.subsd(temp7, temp2);
1312        masm.mulsd(temp2, temp4);
1313        masm.mulpd(temp6, dest);
1314        masm.mulsd(temp3, temp4);
1315        masm.mulpd(temp2, dest);
1316        masm.mulpd(dest, dest);
1317        masm.addpd(temp5, externalAddress(scThreePtr));                                       // 0x1a01a01a,
1318                                                                                              // 0xbf2a01a0,
1319                                                                                              // 0x16c16c17,
1320                                                                                              // 0xbf56c16c
1321        masm.mulsd(temp4, temp8);
1322        masm.addpd(temp6, externalAddress(scOnePtr));                                         // 0x55555555,
1323                                                                                              // 0xbfc55555,
1324                                                                                              // 0x00000000,
1325                                                                                              // 0xbfe00000
1326        masm.mulpd(temp5, dest);
1327        masm.movdqu(dest, temp3);
1328        masm.addsd(temp3, temp9);
1329        masm.mulpd(temp1, temp7);
1330        masm.movdqu(temp7, temp4);
1331        masm.addsd(temp4, temp3);
1332        masm.addpd(temp6, temp5);
1333        masm.subsd(temp9, temp3);
1334        masm.subsd(temp3, temp4);
1335        masm.addsd(temp1, new AMD64Address(gpr1, 16));
1336        masm.mulpd(temp6, temp2);
1337        masm.addsd(temp9, dest);
1338        masm.addsd(temp3, temp7);
1339        masm.addsd(temp1, temp9);
1340        masm.addsd(temp1, temp3);
1341        masm.addsd(temp1, temp6);
1342        masm.unpckhpd(temp6, temp6);
1343        masm.movdqu(dest, temp4);
1344        masm.addsd(temp1, temp6);
1345        masm.addsd(dest, temp1);
1346        masm.jmp(bb15);
1347
1348        masm.bind(bb14);
1349        masm.xorpd(temp1, temp1);
1350        masm.xorpd(dest, dest);
1351        masm.divsd(dest, temp1);
1352        masm.jmp(bb15);
1353
1354        masm.bind(bb0);
1355        masm.jcc(ConditionFlag.Greater, bb1);
1356
1357        masm.shrl(gpr1, 20);
1358        masm.cmpl(gpr1, 3325);
1359        masm.jcc(ConditionFlag.NotEqual, bb2);
1360
1361        masm.mulsd(dest, externalAddress(allOnesPtr));                                        // 0xffffffff,
1362                                                                                              // 0x3fefffff
1363        masm.jmp(bb15);
1364
1365        masm.bind(bb2);
1366        masm.movdq(temp3, externalAddress(twoPowFiftyFivePtr));                               // 0x00000000,
1367                                                                                              // 0x43600000
1368        masm.mulsd(temp3, dest);
1369        masm.subsd(temp3, dest);
1370        masm.mulsd(temp3, externalAddress(twoPowFiftyFiveMPtr));                              // 0x00000000,
1371                                                                                              // 0x3c800000
1372        masm.jmp(bb15);
1373
1374        masm.bind(bb1);
1375        masm.pextrw(gpr3, dest, 3);
1376        masm.andl(gpr3, 32752);
1377        masm.cmpl(gpr3, 32752);
1378        masm.jcc(ConditionFlag.Equal, bb14);
1379
1380        masm.subl(gpr3, 16224);
1381        masm.shrl(gpr3, 7);
1382        masm.andl(gpr3, 65532);
1383        masm.leaq(gpr10, externalAddress(piInvTablePtr));
1384        masm.addq(gpr3, gpr10);
1385        masm.movdq(gpr1, dest);
1386        masm.movl(gpr9, new AMD64Address(gpr3, 20));
1387        masm.movl(gpr7, new AMD64Address(gpr3, 24));
1388        masm.movl(gpr4, gpr1);
1389        masm.shrq(gpr1, 21);
1390        masm.orl(gpr1, Integer.MIN_VALUE);
1391        masm.shrl(gpr1, 11);
1392        masm.movl(gpr8, gpr9);
1393        masm.imulq(gpr9, gpr4);
1394        masm.imulq(gpr8, gpr1);
1395        masm.imulq(gpr7, gpr1);
1396        masm.movl(gpr5, new AMD64Address(gpr3, 16));
1397        masm.movl(gpr6, new AMD64Address(gpr3, 12));
1398        masm.movl(gpr10, gpr9);
1399        masm.shrq(gpr9, 32);
1400        masm.addq(gpr8, gpr9);
1401        masm.addq(gpr10, gpr7);
1402        masm.movl(gpr7, gpr10);
1403        masm.shrq(gpr10, 32);
1404        masm.addq(gpr8, gpr10);
1405        masm.movl(gpr9, gpr5);
1406        masm.imulq(gpr5, gpr4);
1407        masm.imulq(gpr9, gpr1);
1408        masm.movl(gpr10, gpr6);
1409        masm.imulq(gpr6, gpr4);
1410        masm.movl(gpr2, gpr5);
1411        masm.shrq(gpr5, 32);
1412        masm.addq(gpr8, gpr2);
1413        masm.movl(gpr2, gpr8);
1414        masm.shrq(gpr8, 32);
1415        masm.addq(gpr9, gpr5);
1416        masm.addq(gpr9, gpr8);
1417        masm.shlq(gpr2, 32);
1418        masm.orq(gpr7, gpr2);
1419        masm.imulq(gpr10, gpr1);
1420        masm.movl(gpr8, new AMD64Address(gpr3, 8));
1421        masm.movl(gpr5, new AMD64Address(gpr3, 4));
1422        masm.movl(gpr2, gpr6);
1423        masm.shrq(gpr6, 32);
1424        masm.addq(gpr9, gpr2);
1425        masm.movl(gpr2, gpr9);
1426        masm.shrq(gpr9, 32);
1427        masm.addq(gpr10, gpr6);
1428        masm.addq(gpr10, gpr9);
1429        masm.movq(gpr6, gpr8);
1430        masm.imulq(gpr8, gpr4);
1431        masm.imulq(gpr6, gpr1);
1432        masm.movl(gpr9, gpr8);
1433        masm.shrq(gpr8, 32);
1434        masm.addq(gpr10, gpr9);
1435        masm.movl(gpr9, gpr10);
1436        masm.shrq(gpr10, 32);
1437        masm.addq(gpr6, gpr8);
1438        masm.addq(gpr6, gpr10);
1439        masm.movq(gpr8, gpr5);
1440        masm.imulq(gpr5, gpr4);
1441        masm.imulq(gpr8, gpr1);
1442        masm.shlq(gpr9, 32);
1443        masm.orq(gpr9, gpr2);
1444        masm.movl(gpr1, new AMD64Address(gpr3, 0));
1445        masm.movl(gpr10, gpr5);
1446        masm.shrq(gpr5, 32);
1447        masm.addq(gpr6, gpr10);
1448        masm.movl(gpr10, gpr6);
1449        masm.shrq(gpr6, 32);
1450        masm.addq(gpr8, gpr5);
1451        masm.addq(gpr8, gpr6);
1452        masm.imulq(gpr4, gpr1);
1453        masm.pextrw(gpr2, dest, 3);
1454        masm.leaq(gpr6, externalAddress(piInvTablePtr));
1455        masm.subq(gpr3, gpr6);
1456        masm.addl(gpr3, gpr3);
1457        masm.addl(gpr3, gpr3);
1458        masm.addl(gpr3, gpr3);
1459        masm.addl(gpr3, 19);
1460        masm.movl(gpr5, 32768);
1461        masm.andl(gpr5, gpr2);
1462        masm.shrl(gpr2, 4);
1463        masm.andl(gpr2, 2047);
1464        masm.subl(gpr2, 1023);
1465        masm.subl(gpr3, gpr2);
1466        masm.addq(gpr8, gpr4);
1467        masm.movl(gpr4, gpr3);
1468        masm.addl(gpr4, 32);
1469        masm.cmpl(gpr3, 1);
1470        masm.jcc(ConditionFlag.Less, bb4);
1471
1472        masm.negl(gpr3);
1473        masm.addl(gpr3, 29);
1474        masm.shll(gpr8);
1475        masm.movl(gpr6, gpr8);
1476        masm.andl(gpr8, 536870911);
1477        masm.testl(gpr8, 268435456);
1478        masm.jcc(ConditionFlag.NotEqual, bb5);
1479
1480        masm.shrl(gpr8);
1481        masm.movl(gpr2, 0);
1482        masm.shlq(gpr8, 32);
1483        masm.orq(gpr8, gpr10);
1484
1485        masm.bind(bb6);
1486
1487        masm.cmpq(gpr8, 0);
1488        masm.jcc(ConditionFlag.Equal, bb8);
1489
1490        masm.bind(bb9);
1491        masm.bsrq(gpr10, gpr8);
1492        masm.movl(gpr3, 29);
1493        masm.subl(gpr3, gpr10);
1494        masm.jcc(ConditionFlag.LessEqual, bb10);
1495
1496        masm.shlq(gpr8);
1497        masm.movq(gpr1, gpr9);
1498        masm.shlq(gpr9);
1499        masm.addl(gpr4, gpr3);
1500        masm.negl(gpr3);
1501        masm.addl(gpr3, 64);
1502        masm.shrq(gpr1);
1503        masm.shrq(gpr7);
1504        masm.orq(gpr8, gpr1);
1505        masm.orq(gpr9, gpr7);
1506
1507        masm.bind(bb11);
1508        masm.cvtsi2sdq(dest, gpr8);
1509        masm.shrq(gpr9, 1);
1510        masm.cvtsi2sdq(temp3, gpr9);
1511        masm.xorpd(temp4, temp4);
1512        masm.shll(gpr4, 4);
1513        masm.negl(gpr4);
1514        masm.addl(gpr4, 16368);
1515        masm.orl(gpr4, gpr5);
1516        masm.xorl(gpr4, gpr2);
1517        masm.pinsrw(temp4, gpr4, 3);
1518        masm.leaq(gpr1, externalAddress(piFourPtr));
1519        masm.movdqu(temp2, new AMD64Address(gpr1, 0));                                        // 0x40000000,
1520                                                                                              // 0x3fe921fb,
1521                                                                                              // 0x18469899,
1522                                                                                              // 0x3e64442d
1523        masm.xorpd(temp5, temp5);
1524        masm.subl(gpr4, 1008);
1525        masm.pinsrw(temp5, gpr4, 3);
1526        masm.mulsd(dest, temp4);
1527        masm.shll(gpr5, 16);
1528        masm.sarl(gpr5, 31);
1529        masm.mulsd(temp3, temp5);
1530        masm.movdqu(temp1, dest);
1531        masm.pshufd(temp6, temp2, 0xE);
1532        masm.mulsd(dest, temp2);
1533        masm.shrl(gpr6, 29);
1534        masm.addsd(temp1, temp3);
1535        masm.mulsd(temp3, temp2);
1536        masm.addl(gpr6, gpr5);
1537        masm.xorl(gpr6, gpr5);
1538        masm.mulsd(temp6, temp1);
1539        masm.movl(gpr1, gpr6);
1540        masm.addsd(temp6, temp3);
1541        masm.movdqu(temp2, dest);
1542        masm.addsd(dest, temp6);
1543        masm.subsd(temp2, dest);
1544        masm.addsd(temp6, temp2);
1545
1546        masm.bind(bb12);
1547        masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr));                                // 0x6dc9c883,
1548                                                                                              // 0x40245f30
1549        masm.mulsd(temp1, dest);
1550        masm.movdq(temp5, externalAddress(oneHalfPtr));                                       // 0x00000000,
1551                                                                                              // 0x3fe00000,
1552                                                                                              // 0x00000000,
1553                                                                                              // 0x3fe00000
1554        masm.movdq(temp4, externalAddress(signMaskPtr));                                      // 0x00000000,
1555                                                                                              // 0x80000000
1556        masm.pand(temp4, dest);
1557        masm.por(temp5, temp4);
1558        masm.addpd(temp1, temp5);
1559        masm.cvttsd2sil(gpr4, temp1);
1560        masm.cvtsi2sdl(temp1, gpr4);
1561        masm.movdq(temp3, externalAddress(pOnePtr));                                          // 0x54400000,
1562                                                                                              // 0x3fb921fb
1563        masm.movdqu(temp2, externalAddress(pTwoPtr));                                         // 0x1a600000,
1564                                                                                              // 0x3d90b461,
1565                                                                                              // 0x1a600000,
1566                                                                                              // 0x3d90b461
1567        masm.mulsd(temp3, temp1);
1568        masm.unpcklpd(temp1, temp1);
1569        masm.shll(gpr1, 3);
1570        masm.addl(gpr4, 1865216);
1571        masm.movdqu(temp4, dest);
1572        masm.addl(gpr4, gpr1);
1573        masm.andl(gpr4, 63);
1574        masm.movdqu(temp5, externalAddress(scFourPtr));                                       // 0x54400000,
1575                                                                                              // 0x3fb921fb
1576        masm.leaq(gpr1, externalAddress(cTablePtr));
1577        masm.shll(gpr4, 5);
1578        masm.addq(gpr1, gpr4);
1579        masm.movdqu(temp8, new AMD64Address(gpr1, 0));
1580        masm.mulpd(temp2, temp1);
1581        masm.subsd(dest, temp3);
1582        masm.mulsd(temp1, externalAddress(pThreePtr));                                        // 0x2e037073,
1583                                                                                              // 0x3b63198a
1584        masm.subsd(temp4, temp3);
1585        masm.unpcklpd(dest, dest);
1586        masm.movdqu(temp3, temp4);
1587        masm.subsd(temp4, temp2);
1588        masm.mulpd(temp5, dest);
1589        masm.subpd(dest, temp2);
1590        masm.pshufd(temp7, temp8, 0xE);
1591        masm.movdqu(temp9, temp7);
1592        masm.mulsd(temp7, temp4);
1593        masm.subsd(temp3, temp4);
1594        masm.mulpd(temp5, dest);
1595        masm.mulpd(dest, dest);
1596        masm.subsd(temp3, temp2);
1597        masm.movdqu(temp2, temp8);
1598        masm.subsd(temp1, temp3);
1599        masm.movdq(temp3, new AMD64Address(gpr1, 24));
1600        masm.addsd(temp2, temp3);
1601        masm.subsd(temp7, temp2);
1602        masm.subsd(temp1, temp6);
1603        masm.movdqu(temp6, externalAddress(scTwoPtr));                                        // 0x11111111,
1604                                                                                              // 0x3f811111,
1605                                                                                              // 0x55555555,
1606                                                                                              // 0x3fa55555
1607        masm.mulsd(temp2, temp4);
1608        masm.mulpd(temp6, dest);
1609        masm.mulsd(temp3, temp4);
1610        masm.mulpd(temp2, dest);
1611        masm.mulpd(dest, dest);
1612        masm.addpd(temp5, externalAddress(scThreePtr));                                       // 0x1a01a01a,
1613                                                                                              // 0xbf2a01a0,
1614                                                                                              // 0x16c16c17,
1615                                                                                              // 0xbf56c16c
1616        masm.mulsd(temp4, temp8);
1617        masm.addpd(temp6, externalAddress(scOnePtr));                                         // 0x55555555,
1618                                                                                              // 0xbfc55555,
1619                                                                                              // 0x00000000,
1620                                                                                              // 0xbfe00000
1621        masm.mulpd(temp5, dest);
1622        masm.movdqu(dest, temp3);
1623        masm.addsd(temp3, temp9);
1624        masm.mulpd(temp1, temp7);
1625        masm.movdqu(temp7, temp4);
1626        masm.addsd(temp4, temp3);
1627        masm.addpd(temp6, temp5);
1628        masm.subsd(temp9, temp3);
1629        masm.subsd(temp3, temp4);
1630        masm.addsd(temp1, new AMD64Address(gpr1, 16));
1631        masm.mulpd(temp6, temp2);
1632        masm.addsd(temp9, dest);
1633        masm.addsd(temp3, temp7);
1634        masm.addsd(temp1, temp9);
1635        masm.addsd(temp1, temp3);
1636        masm.addsd(temp1, temp6);
1637        masm.unpckhpd(temp6, temp6);
1638        masm.movdqu(dest, temp4);
1639        masm.addsd(temp1, temp6);
1640        masm.addsd(dest, temp1);
1641        masm.jmp(bb15);
1642
1643        masm.bind(bb8);
1644        masm.addl(gpr4, 64);
1645        masm.movq(gpr8, gpr9);
1646        masm.movq(gpr9, gpr7);
1647        masm.movl(gpr7, 0);
1648        masm.cmpq(gpr8, 0);
1649        masm.jcc(ConditionFlag.NotEqual, bb9);
1650
1651        masm.addl(gpr4, 64);
1652        masm.movq(gpr8, gpr9);
1653        masm.movq(gpr9, gpr7);
1654        masm.cmpq(gpr8, 0);
1655        masm.jcc(ConditionFlag.NotEqual, bb9);
1656
1657        masm.xorpd(dest, dest);
1658        masm.xorpd(temp6, temp6);
1659        masm.jmp(bb12);
1660
1661        masm.bind(bb10);
1662        masm.jcc(ConditionFlag.Equal, bb11);
1663
1664        masm.negl(gpr3);
1665        masm.shrq(gpr9);
1666        masm.movq(gpr1, gpr8);
1667        masm.shrq(gpr8);
1668        masm.subl(gpr4, gpr3);
1669        masm.negl(gpr3);
1670        masm.addl(gpr3, 64);
1671        masm.shlq(gpr1);
1672        masm.orq(gpr9, gpr1);
1673        masm.jmp(bb11);
1674
1675        masm.bind(bb4);
1676        masm.negl(gpr3);
1677        masm.shlq(gpr8, 32);
1678        masm.orq(gpr8, gpr10);
1679        masm.shlq(gpr8);
1680        masm.movq(gpr6, gpr8);
1681        masm.testl(gpr8, Integer.MIN_VALUE);
1682        masm.jcc(ConditionFlag.NotEqual, bb13);
1683
1684        masm.shrl(gpr8);
1685        masm.movl(gpr2, 0);
1686        masm.shrq(gpr6, 3);
1687        masm.jmp(bb6);
1688
1689        masm.bind(bb5);
1690        masm.shrl(gpr8);
1691        masm.movl(gpr2, 536870912);
1692        masm.shrl(gpr2);
1693        masm.shlq(gpr8, 32);
1694        masm.orq(gpr8, gpr10);
1695        masm.shlq(gpr2, 32);
1696        masm.addl(gpr6, 536870912);
1697        masm.movl(gpr3, 0);
1698        masm.movl(gpr10, 0);
1699        masm.subq(gpr3, gpr7);
1700        masm.sbbq(gpr10, gpr9);
1701        masm.sbbq(gpr2, gpr8);
1702        masm.movq(gpr7, gpr3);
1703        masm.movq(gpr9, gpr10);
1704        masm.movq(gpr8, gpr2);
1705        masm.movl(gpr2, 32768);
1706        masm.jmp(bb6);
1707
1708        masm.bind(bb13);
1709        masm.shrl(gpr8);
1710        masm.movq(gpr2, 0x100000000L);
1711        masm.shrq(gpr2);
1712        masm.movl(gpr3, 0);
1713        masm.movl(gpr10, 0);
1714        masm.subq(gpr3, gpr7);
1715        masm.sbbq(gpr10, gpr9);
1716        masm.sbbq(gpr2, gpr8);
1717        masm.movq(gpr7, gpr3);
1718        masm.movq(gpr9, gpr10);
1719        masm.movq(gpr8, gpr2);
1720        masm.movl(gpr2, 32768);
1721        masm.shrq(gpr6, 3);
1722        masm.addl(gpr6, 536870912);
1723        masm.jmp(bb6);
1724
1725        masm.bind(bb15);
1726    }
1727
1728    /*
1729     * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM)
1730     * Source Code
1731     *
1732     * ALGORITHM DESCRIPTION - COS() ---------------------
1733     *
1734     * 1. RANGE REDUCTION
1735     *
1736     * We perform an initial range reduction from X to r with
1737     *
1738     * X =~= N * pi/32 + r
1739     *
1740     * so that |r| <= pi/64 + epsilon. We restrict inputs to those where |N| <= 932560. Beyond this,
1741     * the range reduction is insufficiently accurate. For extremely small inputs, denormalization
1742     * can occur internally, impacting performance. This means that the main path is actually only
1743     * taken for 2^-252 <= |X| < 90112.
1744     *
1745     * To avoid branches, we perform the range reduction to full accuracy each time.
1746     *
1747     * X - N * (P_1 + P_2 + P_3)
1748     *
1749     * where P_1 and P_2 are 32-bit numbers (so multiplication by N is exact) and P_3 is a 53-bit
1750     * number. Together, these approximate pi well enough for all cases in the restricted range.
1751     *
1752     * The main reduction sequence is:
1753     *
1754     * y = 32/pi * x N = integer(y) (computed by adding and subtracting off SHIFTER)
1755     *
1756     * m_1 = N * P_1 m_2 = N * P_2 r_1 = x - m_1 r = r_1 - m_2 (this r can be used for most of the
1757     * calculation)
1758     *
1759     * c_1 = r_1 - r m_3 = N * P_3 c_2 = c_1 - m_2 c = c_2 - m_3
1760     *
1761     * 2. MAIN ALGORITHM
1762     *
1763     * The algorithm uses a table lookup based on B = M * pi / 32 where M = N mod 64. The stored
1764     * values are: sigma closest power of 2 to cos(B) C_hl 53-bit cos(B) - sigma S_hi + S_lo 2 *
1765     * 53-bit sin(B)
1766     *
1767     * The computation is organized as follows:
1768     *
1769     * sin(B + r + c) = [sin(B) + sigma * r] + r * (cos(B) - sigma) + sin(B) * [cos(r + c) - 1] +
1770     * cos(B) * [sin(r + c) - r]
1771     *
1772     * which is approximately:
1773     *
1774     * [S_hi + sigma * r] + C_hl * r + S_lo + S_hi * [(cos(r) - 1) - r * c] + (C_hl + sigma) *
1775     * [(sin(r) - r) + c]
1776     *
1777     * and this is what is actually computed. We separate this sum into four parts:
1778     *
1779     * hi + med + pols + corr
1780     *
1781     * where
1782     *
1783     * hi = S_hi + sigma r med = C_hl * r pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r)
1784     * corr = S_lo + c * ((C_hl + sigma) - S_hi * r)
1785     *
1786     * 3. POLYNOMIAL
1787     *
1788     * The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) can be rearranged freely,
1789     * since it is quite small, so we exploit parallelism to the fullest.
1790     *
1791     * psc4 = SC_4 * r_1 msc4 = psc4 * r r2 = r * r msc2 = SC_2 * r2 r4 = r2 * r2 psc3 = SC_3 + msc4
1792     * psc1 = SC_1 + msc2 msc3 = r4 * psc3 sincospols = psc1 + msc3 pols = sincospols * <S_hi * r^2
1793     * | (C_hl + sigma) * r^3>
1794     *
1795     * 4. CORRECTION TERM
1796     *
1797     * This is where the "c" component of the range reduction is taken into account; recall that
1798     * just "r" is used for most of the calculation.
1799     *
1800     * -c = m_3 - c_2 -d = S_hi * r - (C_hl + sigma) corr = -c * -d + S_lo
1801     *
1802     * 5. COMPENSATED SUMMATIONS
1803     *
1804     * The two successive compensated summations add up the high and medium parts, leaving just the
1805     * low parts to add up at the end.
1806     *
1807     * rs = sigma * r res_int = S_hi + rs k_0 = S_hi - res_int k_2 = k_0 + rs med = C_hl * r res_hi
1808     * = res_int + med k_1 = res_int - res_hi k_3 = k_1 + med
1809     *
1810     * 6. FINAL SUMMATION
1811     *
1812     * We now add up all the small parts:
1813     *
1814     * res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3
1815     *
1816     * Now the overall result is just:
1817     *
1818     * res_hi + res_lo
1819     *
1820     * 7. SMALL ARGUMENTS
1821     *
1822     * Inputs with |X| < 2^-252 are treated specially as 1 - |x|.
1823     *
1824     * Special cases: cos(NaN) = quiet NaN, and raise invalid exception cos(INF) = NaN and raise
1825     * invalid exception cos(0) = 1
1826     *
1827     */
1828
1829    public int[] one = {
1830                    0x00000000, 0x3ff00000
1831    };
1832
1833    public void cosIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
1834        ArrayDataPointerConstant oneHalfPtr = new ArrayDataPointerConstant(oneHalf, 16);
1835        ArrayDataPointerConstant pTwoPtr = new ArrayDataPointerConstant(pTwo, 16);
1836        ArrayDataPointerConstant scFourPtr = new ArrayDataPointerConstant(scFour, 16);
1837        ArrayDataPointerConstant cTablePtr = new ArrayDataPointerConstant(cTable, 16);
1838        ArrayDataPointerConstant scTwoPtr = new ArrayDataPointerConstant(scTwo, 16);
1839        ArrayDataPointerConstant scThreePtr = new ArrayDataPointerConstant(scThree, 16);
1840        ArrayDataPointerConstant scOnePtr = new ArrayDataPointerConstant(scOne, 16);
1841        ArrayDataPointerConstant piInvTablePtr = new ArrayDataPointerConstant(piInvTable, 16);
1842        ArrayDataPointerConstant piFourPtr = new ArrayDataPointerConstant(piFour, 16);
1843        ArrayDataPointerConstant piThirtyTwoInvPtr = new ArrayDataPointerConstant(piThirtyTwoInv, 8);
1844        ArrayDataPointerConstant signMaskPtr = new ArrayDataPointerConstant(signMask, 8);
1845        ArrayDataPointerConstant pThreePtr = new ArrayDataPointerConstant(pThree, 8);
1846        ArrayDataPointerConstant pOnePtr = new ArrayDataPointerConstant(pOne, 8);
1847        ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 8);
1848
1849        Label bb0 = new Label();
1850        Label bb1 = new Label();
1851        Label bb3 = new Label();
1852        Label bb4 = new Label();
1853        Label bb5 = new Label();
1854        Label bb6 = new Label();
1855        Label bb7 = new Label();
1856        Label bb8 = new Label();
1857        Label bb9 = new Label();
1858        Label bb10 = new Label();
1859        Label bb11 = new Label();
1860        Label bb12 = new Label();
1861        Label bb13 = new Label();
1862        Label bb14 = new Label();
1863
1864        Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
1865        Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
1866        Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
1867        Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
1868        Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD);
1869        Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD);
1870        Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD);
1871        Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD);
1872        Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD);
1873        Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD);
1874
1875        Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
1876        Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
1877        Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
1878        Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
1879        Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
1880        Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
1881        Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
1882        Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE);
1883        Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE);
1884
1885        AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
1886
1887        setCrb(crb);
1888        masm.movdq(stackSlot, value);
1889        if (dest.encoding != value.encoding) {
1890            masm.movdqu(dest, value);
1891        }
1892
1893        masm.leaq(gpr1, stackSlot);
1894        masm.movl(gpr1, new AMD64Address(gpr1, 4));
1895        masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr));                              // 0x6dc9c883,
1896                                                                                            // 0x40245f30
1897
1898        masm.andl(gpr1, 2147418112);
1899        masm.subl(gpr1, 808452096);
1900        masm.cmpl(gpr1, 281346048);
1901        masm.jcc(ConditionFlag.Above, bb0);
1902
1903        masm.mulsd(temp1, dest);
1904        masm.movdqu(temp5, externalAddress(oneHalfPtr));                                    // 0x00000000,
1905                                                                                            // 0x3fe00000,
1906                                                                                            // 0x00000000,
1907                                                                                            // 0x3fe00000
1908        masm.movdq(temp4, externalAddress(signMaskPtr));                                    // 0x00000000,
1909                                                                                            // 0x80000000
1910        masm.pand(temp4, dest);
1911        masm.por(temp5, temp4);
1912        masm.addpd(temp1, temp5);
1913        masm.cvttsd2sil(gpr4, temp1);
1914        masm.cvtsi2sdl(temp1, gpr4);
1915        masm.movdqu(temp2, externalAddress(pTwoPtr));                                       // 0x1a600000,
1916                                                                                            // 0x3d90b461,
1917                                                                                            // 0x1a600000,
1918                                                                                            // 0x3d90b461
1919        masm.movdq(temp3, externalAddress(pOnePtr));                                        // 0x54400000,
1920                                                                                            // 0x3fb921fb
1921        masm.mulsd(temp3, temp1);
1922        masm.unpcklpd(temp1, temp1);
1923        masm.addq(gpr4, 1865232);
1924        masm.movdqu(temp4, dest);
1925        masm.andq(gpr4, 63);
1926        masm.movdqu(temp5, externalAddress(scFourPtr));                                     // 0xa556c734,
1927                                                                                            // 0x3ec71de3,
1928                                                                                            // 0x1a01a01a,
1929                                                                                            // 0x3efa01a0
1930        masm.leaq(gpr1, externalAddress(cTablePtr));
1931        masm.shlq(gpr4, 5);
1932        masm.addq(gpr1, gpr4);
1933        masm.movdqu(temp8, new AMD64Address(gpr1, 0));
1934        masm.mulpd(temp2, temp1);
1935        masm.subsd(dest, temp3);
1936        masm.mulsd(temp1, externalAddress(pThreePtr));                                      // 0x2e037073,
1937                                                                                            // 0x3b63198a
1938        masm.subsd(temp4, temp3);
1939        masm.unpcklpd(dest, dest);
1940        masm.movdqu(temp3, temp4);
1941        masm.subsd(temp4, temp2);
1942        masm.mulpd(temp5, dest);
1943        masm.subpd(dest, temp2);
1944        masm.pshufd(temp7, temp8, 0xE);
1945        masm.movdqu(temp6, externalAddress(scTwoPtr));                                      // 0x11111111,
1946                                                                                            // 0x3f811111,
1947                                                                                            // 0x55555555,
1948                                                                                            // 0x3fa55555
1949        masm.mulsd(temp7, temp4);
1950        masm.subsd(temp3, temp4);
1951        masm.mulpd(temp5, dest);
1952        masm.mulpd(dest, dest);
1953        masm.subsd(temp3, temp2);
1954        masm.movdqu(temp2, temp8);
1955        masm.subsd(temp1, temp3);
1956        masm.movdq(temp3, new AMD64Address(gpr1, 24));
1957        masm.addsd(temp2, temp3);
1958        masm.subsd(temp7, temp2);
1959        masm.mulsd(temp2, temp4);
1960        masm.mulpd(temp6, dest);
1961        masm.mulsd(temp3, temp4);
1962        masm.mulpd(temp2, dest);
1963        masm.mulpd(dest, dest);
1964        masm.addpd(temp5, externalAddress(scThreePtr));                                     // 0x1a01a01a,
1965                                                                                            // 0xbf2a01a0,
1966                                                                                            // 0x16c16c17,
1967                                                                                            // 0xbf56c16c
1968        masm.mulsd(temp4, temp8);
1969        masm.pshufd(temp9, temp8, 0xE);
1970        masm.addpd(temp6, externalAddress(scOnePtr));                                       // 0x55555555,
1971                                                                                            // 0xbfc55555,
1972                                                                                            // 0x00000000,
1973                                                                                            // 0xbfe00000
1974        masm.mulpd(temp5, dest);
1975        masm.movdqu(dest, temp3);
1976        masm.addsd(temp3, temp9);
1977        masm.mulpd(temp1, temp7);
1978        masm.movdqu(temp7, temp4);
1979        masm.addsd(temp4, temp3);
1980        masm.addpd(temp6, temp5);
1981        masm.subsd(temp9, temp3);
1982        masm.subsd(temp3, temp4);
1983        masm.addsd(temp1, new AMD64Address(gpr1, 16));
1984        masm.mulpd(temp6, temp2);
1985        masm.addsd(dest, temp9);
1986        masm.addsd(temp3, temp7);
1987        masm.addsd(dest, temp1);
1988        masm.addsd(dest, temp3);
1989        masm.addsd(dest, temp6);
1990        masm.unpckhpd(temp6, temp6);
1991        masm.addsd(dest, temp6);
1992        masm.addsd(dest, temp4);
1993        masm.jmp(bb13);
1994
1995        masm.bind(bb14);
1996        masm.xorpd(temp1, temp1);
1997        masm.xorpd(dest, dest);
1998        masm.divsd(dest, temp1);
1999        masm.jmp(bb13);
2000
2001        masm.bind(bb0);
2002        masm.jcc(ConditionFlag.Greater, bb1);
2003
2004        masm.pextrw(gpr1, dest, 3);
2005        masm.andl(gpr1, 32767);
2006        masm.pinsrw(dest, gpr1, 3);
2007        masm.movdq(temp1, externalAddress(onePtr));                                         // 0x00000000,
2008                                                                                            // 0x3ff00000
2009        masm.subsd(temp1, dest);
2010        masm.movdqu(dest, temp1);
2011        masm.jmp(bb13);
2012
2013        masm.bind(bb1);
2014        masm.pextrw(gpr3, dest, 3);
2015        masm.andl(gpr3, 32752);
2016        masm.cmpl(gpr3, 32752);
2017        masm.jcc(ConditionFlag.Equal, bb14);
2018
2019        masm.subl(gpr3, 16224);
2020        masm.shrl(gpr3, 7);
2021        masm.andl(gpr3, 65532);
2022        masm.leaq(gpr10, externalAddress(piInvTablePtr));
2023        masm.addq(gpr3, gpr10);
2024        masm.movdq(gpr1, dest);
2025        masm.movl(gpr9, new AMD64Address(gpr3, 20));
2026        masm.movl(gpr7, new AMD64Address(gpr3, 24));
2027        masm.movl(gpr4, gpr1);
2028        masm.shrq(gpr1, 21);
2029        masm.orl(gpr1, Integer.MIN_VALUE);
2030        masm.shrl(gpr1, 11);
2031        masm.movl(gpr8, gpr9);
2032        masm.imulq(gpr9, gpr4);
2033        masm.imulq(gpr8, gpr1);
2034        masm.imulq(gpr7, gpr1);
2035        masm.movl(gpr5, new AMD64Address(gpr3, 16));
2036        masm.movl(gpr6, new AMD64Address(gpr3, 12));
2037        masm.movl(gpr10, gpr9);
2038        masm.shrq(gpr9, 32);
2039        masm.addq(gpr8, gpr9);
2040        masm.addq(gpr10, gpr7);
2041        masm.movl(gpr7, gpr10);
2042        masm.shrq(gpr10, 32);
2043        masm.addq(gpr8, gpr10);
2044        masm.movl(gpr9, gpr5);
2045        masm.imulq(gpr5, gpr4);
2046        masm.imulq(gpr9, gpr1);
2047        masm.movl(gpr10, gpr6);
2048        masm.imulq(gpr6, gpr4);
2049        masm.movl(gpr2, gpr5);
2050        masm.shrq(gpr5, 32);
2051        masm.addq(gpr8, gpr2);
2052        masm.movl(gpr2, gpr8);
2053        masm.shrq(gpr8, 32);
2054        masm.addq(gpr9, gpr5);
2055        masm.addq(gpr9, gpr8);
2056        masm.shlq(gpr2, 32);
2057        masm.orq(gpr7, gpr2);
2058        masm.imulq(gpr10, gpr1);
2059        masm.movl(gpr8, new AMD64Address(gpr3, 8));
2060        masm.movl(gpr5, new AMD64Address(gpr3, 4));
2061        masm.movl(gpr2, gpr6);
2062        masm.shrq(gpr6, 32);
2063        masm.addq(gpr9, gpr2);
2064        masm.movl(gpr2, gpr9);
2065        masm.shrq(gpr9, 32);
2066        masm.addq(gpr10, gpr6);
2067        masm.addq(gpr10, gpr9);
2068        masm.movq(gpr6, gpr8);
2069        masm.imulq(gpr8, gpr4);
2070        masm.imulq(gpr6, gpr1);
2071        masm.movl(gpr9, gpr8);
2072        masm.shrq(gpr8, 32);
2073        masm.addq(gpr10, gpr9);
2074        masm.movl(gpr9, gpr10);
2075        masm.shrq(gpr10, 32);
2076        masm.addq(gpr6, gpr8);
2077        masm.addq(gpr6, gpr10);
2078        masm.movq(gpr8, gpr5);
2079        masm.imulq(gpr5, gpr4);
2080        masm.imulq(gpr8, gpr1);
2081        masm.shlq(gpr9, 32);
2082        masm.orq(gpr9, gpr2);
2083        masm.movl(gpr1, new AMD64Address(gpr3, 0));
2084        masm.movl(gpr10, gpr5);
2085        masm.shrq(gpr5, 32);
2086        masm.addq(gpr6, gpr10);
2087        masm.movl(gpr10, gpr6);
2088        masm.shrq(gpr6, 32);
2089        masm.addq(gpr8, gpr5);
2090        masm.addq(gpr8, gpr6);
2091        masm.imulq(gpr4, gpr1);
2092        masm.pextrw(gpr2, dest, 3);
2093        masm.leaq(gpr6, externalAddress(piInvTablePtr));
2094        masm.subq(gpr3, gpr6);
2095        masm.addl(gpr3, gpr3);
2096        masm.addl(gpr3, gpr3);
2097        masm.addl(gpr3, gpr3);
2098        masm.addl(gpr3, 19);
2099        masm.movl(gpr5, 32768);
2100        masm.andl(gpr5, gpr2);
2101        masm.shrl(gpr2, 4);
2102        masm.andl(gpr2, 2047);
2103        masm.subl(gpr2, 1023);
2104        masm.subl(gpr3, gpr2);
2105        masm.addq(gpr8, gpr4);
2106        masm.movl(gpr4, gpr3);
2107        masm.addl(gpr4, 32);
2108        masm.cmpl(gpr3, 1);
2109        masm.jcc(ConditionFlag.Less, bb3);
2110
2111        masm.negl(gpr3);
2112        masm.addl(gpr3, 29);
2113        masm.shll(gpr8);
2114        masm.movl(gpr6, gpr8);
2115        masm.andl(gpr8, 536870911);
2116        masm.testl(gpr8, 268435456);
2117        masm.jcc(ConditionFlag.NotEqual, bb4);
2118
2119        masm.shrl(gpr8);
2120        masm.movl(gpr2, 0);
2121        masm.shlq(gpr8, 32);
2122        masm.orq(gpr8, gpr10);
2123
2124        masm.bind(bb5);
2125
2126        masm.bind(bb6);
2127        masm.cmpq(gpr8, 0);
2128        masm.jcc(ConditionFlag.Equal, bb7);
2129
2130        masm.bind(bb8);
2131        masm.bsrq(gpr10, gpr8);
2132        masm.movl(gpr3, 29);
2133        masm.subl(gpr3, gpr10);
2134        masm.jcc(ConditionFlag.LessEqual, bb9);
2135
2136        masm.shlq(gpr8);
2137        masm.movq(gpr1, gpr9);
2138        masm.shlq(gpr9);
2139        masm.addl(gpr4, gpr3);
2140        masm.negl(gpr3);
2141        masm.addl(gpr3, 64);
2142        masm.shrq(gpr1);
2143        masm.shrq(gpr7);
2144        masm.orq(gpr8, gpr1);
2145        masm.orq(gpr9, gpr7);
2146
2147        masm.bind(bb10);
2148        masm.cvtsi2sdq(dest, gpr8);
2149        masm.shrq(gpr9, 1);
2150        masm.cvtsi2sdq(temp3, gpr9);
2151        masm.xorpd(temp4, temp4);
2152        masm.shll(gpr4, 4);
2153        masm.negl(gpr4);
2154        masm.addl(gpr4, 16368);
2155        masm.orl(gpr4, gpr5);
2156        masm.xorl(gpr4, gpr2);
2157        masm.pinsrw(temp4, gpr4, 3);
2158        masm.leaq(gpr2, externalAddress(piFourPtr));
2159        masm.movdqu(temp2, new AMD64Address(gpr2, 0));                                      // 0x40000000,
2160                                                                                            // 0x3fe921fb,
2161                                                                                            // 0x18469899,
2162                                                                                            // 0x3e64442d
2163        masm.xorpd(temp5, temp5);
2164        masm.subl(gpr4, 1008);
2165        masm.pinsrw(temp5, gpr4, 3);
2166        masm.mulsd(dest, temp4);
2167        masm.shll(gpr5, 16);
2168        masm.sarl(gpr5, 31);
2169        masm.mulsd(temp3, temp5);
2170        masm.movdqu(temp1, dest);
2171        masm.mulsd(dest, temp2);
2172        masm.pshufd(temp6, temp2, 0xE);
2173        masm.shrl(gpr6, 29);
2174        masm.addsd(temp1, temp3);
2175        masm.mulsd(temp3, temp2);
2176        masm.addl(gpr6, gpr5);
2177        masm.xorl(gpr6, gpr5);
2178        masm.mulsd(temp6, temp1);
2179        masm.movl(gpr1, gpr6);
2180        masm.addsd(temp6, temp3);
2181        masm.movdqu(temp2, dest);
2182        masm.addsd(dest, temp6);
2183        masm.subsd(temp2, dest);
2184        masm.addsd(temp6, temp2);
2185
2186        masm.bind(bb11);
2187        masm.movq(temp1, externalAddress(piThirtyTwoInvPtr));                               // 0x6dc9c883,
2188                                                                                            // 0x40245f30
2189        masm.mulsd(temp1, dest);
2190        masm.movdq(temp5, externalAddress(oneHalfPtr));                                     // 0x00000000,
2191                                                                                            // 0x3fe00000,
2192                                                                                            // 0x00000000,
2193                                                                                            // 0x3fe00000
2194        masm.movdq(temp4, externalAddress(signMaskPtr));                                    // 0x00000000,
2195                                                                                            // 0x80000000
2196        masm.pand(temp4, dest);
2197        masm.por(temp5, temp4);
2198        masm.addpd(temp1, temp5);
2199        masm.cvttsd2siq(gpr4, temp1);
2200        masm.cvtsi2sdq(temp1, gpr4);
2201        masm.movdq(temp3, externalAddress(pOnePtr));                                        // 0x54400000,
2202                                                                                            // 0x3fb921fb
2203        masm.movdqu(temp2, externalAddress(pTwoPtr));                                       // 0x1a600000,
2204                                                                                            // 0x3d90b461,
2205                                                                                            // 0x1a600000,
2206                                                                                            // 0x3d90b461
2207        masm.mulsd(temp3, temp1);
2208        masm.unpcklpd(temp1, temp1);
2209        masm.shll(gpr1, 3);
2210        masm.addl(gpr4, 1865232);
2211        masm.movdqu(temp4, dest);
2212        masm.addl(gpr4, gpr1);
2213        masm.andl(gpr4, 63);
2214        masm.movdqu(temp5, externalAddress(scFourPtr));                                     // 0xa556c734,
2215                                                                                            // 0x3ec71de3,
2216                                                                                            // 0x1a01a01a,
2217                                                                                            // 0x3efa01a0
2218        masm.leaq(gpr1, externalAddress(cTablePtr));
2219        masm.shll(gpr4, 5);
2220        masm.addq(gpr1, gpr4);
2221        masm.movdqu(temp8, new AMD64Address(gpr1, 0));
2222        masm.mulpd(temp2, temp1);
2223        masm.subsd(dest, temp3);
2224        masm.mulsd(temp1, externalAddress(pThreePtr));                                      // 0x2e037073,
2225                                                                                            // 0x3b63198a
2226        masm.subsd(temp4, temp3);
2227        masm.unpcklpd(dest, dest);
2228        masm.movdqu(temp3, temp4);
2229        masm.subsd(temp4, temp2);
2230        masm.mulpd(temp5, dest);
2231        masm.pshufd(temp7, temp8, 0xE);
2232        masm.movdqu(temp9, temp7);
2233        masm.subpd(dest, temp2);
2234        masm.mulsd(temp7, temp4);
2235        masm.subsd(temp3, temp4);
2236        masm.mulpd(temp5, dest);
2237        masm.mulpd(dest, dest);
2238        masm.subsd(temp3, temp2);
2239        masm.movdqu(temp2, temp8);
2240        masm.subsd(temp1, temp3);
2241        masm.movdq(temp3, new AMD64Address(gpr1, 24));
2242        masm.addsd(temp2, temp3);
2243        masm.subsd(temp7, temp2);
2244        masm.subsd(temp1, temp6);
2245        masm.movdqu(temp6, externalAddress(scTwoPtr));                                      // 0x11111111,
2246                                                                                            // 0x3f811111,
2247                                                                                            // 0x55555555,
2248                                                                                            // 0x3fa55555
2249        masm.mulsd(temp2, temp4);
2250        masm.mulpd(temp6, dest);
2251        masm.mulsd(temp3, temp4);
2252        masm.mulpd(temp2, dest);
2253        masm.mulpd(dest, dest);
2254        masm.addpd(temp5, externalAddress(scThreePtr));                                     // 0x1a01a01a,
2255                                                                                            // 0xbf2a01a0,
2256                                                                                            // 0x16c16c17,
2257                                                                                            // 0xbf56c16c
2258        masm.mulsd(temp4, temp8);
2259        masm.addpd(temp6, externalAddress(scOnePtr));                                       // 0x55555555,
2260                                                                                            // 0xbfc55555,
2261                                                                                            // 0x00000000,
2262                                                                                            // 0xbfe00000
2263        masm.mulpd(temp5, dest);
2264        masm.movdqu(dest, temp3);
2265        masm.addsd(temp3, temp9);
2266        masm.mulpd(temp1, temp7);
2267        masm.movdqu(temp7, temp4);
2268        masm.addsd(temp4, temp3);
2269        masm.addpd(temp6, temp5);
2270        masm.subsd(temp9, temp3);
2271        masm.subsd(temp3, temp4);
2272        masm.addsd(temp1, new AMD64Address(gpr1, 16));
2273        masm.mulpd(temp6, temp2);
2274        masm.addsd(temp9, dest);
2275        masm.addsd(temp3, temp7);
2276        masm.addsd(temp1, temp9);
2277        masm.addsd(temp1, temp3);
2278        masm.addsd(temp1, temp6);
2279        masm.unpckhpd(temp6, temp6);
2280        masm.movdqu(dest, temp4);
2281        masm.addsd(temp1, temp6);
2282        masm.addsd(dest, temp1);
2283        masm.jmp(bb13);
2284
2285        masm.bind(bb7);
2286        masm.addl(gpr4, 64);
2287        masm.movq(gpr8, gpr9);
2288        masm.movq(gpr9, gpr7);
2289        masm.movl(gpr7, 0);
2290        masm.cmpq(gpr8, 0);
2291        masm.jcc(ConditionFlag.NotEqual, bb8);
2292
2293        masm.addl(gpr4, 64);
2294        masm.movq(gpr8, gpr9);
2295        masm.movq(gpr9, gpr7);
2296        masm.cmpq(gpr8, 0);
2297        masm.jcc(ConditionFlag.NotEqual, bb8);
2298
2299        masm.xorpd(dest, dest);
2300        masm.xorpd(temp6, temp6);
2301        masm.jmp(bb11);
2302
2303        masm.bind(bb9);
2304        masm.jcc(ConditionFlag.Equal, bb10);
2305
2306        masm.negl(gpr3);
2307        masm.shrq(gpr9);
2308        masm.movq(gpr1, gpr8);
2309        masm.shrq(gpr8);
2310        masm.subl(gpr4, gpr3);
2311        masm.negl(gpr3);
2312        masm.addl(gpr3, 64);
2313        masm.shlq(gpr1);
2314        masm.orq(gpr9, gpr1);
2315        masm.jmp(bb10);
2316
2317        masm.bind(bb3);
2318        masm.negl(gpr3);
2319        masm.shlq(gpr8, 32);
2320        masm.orq(gpr8, gpr10);
2321        masm.shlq(gpr8);
2322        masm.movq(gpr6, gpr8);
2323        masm.testl(gpr8, Integer.MIN_VALUE);
2324        masm.jcc(ConditionFlag.NotEqual, bb12);
2325
2326        masm.shrl(gpr8);
2327        masm.movl(gpr2, 0);
2328        masm.shrq(gpr6, 3);
2329        masm.jmp(bb6);
2330
2331        masm.bind(bb4);
2332        masm.shrl(gpr8);
2333        masm.movl(gpr2, 536870912);
2334        masm.shrl(gpr2);
2335        masm.shlq(gpr8, 32);
2336        masm.orq(gpr8, gpr10);
2337        masm.shlq(gpr2, 32);
2338        masm.addl(gpr6, 536870912);
2339        masm.movl(gpr3, 0);
2340        masm.movl(gpr10, 0);
2341        masm.subq(gpr3, gpr7);
2342        masm.sbbq(gpr10, gpr9);
2343        masm.sbbq(gpr2, gpr8);
2344        masm.movq(gpr7, gpr3);
2345        masm.movq(gpr9, gpr10);
2346        masm.movq(gpr8, gpr2);
2347        masm.movl(gpr2, 32768);
2348        masm.jmp(bb5);
2349
2350        masm.bind(bb12);
2351        masm.shrl(gpr8);
2352        masm.movq(gpr2, 0x100000000L);
2353        masm.shrq(gpr2);
2354        masm.movl(gpr3, 0);
2355        masm.movl(gpr10, 0);
2356        masm.subq(gpr3, gpr7);
2357        masm.sbbq(gpr10, gpr9);
2358        masm.sbbq(gpr2, gpr8);
2359        masm.movq(gpr7, gpr3);
2360        masm.movq(gpr9, gpr10);
2361        masm.movq(gpr8, gpr2);
2362        masm.movl(gpr2, 32768);
2363        masm.shrq(gpr6, 3);
2364        masm.addl(gpr6, 536870912);
2365        masm.jmp(bb6);
2366
2367        masm.bind(bb13);
2368    }
2369
2370    /*
2371     * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM)
2372     * Source Code
2373     *
2374     * ALGORITHM DESCRIPTION - TAN() ---------------------
2375     *
2376     * Polynomials coefficients and other constants.
2377     *
2378     * Note that in this algorithm, there is a different polynomial for each breakpoint, so there
2379     * are 32 sets of polynomial coefficients as well as 32 instances of the other constants.
2380     *
2381     * The polynomial coefficients and constants are offset from the start of the main block as
2382     * follows:
2383     *
2384     * 0: c8 | c0 16: c9 | c1 32: c10 | c2 48: c11 | c3 64: c12 | c4 80: c13 | c5 96: c14 | c6 112:
2385     * c15 | c7 128: T_hi 136: T_lo 144: Sigma 152: T_hl 160: Tau 168: Mask 176: (end of block)
2386     *
2387     * The total table size is therefore 5632 bytes.
2388     *
2389     * Note that c0 and c1 are always zero. We could try storing other constants here, and just
2390     * loading the low part of the SIMD register in these cases, after ensuring the high part is
2391     * zero.
2392     *
2393     * The higher terms of the polynomial are computed in the *low* part of the SIMD register. This
2394     * is so we can overlap the multiplication by r^8 and the unpacking of the other part.
2395     *
2396     * The constants are: T_hi + T_lo = accurate constant term in power series Sigma + T_hl =
2397     * accurate coefficient of r in power series (Sigma=1 bit) Tau = multiplier for the reciprocal,
2398     * always -1 or 0
2399     *
2400     * The basic reconstruction formula using these constants is:
2401     *
2402     * High = tau * recip_hi + t_hi Med = (sgn * r + t_hl * r)_hi Low = (sgn * r + t_hl * r)_lo +
2403     * tau * recip_lo + T_lo + (T_hl + sigma) * c + pol
2404     *
2405     * where pol = c0 + c1 * r + c2 * r^2 + ... + c15 * r^15
2406     *
2407     * (c0 = c1 = 0, but using them keeps SIMD regularity)
2408     *
2409     * We then do a compensated sum High + Med, add the low parts together and then do the final
2410     * sum.
2411     *
2412     * Here recip_hi + recip_lo is an accurate reciprocal of the remainder modulo pi/2
2413     *
2414     * Special cases: tan(NaN) = quiet NaN, and raise invalid exception tan(INF) = NaN and raise
2415     * invalid exception tan(+/-0) = +/-0
2416     *
2417     */
2418
2419    private static int[] oneHalfTan = {
2420                    0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000
2421    };
2422
2423    private static int[] mulSixteen = {
2424                    0x00000000, 0x40300000, 0x00000000, 0x3ff00000
2425    };
2426
2427    private static int[] signMaskTan = {
2428                    0x00000000, 0x80000000, 0x00000000, 0x80000000
2429    };
2430
2431    private static int[] piThirtyTwoInvTan = {
2432                    0x6dc9c883, 0x3fe45f30, 0x6dc9c883, 0x40245f30
2433    };
2434
2435    private static int[] pOneTan = {
2436                    0x54444000, 0x3fb921fb, 0x54440000, 0x3fb921fb
2437    };
2438
2439    private static int[] pTwoTan = {
2440                    0x67674000, 0xbd32e7b9, 0x4c4c0000, 0x3d468c23
2441    };
2442
2443    private static int[] pThreeTan = {
2444                    0x3707344a, 0x3aa8a2e0, 0x03707345, 0x3ae98a2e
2445    };
2446
2447    private static int[] cTableTan = {
2448                    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x882c10fa,
2449                    0x3f9664f4, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2450                    0x00000000, 0x00000000, 0x55e6c23d, 0x3f8226e3, 0x55555555,
2451                    0x3fd55555, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2452                    0x0e157de0, 0x3f6d6d3d, 0x11111111, 0x3fc11111, 0x00000000,
2453                    0x00000000, 0x00000000, 0x00000000, 0x452b75e3, 0x3f57da36,
2454                    0x1ba1ba1c, 0x3faba1ba, 0x00000000, 0x00000000, 0x00000000,
2455                    0x00000000, 0x00000000, 0x3ff00000, 0x00000000, 0x00000000,
2456                    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x4e435f9b,
2457                    0x3f953f83, 0x00000000, 0x00000000, 0x3c6e8e46, 0x3f9b74ea,
2458                    0x00000000, 0x00000000, 0xda5b7511, 0x3f85ad63, 0xdc230b9b,
2459                    0x3fb97558, 0x26cb3788, 0x3f881308, 0x76fc4985, 0x3fd62ac9,
2460                    0x77bb08ba, 0x3f757c85, 0xb6247521, 0x3fb1381e, 0x5922170c,
2461                    0x3f754e95, 0x8746482d, 0x3fc27f83, 0x11055b30, 0x3f64e391,
2462                    0x3e666320, 0x3fa3e609, 0x0de9dae3, 0x3f6301df, 0x1f1dca06,
2463                    0x3fafa8ae, 0x8c5b2da2, 0x3fb936bb, 0x4e88f7a5, 0x3c587d05,
2464                    0x00000000, 0x3ff00000, 0xa8935dd9, 0x3f83dde2, 0x00000000,
2465                    0x00000000, 0x00000000, 0x00000000, 0x5a279ea3, 0x3faa3407,
2466                    0x00000000, 0x00000000, 0x432d65fa, 0x3fa70153, 0x00000000,
2467                    0x00000000, 0x891a4602, 0x3f9d03ef, 0xd62ca5f8, 0x3fca77d9,
2468                    0xb35f4628, 0x3f97a265, 0x433258fa, 0x3fd8cf51, 0xb58fd909,
2469                    0x3f8f88e3, 0x01771cea, 0x3fc2b154, 0xf3562f8e, 0x3f888f57,
2470                    0xc028a723, 0x3fc7370f, 0x20b7f9f0, 0x3f80f44c, 0x214368e9,
2471                    0x3fb6dfaa, 0x28891863, 0x3f79b4b6, 0x172dbbf0, 0x3fb6cb8e,
2472                    0xe0553158, 0x3fc975f5, 0x593fe814, 0x3c2ef5d3, 0x00000000,
2473                    0x3ff00000, 0x03dec550, 0x3fa44203, 0x00000000, 0x00000000,
2474                    0x00000000, 0x00000000, 0x9314533e, 0x3fbb8ec5, 0x00000000,
2475                    0x00000000, 0x09aa36d0, 0x3fb6d3f4, 0x00000000, 0x00000000,
2476                    0xdcb427fd, 0x3fb13950, 0xd87ab0bb, 0x3fd5335e, 0xce0ae8a5,
2477                    0x3fabb382, 0x79143126, 0x3fddba41, 0x5f2b28d4, 0x3fa552f1,
2478                    0x59f21a6d, 0x3fd015ab, 0x22c27d95, 0x3fa0e984, 0xe19fc6aa,
2479                    0x3fd0576c, 0x8f2c2950, 0x3f9a4898, 0xc0b3f22c, 0x3fc59462,
2480                    0x1883a4b8, 0x3f94b61c, 0x3f838640, 0x3fc30eb8, 0x355c63dc,
2481                    0x3fd36a08, 0x1dce993d, 0xbc6d704d, 0x00000000, 0x3ff00000,
2482                    0x2b82ab63, 0x3fb78e92, 0x00000000, 0x00000000, 0x00000000,
2483                    0x00000000, 0x56f37042, 0x3fccfc56, 0x00000000, 0x00000000,
2484                    0xaa563951, 0x3fc90125, 0x00000000, 0x00000000, 0x3d0e7c5d,
2485                    0x3fc50533, 0x9bed9b2e, 0x3fdf0ed9, 0x5fe7c47c, 0x3fc1f250,
2486                    0x96c125e5, 0x3fe2edd9, 0x5a02bbd8, 0x3fbe5c71, 0x86362c20,
2487                    0x3fda08b7, 0x4b4435ed, 0x3fb9d342, 0x4b494091, 0x3fd911bd,
2488                    0xb56658be, 0x3fb5e4c7, 0x93a2fd76, 0x3fd3c092, 0xda271794,
2489                    0x3fb29910, 0x3303df2b, 0x3fd189be, 0x99fcef32, 0x3fda8279,
2490                    0xb68c1467, 0x3c708b2f, 0x00000000, 0x3ff00000, 0x980c4337,
2491                    0x3fc5f619, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2492                    0xcc03e501, 0x3fdff10f, 0x00000000, 0x00000000, 0x44a4e845,
2493                    0x3fddb63b, 0x00000000, 0x00000000, 0x3768ad9f, 0x3fdb72a4,
2494                    0x3dd01cca, 0x3fe5fdb9, 0xa61d2811, 0x3fd972b2, 0x5645ad0b,
2495                    0x3fe977f9, 0xd013b3ab, 0x3fd78ca3, 0xbf0bf914, 0x3fe4f192,
2496                    0x4d53e730, 0x3fd5d060, 0x3f8b9000, 0x3fe49933, 0xe2b82f08,
2497                    0x3fd4322a, 0x5936a835, 0x3fe27ae1, 0xb1c61c9b, 0x3fd2b3fb,
2498                    0xef478605, 0x3fe1659e, 0x190834ec, 0x3fe11ab7, 0xcdb625ea,
2499                    0xbc8e564b, 0x00000000, 0x3ff00000, 0xb07217e3, 0x3fd248f1,
2500                    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2b2c49d0,
2501                    0x3ff2de9c, 0x00000000, 0x00000000, 0x2655bc98, 0x3ff33e58,
2502                    0x00000000, 0x00000000, 0xff691fa2, 0x3ff3972e, 0xe93463bd,
2503                    0x3feeed87, 0x070e10a0, 0x3ff3f5b2, 0xf4d790a4, 0x3ff20c10,
2504                    0xa04e8ea3, 0x3ff4541a, 0x386accd3, 0x3ff1369e, 0x222a66dd,
2505                    0x3ff4b521, 0x22a9777e, 0x3ff20817, 0x52a04a6e, 0x3ff5178f,
2506                    0xddaa0031, 0x3ff22137, 0x4447d47c, 0x3ff57c01, 0x1e9c7f1d,
2507                    0x3ff29311, 0x2ab7f990, 0x3fe561b8, 0x209c7df1, 0x3c87a8c5,
2508                    0x00000000, 0x3ff00000, 0x4170bcc6, 0x3fdc92d8, 0x00000000,
2509                    0x00000000, 0x00000000, 0x00000000, 0xc7ab4d5a, 0x40085e24,
2510                    0x00000000, 0x00000000, 0xe93ea75d, 0x400b963d, 0x00000000,
2511                    0x00000000, 0x94a7f25a, 0x400f37e2, 0x4b6261cb, 0x3ff5f984,
2512                    0x5a9dd812, 0x4011aab0, 0x74c30018, 0x3ffaf5a5, 0x7f2ce8e3,
2513                    0x4013fe8b, 0xfe8e54fa, 0x3ffd7334, 0x670d618d, 0x4016a10c,
2514                    0x4db97058, 0x4000e012, 0x24df44dd, 0x40199c5f, 0x697d6ece,
2515                    0x4003006e, 0x83298b82, 0x401cfc4d, 0x19d490d6, 0x40058c19,
2516                    0x2ae42850, 0x3fea4300, 0x118e20e6, 0xbc7a6db8, 0x00000000,
2517                    0x40000000, 0xe33345b8, 0xbfd4e526, 0x00000000, 0x00000000,
2518                    0x00000000, 0x00000000, 0x65965966, 0x40219659, 0x00000000,
2519                    0x00000000, 0x882c10fa, 0x402664f4, 0x00000000, 0x00000000,
2520                    0x83cd3723, 0x402c8342, 0x00000000, 0x40000000, 0x55e6c23d,
2521                    0x403226e3, 0x55555555, 0x40055555, 0x34451939, 0x40371c96,
2522                    0xaaaaaaab, 0x400aaaaa, 0x0e157de0, 0x403d6d3d, 0x11111111,
2523                    0x40111111, 0xa738201f, 0x4042bbce, 0x05b05b06, 0x4015b05b,
2524                    0x452b75e3, 0x4047da36, 0x1ba1ba1c, 0x401ba1ba, 0x00000000,
2525                    0x3ff00000, 0x00000000, 0x00000000, 0x00000000, 0x40000000,
2526                    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2527                    0x00000000, 0x4f48b8d3, 0xbf33eaf9, 0x00000000, 0x00000000,
2528                    0x0cf7586f, 0x3f20b8ea, 0x00000000, 0x00000000, 0xd0258911,
2529                    0xbf0abaf3, 0x23e49fe9, 0xbfab5a8c, 0x2d53222e, 0x3ef60d15,
2530                    0x21169451, 0x3fa172b2, 0xbb254dbc, 0xbee1d3b5, 0xdbf93b8e,
2531                    0xbf84c7db, 0x05b4630b, 0x3ecd3364, 0xee9aada7, 0x3f743924,
2532                    0x794a8297, 0xbeb7b7b9, 0xe015f797, 0xbf5d41f5, 0xe41a4a56,
2533                    0x3ea35dfb, 0xe4c2a251, 0x3f49a2ab, 0x5af9e000, 0xbfce49ce,
2534                    0x8c743719, 0x3d1eb860, 0x00000000, 0x00000000, 0x1b4863cf,
2535                    0x3fd78294, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8,
2536                    0x535ad890, 0xbf2b9320, 0x00000000, 0x00000000, 0x018fdf1f,
2537                    0x3f16d61d, 0x00000000, 0x00000000, 0x0359f1be, 0xbf0139e4,
2538                    0xa4317c6d, 0xbfa67e17, 0x82672d0f, 0x3eebb405, 0x2f1b621e,
2539                    0x3f9f455b, 0x51ccf238, 0xbed55317, 0xf437b9ac, 0xbf804bee,
2540                    0xc791a2b5, 0x3ec0e993, 0x919a1db2, 0x3f7080c2, 0x336a5b0e,
2541                    0xbeaa48a2, 0x0a268358, 0xbf55a443, 0xdfd978e4, 0x3e94b61f,
2542                    0xd7767a58, 0x3f431806, 0x2aea0000, 0xbfc9bbe8, 0x7723ea61,
2543                    0xbd3a2369, 0x00000000, 0x00000000, 0xdf7796ff, 0x3fd6e642,
2544                    0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0xb9ff07ce,
2545                    0xbf231c78, 0x00000000, 0x00000000, 0xa5517182, 0x3f0ff0e0,
2546                    0x00000000, 0x00000000, 0x790b4cbc, 0xbef66191, 0x848a46c6,
2547                    0xbfa21ac0, 0xb16435fa, 0x3ee1d3ec, 0x2a1aa832, 0x3f9c71ea,
2548                    0xfdd299ef, 0xbec9dd1a, 0x3f8dbaaf, 0xbf793363, 0x309fc6ea,
2549                    0x3eb415d6, 0xbee60471, 0x3f6b83ba, 0x94a0a697, 0xbe9dae11,
2550                    0x3e5c67b3, 0xbf4fd07b, 0x9a8f3e3e, 0x3e86bd75, 0xa4beb7a4,
2551                    0x3f3d1eb1, 0x29cfc000, 0xbfc549ce, 0xbf159358, 0xbd397b33,
2552                    0x00000000, 0x00000000, 0x871fee6c, 0x3fd666f0, 0x00000000,
2553                    0x3ff00000, 0x00000000, 0xfffffff8, 0x7d98a556, 0xbf1a3958,
2554                    0x00000000, 0x00000000, 0x9d88dc01, 0x3f0704c2, 0x00000000,
2555                    0x00000000, 0x73742a2b, 0xbeed054a, 0x58844587, 0xbf9c2a13,
2556                    0x55688a79, 0x3ed7a326, 0xee33f1d6, 0x3f9a48f4, 0xa8dc9888,
2557                    0xbebf8939, 0xaad4b5b8, 0xbf72f746, 0x9102efa1, 0x3ea88f82,
2558                    0xdabc29cf, 0x3f678228, 0x9289afb8, 0xbe90f456, 0x741fb4ed,
2559                    0xbf46f3a3, 0xa97f6663, 0x3e79b4bf, 0xca89ff3f, 0x3f36db70,
2560                    0xa8a2a000, 0xbfc0ee13, 0x3da24be1, 0xbd338b9f, 0x00000000,
2561                    0x00000000, 0x11cd6c69, 0x3fd601fd, 0x00000000, 0x3ff00000,
2562                    0x00000000, 0xfffffff8, 0x1a154b97, 0xbf116b01, 0x00000000,
2563                    0x00000000, 0x2d427630, 0x3f0147bf, 0x00000000, 0x00000000,
2564                    0xb93820c8, 0xbee264d4, 0xbb6cbb18, 0xbf94ab8c, 0x888d4d92,
2565                    0x3ed0568b, 0x60730f7c, 0x3f98b19b, 0xe4b1fb11, 0xbeb2f950,
2566                    0x22cf9f74, 0xbf6b21cd, 0x4a3ff0a6, 0x3e9f499e, 0xfd2b83ce,
2567                    0x3f64aad7, 0x637b73af, 0xbe83487c, 0xe522591a, 0xbf3fc092,
2568                    0xa158e8bc, 0x3e6e3aae, 0xe5e82ffa, 0x3f329d2f, 0xd636a000,
2569                    0xbfb9477f, 0xc2c2d2bc, 0xbd135ef9, 0x00000000, 0x00000000,
2570                    0xf2fdb123, 0x3fd5b566, 0x00000000, 0x3ff00000, 0x00000000,
2571                    0xfffffff8, 0xc41acb64, 0xbf05448d, 0x00000000, 0x00000000,
2572                    0xdbb03d6f, 0x3efb7ad2, 0x00000000, 0x00000000, 0x9e42962d,
2573                    0xbed5aea5, 0x2579f8ef, 0xbf8b2398, 0x288a1ed9, 0x3ec81441,
2574                    0xb0198dc5, 0x3f979a3a, 0x2fdfe253, 0xbea57cd3, 0x5766336f,
2575                    0xbf617caa, 0x600944c3, 0x3e954ed6, 0xa4e0aaf8, 0x3f62c646,
2576                    0x6b8fb29c, 0xbe74e3a3, 0xdc4c0409, 0xbf33f952, 0x9bffe365,
2577                    0x3e6301ec, 0xb8869e44, 0x3f2fc566, 0xe1e04000, 0xbfb0cc62,
2578                    0x016b907f, 0xbd119cbc, 0x00000000, 0x00000000, 0xe6b9d8fa,
2579                    0x3fd57fb3, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8,
2580                    0x5daf22a6, 0xbef429d7, 0x00000000, 0x00000000, 0x06bca545,
2581                    0x3ef7a27d, 0x00000000, 0x00000000, 0x7211c19a, 0xbec41c3e,
2582                    0x956ed53e, 0xbf7ae3f4, 0xee750e72, 0x3ec3901b, 0x91d443f5,
2583                    0x3f96f713, 0x36661e6c, 0xbe936e09, 0x506f9381, 0xbf5122e8,
2584                    0xcb6dd43f, 0x3e9041b9, 0x6698b2ff, 0x3f61b0c7, 0x576bf12b,
2585                    0xbe625a8a, 0xe5a0e9dc, 0xbf23499d, 0x110384dd, 0x3e5b1c2c,
2586                    0x68d43db6, 0x3f2cb899, 0x6ecac000, 0xbfa0c414, 0xcd7dd58c,
2587                    0x3d13500f, 0x00000000, 0x00000000, 0x85a2c8fb, 0x3fd55fe0,
2588                    0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0x00000000,
2589                    0x00000000, 0x00000000, 0x00000000, 0x2bf70ebe, 0x3ef66a8f,
2590                    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2591                    0x00000000, 0xd644267f, 0x3ec22805, 0x16c16c17, 0x3f96c16c,
2592                    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xc4e09162,
2593                    0x3e8d6db2, 0xbc011567, 0x3f61566a, 0x00000000, 0x00000000,
2594                    0x00000000, 0x00000000, 0x1f79955c, 0x3e57da4e, 0x9334ef0b,
2595                    0x3f2bbd77, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2596                    0x00000000, 0x00000000, 0x55555555, 0x3fd55555, 0x00000000,
2597                    0x3ff00000, 0x00000000, 0xfffffff8, 0x5daf22a6, 0x3ef429d7,
2598                    0x00000000, 0x00000000, 0x06bca545, 0x3ef7a27d, 0x00000000,
2599                    0x00000000, 0x7211c19a, 0x3ec41c3e, 0x956ed53e, 0x3f7ae3f4,
2600                    0xee750e72, 0x3ec3901b, 0x91d443f5, 0x3f96f713, 0x36661e6c,
2601                    0x3e936e09, 0x506f9381, 0x3f5122e8, 0xcb6dd43f, 0x3e9041b9,
2602                    0x6698b2ff, 0x3f61b0c7, 0x576bf12b, 0x3e625a8a, 0xe5a0e9dc,
2603                    0x3f23499d, 0x110384dd, 0x3e5b1c2c, 0x68d43db6, 0x3f2cb899,
2604                    0x6ecac000, 0x3fa0c414, 0xcd7dd58c, 0xbd13500f, 0x00000000,
2605                    0x00000000, 0x85a2c8fb, 0x3fd55fe0, 0x00000000, 0x3ff00000,
2606                    0x00000000, 0xfffffff8, 0xc41acb64, 0x3f05448d, 0x00000000,
2607                    0x00000000, 0xdbb03d6f, 0x3efb7ad2, 0x00000000, 0x00000000,
2608                    0x9e42962d, 0x3ed5aea5, 0x2579f8ef, 0x3f8b2398, 0x288a1ed9,
2609                    0x3ec81441, 0xb0198dc5, 0x3f979a3a, 0x2fdfe253, 0x3ea57cd3,
2610                    0x5766336f, 0x3f617caa, 0x600944c3, 0x3e954ed6, 0xa4e0aaf8,
2611                    0x3f62c646, 0x6b8fb29c, 0x3e74e3a3, 0xdc4c0409, 0x3f33f952,
2612                    0x9bffe365, 0x3e6301ec, 0xb8869e44, 0x3f2fc566, 0xe1e04000,
2613                    0x3fb0cc62, 0x016b907f, 0x3d119cbc, 0x00000000, 0x00000000,
2614                    0xe6b9d8fa, 0x3fd57fb3, 0x00000000, 0x3ff00000, 0x00000000,
2615                    0xfffffff8, 0x1a154b97, 0x3f116b01, 0x00000000, 0x00000000,
2616                    0x2d427630, 0x3f0147bf, 0x00000000, 0x00000000, 0xb93820c8,
2617                    0x3ee264d4, 0xbb6cbb18, 0x3f94ab8c, 0x888d4d92, 0x3ed0568b,
2618                    0x60730f7c, 0x3f98b19b, 0xe4b1fb11, 0x3eb2f950, 0x22cf9f74,
2619                    0x3f6b21cd, 0x4a3ff0a6, 0x3e9f499e, 0xfd2b83ce, 0x3f64aad7,
2620                    0x637b73af, 0x3e83487c, 0xe522591a, 0x3f3fc092, 0xa158e8bc,
2621                    0x3e6e3aae, 0xe5e82ffa, 0x3f329d2f, 0xd636a000, 0x3fb9477f,
2622                    0xc2c2d2bc, 0x3d135ef9, 0x00000000, 0x00000000, 0xf2fdb123,
2623                    0x3fd5b566, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8,
2624                    0x7d98a556, 0x3f1a3958, 0x00000000, 0x00000000, 0x9d88dc01,
2625                    0x3f0704c2, 0x00000000, 0x00000000, 0x73742a2b, 0x3eed054a,
2626                    0x58844587, 0x3f9c2a13, 0x55688a79, 0x3ed7a326, 0xee33f1d6,
2627                    0x3f9a48f4, 0xa8dc9888, 0x3ebf8939, 0xaad4b5b8, 0x3f72f746,
2628                    0x9102efa1, 0x3ea88f82, 0xdabc29cf, 0x3f678228, 0x9289afb8,
2629                    0x3e90f456, 0x741fb4ed, 0x3f46f3a3, 0xa97f6663, 0x3e79b4bf,
2630                    0xca89ff3f, 0x3f36db70, 0xa8a2a000, 0x3fc0ee13, 0x3da24be1,
2631                    0x3d338b9f, 0x00000000, 0x00000000, 0x11cd6c69, 0x3fd601fd,
2632                    0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0xb9ff07ce,
2633                    0x3f231c78, 0x00000000, 0x00000000, 0xa5517182, 0x3f0ff0e0,
2634                    0x00000000, 0x00000000, 0x790b4cbc, 0x3ef66191, 0x848a46c6,
2635                    0x3fa21ac0, 0xb16435fa, 0x3ee1d3ec, 0x2a1aa832, 0x3f9c71ea,
2636                    0xfdd299ef, 0x3ec9dd1a, 0x3f8dbaaf, 0x3f793363, 0x309fc6ea,
2637                    0x3eb415d6, 0xbee60471, 0x3f6b83ba, 0x94a0a697, 0x3e9dae11,
2638                    0x3e5c67b3, 0x3f4fd07b, 0x9a8f3e3e, 0x3e86bd75, 0xa4beb7a4,
2639                    0x3f3d1eb1, 0x29cfc000, 0x3fc549ce, 0xbf159358, 0x3d397b33,
2640                    0x00000000, 0x00000000, 0x871fee6c, 0x3fd666f0, 0x00000000,
2641                    0x3ff00000, 0x00000000, 0xfffffff8, 0x535ad890, 0x3f2b9320,
2642                    0x00000000, 0x00000000, 0x018fdf1f, 0x3f16d61d, 0x00000000,
2643                    0x00000000, 0x0359f1be, 0x3f0139e4, 0xa4317c6d, 0x3fa67e17,
2644                    0x82672d0f, 0x3eebb405, 0x2f1b621e, 0x3f9f455b, 0x51ccf238,
2645                    0x3ed55317, 0xf437b9ac, 0x3f804bee, 0xc791a2b5, 0x3ec0e993,
2646                    0x919a1db2, 0x3f7080c2, 0x336a5b0e, 0x3eaa48a2, 0x0a268358,
2647                    0x3f55a443, 0xdfd978e4, 0x3e94b61f, 0xd7767a58, 0x3f431806,
2648                    0x2aea0000, 0x3fc9bbe8, 0x7723ea61, 0x3d3a2369, 0x00000000,
2649                    0x00000000, 0xdf7796ff, 0x3fd6e642, 0x00000000, 0x3ff00000,
2650                    0x00000000, 0xfffffff8, 0x4f48b8d3, 0x3f33eaf9, 0x00000000,
2651                    0x00000000, 0x0cf7586f, 0x3f20b8ea, 0x00000000, 0x00000000,
2652                    0xd0258911, 0x3f0abaf3, 0x23e49fe9, 0x3fab5a8c, 0x2d53222e,
2653                    0x3ef60d15, 0x21169451, 0x3fa172b2, 0xbb254dbc, 0x3ee1d3b5,
2654                    0xdbf93b8e, 0x3f84c7db, 0x05b4630b, 0x3ecd3364, 0xee9aada7,
2655                    0x3f743924, 0x794a8297, 0x3eb7b7b9, 0xe015f797, 0x3f5d41f5,
2656                    0xe41a4a56, 0x3ea35dfb, 0xe4c2a251, 0x3f49a2ab, 0x5af9e000,
2657                    0x3fce49ce, 0x8c743719, 0xbd1eb860, 0x00000000, 0x00000000,
2658                    0x1b4863cf, 0x3fd78294, 0x00000000, 0x3ff00000, 0x00000000,
2659                    0xfffffff8, 0x65965966, 0xc0219659, 0x00000000, 0x00000000,
2660                    0x882c10fa, 0x402664f4, 0x00000000, 0x00000000, 0x83cd3723,
2661                    0xc02c8342, 0x00000000, 0xc0000000, 0x55e6c23d, 0x403226e3,
2662                    0x55555555, 0x40055555, 0x34451939, 0xc0371c96, 0xaaaaaaab,
2663                    0xc00aaaaa, 0x0e157de0, 0x403d6d3d, 0x11111111, 0x40111111,
2664                    0xa738201f, 0xc042bbce, 0x05b05b06, 0xc015b05b, 0x452b75e3,
2665                    0x4047da36, 0x1ba1ba1c, 0x401ba1ba, 0x00000000, 0xbff00000,
2666                    0x00000000, 0x00000000, 0x00000000, 0x40000000, 0x00000000,
2667                    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2668                    0xc7ab4d5a, 0xc0085e24, 0x00000000, 0x00000000, 0xe93ea75d,
2669                    0x400b963d, 0x00000000, 0x00000000, 0x94a7f25a, 0xc00f37e2,
2670                    0x4b6261cb, 0xbff5f984, 0x5a9dd812, 0x4011aab0, 0x74c30018,
2671                    0x3ffaf5a5, 0x7f2ce8e3, 0xc013fe8b, 0xfe8e54fa, 0xbffd7334,
2672                    0x670d618d, 0x4016a10c, 0x4db97058, 0x4000e012, 0x24df44dd,
2673                    0xc0199c5f, 0x697d6ece, 0xc003006e, 0x83298b82, 0x401cfc4d,
2674                    0x19d490d6, 0x40058c19, 0x2ae42850, 0xbfea4300, 0x118e20e6,
2675                    0x3c7a6db8, 0x00000000, 0x40000000, 0xe33345b8, 0xbfd4e526,
2676                    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2b2c49d0,
2677                    0xbff2de9c, 0x00000000, 0x00000000, 0x2655bc98, 0x3ff33e58,
2678                    0x00000000, 0x00000000, 0xff691fa2, 0xbff3972e, 0xe93463bd,
2679                    0xbfeeed87, 0x070e10a0, 0x3ff3f5b2, 0xf4d790a4, 0x3ff20c10,
2680                    0xa04e8ea3, 0xbff4541a, 0x386accd3, 0xbff1369e, 0x222a66dd,
2681                    0x3ff4b521, 0x22a9777e, 0x3ff20817, 0x52a04a6e, 0xbff5178f,
2682                    0xddaa0031, 0xbff22137, 0x4447d47c, 0x3ff57c01, 0x1e9c7f1d,
2683                    0x3ff29311, 0x2ab7f990, 0xbfe561b8, 0x209c7df1, 0xbc87a8c5,
2684                    0x00000000, 0x3ff00000, 0x4170bcc6, 0x3fdc92d8, 0x00000000,
2685                    0x00000000, 0x00000000, 0x00000000, 0xcc03e501, 0xbfdff10f,
2686                    0x00000000, 0x00000000, 0x44a4e845, 0x3fddb63b, 0x00000000,
2687                    0x00000000, 0x3768ad9f, 0xbfdb72a4, 0x3dd01cca, 0xbfe5fdb9,
2688                    0xa61d2811, 0x3fd972b2, 0x5645ad0b, 0x3fe977f9, 0xd013b3ab,
2689                    0xbfd78ca3, 0xbf0bf914, 0xbfe4f192, 0x4d53e730, 0x3fd5d060,
2690                    0x3f8b9000, 0x3fe49933, 0xe2b82f08, 0xbfd4322a, 0x5936a835,
2691                    0xbfe27ae1, 0xb1c61c9b, 0x3fd2b3fb, 0xef478605, 0x3fe1659e,
2692                    0x190834ec, 0xbfe11ab7, 0xcdb625ea, 0x3c8e564b, 0x00000000,
2693                    0x3ff00000, 0xb07217e3, 0x3fd248f1, 0x00000000, 0x00000000,
2694                    0x00000000, 0x00000000, 0x56f37042, 0xbfccfc56, 0x00000000,
2695                    0x00000000, 0xaa563951, 0x3fc90125, 0x00000000, 0x00000000,
2696                    0x3d0e7c5d, 0xbfc50533, 0x9bed9b2e, 0xbfdf0ed9, 0x5fe7c47c,
2697                    0x3fc1f250, 0x96c125e5, 0x3fe2edd9, 0x5a02bbd8, 0xbfbe5c71,
2698                    0x86362c20, 0xbfda08b7, 0x4b4435ed, 0x3fb9d342, 0x4b494091,
2699                    0x3fd911bd, 0xb56658be, 0xbfb5e4c7, 0x93a2fd76, 0xbfd3c092,
2700                    0xda271794, 0x3fb29910, 0x3303df2b, 0x3fd189be, 0x99fcef32,
2701                    0xbfda8279, 0xb68c1467, 0xbc708b2f, 0x00000000, 0x3ff00000,
2702                    0x980c4337, 0x3fc5f619, 0x00000000, 0x00000000, 0x00000000,
2703                    0x00000000, 0x9314533e, 0xbfbb8ec5, 0x00000000, 0x00000000,
2704                    0x09aa36d0, 0x3fb6d3f4, 0x00000000, 0x00000000, 0xdcb427fd,
2705                    0xbfb13950, 0xd87ab0bb, 0xbfd5335e, 0xce0ae8a5, 0x3fabb382,
2706                    0x79143126, 0x3fddba41, 0x5f2b28d4, 0xbfa552f1, 0x59f21a6d,
2707                    0xbfd015ab, 0x22c27d95, 0x3fa0e984, 0xe19fc6aa, 0x3fd0576c,
2708                    0x8f2c2950, 0xbf9a4898, 0xc0b3f22c, 0xbfc59462, 0x1883a4b8,
2709                    0x3f94b61c, 0x3f838640, 0x3fc30eb8, 0x355c63dc, 0xbfd36a08,
2710                    0x1dce993d, 0x3c6d704d, 0x00000000, 0x3ff00000, 0x2b82ab63,
2711                    0x3fb78e92, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2712                    0x5a279ea3, 0xbfaa3407, 0x00000000, 0x00000000, 0x432d65fa,
2713                    0x3fa70153, 0x00000000, 0x00000000, 0x891a4602, 0xbf9d03ef,
2714                    0xd62ca5f8, 0xbfca77d9, 0xb35f4628, 0x3f97a265, 0x433258fa,
2715                    0x3fd8cf51, 0xb58fd909, 0xbf8f88e3, 0x01771cea, 0xbfc2b154,
2716                    0xf3562f8e, 0x3f888f57, 0xc028a723, 0x3fc7370f, 0x20b7f9f0,
2717                    0xbf80f44c, 0x214368e9, 0xbfb6dfaa, 0x28891863, 0x3f79b4b6,
2718                    0x172dbbf0, 0x3fb6cb8e, 0xe0553158, 0xbfc975f5, 0x593fe814,
2719                    0xbc2ef5d3, 0x00000000, 0x3ff00000, 0x03dec550, 0x3fa44203,
2720                    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x4e435f9b,
2721                    0xbf953f83, 0x00000000, 0x00000000, 0x3c6e8e46, 0x3f9b74ea,
2722                    0x00000000, 0x00000000, 0xda5b7511, 0xbf85ad63, 0xdc230b9b,
2723                    0xbfb97558, 0x26cb3788, 0x3f881308, 0x76fc4985, 0x3fd62ac9,
2724                    0x77bb08ba, 0xbf757c85, 0xb6247521, 0xbfb1381e, 0x5922170c,
2725                    0x3f754e95, 0x8746482d, 0x3fc27f83, 0x11055b30, 0xbf64e391,
2726                    0x3e666320, 0xbfa3e609, 0x0de9dae3, 0x3f6301df, 0x1f1dca06,
2727                    0x3fafa8ae, 0x8c5b2da2, 0xbfb936bb, 0x4e88f7a5, 0xbc587d05,
2728                    0x00000000, 0x3ff00000, 0xa8935dd9, 0x3f83dde2, 0x00000000,
2729                    0x00000000, 0x00000000, 0x00000000
2730    };
2731
2732    private static int[] maskThirtyFiveTan = {
2733                    0xfffc0000, 0xffffffff, 0x00000000, 0x00000000
2734    };
2735
2736    private static int[] qElevenTan = {
2737                    0xb8fe4d77, 0x3f82609a
2738    };
2739
2740    private static int[] qNineTan = {
2741                    0xbf847a43, 0x3f9664a0
2742    };
2743
2744    private static int[] qSevenTan = {
2745                    0x52c4c8ab, 0x3faba1ba
2746    };
2747
2748    private static int[] qFiveTan = {
2749                    0x11092746, 0x3fc11111
2750    };
2751
2752    private static int[] qThreeTan = {
2753                    0x55555612, 0x3fd55555
2754    };
2755
2756    private static int[] piInvTableTan = {
2757                    0x00000000, 0x00000000, 0xa2f9836e, 0x4e441529, 0xfc2757d1,
2758                    0xf534ddc0, 0xdb629599, 0x3c439041, 0xfe5163ab, 0xdebbc561,
2759                    0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c, 0xfe1deb1c,
2760                    0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
2761                    0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff,
2762                    0xde05980f, 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7,
2763                    0x4f463f66, 0x9e5fea2d, 0x7527bac7, 0xebe5f17b, 0x3d0739f7,
2764                    0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08, 0x56033046, 0xfc7b6bab,
2765                    0xf0cfbc21
2766    };
2767
2768    private static int[] piFourTan = {
2769                    0x00000000, 0x3fe921fb, 0x4611a626, 0x3e85110b
2770    };
2771
2772    private static int[] qqTwoTan = {
2773                    0x676733af, 0x3d32e7b9
2774    };
2775
2776    private static int[] twoPowFiftyFiveTan = {
2777                    0x00000000, 0x43600000
2778    };
2779
2780    private static int[] twoPowMFiftyFiveTan = {
2781                    0x00000000, 0x3c800000
2782    };
2783
2784    public void tanIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
2785        ArrayDataPointerConstant oneHalfTanPtr = new ArrayDataPointerConstant(oneHalfTan, 16);
2786        ArrayDataPointerConstant mulSixteenPtr = new ArrayDataPointerConstant(mulSixteen, 16);
2787        ArrayDataPointerConstant signMaskTanPtr = new ArrayDataPointerConstant(signMaskTan, 16);
2788        ArrayDataPointerConstant piThirtyTwoInvTanPtr = new ArrayDataPointerConstant(piThirtyTwoInvTan, 16);
2789        ArrayDataPointerConstant pOneTanPtr = new ArrayDataPointerConstant(pOneTan, 16);
2790        ArrayDataPointerConstant pTwoTanPtr = new ArrayDataPointerConstant(pTwoTan, 16);
2791        ArrayDataPointerConstant pThreeTanPtr = new ArrayDataPointerConstant(pThreeTan, 16);
2792        ArrayDataPointerConstant cTableTanPtr = new ArrayDataPointerConstant(cTableTan, 16);
2793        ArrayDataPointerConstant maskThirtyFiveTanPtr = new ArrayDataPointerConstant(maskThirtyFiveTan, 16);
2794        ArrayDataPointerConstant qElevenTanPtr = new ArrayDataPointerConstant(qElevenTan, 16);
2795        ArrayDataPointerConstant qNineTanPtr = new ArrayDataPointerConstant(qNineTan, 16);
2796        ArrayDataPointerConstant qSevenTanPtr = new ArrayDataPointerConstant(qSevenTan, 8);
2797        ArrayDataPointerConstant qFiveTanPtr = new ArrayDataPointerConstant(qFiveTan, 16);
2798        ArrayDataPointerConstant qThreeTanPtr = new ArrayDataPointerConstant(qThreeTan, 16);
2799        ArrayDataPointerConstant piInvTableTanPtr = new ArrayDataPointerConstant(piInvTableTan, 16);
2800        ArrayDataPointerConstant piFourTanPtr = new ArrayDataPointerConstant(piFourTan, 8);
2801        ArrayDataPointerConstant qqTwoTanPtr = new ArrayDataPointerConstant(qqTwoTan, 8);
2802        ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 8);
2803        ArrayDataPointerConstant twoPowFiftyFiveTanPtr = new ArrayDataPointerConstant(twoPowFiftyFiveTan, 8);
2804        ArrayDataPointerConstant twoPowMFiftyFiveTanPtr = new ArrayDataPointerConstant(twoPowMFiftyFiveTan, 8);
2805
2806        Label bb0 = new Label();
2807        Label bb1 = new Label();
2808        Label bb2 = new Label();
2809        Label bb3 = new Label();
2810        Label bb5 = new Label();
2811        Label bb6 = new Label();
2812        Label bb8 = new Label();
2813        Label bb9 = new Label();
2814        Label bb10 = new Label();
2815        Label bb11 = new Label();
2816        Label bb12 = new Label();
2817        Label bb13 = new Label();
2818        Label bb14 = new Label();
2819        Label bb15 = new Label();
2820
2821        Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
2822        Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
2823        Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
2824        Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
2825        Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD);
2826        Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD);
2827        Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD);
2828        Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD);
2829        Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD);
2830        Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD);
2831
2832        Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
2833        Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
2834        Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
2835        Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
2836        Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
2837        Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
2838        Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
2839
2840        setCrb(crb);
2841        if (dest.encoding != value.encoding) {
2842            masm.movdqu(dest, value);
2843        }
2844
2845        masm.pextrw(gpr1, dest, 3);
2846        masm.andl(gpr1, 32767);
2847        masm.subl(gpr1, 16314);
2848        masm.cmpl(gpr1, 270);
2849        masm.jcc(ConditionFlag.Above, bb0);
2850
2851        masm.movdqu(temp5, externalAddress(oneHalfTanPtr));                                     // 0x00000000,
2852                                                                                                // 0x3fe00000,
2853                                                                                                // 0x00000000,
2854                                                                                                // 0x3fe00000
2855        masm.movdqu(temp6, externalAddress(mulSixteenPtr));                                     // 0x00000000,
2856                                                                                                // 0x40300000,
2857                                                                                                // 0x00000000,
2858                                                                                                // 0x3ff00000
2859        masm.unpcklpd(dest, dest);
2860        masm.movdqu(temp4, externalAddress(signMaskTanPtr));                                    // 0x00000000,
2861                                                                                                // 0x80000000,
2862                                                                                                // 0x00000000,
2863                                                                                                // 0x80000000
2864        masm.andpd(temp4, dest);
2865        masm.movdqu(temp1, externalAddress(piThirtyTwoInvTanPtr));                              // 0x6dc9c883,
2866                                                                                                // 0x3fe45f30,
2867                                                                                                // 0x6dc9c883,
2868                                                                                                // 0x40245f30
2869        masm.mulpd(temp1, dest);
2870        masm.por(temp5, temp4);
2871        masm.addpd(temp1, temp5);
2872        masm.movdqu(temp7, temp1);
2873        masm.unpckhpd(temp7, temp7);
2874        masm.cvttsd2sil(gpr4, temp7);
2875        masm.cvttpd2dq(temp1, temp1);
2876        masm.cvtdq2pd(temp1, temp1);
2877        masm.mulpd(temp1, temp6);
2878        masm.movdqu(temp3, externalAddress(pOneTanPtr));                                        // 0x54444000,
2879                                                                                                // 0x3fb921fb,
2880                                                                                                // 0x54440000,
2881                                                                                                // 0x3fb921fb
2882        masm.movdq(temp5, externalAddress(qqTwoTanPtr));                                        // 0x676733af,
2883                                                                                                // 0x3d32e7b9
2884        masm.addq(gpr4, 469248);
2885        masm.movdqu(temp4, externalAddress(pTwoTanPtr));                                        // 0x67674000,
2886                                                                                                // 0xbd32e7b9,
2887                                                                                                // 0x4c4c0000,
2888                                                                                                // 0x3d468c23
2889        masm.mulpd(temp3, temp1);
2890        masm.andq(gpr4, 31);
2891        masm.mulsd(temp5, temp1);
2892        masm.movq(gpr3, gpr4);
2893        masm.mulpd(temp4, temp1);
2894        masm.shlq(gpr3, 1);
2895        masm.subpd(dest, temp3);
2896        masm.mulpd(temp1, externalAddress(pThreeTanPtr));                                       // 0x3707344a,
2897                                                                                                // 0x3aa8a2e0,
2898                                                                                                // 0x03707345,
2899                                                                                                // 0x3ae98a2e
2900        masm.addq(gpr4, gpr3);
2901        masm.shlq(gpr3, 2);
2902        masm.addq(gpr4, gpr3);
2903        masm.addsd(temp5, dest);
2904        masm.movdqu(temp2, dest);
2905        masm.subpd(dest, temp4);
2906        masm.movdq(temp6, externalAddress(onePtr));                                             // 0x00000000,
2907                                                                                                // 0x3ff00000
2908        masm.shlq(gpr4, 4);
2909        masm.leaq(gpr1, externalAddress(cTableTanPtr));
2910        masm.andpd(temp5, externalAddress(maskThirtyFiveTanPtr));                               // 0xfffc0000,
2911                                                                                                // 0xffffffff,
2912                                                                                                // 0x00000000,
2913                                                                                                // 0x00000000
2914        masm.movdqu(temp3, dest);
2915        masm.addq(gpr1, gpr4);
2916        masm.subpd(temp2, dest);
2917        masm.unpckhpd(dest, dest);
2918        masm.divsd(temp6, temp5);
2919        masm.subpd(temp2, temp4);
2920        masm.movdqu(temp7, new AMD64Address(gpr1, 16));
2921        masm.subsd(temp3, temp5);
2922        masm.mulpd(temp7, dest);
2923        masm.subpd(temp2, temp1);
2924        masm.movdqu(temp1, new AMD64Address(gpr1, 48));
2925        masm.mulpd(temp1, dest);
2926        masm.movdqu(temp4, new AMD64Address(gpr1, 96));
2927        masm.mulpd(temp4, dest);
2928        masm.addsd(temp2, temp3);
2929        masm.movdqu(temp3, dest);
2930        masm.mulpd(dest, dest);
2931        masm.addpd(temp7, new AMD64Address(gpr1, 0));
2932        masm.addpd(temp1, new AMD64Address(gpr1, 32));
2933        masm.mulpd(temp1, dest);
2934        masm.addpd(temp4, new AMD64Address(gpr1, 80));
2935        masm.addpd(temp7, temp1);
2936        masm.movdqu(temp1, new AMD64Address(gpr1, 112));
2937        masm.mulpd(temp1, dest);
2938        masm.mulpd(dest, dest);
2939        masm.addpd(temp4, temp1);
2940        masm.movdqu(temp1, new AMD64Address(gpr1, 64));
2941        masm.mulpd(temp1, dest);
2942        masm.addpd(temp7, temp1);
2943        masm.movdqu(temp1, temp3);
2944        masm.mulpd(temp3, dest);
2945        masm.mulsd(dest, dest);
2946        masm.mulpd(temp1, new AMD64Address(gpr1, 144));
2947        masm.mulpd(temp4, temp3);
2948        masm.movdqu(temp3, temp1);
2949        masm.addpd(temp7, temp4);
2950        masm.movdqu(temp4, temp1);
2951        masm.mulsd(dest, temp7);
2952        masm.unpckhpd(temp7, temp7);
2953        masm.addsd(dest, temp7);
2954        masm.unpckhpd(temp1, temp1);
2955        masm.addsd(temp3, temp1);
2956        masm.subsd(temp4, temp3);
2957        masm.addsd(temp1, temp4);
2958        masm.movdqu(temp4, temp2);
2959        masm.movdq(temp7, new AMD64Address(gpr1, 144));
2960        masm.unpckhpd(temp2, temp2);
2961        masm.addsd(temp7, new AMD64Address(gpr1, 152));
2962        masm.mulsd(temp7, temp2);
2963        masm.addsd(temp7, new AMD64Address(gpr1, 136));
2964        masm.addsd(temp7, temp1);
2965        masm.addsd(dest, temp7);
2966        masm.movdq(temp7, externalAddress(onePtr));                                             // 0x00000000,
2967                                                                                                // 0x3ff00000
2968        masm.mulsd(temp4, temp6);
2969        masm.movdq(temp2, new AMD64Address(gpr1, 168));
2970        masm.andpd(temp2, temp6);
2971        masm.mulsd(temp5, temp2);
2972        masm.mulsd(temp6, new AMD64Address(gpr1, 160));
2973        masm.subsd(temp7, temp5);
2974        masm.subsd(temp2, new AMD64Address(gpr1, 128));
2975        masm.subsd(temp7, temp4);
2976        masm.mulsd(temp7, temp6);
2977        masm.movdqu(temp4, temp3);
2978        masm.subsd(temp3, temp2);
2979        masm.addsd(temp2, temp3);
2980        masm.subsd(temp4, temp2);
2981        masm.addsd(dest, temp4);
2982        masm.subsd(dest, temp7);
2983        masm.addsd(dest, temp3);
2984        masm.jmp(bb15);
2985
2986        masm.bind(bb0);
2987        masm.jcc(ConditionFlag.Greater, bb1);
2988
2989        masm.pextrw(gpr1, dest, 3);
2990        masm.movl(gpr4, gpr1);
2991        masm.andl(gpr1, 32752);
2992        masm.jcc(ConditionFlag.Equal, bb2);
2993
2994        masm.andl(gpr4, 32767);
2995        masm.cmpl(gpr4, 15904);
2996        masm.jcc(ConditionFlag.Below, bb3);
2997
2998        masm.movdqu(temp2, dest);
2999        masm.movdqu(temp3, dest);
3000        masm.movdq(temp1, externalAddress(qElevenTanPtr));                                      // 0xb8fe4d77,
3001                                                                                                // 0x3f82609a
3002        masm.mulsd(temp2, dest);
3003        masm.mulsd(temp3, temp2);
3004        masm.mulsd(temp1, temp2);
3005        masm.addsd(temp1, externalAddress(qNineTanPtr));                                        // 0xbf847a43,
3006                                                                                                // 0x3f9664a0
3007        masm.mulsd(temp1, temp2);
3008        masm.addsd(temp1, externalAddress(qSevenTanPtr));                                       // 0x52c4c8ab,
3009                                                                                                // 0x3faba1ba
3010        masm.mulsd(temp1, temp2);
3011        masm.addsd(temp1, externalAddress(qFiveTanPtr));                                        // 0x11092746,
3012                                                                                                // 0x3fc11111
3013        masm.mulsd(temp1, temp2);
3014        masm.addsd(temp1, externalAddress(qThreeTanPtr));                                       // 0x55555612,
3015                                                                                                // 0x3fd55555
3016        masm.mulsd(temp1, temp3);
3017        masm.addsd(dest, temp1);
3018        masm.jmp(bb15);
3019
3020        masm.bind(bb3);
3021        masm.movdq(temp3, externalAddress(twoPowFiftyFiveTanPtr));                              // 0x00000000,
3022                                                                                                // 0x43600000
3023        masm.mulsd(temp3, dest);
3024        masm.addsd(dest, temp3);
3025        masm.mulsd(dest, externalAddress(twoPowMFiftyFiveTanPtr));                              // 0x00000000,
3026                                                                                                // 0x3c800000
3027        masm.jmp(bb15);
3028
3029        masm.bind(bb14);
3030        masm.xorpd(temp1, temp1);
3031        masm.xorpd(dest, dest);
3032        masm.divsd(dest, temp1);
3033        masm.jmp(bb15);
3034
3035        masm.bind(bb2);
3036        masm.movdqu(temp1, dest);
3037        masm.mulsd(temp1, temp1);
3038        masm.jmp(bb15);
3039
3040        masm.bind(bb1);
3041        masm.pextrw(gpr3, dest, 3);
3042        masm.andl(gpr3, 32752);
3043        masm.cmpl(gpr3, 32752);
3044        masm.jcc(ConditionFlag.Equal, bb14);
3045
3046        masm.subl(gpr3, 16224);
3047        masm.shrl(gpr3, 7);
3048        masm.andl(gpr3, 65532);
3049        masm.leaq(gpr10, externalAddress(piInvTableTanPtr));
3050        masm.addq(gpr3, gpr10);
3051        masm.movdq(gpr1, dest);
3052        masm.movl(gpr9, new AMD64Address(gpr3, 20));
3053        masm.movl(gpr7, new AMD64Address(gpr3, 24));
3054        masm.movl(gpr4, gpr1);
3055        masm.shrq(gpr1, 21);
3056        masm.orl(gpr1, Integer.MIN_VALUE);
3057        masm.shrl(gpr1, 11);
3058        masm.movl(gpr8, gpr9);
3059        masm.imulq(gpr9, gpr4);
3060        masm.imulq(gpr8, gpr1);
3061        masm.imulq(gpr7, gpr1);
3062        masm.movl(gpr5, new AMD64Address(gpr3, 16));
3063        masm.movl(gpr6, new AMD64Address(gpr3, 12));
3064        masm.movl(gpr10, gpr9);
3065        masm.shrq(gpr9, 32);
3066        masm.addq(gpr8, gpr9);
3067        masm.addq(gpr10, gpr7);
3068        masm.movl(gpr7, gpr10);
3069        masm.shrq(gpr10, 32);
3070        masm.addq(gpr8, gpr10);
3071        masm.movl(gpr9, gpr5);
3072        masm.imulq(gpr5, gpr4);
3073        masm.imulq(gpr9, gpr1);
3074        masm.movl(gpr10, gpr6);
3075        masm.imulq(gpr6, gpr4);
3076        masm.movl(gpr2, gpr5);
3077        masm.shrq(gpr5, 32);
3078        masm.addq(gpr8, gpr2);
3079        masm.movl(gpr2, gpr8);
3080        masm.shrq(gpr8, 32);
3081        masm.addq(gpr9, gpr5);
3082        masm.addq(gpr9, gpr8);
3083        masm.shlq(gpr2, 32);
3084        masm.orq(gpr7, gpr2);
3085        masm.imulq(gpr10, gpr1);
3086        masm.movl(gpr8, new AMD64Address(gpr3, 8));
3087        masm.movl(gpr5, new AMD64Address(gpr3, 4));
3088        masm.movl(gpr2, gpr6);
3089        masm.shrq(gpr6, 32);
3090        masm.addq(gpr9, gpr2);
3091        masm.movl(gpr2, gpr9);
3092        masm.shrq(gpr9, 32);
3093        masm.addq(gpr10, gpr6);
3094        masm.addq(gpr10, gpr9);
3095        masm.movq(gpr6, gpr8);
3096        masm.imulq(gpr8, gpr4);
3097        masm.imulq(gpr6, gpr1);
3098        masm.movl(gpr9, gpr8);
3099        masm.shrq(gpr8, 32);
3100        masm.addq(gpr10, gpr9);
3101        masm.movl(gpr9, gpr10);
3102        masm.shrq(gpr10, 32);
3103        masm.addq(gpr6, gpr8);
3104        masm.addq(gpr6, gpr10);
3105        masm.movq(gpr8, gpr5);
3106        masm.imulq(gpr5, gpr4);
3107        masm.imulq(gpr8, gpr1);
3108        masm.shlq(gpr9, 32);
3109        masm.orq(gpr9, gpr2);
3110        masm.movl(gpr1, new AMD64Address(gpr3, 0));
3111        masm.movl(gpr10, gpr5);
3112        masm.shrq(gpr5, 32);
3113        masm.addq(gpr6, gpr10);
3114        masm.movl(gpr10, gpr6);
3115        masm.shrq(gpr6, 32);
3116        masm.addq(gpr8, gpr5);
3117        masm.addq(gpr8, gpr6);
3118        masm.imulq(gpr4, gpr1);
3119        masm.pextrw(gpr2, dest, 3);
3120        masm.leaq(gpr6, externalAddress(piInvTableTanPtr));
3121        masm.subq(gpr3, gpr6);
3122        masm.addl(gpr3, gpr3);
3123        masm.addl(gpr3, gpr3);
3124        masm.addl(gpr3, gpr3);
3125        masm.addl(gpr3, 19);
3126        masm.movl(gpr5, 32768);
3127        masm.andl(gpr5, gpr2);
3128        masm.shrl(gpr2, 4);
3129        masm.andl(gpr2, 2047);
3130        masm.subl(gpr2, 1023);
3131        masm.subl(gpr3, gpr2);
3132        masm.addq(gpr8, gpr4);
3133        masm.movl(gpr4, gpr3);
3134        masm.addl(gpr4, 32);
3135        masm.cmpl(gpr3, 0);
3136        masm.jcc(ConditionFlag.Less, bb5);
3137
3138        masm.negl(gpr3);
3139        masm.addl(gpr3, 29);
3140        masm.shll(gpr8);
3141        masm.movl(gpr6, gpr8);
3142        masm.andl(gpr8, 1073741823);
3143        masm.testl(gpr8, 536870912);
3144        masm.jcc(ConditionFlag.NotEqual, bb6);
3145
3146        masm.shrl(gpr8);
3147        masm.movl(gpr2, 0);
3148        masm.shlq(gpr8, 32);
3149        masm.orq(gpr8, gpr10);
3150
3151        masm.bind(bb8);
3152        masm.cmpq(gpr8, 0);
3153        masm.jcc(ConditionFlag.Equal, bb9);
3154
3155        masm.bind(bb10);
3156        masm.bsrq(gpr10, gpr8);
3157        masm.movl(gpr3, 29);
3158        masm.subl(gpr3, gpr10);
3159        masm.jcc(ConditionFlag.LessEqual, bb11);
3160
3161        masm.shlq(gpr8);
3162        masm.movq(gpr1, gpr9);
3163        masm.shlq(gpr9);
3164        masm.addl(gpr4, gpr3);
3165        masm.negl(gpr3);
3166        masm.addl(gpr3, 64);
3167        masm.shrq(gpr1);
3168        masm.shrq(gpr7);
3169        masm.orq(gpr8, gpr1);
3170        masm.orq(gpr9, gpr7);
3171
3172        masm.bind(bb12);
3173        masm.cvtsi2sdq(dest, gpr8);
3174        masm.shrq(gpr9, 1);
3175        masm.cvtsi2sdq(temp3, gpr9);
3176        masm.xorpd(temp4, temp4);
3177        masm.shll(gpr4, 4);
3178        masm.negl(gpr4);
3179        masm.addl(gpr4, 16368);
3180        masm.orl(gpr4, gpr5);
3181        masm.xorl(gpr4, gpr2);
3182        masm.pinsrw(temp4, gpr4, 3);
3183        masm.leaq(gpr1, externalAddress(piFourTanPtr));
3184        masm.movdq(temp2, new AMD64Address(gpr1, 0));                                           // 0x00000000,
3185                                                                                                // 0x3fe921fb,
3186        masm.movdq(temp7, new AMD64Address(gpr1, 8));                                           // 0x4611a626,
3187                                                                                                // 0x3e85110b
3188        masm.xorpd(temp5, temp5);
3189        masm.subl(gpr4, 1008);
3190        masm.pinsrw(temp5, gpr4, 3);
3191        masm.mulsd(dest, temp4);
3192        masm.shll(gpr5, 16);
3193        masm.sarl(gpr5, 31);
3194        masm.mulsd(temp3, temp5);
3195        masm.movdqu(temp1, dest);
3196        masm.mulsd(dest, temp2);
3197        masm.shrl(gpr6, 30);
3198        masm.addsd(temp1, temp3);
3199        masm.mulsd(temp3, temp2);
3200        masm.addl(gpr6, gpr5);
3201        masm.xorl(gpr6, gpr5);
3202        masm.mulsd(temp7, temp1);
3203        masm.movl(gpr1, gpr6);
3204        masm.addsd(temp7, temp3);
3205        masm.movdqu(temp2, dest);
3206        masm.addsd(dest, temp7);
3207        masm.subsd(temp2, dest);
3208        masm.addsd(temp7, temp2);
3209        masm.movdqu(temp1, externalAddress(piThirtyTwoInvTanPtr));                              // 0x6dc9c883,
3210                                                                                                // 0x3fe45f30,
3211                                                                                                // 0x6dc9c883,
3212                                                                                                // 0x40245f30
3213        if (masm.supports(CPUFeature.SSE3)) {
3214            masm.movddup(dest, dest);
3215        } else {
3216            masm.movlhps(dest, dest);
3217        }
3218        masm.movdqu(temp4, externalAddress(signMaskTanPtr));                                    // 0x00000000,
3219                                                                                                // 0x80000000,
3220                                                                                                // 0x00000000,
3221                                                                                                // 0x80000000
3222        masm.andpd(temp4, dest);
3223        masm.mulpd(temp1, dest);
3224        if (masm.supports(CPUFeature.SSE3)) {
3225            masm.movddup(temp7, temp7);
3226        } else {
3227            masm.movlhps(temp7, temp7);
3228        }
3229        masm.movdqu(temp5, externalAddress(oneHalfTanPtr));                                     // 0x00000000,
3230                                                                                                // 0x3fe00000,
3231                                                                                                // 0x00000000,
3232                                                                                                // 0x3fe00000
3233        masm.movdqu(temp6, externalAddress(mulSixteenPtr));                                     // 0x00000000,
3234                                                                                                // 0x40300000,
3235                                                                                                // 0x00000000,
3236                                                                                                // 0x3ff00000
3237        masm.por(temp5, temp4);
3238        masm.addpd(temp1, temp5);
3239        masm.movdqu(temp5, temp1);
3240        masm.unpckhpd(temp5, temp5);
3241        masm.cvttsd2sil(gpr4, temp5);
3242        masm.cvttpd2dq(temp1, temp1);
3243        masm.cvtdq2pd(temp1, temp1);
3244        masm.mulpd(temp1, temp6);
3245        masm.movdqu(temp3, externalAddress(pOneTanPtr));                                        // 0x54444000,
3246                                                                                                // 0x3fb921fb,
3247                                                                                                // 0x54440000,
3248                                                                                                // 0x3fb921fb
3249        masm.movdq(temp5, externalAddress(qqTwoTanPtr));                                        // 0x676733af,
3250                                                                                                // 0x3d32e7b9
3251        masm.shll(gpr1, 4);
3252        masm.addl(gpr4, 469248);
3253        masm.movdqu(temp4, externalAddress(pTwoTanPtr));                                        // 0x67674000,
3254                                                                                                // 0xbd32e7b9,
3255                                                                                                // 0x4c4c0000,
3256                                                                                                // 0x3d468c23
3257        masm.mulpd(temp3, temp1);
3258        masm.addl(gpr4, gpr1);
3259        masm.andl(gpr4, 31);
3260        masm.mulsd(temp5, temp1);
3261        masm.movl(gpr3, gpr4);
3262        masm.mulpd(temp4, temp1);
3263        masm.shll(gpr3, 1);
3264        masm.subpd(dest, temp3);
3265        masm.mulpd(temp1, externalAddress(pThreeTanPtr));                                       // 0x3707344a,
3266                                                                                                // 0x3aa8a2e0,
3267                                                                                                // 0x03707345,
3268                                                                                                // 0x3ae98a2e
3269        masm.addl(gpr4, gpr3);
3270        masm.shll(gpr3, 2);
3271        masm.addl(gpr4, gpr3);
3272        masm.addsd(temp5, dest);
3273        masm.movdqu(temp2, dest);
3274        masm.subpd(dest, temp4);
3275        masm.movdq(temp6, externalAddress(onePtr));                                             // 0x00000000,
3276                                                                                                // 0x3ff00000
3277        masm.shll(gpr4, 4);
3278        masm.leaq(gpr1, externalAddress(cTableTanPtr));
3279        masm.andpd(temp5, externalAddress(maskThirtyFiveTanPtr));                               // 0xfffc0000,
3280                                                                                                // 0xffffffff,
3281                                                                                                // 0x00000000,
3282                                                                                                // 0x00000000
3283        masm.movdqu(temp3, dest);
3284        masm.addq(gpr1, gpr4);
3285        masm.subpd(temp2, dest);
3286        masm.unpckhpd(dest, dest);
3287        masm.divsd(temp6, temp5);
3288        masm.subpd(temp2, temp4);
3289        masm.subsd(temp3, temp5);
3290        masm.subpd(temp2, temp1);
3291        masm.movdqu(temp1, new AMD64Address(gpr1, 48));
3292        masm.addpd(temp2, temp7);
3293        masm.movdqu(temp7, new AMD64Address(gpr1, 16));
3294        masm.mulpd(temp7, dest);
3295        masm.movdqu(temp4, new AMD64Address(gpr1, 96));
3296        masm.mulpd(temp1, dest);
3297        masm.mulpd(temp4, dest);
3298        masm.addsd(temp2, temp3);
3299        masm.movdqu(temp3, dest);
3300        masm.mulpd(dest, dest);
3301        masm.addpd(temp7, new AMD64Address(gpr1, 0));
3302        masm.addpd(temp1, new AMD64Address(gpr1, 32));
3303        masm.mulpd(temp1, dest);
3304        masm.addpd(temp4, new AMD64Address(gpr1, 80));
3305        masm.addpd(temp7, temp1);
3306        masm.movdqu(temp1, new AMD64Address(gpr1, 112));
3307        masm.mulpd(temp1, dest);
3308        masm.mulpd(dest, dest);
3309        masm.addpd(temp4, temp1);
3310        masm.movdqu(temp1, new AMD64Address(gpr1, 64));
3311        masm.mulpd(temp1, dest);
3312        masm.addpd(temp7, temp1);
3313        masm.movdqu(temp1, temp3);
3314        masm.mulpd(temp3, dest);
3315        masm.mulsd(dest, dest);
3316        masm.mulpd(temp1, new AMD64Address(gpr1, 144));
3317        masm.mulpd(temp4, temp3);
3318        masm.movdqu(temp3, temp1);
3319        masm.addpd(temp7, temp4);
3320        masm.movdqu(temp4, temp1);
3321        masm.mulsd(dest, temp7);
3322        masm.unpckhpd(temp7, temp7);
3323        masm.addsd(dest, temp7);
3324        masm.unpckhpd(temp1, temp1);
3325        masm.addsd(temp3, temp1);
3326        masm.subsd(temp4, temp3);
3327        masm.addsd(temp1, temp4);
3328        masm.movdqu(temp4, temp2);
3329        masm.movdq(temp7, new AMD64Address(gpr1, 144));
3330        masm.unpckhpd(temp2, temp2);
3331        masm.addsd(temp7, new AMD64Address(gpr1, 152));
3332        masm.mulsd(temp7, temp2);
3333        masm.addsd(temp7, new AMD64Address(gpr1, 136));
3334        masm.addsd(temp7, temp1);
3335        masm.addsd(dest, temp7);
3336        masm.movdq(temp7, externalAddress(onePtr));                                             // 0x00000000,
3337                                                                                                // 0x3ff00000
3338        masm.mulsd(temp4, temp6);
3339        masm.movdq(temp2, new AMD64Address(gpr1, 168));
3340        masm.andpd(temp2, temp6);
3341        masm.mulsd(temp5, temp2);
3342        masm.mulsd(temp6, new AMD64Address(gpr1, 160));
3343        masm.subsd(temp7, temp5);
3344        masm.subsd(temp2, new AMD64Address(gpr1, 128));
3345        masm.subsd(temp7, temp4);
3346        masm.mulsd(temp7, temp6);
3347        masm.movdqu(temp4, temp3);
3348        masm.subsd(temp3, temp2);
3349        masm.addsd(temp2, temp3);
3350        masm.subsd(temp4, temp2);
3351        masm.addsd(dest, temp4);
3352        masm.subsd(dest, temp7);
3353        masm.addsd(dest, temp3);
3354        masm.jmp(bb15);
3355
3356        masm.bind(bb9);
3357        masm.addl(gpr4, 64);
3358        masm.movq(gpr8, gpr9);
3359        masm.movq(gpr9, gpr7);
3360        masm.movl(gpr7, 0);
3361        masm.cmpq(gpr8, 0);
3362        masm.jcc(ConditionFlag.NotEqual, bb10);
3363
3364        masm.addl(gpr4, 64);
3365        masm.movq(gpr8, gpr9);
3366        masm.movq(gpr9, gpr7);
3367        masm.cmpq(gpr8, 0);
3368        masm.jcc(ConditionFlag.NotEqual, bb10);
3369
3370        masm.jmp(bb12);
3371
3372        masm.bind(bb11);
3373        masm.jcc(ConditionFlag.Equal, bb12);
3374
3375        masm.negl(gpr3);
3376        masm.shrq(gpr9);
3377        masm.movq(gpr1, gpr8);
3378        masm.shrq(gpr8);
3379        masm.subl(gpr4, gpr3);
3380        masm.negl(gpr3);
3381        masm.addl(gpr3, 64);
3382        masm.shlq(gpr1);
3383        masm.orq(gpr9, gpr1);
3384        masm.jmp(bb12);
3385
3386        masm.bind(bb5);
3387        masm.notl(gpr3);
3388        masm.shlq(gpr8, 32);
3389        masm.orq(gpr8, gpr10);
3390        masm.shlq(gpr8);
3391        masm.movq(gpr6, gpr8);
3392        masm.testl(gpr8, Integer.MIN_VALUE);
3393        masm.jcc(ConditionFlag.NotEqual, bb13);
3394
3395        masm.shrl(gpr8);
3396        masm.movl(gpr2, 0);
3397        masm.shrq(gpr6, 2);
3398        masm.jmp(bb8);
3399
3400        masm.bind(bb6);
3401        masm.shrl(gpr8);
3402        masm.movl(gpr2, 1073741824);
3403        masm.shrl(gpr2);
3404        masm.shlq(gpr8, 32);
3405        masm.orq(gpr8, gpr10);
3406        masm.shlq(gpr2, 32);
3407        masm.addl(gpr6, 1073741824);
3408        masm.movl(gpr3, 0);
3409        masm.movl(gpr10, 0);
3410        masm.subq(gpr3, gpr7);
3411        masm.sbbq(gpr10, gpr9);
3412        masm.sbbq(gpr2, gpr8);
3413        masm.movq(gpr7, gpr3);
3414        masm.movq(gpr9, gpr10);
3415        masm.movq(gpr8, gpr2);
3416        masm.movl(gpr2, 32768);
3417        masm.jmp(bb8);
3418
3419        masm.bind(bb13);
3420        masm.shrl(gpr8);
3421        masm.movq(gpr2, 0x100000000L);
3422        masm.shrq(gpr2);
3423        masm.movl(gpr3, 0);
3424        masm.movl(gpr10, 0);
3425        masm.subq(gpr3, gpr7);
3426        masm.sbbq(gpr10, gpr9);
3427        masm.sbbq(gpr2, gpr8);
3428        masm.movq(gpr7, gpr3);
3429        masm.movq(gpr9, gpr10);
3430        masm.movq(gpr8, gpr2);
3431        masm.movl(gpr2, 32768);
3432        masm.shrq(gpr6, 2);
3433        masm.addl(gpr6, 1073741824);
3434        masm.jmp(bb8);
3435
3436        masm.bind(bb15);
3437    }
3438
3439    /*
3440     * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM)
3441     * Source Code
3442     *
3443     * ALGORITHM DESCRIPTION - EXP() ---------------------
3444     *
3445     * Description: Let K = 64 (table size). x x/log(2) n e = 2 = 2 * T[j] * (1 + P(y)) where x =
3446     * m*log(2)/K + y, y in [-log(2)/K..log(2)/K] m = n*K + j, m,n,j - signed integer, j in
3447     * [-K/2..K/2] j/K values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
3448     *
3449     * P(y) is a minimax polynomial approximation of exp(x)-1 on small interval
3450     * [-log(2)/K..log(2)/K] (were calculated by Maple V).
3451     *
3452     * To avoid problems with arithmetic overflow and underflow, n n1 n2 value of 2 is safely
3453     * computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2] where BIAS is a value of exponent bias.
3454     *
3455     * Special cases: exp(NaN) = NaN exp(+INF) = +INF exp(-INF) = 0 exp(x) = 1 for subnormals for
3456     * finite argument, only exp(0)=1 is exact For IEEE double if x > 709.782712893383973096 then
3457     * exp(x) overflow if x < -745.133219101941108420 then exp(x) underflow
3458     *
3459     */
3460
3461    private static int[] cvExp = {
3462                    0x652b82fe, 0x40571547, 0x652b82fe, 0x40571547, 0xfefa0000,
3463                    0x3f862e42, 0xfefa0000, 0x3f862e42, 0xbc9e3b3a, 0x3d1cf79a,
3464                    0xbc9e3b3a, 0x3d1cf79a, 0xfffffffe, 0x3fdfffff, 0xfffffffe,
3465                    0x3fdfffff, 0xe3289860, 0x3f56c15c, 0x555b9e25, 0x3fa55555,
3466                    0xc090cf0f, 0x3f811115, 0x55548ba1, 0x3fc55555
3467    };
3468
3469    private static int[] shifterExp = {
3470                    0x00000000, 0x43380000, 0x00000000, 0x43380000
3471    };
3472
3473    private static int[] mMaskExp = {
3474                    0xffffffc0, 0x00000000, 0xffffffc0, 0x00000000
3475    };
3476
3477    private static int[] biasExp = {
3478                    0x0000ffc0, 0x00000000, 0x0000ffc0, 0x00000000
3479    };
3480
3481    private static int[] tblAddrExp = {
3482                    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0e03754d,
3483                    0x3cad7bbf, 0x3e778060, 0x00002c9a, 0x3567f613, 0x3c8cd252,
3484                    0xd3158574, 0x000059b0, 0x61e6c861, 0x3c60f74e, 0x18759bc8,
3485                    0x00008745, 0x5d837b6c, 0x3c979aa6, 0x6cf9890f, 0x0000b558,
3486                    0x702f9cd1, 0x3c3ebe3d, 0x32d3d1a2, 0x0000e3ec, 0x1e63bcd8,
3487                    0x3ca3516e, 0xd0125b50, 0x00011301, 0x26f0387b, 0x3ca4c554,
3488                    0xaea92ddf, 0x0001429a, 0x62523fb6, 0x3ca95153, 0x3c7d517a,
3489                    0x000172b8, 0x3f1353bf, 0x3c8b898c, 0xeb6fcb75, 0x0001a35b,
3490                    0x3e3a2f5f, 0x3c9aecf7, 0x3168b9aa, 0x0001d487, 0x44a6c38d,
3491                    0x3c8a6f41, 0x88628cd6, 0x0002063b, 0xe3a8a894, 0x3c968efd,
3492                    0x6e756238, 0x0002387a, 0x981fe7f2, 0x3c80472b, 0x65e27cdd,
3493                    0x00026b45, 0x6d09ab31, 0x3c82f7e1, 0xf51fdee1, 0x00029e9d,
3494                    0x720c0ab3, 0x3c8b3782, 0xa6e4030b, 0x0002d285, 0x4db0abb6,
3495                    0x3c834d75, 0x0a31b715, 0x000306fe, 0x5dd3f84a, 0x3c8fdd39,
3496                    0xb26416ff, 0x00033c08, 0xcc187d29, 0x3ca12f8c, 0x373aa9ca,
3497                    0x000371a7, 0x738b5e8b, 0x3ca7d229, 0x34e59ff6, 0x0003a7db,
3498                    0xa72a4c6d, 0x3c859f48, 0x4c123422, 0x0003dea6, 0x259d9205,
3499                    0x3ca8b846, 0x21f72e29, 0x0004160a, 0x60c2ac12, 0x3c4363ed,
3500                    0x6061892d, 0x00044e08, 0xdaa10379, 0x3c6ecce1, 0xb5c13cd0,
3501                    0x000486a2, 0xbb7aafb0, 0x3c7690ce, 0xd5362a27, 0x0004bfda,
3502                    0x9b282a09, 0x3ca083cc, 0x769d2ca6, 0x0004f9b2, 0xc1aae707,
3503                    0x3ca509b0, 0x569d4f81, 0x0005342b, 0x18fdd78e, 0x3c933505,
3504                    0x36b527da, 0x00056f47, 0xe21c5409, 0x3c9063e1, 0xdd485429,
3505                    0x0005ab07, 0x2b64c035, 0x3c9432e6, 0x15ad2148, 0x0005e76f,
3506                    0x99f08c0a, 0x3ca01284, 0xb03a5584, 0x0006247e, 0x0073dc06,
3507                    0x3c99f087, 0x82552224, 0x00066238, 0x0da05571, 0x3c998d4d,
3508                    0x667f3bcc, 0x0006a09e, 0x86ce4786, 0x3ca52bb9, 0x3c651a2e,
3509                    0x0006dfb2, 0x206f0dab, 0x3ca32092, 0xe8ec5f73, 0x00071f75,
3510                    0x8e17a7a6, 0x3ca06122, 0x564267c8, 0x00075feb, 0x461e9f86,
3511                    0x3ca244ac, 0x73eb0186, 0x0007a114, 0xabd66c55, 0x3c65ebe1,
3512                    0x36cf4e62, 0x0007e2f3, 0xbbff67d0, 0x3c96fe9f, 0x994cce12,
3513                    0x00082589, 0x14c801df, 0x3c951f14, 0x9b4492ec, 0x000868d9,
3514                    0xc1f0eab4, 0x3c8db72f, 0x422aa0db, 0x0008ace5, 0x59f35f44,
3515                    0x3c7bf683, 0x99157736, 0x0008f1ae, 0x9c06283c, 0x3ca360ba,
3516                    0xb0cdc5e4, 0x00093737, 0x20f962aa, 0x3c95e8d1, 0x9fde4e4f,
3517                    0x00097d82, 0x2b91ce27, 0x3c71affc, 0x82a3f090, 0x0009c491,
3518                    0x589a2ebd, 0x3c9b6d34, 0x7b5de564, 0x000a0c66, 0x9ab89880,
3519                    0x3c95277c, 0xb23e255c, 0x000a5503, 0x6e735ab3, 0x3c846984,
3520                    0x5579fdbf, 0x000a9e6b, 0x92cb3387, 0x3c8c1a77, 0x995ad3ad,
3521                    0x000ae89f, 0xdc2d1d96, 0x3ca22466, 0xb84f15fa, 0x000b33a2,
3522                    0xb19505ae, 0x3ca1112e, 0xf2fb5e46, 0x000b7f76, 0x0a5fddcd,
3523                    0x3c74ffd7, 0x904bc1d2, 0x000bcc1e, 0x30af0cb3, 0x3c736eae,
3524                    0xdd85529c, 0x000c199b, 0xd10959ac, 0x3c84e08f, 0x2e57d14b,
3525                    0x000c67f1, 0x6c921968, 0x3c676b2c, 0xdcef9069, 0x000cb720,
3526                    0x36df99b3, 0x3c937009, 0x4a07897b, 0x000d072d, 0xa63d07a7,
3527                    0x3c74a385, 0xdcfba487, 0x000d5818, 0xd5c192ac, 0x3c8e5a50,
3528                    0x03db3285, 0x000da9e6, 0x1c4a9792, 0x3c98bb73, 0x337b9b5e,
3529                    0x000dfc97, 0x603a88d3, 0x3c74b604, 0xe78b3ff6, 0x000e502e,
3530                    0x92094926, 0x3c916f27, 0xa2a490d9, 0x000ea4af, 0x41aa2008,
3531                    0x3c8ec3bc, 0xee615a27, 0x000efa1b, 0x31d185ee, 0x3c8a64a9,
3532                    0x5b6e4540, 0x000f5076, 0x4d91cd9d, 0x3c77893b, 0x819e90d8,
3533                    0x000fa7c1
3534    };
3535
3536    private static int[] allOnesExp = {
3537                    0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
3538    };
3539
3540    private static int[] expBias = {
3541                    0x00000000, 0x3ff00000, 0x00000000, 0x3ff00000
3542    };
3543
3544    private static int[] xMaxExp = {
3545                    0xffffffff, 0x7fefffff
3546    };
3547
3548    private static int[] xMinExp = {
3549                    0x00000000, 0x00100000
3550    };
3551
3552    private static int[] infExp = {
3553                    0x00000000, 0x7ff00000
3554    };
3555
3556    private static int[] zeroExp = {
3557                    0x00000000, 0x00000000
3558    };
3559
3560    public void expIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
3561        ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 16);
3562        ArrayDataPointerConstant cvExpPtr = new ArrayDataPointerConstant(cvExp, 16);
3563        ArrayDataPointerConstant shifterExpPtr = new ArrayDataPointerConstant(shifterExp, 8);
3564        ArrayDataPointerConstant mMaskExpPtr = new ArrayDataPointerConstant(mMaskExp, 16);
3565        ArrayDataPointerConstant biasExpPtr = new ArrayDataPointerConstant(biasExp, 16);
3566        ArrayDataPointerConstant tblAddrExpPtr = new ArrayDataPointerConstant(tblAddrExp, 16);
3567        ArrayDataPointerConstant expBiasPtr = new ArrayDataPointerConstant(expBias, 8);
3568        ArrayDataPointerConstant xMaxExpPtr = new ArrayDataPointerConstant(xMaxExp, 8);
3569        ArrayDataPointerConstant xMinExpPtr = new ArrayDataPointerConstant(xMinExp, 8);
3570        ArrayDataPointerConstant infExpPtr = new ArrayDataPointerConstant(infExp, 8);
3571        ArrayDataPointerConstant zeroExpPtr = new ArrayDataPointerConstant(zeroExp, 8);
3572        ArrayDataPointerConstant allOnesExpPtr = new ArrayDataPointerConstant(allOnesExp, 8);
3573
3574        Label bb0 = new Label();
3575        Label bb1 = new Label();
3576        Label bb2 = new Label();
3577        Label bb3 = new Label();
3578        Label bb4 = new Label();
3579        Label bb5 = new Label();
3580        Label bb7 = new Label();
3581        Label bb8 = new Label();
3582        Label bb9 = new Label();
3583        Label bb10 = new Label();
3584        Label bb11 = new Label();
3585        Label bb12 = new Label();
3586        Label bb14 = new Label();
3587
3588        Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
3589        Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
3590        Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
3591        Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
3592        Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD);
3593
3594        Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
3595        Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
3596        Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
3597        Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
3598        Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
3599        Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
3600        Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
3601        Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE);
3602        Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE);
3603        Register temp10 = asRegister(xmm10Temp, AMD64Kind.DOUBLE);
3604
3605        AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
3606
3607        setCrb(crb);
3608        masm.movsd(stackSlot, value);
3609        if (dest.encoding != value.encoding) {
3610            masm.movdqu(dest, value);
3611        }
3612
3613        masm.movdqu(temp9, externalAddress(mMaskExpPtr));                                // 0xffffffc0,
3614                                                                                         // 0x00000000,
3615                                                                                         // 0xffffffc0,
3616                                                                                         // 0x00000000
3617        masm.movdqu(temp10, externalAddress(biasExpPtr));                                // 0x0000ffc0,
3618                                                                                         // 0x00000000,
3619                                                                                         // 0x0000ffc0,
3620                                                                                         // 0x00000000
3621        masm.unpcklpd(dest, dest);
3622        masm.leaq(gpr5, stackSlot);
3623        masm.leaq(gpr2, externalAddress(cvExpPtr));
3624        masm.movdqu(temp1, new AMD64Address(gpr2, 0));                                   // 0x652b82fe,
3625                                                                                         // 0x40571547,
3626                                                                                         // 0x652b82fe,
3627                                                                                         // 0x40571547
3628        masm.movdqu(temp6, externalAddress(shifterExpPtr));                              // 0x00000000,
3629                                                                                         // 0x43380000,
3630                                                                                         // 0x00000000,
3631                                                                                         // 0x43380000
3632        masm.movdqu(temp2, new AMD64Address(gpr2, 16));                                  // 0xfefa0000,
3633                                                                                         // 0x3f862e42,
3634                                                                                         // 0xfefa0000,
3635                                                                                         // 0x3f862e42
3636        masm.movdqu(temp3, new AMD64Address(gpr2, 32));                                  // 0xbc9e3b3a,
3637                                                                                         // 0x3d1cf79a,
3638                                                                                         // 0xbc9e3b3a,
3639                                                                                         // 0x3d1cf79a
3640        masm.pextrw(gpr1, dest, 3);
3641        masm.andl(gpr1, 32767);
3642        masm.movl(gpr4, 16527);
3643        masm.subl(gpr4, gpr1);
3644        masm.subl(gpr1, 15504);
3645        masm.orl(gpr4, gpr1);
3646        masm.cmpl(gpr4, Integer.MIN_VALUE);
3647        masm.jcc(ConditionFlag.AboveEqual, bb0);
3648
3649        masm.leaq(gpr4, externalAddress(tblAddrExpPtr));
3650        masm.movdqu(temp8, new AMD64Address(gpr2, 48));                                  // 0xfffffffe,
3651                                                                                         // 0x3fdfffff,
3652                                                                                         // 0xfffffffe,
3653                                                                                         // 0x3fdfffff
3654        masm.movdqu(temp4, new AMD64Address(gpr2, 64));                                  // 0xe3289860,
3655                                                                                         // 0x3f56c15c,
3656                                                                                         // 0x555b9e25,
3657                                                                                         // 0x3fa55555
3658        masm.movdqu(temp5, new AMD64Address(gpr2, 80));                                  // 0xc090cf0f,
3659                                                                                         // 0x3f811115,
3660                                                                                         // 0x55548ba1,
3661                                                                                         // 0x3fc55555
3662        masm.mulpd(temp1, dest);
3663        masm.addpd(temp1, temp6);
3664        masm.movapd(temp7, temp1);
3665        masm.movdl(gpr1, temp1);
3666        masm.pand(temp7, temp9);
3667        masm.subpd(temp1, temp6);
3668        masm.mulpd(temp2, temp1);
3669        masm.mulpd(temp3, temp1);
3670        masm.paddq(temp7, temp10);
3671        masm.subpd(dest, temp2);
3672        masm.movl(gpr3, gpr1);
3673        masm.andl(gpr3, 63);
3674        masm.shll(gpr3, 4);
3675        masm.movdqu(temp2, new AMD64Address(gpr3, gpr4, Scale.Times1, 0));
3676        masm.sarl(gpr1, 6);
3677        masm.psllq(temp7, 46);
3678        masm.subpd(dest, temp3);
3679        masm.mulpd(temp4, dest);
3680        masm.movl(gpr4, gpr1);
3681        masm.movapd(temp6, dest);
3682        masm.movapd(temp1, dest);
3683        masm.mulpd(temp6, temp6);
3684        masm.mulpd(dest, temp6);
3685        masm.addpd(temp5, temp4);
3686        masm.mulsd(dest, temp6);
3687        masm.mulpd(temp6, temp8);
3688        masm.addsd(temp1, temp2);
3689        masm.unpckhpd(temp2, temp2);
3690        masm.mulpd(dest, temp5);
3691        masm.addsd(temp1, dest);
3692        masm.por(temp2, temp7);
3693        masm.unpckhpd(dest, dest);
3694        masm.addsd(dest, temp1);
3695        masm.addsd(dest, temp6);
3696        masm.addl(gpr4, 894);
3697        masm.cmpl(gpr4, 1916);
3698        masm.jcc(ConditionFlag.Above, bb1);
3699
3700        masm.mulsd(dest, temp2);
3701        masm.addsd(dest, temp2);
3702        masm.jmp(bb14);
3703
3704        masm.bind(bb1);
3705        masm.movdqu(temp6, externalAddress(expBiasPtr));                                 // 0x00000000,
3706                                                                                         // 0x3ff00000,
3707                                                                                         // 0x00000000,
3708                                                                                         // 0x3ff00000
3709        masm.xorpd(temp3, temp3);
3710        masm.movdqu(temp4, externalAddress(allOnesExpPtr));                              // 0xffffffff,
3711                                                                                         // 0xffffffff,
3712                                                                                         // 0xffffffff,
3713                                                                                         // 0xffffffff
3714        masm.movl(gpr4, -1022);
3715        masm.subl(gpr4, gpr1);
3716        masm.movdl(temp5, gpr4);
3717        masm.psllq(temp4, temp5);
3718        masm.movl(gpr3, gpr1);
3719        masm.sarl(gpr1, 1);
3720        masm.pinsrw(temp3, gpr1, 3);
3721        masm.psllq(temp3, 4);
3722        masm.psubd(temp2, temp3);
3723        masm.mulsd(dest, temp2);
3724        masm.cmpl(gpr4, 52);
3725        masm.jcc(ConditionFlag.Greater, bb2);
3726
3727        masm.pand(temp4, temp2);
3728        masm.paddd(temp3, temp6);
3729        masm.subsd(temp2, temp4);
3730        masm.addsd(dest, temp2);
3731        masm.cmpl(gpr3, 1023);
3732        masm.jcc(ConditionFlag.GreaterEqual, bb3);
3733
3734        masm.pextrw(gpr3, dest, 3);
3735        masm.andl(gpr3, 32768);
3736        masm.orl(gpr4, gpr3);
3737        masm.cmpl(gpr4, 0);
3738        masm.jcc(ConditionFlag.Equal, bb4);
3739
3740        masm.movapd(temp6, dest);
3741        masm.addsd(dest, temp4);
3742        masm.mulsd(dest, temp3);
3743        masm.pextrw(gpr3, dest, 3);
3744        masm.andl(gpr3, 32752);
3745        masm.cmpl(gpr3, 0);
3746        masm.jcc(ConditionFlag.Equal, bb5);
3747
3748        masm.jmp(bb14);
3749
3750        masm.bind(bb5);
3751        masm.mulsd(temp6, temp3);
3752        masm.mulsd(temp4, temp3);
3753        masm.movdqu(dest, temp6);
3754        masm.pxor(temp6, temp4);
3755        masm.psrad(temp6, 31);
3756        masm.pshufd(temp6, temp6, 85);
3757        masm.psllq(dest, 1);
3758        masm.psrlq(dest, 1);
3759        masm.pxor(dest, temp6);
3760        masm.psrlq(temp6, 63);
3761        masm.paddq(dest, temp6);
3762        masm.paddq(dest, temp4);
3763        masm.jmp(bb14);
3764
3765        masm.bind(bb4);
3766        masm.addsd(dest, temp4);
3767        masm.mulsd(dest, temp3);
3768        masm.jmp(bb14);
3769
3770        masm.bind(bb3);
3771        masm.addsd(dest, temp4);
3772        masm.mulsd(dest, temp3);
3773        masm.pextrw(gpr3, dest, 3);
3774        masm.andl(gpr3, 32752);
3775        masm.cmpl(gpr3, 32752);
3776        masm.jcc(ConditionFlag.AboveEqual, bb7);
3777
3778        masm.jmp(bb14);
3779
3780        masm.bind(bb2);
3781        masm.paddd(temp3, temp6);
3782        masm.addpd(dest, temp2);
3783        masm.mulsd(dest, temp3);
3784        masm.jmp(bb14);
3785
3786        masm.bind(bb8);
3787        masm.movsd(dest, externalAddress(xMaxExpPtr));                                   // 0xffffffff,
3788                                                                                         // 0x7fefffff
3789        masm.movsd(temp8, externalAddress(xMinExpPtr));                                  // 0x00000000,
3790                                                                                         // 0x00100000
3791        masm.cmpl(gpr1, 2146435072);
3792        masm.jcc(ConditionFlag.AboveEqual, bb9);
3793
3794        masm.movl(gpr1, new AMD64Address(gpr5, 4));
3795        masm.cmpl(gpr1, Integer.MIN_VALUE);
3796        masm.jcc(ConditionFlag.AboveEqual, bb10);
3797
3798        masm.mulsd(dest, dest);
3799
3800        masm.bind(bb7);
3801        masm.jmp(bb14);
3802
3803        masm.bind(bb10);
3804        masm.mulsd(dest, temp8);
3805        masm.jmp(bb14);
3806
3807        masm.bind(bb9);
3808        masm.movl(gpr4, stackSlot);
3809        masm.cmpl(gpr1, 2146435072);
3810        masm.jcc(ConditionFlag.Above, bb11);
3811
3812        masm.cmpl(gpr4, 0);
3813        masm.jcc(ConditionFlag.NotEqual, bb11);
3814
3815        masm.movl(gpr1, new AMD64Address(gpr5, 4));
3816        masm.cmpl(gpr1, 2146435072);
3817        masm.jcc(ConditionFlag.NotEqual, bb12);
3818
3819        masm.movsd(dest, externalAddress(infExpPtr));                                    // 0x00000000,
3820                                                                                         // 0x7ff00000
3821        masm.jmp(bb14);
3822
3823        masm.bind(bb12);
3824        masm.movsd(dest, externalAddress(zeroExpPtr));                                   // 0x00000000,
3825                                                                                         // 0x00000000
3826        masm.jmp(bb14);
3827
3828        masm.bind(bb11);
3829        masm.movsd(dest, stackSlot);
3830        masm.addsd(dest, dest);
3831        masm.jmp(bb14);
3832
3833        masm.bind(bb0);
3834        masm.movl(gpr1, new AMD64Address(gpr5, 4));
3835        masm.andl(gpr1, 2147483647);
3836        masm.cmpl(gpr1, 1083179008);
3837        masm.jcc(ConditionFlag.AboveEqual, bb8);
3838
3839        masm.addsd(dest, externalAddress(onePtr));                                       // 0x00000000,
3840                                                                                         // 0x3ff00000
3841        masm.bind(bb14);
3842    }
3843}
3844