1/* 2 * Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23package org.graalvm.compiler.lir.amd64; 24 25import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.ILLEGAL; 26import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG; 27import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.STACK; 28import static jdk.vm.ci.code.ValueUtil.asRegister; 29 30import org.graalvm.compiler.asm.Label; 31import org.graalvm.compiler.asm.amd64.AMD64Address; 32import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 33import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag; 34import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler; 35import org.graalvm.compiler.core.common.LIRKind; 36import org.graalvm.compiler.debug.GraalError; 37import org.graalvm.compiler.lir.LIRInstructionClass; 38import org.graalvm.compiler.lir.Opcode; 39import org.graalvm.compiler.lir.asm.ArrayDataPointerConstant; 40import org.graalvm.compiler.lir.asm.CompilationResultBuilder; 41import org.graalvm.compiler.lir.gen.LIRGeneratorTool; 42 43import jdk.vm.ci.amd64.AMD64; 44import jdk.vm.ci.amd64.AMD64.CPUFeature; 45import jdk.vm.ci.amd64.AMD64Kind; 46import jdk.vm.ci.code.Register; 47import jdk.vm.ci.meta.AllocatableValue; 48import jdk.vm.ci.meta.Value; 49 50public final class AMD64MathIntrinsicUnaryOp extends AMD64LIRInstruction { 51 public static final LIRInstructionClass<AMD64MathIntrinsicUnaryOp> TYPE = LIRInstructionClass.create(AMD64MathIntrinsicUnaryOp.class); 52 53 public enum UnaryIntrinsicOpcode { 54 LOG, 55 LOG10, 56 SIN, 57 COS, 58 TAN, 59 EXP 60 } 61 62 @Opcode private final UnaryIntrinsicOpcode opcode; 63 @Def protected Value result; 64 @Use protected Value input; 65 @Temp({REG, ILLEGAL}) protected Value xmm1Temp = Value.ILLEGAL; 66 @Temp({REG, ILLEGAL}) protected Value xmm2Temp = Value.ILLEGAL; 67 @Temp({REG, ILLEGAL}) protected Value xmm3Temp = Value.ILLEGAL; 68 @Temp({REG, ILLEGAL}) protected Value xmm4Temp = Value.ILLEGAL; 69 @Temp({REG, ILLEGAL}) protected Value xmm5Temp = Value.ILLEGAL; 70 @Temp({REG, ILLEGAL}) protected Value xmm6Temp = Value.ILLEGAL; 71 @Temp({REG, ILLEGAL}) protected Value xmm7Temp = Value.ILLEGAL; 72 @Temp({REG, ILLEGAL}) protected Value xmm8Temp = Value.ILLEGAL; 73 @Temp({REG, ILLEGAL}) protected Value xmm9Temp = Value.ILLEGAL; 74 @Temp({REG, ILLEGAL}) protected Value xmm10Temp = Value.ILLEGAL; 75 @Temp({REG, ILLEGAL}) protected Value gpr1Temp = Value.ILLEGAL; 76 @Temp({REG, ILLEGAL}) protected Value gpr2Temp = Value.ILLEGAL; 77 @Temp protected AllocatableValue rcxTemp; 78 @Temp({REG, ILLEGAL}) protected Value gpr4Temp = Value.ILLEGAL; 79 @Temp({REG, ILLEGAL}) protected Value gpr5Temp = Value.ILLEGAL; 80 @Temp({REG, ILLEGAL}) protected Value gpr6Temp = Value.ILLEGAL; 81 @Temp({REG, ILLEGAL}) protected Value gpr7Temp = Value.ILLEGAL; 82 @Temp({REG, ILLEGAL}) protected Value gpr8Temp = Value.ILLEGAL; 83 @Temp({REG, ILLEGAL}) protected Value gpr9Temp = Value.ILLEGAL; 84 @Temp({REG, ILLEGAL}) protected Value gpr10Temp = Value.ILLEGAL; 85 @Temp({STACK, ILLEGAL}) protected Value stackTemp = Value.ILLEGAL; 86 87 CompilationResultBuilder internalCrb; 88 89 public AMD64MathIntrinsicUnaryOp(LIRGeneratorTool tool, UnaryIntrinsicOpcode opcode, Value result, Value input, Value stackTemp) { 90 super(TYPE); 91 this.opcode = opcode; 92 this.result = result; 93 this.input = input; 94 if (opcode == UnaryIntrinsicOpcode.LOG || opcode == UnaryIntrinsicOpcode.LOG10 || 95 opcode == UnaryIntrinsicOpcode.SIN || opcode == UnaryIntrinsicOpcode.COS || 96 opcode == UnaryIntrinsicOpcode.TAN || opcode == UnaryIntrinsicOpcode.EXP) { 97 this.gpr1Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 98 this.gpr2Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 99 this.rcxTemp = AMD64.rcx.asValue(LIRKind.value(AMD64Kind.QWORD)); 100 this.gpr4Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 101 this.xmm1Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 102 this.xmm2Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 103 this.xmm3Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 104 this.xmm4Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 105 this.xmm5Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 106 this.xmm6Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 107 this.xmm7Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 108 109 if (opcode == UnaryIntrinsicOpcode.EXP) { 110 this.gpr5Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 111 this.xmm8Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 112 this.xmm9Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 113 this.xmm10Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 114 } 115 116 if (opcode == UnaryIntrinsicOpcode.TAN) { 117 this.gpr5Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 118 this.gpr6Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 119 this.gpr7Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 120 this.gpr8Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 121 this.gpr9Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 122 this.gpr10Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 123 } 124 125 if (opcode == UnaryIntrinsicOpcode.SIN || opcode == UnaryIntrinsicOpcode.COS) { 126 this.gpr5Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 127 this.gpr6Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 128 this.gpr7Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 129 this.gpr8Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 130 this.gpr9Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 131 this.gpr10Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 132 this.xmm8Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 133 this.xmm9Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 134 } 135 136 this.stackTemp = stackTemp; 137 } 138 } 139 140 public AMD64MathIntrinsicUnaryOp(LIRGeneratorTool tool, UnaryIntrinsicOpcode opcode, Value result, Value input) { 141 this(tool, opcode, result, input, Value.ILLEGAL); 142 } 143 144 private void setCrb(CompilationResultBuilder crb) { 145 internalCrb = crb; 146 } 147 148 private AMD64Address externalAddress(ArrayDataPointerConstant curPtr) { 149 return (AMD64Address) internalCrb.recordDataReferenceInCode(curPtr); 150 } 151 152 @Override 153 public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { 154 switch (opcode) { 155 case LOG: 156 logIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); 157 break; 158 case LOG10: 159 log10Intrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); 160 break; 161 case SIN: 162 sinIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); 163 break; 164 case COS: 165 cosIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); 166 break; 167 case TAN: 168 tanIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); 169 break; 170 case EXP: 171 expIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); 172 break; 173 default: 174 throw GraalError.shouldNotReachHere(); 175 } 176 } 177 178 private static int[] logTwoTable = { 179 0xfefa3800, 0x3fe62e42, 0x93c76730, 0x3d2ef357, 0xaa241800, 180 0x3fe5ee82, 0x0cda46be, 0x3d220238, 0x5c364800, 0x3fe5af40, 181 0xac10c9fb, 0x3d2dfa63, 0x26bb8c00, 0x3fe5707a, 0xff3303dd, 182 0x3d09980b, 0x26867800, 0x3fe5322e, 0x5d257531, 0x3d05ccc4, 183 0x835a5000, 0x3fe4f45a, 0x6d93b8fb, 0xbd2e6c51, 0x6f970c00, 184 0x3fe4b6fd, 0xed4c541c, 0x3cef7115, 0x27e8a400, 0x3fe47a15, 185 0xf94d60aa, 0xbd22cb6a, 0xf2f92400, 0x3fe43d9f, 0x481051f7, 186 0xbcfd984f, 0x2125cc00, 0x3fe4019c, 0x30f0c74c, 0xbd26ce79, 187 0x0c36c000, 0x3fe3c608, 0x7cfe13c2, 0xbd02b736, 0x17197800, 188 0x3fe38ae2, 0xbb5569a4, 0xbd218b7a, 0xad9d8c00, 0x3fe35028, 189 0x9527e6ac, 0x3d10b83f, 0x44340800, 0x3fe315da, 0xc5a0ed9c, 190 0xbd274e93, 0x57b0e000, 0x3fe2dbf5, 0x07b9dc11, 0xbd17a6e5, 191 0x6d0ec000, 0x3fe2a278, 0xe797882d, 0x3d206d2b, 0x1134dc00, 192 0x3fe26962, 0x05226250, 0xbd0b61f1, 0xd8bebc00, 0x3fe230b0, 193 0x6e48667b, 0x3d12fc06, 0x5fc61800, 0x3fe1f863, 0xc9fe81d3, 194 0xbd2a7242, 0x49ae6000, 0x3fe1c078, 0xed70e667, 0x3cccacde, 195 0x40f23c00, 0x3fe188ee, 0xf8ab4650, 0x3d14cc4e, 0xf6f29800, 196 0x3fe151c3, 0xa293ae49, 0xbd2edd97, 0x23c75c00, 0x3fe11af8, 197 0xbb9ddcb2, 0xbd258647, 0x8611cc00, 0x3fe0e489, 0x07801742, 198 0x3d1c2998, 0xe2d05400, 0x3fe0ae76, 0x887e7e27, 0x3d1f486b, 199 0x0533c400, 0x3fe078bf, 0x41edf5fd, 0x3d268122, 0xbe760400, 200 0x3fe04360, 0xe79539e0, 0xbd04c45f, 0xe5b20800, 0x3fe00e5a, 201 0xb1727b1c, 0xbd053ba3, 0xaf7a4800, 0x3fdfb358, 0x3c164935, 202 0x3d0085fa, 0xee031800, 0x3fdf4aa7, 0x6f014a8b, 0x3d12cde5, 203 0x56b41000, 0x3fdee2a1, 0x5a470251, 0x3d2f27f4, 0xc3ddb000, 204 0x3fde7b42, 0x5372bd08, 0xbd246550, 0x1a272800, 0x3fde148a, 205 0x07322938, 0xbd1326b2, 0x484c9800, 0x3fddae75, 0x60dc616a, 206 0xbd1ea42d, 0x46def800, 0x3fdd4902, 0xe9a767a8, 0x3d235baf, 207 0x18064800, 0x3fdce42f, 0x3ec7a6b0, 0xbd0797c3, 0xc7455800, 208 0x3fdc7ff9, 0xc15249ae, 0xbd29b6dd, 0x693fa000, 0x3fdc1c60, 209 0x7fe8e180, 0x3d2cec80, 0x1b80e000, 0x3fdbb961, 0xf40a666d, 210 0x3d27d85b, 0x04462800, 0x3fdb56fa, 0x2d841995, 0x3d109525, 211 0x5248d000, 0x3fdaf529, 0x52774458, 0xbd217cc5, 0x3c8ad800, 212 0x3fda93ed, 0xbea77a5d, 0x3d1e36f2, 0x0224f800, 0x3fda3344, 213 0x7f9d79f5, 0x3d23c645, 0xea15f000, 0x3fd9d32b, 0x10d0c0b0, 214 0xbd26279e, 0x43135800, 0x3fd973a3, 0xa502d9f0, 0xbd152313, 215 0x635bf800, 0x3fd914a8, 0x2ee6307d, 0xbd1766b5, 0xa88b3000, 216 0x3fd8b639, 0xe5e70470, 0xbd205ae1, 0x776dc800, 0x3fd85855, 217 0x3333778a, 0x3d2fd56f, 0x3bd81800, 0x3fd7fafa, 0xc812566a, 218 0xbd272090, 0x687cf800, 0x3fd79e26, 0x2efd1778, 0x3d29ec7d, 219 0x76c67800, 0x3fd741d8, 0x49dc60b3, 0x3d2d8b09, 0xe6af1800, 220 0x3fd6e60e, 0x7c222d87, 0x3d172165, 0x3e9c6800, 0x3fd68ac8, 221 0x2756eba0, 0x3d20a0d3, 0x0b3ab000, 0x3fd63003, 0xe731ae00, 222 0xbd2db623, 0xdf596000, 0x3fd5d5bd, 0x08a465dc, 0xbd0a0b2a, 223 0x53c8d000, 0x3fd57bf7, 0xee5d40ef, 0x3d1faded, 0x0738a000, 224 0x3fd522ae, 0x8164c759, 0x3d2ebe70, 0x9e173000, 0x3fd4c9e0, 225 0x1b0ad8a4, 0xbd2e2089, 0xc271c800, 0x3fd4718d, 0x0967d675, 226 0xbd2f27ce, 0x23d5e800, 0x3fd419b4, 0xec90e09d, 0x3d08e436, 227 0x77333000, 0x3fd3c252, 0xb606bd5c, 0x3d183b54, 0x76be1000, 228 0x3fd36b67, 0xb0f177c8, 0x3d116ecd, 0xe1d36000, 0x3fd314f1, 229 0xd3213cb8, 0xbd28e27a, 0x7cdc9000, 0x3fd2bef0, 0x4a5004f4, 230 0x3d2a9cfa, 0x1134d800, 0x3fd26962, 0xdf5bb3b6, 0x3d2c93c1, 231 0x6d0eb800, 0x3fd21445, 0xba46baea, 0x3d0a87de, 0x635a6800, 232 0x3fd1bf99, 0x5147bdb7, 0x3d2ca6ed, 0xcbacf800, 0x3fd16b5c, 233 0xf7a51681, 0x3d2b9acd, 0x8227e800, 0x3fd1178e, 0x63a5f01c, 234 0xbd2c210e, 0x67616000, 0x3fd0c42d, 0x163ceae9, 0x3d27188b, 235 0x604d5800, 0x3fd07138, 0x16ed4e91, 0x3cf89cdb, 0x5626c800, 236 0x3fd01eae, 0x1485e94a, 0xbd16f08c, 0x6cb3b000, 0x3fcf991c, 237 0xca0cdf30, 0x3d1bcbec, 0xe4dd0000, 0x3fcef5ad, 0x65bb8e11, 238 0xbcca2115, 0xffe71000, 0x3fce530e, 0x6041f430, 0x3cc21227, 239 0xb0d49000, 0x3fcdb13d, 0xf715b035, 0xbd2aff2a, 0xf2656000, 240 0x3fcd1037, 0x75b6f6e4, 0xbd084a7e, 0xc6f01000, 0x3fcc6ffb, 241 0xc5962bd2, 0xbcf1ec72, 0x383be000, 0x3fcbd087, 0x595412b6, 242 0xbd2d4bc4, 0x575bd000, 0x3fcb31d8, 0x4eace1aa, 0xbd0c358d, 243 0x3c8ae000, 0x3fca93ed, 0x50562169, 0xbd287243, 0x07089000, 244 0x3fc9f6c4, 0x6865817a, 0x3d29904d, 0xdcf70000, 0x3fc95a5a, 245 0x58a0ff6f, 0x3d07f228, 0xeb390000, 0x3fc8beaf, 0xaae92cd1, 246 0xbd073d54, 0x6551a000, 0x3fc823c1, 0x9a631e83, 0x3d1e0ddb, 247 0x85445000, 0x3fc7898d, 0x70914305, 0xbd1c6610, 0x8b757000, 248 0x3fc6f012, 0xe59c21e1, 0xbd25118d, 0xbe8c1000, 0x3fc6574e, 249 0x2c3c2e78, 0x3d19cf8b, 0x6b544000, 0x3fc5bf40, 0xeb68981c, 250 0xbd127023, 0xe4a1b000, 0x3fc527e5, 0xe5697dc7, 0x3d2633e8, 251 0x8333b000, 0x3fc4913d, 0x54fdb678, 0x3d258379, 0xa5993000, 252 0x3fc3fb45, 0x7e6a354d, 0xbd2cd1d8, 0xb0159000, 0x3fc365fc, 253 0x234b7289, 0x3cc62fa8, 0x0c868000, 0x3fc2d161, 0xcb81b4a1, 254 0x3d039d6c, 0x2a49c000, 0x3fc23d71, 0x8fd3df5c, 0x3d100d23, 255 0x7e23f000, 0x3fc1aa2b, 0x44389934, 0x3d2ca78e, 0x8227e000, 256 0x3fc1178e, 0xce2d07f2, 0x3d21ef78, 0xb59e4000, 0x3fc08598, 257 0x7009902c, 0xbd27e5dd, 0x39dbe000, 0x3fbfe891, 0x4fa10afd, 258 0xbd2534d6, 0x830a2000, 0x3fbec739, 0xafe645e0, 0xbd2dc068, 259 0x63844000, 0x3fbda727, 0x1fa71733, 0x3d1a8940, 0x01bc4000, 260 0x3fbc8858, 0xc65aacd3, 0x3d2646d1, 0x8dad6000, 0x3fbb6ac8, 261 0x2bf768e5, 0xbd139080, 0x40b1c000, 0x3fba4e76, 0xb94407c8, 262 0xbd0e42b6, 0x5d594000, 0x3fb9335e, 0x3abd47da, 0x3d23115c, 263 0x2f40e000, 0x3fb8197e, 0xf96ffdf7, 0x3d0f80dc, 0x0aeac000, 264 0x3fb700d3, 0xa99ded32, 0x3cec1e8d, 0x4d97a000, 0x3fb5e95a, 265 0x3c5d1d1e, 0xbd2c6906, 0x5d208000, 0x3fb4d311, 0x82f4e1ef, 266 0xbcf53a25, 0xa7d1e000, 0x3fb3bdf5, 0xa5db4ed7, 0x3d2cc85e, 267 0xa4472000, 0x3fb2aa04, 0xae9c697d, 0xbd20b6e8, 0xd1466000, 268 0x3fb1973b, 0x560d9e9b, 0xbd25325d, 0xb59e4000, 0x3fb08598, 269 0x7009902c, 0xbd17e5dd, 0xc006c000, 0x3faeea31, 0x4fc93b7b, 270 0xbd0e113e, 0xcdddc000, 0x3faccb73, 0x47d82807, 0xbd1a68f2, 271 0xd0fb0000, 0x3faaaef2, 0x353bb42e, 0x3d20fc1a, 0x149fc000, 272 0x3fa894aa, 0xd05a267d, 0xbd197995, 0xf2d4c000, 0x3fa67c94, 273 0xec19afa2, 0xbd029efb, 0xd42e0000, 0x3fa466ae, 0x75bdfd28, 274 0xbd2c1673, 0x2f8d0000, 0x3fa252f3, 0xe021b67b, 0x3d283e9a, 275 0x89e74000, 0x3fa0415d, 0x5cf1d753, 0x3d0111c0, 0xec148000, 276 0x3f9c63d2, 0x3f9eb2f3, 0x3d2578c6, 0x28c90000, 0x3f984925, 277 0x325a0c34, 0xbd2aa0ba, 0x25980000, 0x3f9432a9, 0x928637fe, 278 0x3d098139, 0x58938000, 0x3f902056, 0x06e2f7d2, 0xbd23dc5b, 279 0xa3890000, 0x3f882448, 0xda74f640, 0xbd275577, 0x75890000, 280 0x3f801015, 0x999d2be8, 0xbd10c76b, 0x59580000, 0x3f700805, 281 0xcb31c67b, 0x3d2166af, 0x00000000, 0x00000000, 0x00000000, 282 0x80000000 283 }; 284 285 private static int[] logTwoData = { 286 0xfefa3800, 0x3fa62e42, 0x93c76730, 0x3ceef357 287 }; 288 289 private static int[] coeffLogTwoData = { 290 0x92492492, 0x3fc24924, 0x00000000, 0xbfd00000, 0x3d6fb175, 291 0xbfc5555e, 0x55555555, 0x3fd55555, 0x9999999a, 0x3fc99999, 292 0x00000000, 0xbfe00000 293 }; 294 295 /* 296 * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) 297 * Source Code 298 * 299 * ALGORITHM DESCRIPTION - LOG() --------------------- 300 * 301 * x=2^k * mx, mx in [1,2) 302 * 303 * Get B~1/mx based on the output of rcpps instruction (B0) B = int((B0*2^7+0.5))/2^7 304 * 305 * Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts) 306 * 307 * Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and p(r) is a degree 7 308 * polynomial -log(B) read from data table (high, low parts) Result is formed from high and low 309 * parts. 310 * 311 * Special cases: log(NaN) = quiet NaN, and raise invalid exception log(+INF) = that INF log(0) 312 * = -INF with divide-by-zero exception raised log(1) = +0 log(x) = NaN with invalid exception 313 * raised if x < -0, including -INF 314 * 315 */ 316 317 public void logIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { 318 ArrayDataPointerConstant logTwoTablePtr = new ArrayDataPointerConstant(logTwoTable, 16); 319 ArrayDataPointerConstant logTwoDataPtr = new ArrayDataPointerConstant(logTwoData, 16); 320 ArrayDataPointerConstant coeffLogTwoDataPtr = new ArrayDataPointerConstant(coeffLogTwoData, 16); 321 322 Label bb0 = new Label(); 323 Label bb1 = new Label(); 324 Label bb2 = new Label(); 325 Label bb3 = new Label(); 326 Label bb4 = new Label(); 327 Label bb5 = new Label(); 328 Label bb6 = new Label(); 329 Label bb7 = new Label(); 330 Label bb8 = new Label(); 331 332 Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); 333 Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); 334 Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); 335 Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); 336 337 Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); 338 Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); 339 Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); 340 Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); 341 Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); 342 Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); 343 Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); 344 345 AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); 346 347 setCrb(crb); 348 masm.movdq(stackSlot, value); 349 if (dest.encoding != value.encoding) { 350 masm.movdqu(dest, value); 351 } 352 masm.movq(gpr1, 0x3ff0000000000000L); 353 masm.movdq(temp2, gpr1); 354 masm.movq(gpr3, 0x77f0000000000000L); 355 masm.movdq(temp3, gpr3); 356 masm.movl(gpr2, 32768); 357 masm.movdl(temp4, gpr2); 358 masm.movq(gpr2, 0xffffe00000000000L); 359 masm.movdq(temp5, gpr2); 360 masm.movdqu(temp1, value); 361 masm.pextrw(gpr1, dest, 3); 362 masm.por(dest, temp2); 363 masm.movl(gpr2, 16352); 364 masm.psrlq(dest, 27); 365 masm.leaq(gpr4, externalAddress(logTwoTablePtr)); 366 masm.psrld(dest, 2); 367 masm.rcpps(dest, dest); 368 masm.psllq(temp1, 12); 369 masm.pshufd(temp6, temp5, 0xE4); 370 masm.psrlq(temp1, 12); 371 masm.subl(gpr1, 16); 372 masm.cmpl(gpr1, 32736); 373 masm.jcc(ConditionFlag.AboveEqual, bb0); 374 375 masm.bind(bb1); 376 masm.paddd(dest, temp4); 377 masm.por(temp1, temp3); 378 masm.movdl(gpr3, dest); 379 masm.psllq(dest, 29); 380 masm.pand(temp5, temp1); 381 masm.pand(dest, temp6); 382 masm.subsd(temp1, temp5); 383 masm.mulpd(temp5, dest); 384 masm.andl(gpr1, 32752); 385 masm.subl(gpr1, gpr2); 386 masm.cvtsi2sdl(temp7, gpr1); 387 masm.mulsd(temp1, dest); 388 masm.movdq(temp6, externalAddress(logTwoDataPtr)); // 0xfefa3800, 389 // 0x3fa62e42 390 masm.movdqu(temp3, externalAddress(coeffLogTwoDataPtr)); // 0x92492492, 391 // 0x3fc24924, 392 // 0x00000000, 393 // 0xbfd00000 394 masm.subsd(temp5, temp2); 395 masm.andl(gpr3, 16711680); 396 masm.shrl(gpr3, 12); 397 masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, 0)); 398 masm.leaq(gpr4, externalAddress(coeffLogTwoDataPtr)); 399 masm.movdqu(temp4, new AMD64Address(gpr4, 16)); // 0x3d6fb175, 400 // 0xbfc5555e, 401 // 0x55555555, 402 // 0x3fd55555 403 masm.addsd(temp1, temp5); 404 masm.movdqu(temp2, new AMD64Address(gpr4, 32)); // 0x9999999a, 405 // 0x3fc99999, 406 // 0x00000000, 407 // 0xbfe00000 408 masm.mulsd(temp6, temp7); 409 if (masm.supports(CPUFeature.SSE3)) { 410 masm.movddup(temp5, temp1); 411 } else { 412 masm.movdqu(temp5, temp1); 413 masm.movlhps(temp5, temp5); 414 } 415 masm.leaq(gpr4, externalAddress(logTwoDataPtr)); 416 masm.mulsd(temp7, new AMD64Address(gpr4, 8)); // 0x93c76730, 417 // 0x3ceef357 418 masm.mulsd(temp3, temp1); 419 masm.addsd(dest, temp6); 420 masm.mulpd(temp4, temp5); 421 masm.mulpd(temp5, temp5); 422 if (masm.supports(CPUFeature.SSE3)) { 423 masm.movddup(temp6, dest); 424 } else { 425 masm.movdqu(temp6, dest); 426 masm.movlhps(temp6, temp6); 427 } 428 masm.addsd(dest, temp1); 429 masm.addpd(temp4, temp2); 430 masm.mulpd(temp3, temp5); 431 masm.subsd(temp6, dest); 432 masm.mulsd(temp4, temp1); 433 masm.pshufd(temp2, dest, 0xEE); 434 masm.addsd(temp1, temp6); 435 masm.mulsd(temp5, temp5); 436 masm.addsd(temp7, temp2); 437 masm.addpd(temp4, temp3); 438 masm.addsd(temp1, temp7); 439 masm.mulpd(temp4, temp5); 440 masm.addsd(temp1, temp4); 441 masm.pshufd(temp5, temp4, 0xEE); 442 masm.addsd(temp1, temp5); 443 masm.addsd(dest, temp1); 444 masm.jmp(bb8); 445 446 masm.bind(bb0); 447 masm.movdq(dest, stackSlot); 448 masm.movdq(temp1, stackSlot); 449 masm.addl(gpr1, 16); 450 masm.cmpl(gpr1, 32768); 451 masm.jcc(ConditionFlag.AboveEqual, bb2); 452 453 masm.cmpl(gpr1, 16); 454 masm.jcc(ConditionFlag.Below, bb3); 455 456 masm.bind(bb4); 457 masm.addsd(dest, dest); 458 masm.jmp(bb8); 459 460 masm.bind(bb5); 461 masm.jcc(ConditionFlag.Above, bb4); 462 463 masm.cmpl(gpr3, 0); 464 masm.jcc(ConditionFlag.Above, bb4); 465 466 masm.jmp(bb6); 467 468 masm.bind(bb3); 469 masm.xorpd(temp1, temp1); 470 masm.addsd(temp1, dest); 471 masm.movdl(gpr3, temp1); 472 masm.psrlq(temp1, 32); 473 masm.movdl(gpr2, temp1); 474 masm.orl(gpr3, gpr2); 475 masm.cmpl(gpr3, 0); 476 masm.jcc(ConditionFlag.Equal, bb7); 477 478 masm.xorpd(temp1, temp1); 479 masm.movl(gpr1, 18416); 480 masm.pinsrw(temp1, gpr1, 3); 481 masm.mulsd(dest, temp1); 482 masm.movdqu(temp1, dest); 483 masm.pextrw(gpr1, dest, 3); 484 masm.por(dest, temp2); 485 masm.psrlq(dest, 27); 486 masm.movl(gpr2, 18416); 487 masm.psrld(dest, 2); 488 masm.rcpps(dest, dest); 489 masm.psllq(temp1, 12); 490 masm.pshufd(temp6, temp5, 0xE4); 491 masm.psrlq(temp1, 12); 492 masm.jmp(bb1); 493 494 masm.bind(bb2); 495 masm.movdl(gpr3, temp1); 496 masm.psrlq(temp1, 32); 497 masm.movdl(gpr2, temp1); 498 masm.addl(gpr2, gpr2); 499 masm.cmpl(gpr2, -2097152); 500 masm.jcc(ConditionFlag.AboveEqual, bb5); 501 502 masm.orl(gpr3, gpr2); 503 masm.cmpl(gpr3, 0); 504 masm.jcc(ConditionFlag.Equal, bb7); 505 506 masm.bind(bb6); 507 masm.xorpd(temp1, temp1); 508 masm.xorpd(dest, dest); 509 masm.movl(gpr1, 32752); 510 masm.pinsrw(temp1, gpr1, 3); 511 masm.mulsd(dest, temp1); 512 masm.jmp(bb8); 513 514 masm.bind(bb7); 515 masm.xorpd(temp1, temp1); 516 masm.xorpd(dest, dest); 517 masm.movl(gpr1, 49136); 518 masm.pinsrw(dest, gpr1, 3); 519 masm.divsd(dest, temp1); 520 521 masm.bind(bb8); 522 } 523 524 private static int[] highmaskLogTen = { 525 0xf8000000, 0xffffffff, 0x00000000, 0xffffe000 526 }; 527 528 private static int[] logTenE = { 529 0x00000000, 0x3fdbc000, 0xbf2e4108, 0x3f5a7a6c 530 }; 531 532 private static int[] logTenTable = { 533 0x509f7800, 0x3fd34413, 0x1f12b358, 0x3d1fef31, 0x80333400, 534 0x3fd32418, 0xc671d9d0, 0xbcf542bf, 0x51195000, 0x3fd30442, 535 0x78a4b0c3, 0x3d18216a, 0x6fc79400, 0x3fd2e490, 0x80fa389d, 536 0xbc902869, 0x89d04000, 0x3fd2c502, 0x75c2f564, 0x3d040754, 537 0x4ddd1c00, 0x3fd2a598, 0xd219b2c3, 0xbcfa1d84, 0x6baa7c00, 538 0x3fd28651, 0xfd9abec1, 0x3d1be6d3, 0x94028800, 0x3fd2672d, 539 0xe289a455, 0xbd1ede5e, 0x78b86400, 0x3fd2482c, 0x6734d179, 540 0x3d1fe79b, 0xcca3c800, 0x3fd2294d, 0x981a40b8, 0xbced34ea, 541 0x439c5000, 0x3fd20a91, 0xcc392737, 0xbd1a9cc3, 0x92752c00, 542 0x3fd1ebf6, 0x03c9afe7, 0x3d1e98f8, 0x6ef8dc00, 0x3fd1cd7d, 543 0x71dae7f4, 0x3d08a86c, 0x8fe4dc00, 0x3fd1af25, 0xee9185a1, 544 0xbcff3412, 0xace59400, 0x3fd190ee, 0xc2cab353, 0x3cf17ed9, 545 0x7e925000, 0x3fd172d8, 0x6952c1b2, 0x3cf1521c, 0xbe694400, 546 0x3fd154e2, 0xcacb79ca, 0xbd0bdc78, 0x26cbac00, 0x3fd1370d, 547 0xf71f4de1, 0xbd01f8be, 0x72fa0800, 0x3fd11957, 0x55bf910b, 548 0x3c946e2b, 0x5f106000, 0x3fd0fbc1, 0x39e639c1, 0x3d14a84b, 549 0xa802a800, 0x3fd0de4a, 0xd3f31d5d, 0xbd178385, 0x0b992000, 550 0x3fd0c0f3, 0x3843106f, 0xbd1f602f, 0x486ce800, 0x3fd0a3ba, 551 0x8819497c, 0x3cef987a, 0x1de49400, 0x3fd086a0, 0x1caa0467, 552 0x3d0faec7, 0x4c30cc00, 0x3fd069a4, 0xa4424372, 0xbd1618fc, 553 0x94490000, 0x3fd04cc6, 0x946517d2, 0xbd18384b, 0xb7e84000, 554 0x3fd03006, 0xe0109c37, 0xbd19a6ac, 0x798a0c00, 0x3fd01364, 555 0x5121e864, 0xbd164cf7, 0x38ce8000, 0x3fcfedbf, 0x46214d1a, 556 0xbcbbc402, 0xc8e62000, 0x3fcfb4ef, 0xdab93203, 0x3d1e0176, 557 0x2cb02800, 0x3fcf7c5a, 0x2a2ea8e4, 0xbcfec86a, 0xeeeaa000, 558 0x3fcf43fd, 0xc18e49a4, 0x3cf110a8, 0x9bb6e800, 0x3fcf0bda, 559 0x923cc9c0, 0xbd15ce99, 0xc093f000, 0x3fced3ef, 0x4d4b51e9, 560 0x3d1a04c7, 0xec58f800, 0x3fce9c3c, 0x163cad59, 0x3cac8260, 561 0x9a907000, 0x3fce2d7d, 0x3fa93646, 0x3ce4a1c0, 0x37311000, 562 0x3fcdbf99, 0x32abd1fd, 0x3d07ea9d, 0x6744b800, 0x3fcd528c, 563 0x4dcbdfd4, 0xbd1b08e2, 0xe36de800, 0x3fcce653, 0x0b7b7f7f, 564 0xbd1b8f03, 0x77506800, 0x3fcc7aec, 0xa821c9fb, 0x3d13c163, 565 0x00ff8800, 0x3fcc1053, 0x536bca76, 0xbd074ee5, 0x70719800, 566 0x3fcba684, 0xd7da9b6b, 0xbd1fbf16, 0xc6f8d800, 0x3fcb3d7d, 567 0xe2220bb3, 0x3d1a295d, 0x16c15800, 0x3fcad53c, 0xe724911e, 568 0xbcf55822, 0x82533800, 0x3fca6dbc, 0x6d982371, 0x3cac567c, 569 0x3c19e800, 0x3fca06fc, 0x84d17d80, 0x3d1da204, 0x85ef8000, 570 0x3fc9a0f8, 0x54466a6a, 0xbd002204, 0xb0ac2000, 0x3fc93bae, 571 0xd601fd65, 0x3d18840c, 0x1bb9b000, 0x3fc8d71c, 0x7bf58766, 572 0xbd14f897, 0x34aae800, 0x3fc8733e, 0x3af6ac24, 0xbd0f5c45, 573 0x76d68000, 0x3fc81012, 0x4303e1a1, 0xbd1f9a80, 0x6af57800, 574 0x3fc7ad96, 0x43fbcb46, 0x3cf4c33e, 0xa6c51000, 0x3fc74bc7, 575 0x70f0eac5, 0xbd192e3b, 0xccab9800, 0x3fc6eaa3, 0xc0093dfe, 576 0xbd0faf15, 0x8b60b800, 0x3fc68a28, 0xde78d5fd, 0xbc9ea4ee, 577 0x9d987000, 0x3fc62a53, 0x962bea6e, 0xbd194084, 0xc9b0e800, 578 0x3fc5cb22, 0x888dd999, 0x3d1fe201, 0xe1634800, 0x3fc56c93, 579 0x16ada7ad, 0x3d1b1188, 0xc176c000, 0x3fc50ea4, 0x4159b5b5, 580 0xbcf09c08, 0x51766000, 0x3fc4b153, 0x84393d23, 0xbcf6a89c, 581 0x83695000, 0x3fc4549d, 0x9f0b8bbb, 0x3d1c4b8c, 0x538d5800, 582 0x3fc3f881, 0xf49df747, 0x3cf89b99, 0xc8138000, 0x3fc39cfc, 583 0xd503b834, 0xbd13b99f, 0xf0df0800, 0x3fc3420d, 0xf011b386, 584 0xbd05d8be, 0xe7466800, 0x3fc2e7b2, 0xf39c7bc2, 0xbd1bb94e, 585 0xcdd62800, 0x3fc28de9, 0x05e6d69b, 0xbd10ed05, 0xd015d800, 586 0x3fc234b0, 0xe29b6c9d, 0xbd1ff967, 0x224ea800, 0x3fc1dc06, 587 0x727711fc, 0xbcffb30d, 0x01540000, 0x3fc183e8, 0x39786c5a, 588 0x3cc23f57, 0xb24d9800, 0x3fc12c54, 0xc905a342, 0x3d003a1d, 589 0x82835800, 0x3fc0d54a, 0x9b9920c0, 0x3d03b25a, 0xc72ac000, 590 0x3fc07ec7, 0x46f26a24, 0x3cf0fa41, 0xdd35d800, 0x3fc028ca, 591 0x41d9d6dc, 0x3d034a65, 0x52474000, 0x3fbfa6a4, 0x44f66449, 592 0x3d19cad3, 0x2da3d000, 0x3fbefcb8, 0x67832999, 0x3d18400f, 593 0x32a10000, 0x3fbe53ce, 0x9c0e3b1a, 0xbcff62fd, 0x556b7000, 594 0x3fbdabe3, 0x02976913, 0xbcf8243b, 0x97e88000, 0x3fbd04f4, 595 0xec793797, 0x3d1c0578, 0x09647000, 0x3fbc5eff, 0x05fc0565, 596 0xbd1d799e, 0xc6426000, 0x3fbbb9ff, 0x4625f5ed, 0x3d1f5723, 597 0xf7afd000, 0x3fbb15f3, 0xdd5aae61, 0xbd1a7e1e, 0xd358b000, 598 0x3fba72d8, 0x3314e4d3, 0x3d17bc91, 0x9b1f5000, 0x3fb9d0ab, 599 0x9a4d514b, 0x3cf18c9b, 0x9cd4e000, 0x3fb92f69, 0x7e4496ab, 600 0x3cf1f96d, 0x31f4f000, 0x3fb88f10, 0xf56479e7, 0x3d165818, 601 0xbf628000, 0x3fb7ef9c, 0x26bf486d, 0xbd1113a6, 0xb526b000, 602 0x3fb7510c, 0x1a1c3384, 0x3ca9898d, 0x8e31e000, 0x3fb6b35d, 603 0xb3875361, 0xbd0661ac, 0xd01de000, 0x3fb6168c, 0x2a7cacfa, 604 0xbd1bdf10, 0x0af23000, 0x3fb57a98, 0xff868816, 0x3cf046d0, 605 0xd8ea0000, 0x3fb4df7c, 0x1515fbe7, 0xbd1fd529, 0xde3b2000, 606 0x3fb44538, 0x6e59a132, 0x3d1faeee, 0xc8df9000, 0x3fb3abc9, 607 0xf1322361, 0xbd198807, 0x505f1000, 0x3fb3132d, 0x0888e6ab, 608 0x3d1e5380, 0x359bd000, 0x3fb27b61, 0xdfbcbb22, 0xbcfe2724, 609 0x429ee000, 0x3fb1e463, 0x6eb4c58c, 0xbcfe4dd6, 0x4a673000, 610 0x3fb14e31, 0x4ce1ac9b, 0x3d1ba691, 0x28b96000, 0x3fb0b8c9, 611 0x8c7813b8, 0xbd0b3872, 0xc1f08000, 0x3fb02428, 0xc2bc8c2c, 612 0x3cb5ea6b, 0x05a1a000, 0x3faf209c, 0x72e8f18e, 0xbce8df84, 613 0xc0b5e000, 0x3fadfa6d, 0x9fdef436, 0x3d087364, 0xaf416000, 614 0x3facd5c2, 0x1068c3a9, 0x3d0827e7, 0xdb356000, 0x3fabb296, 615 0x120a34d3, 0x3d101a9f, 0x5dfea000, 0x3faa90e6, 0xdaded264, 616 0xbd14c392, 0x6034c000, 0x3fa970ad, 0x1c9d06a9, 0xbd1b705e, 617 0x194c6000, 0x3fa851e8, 0x83996ad9, 0xbd0117bc, 0xcf4ac000, 618 0x3fa73492, 0xb1a94a62, 0xbca5ea42, 0xd67b4000, 0x3fa618a9, 619 0x75aed8ca, 0xbd07119b, 0x9126c000, 0x3fa4fe29, 0x5291d533, 620 0x3d12658f, 0x6f4d4000, 0x3fa3e50e, 0xcd2c5cd9, 0x3d1d5c70, 621 0xee608000, 0x3fa2cd54, 0xd1008489, 0x3d1a4802, 0x9900e000, 622 0x3fa1b6f9, 0x54fb5598, 0xbd16593f, 0x06bb6000, 0x3fa0a1f9, 623 0x64ef57b4, 0xbd17636b, 0xb7940000, 0x3f9f1c9f, 0xee6a4737, 624 0x3cb5d479, 0x91aa0000, 0x3f9cf7f5, 0x3a16373c, 0x3d087114, 625 0x156b8000, 0x3f9ad5ed, 0x836c554a, 0x3c6900b0, 0xd4764000, 626 0x3f98b67f, 0xed12f17b, 0xbcffc974, 0x77dec000, 0x3f9699a7, 627 0x232ce7ea, 0x3d1e35bb, 0xbfbf4000, 0x3f947f5d, 0xd84ffa6e, 628 0x3d0e0a49, 0x82c7c000, 0x3f92679c, 0x8d170e90, 0xbd14d9f2, 629 0xadd20000, 0x3f90525d, 0x86d9f88e, 0x3cdeb986, 0x86f10000, 630 0x3f8c7f36, 0xb9e0a517, 0x3ce29faa, 0xb75c8000, 0x3f885e9e, 631 0x542568cb, 0xbd1f7bdb, 0x46b30000, 0x3f8442e8, 0xb954e7d9, 632 0x3d1e5287, 0xb7e60000, 0x3f802c07, 0x22da0b17, 0xbd19fb27, 633 0x6c8b0000, 0x3f7833e3, 0x821271ef, 0xbd190f96, 0x29910000, 634 0x3f701936, 0xbc3491a5, 0xbd1bcf45, 0x354a0000, 0x3f600fe3, 635 0xc0ff520a, 0xbd19d71c, 0x00000000, 0x00000000, 0x00000000, 636 0x00000000 637 }; 638 639 private static int[] logTwoLogTenData = { 640 0x509f7800, 0x3f934413, 0x1f12b358, 0x3cdfef31 641 }; 642 643 private static int[] coeffLogTenData = { 644 0xc1a5f12e, 0x40358874, 0x64d4ef0d, 0xc0089309, 0x385593b1, 645 0xc025c917, 0xdc963467, 0x3ffc6a02, 0x7f9d3aa1, 0x4016ab9f, 646 0xdc77b115, 0xbff27af2 647 }; 648 649 /* 650 * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) 651 * Source Code 652 * 653 * ALGORITHM DESCRIPTION - LOG10() --------------------- 654 * 655 * Let x=2^k * mx, mx in [1,2) 656 * 657 * Get B~1/mx based on the output of rcpss instruction (B0) B = int((B0*LH*2^7+0.5))/2^7 LH is a 658 * short approximation for log10(e) 659 * 660 * Reduced argument: r=B*mx-LH (computed accurately in high and low parts) 661 * 662 * Result: k*log10(2) - log(B) + p(r) p(r) is a degree 7 polynomial -log(B) read from data table 663 * (high, low parts) Result is formed from high and low parts 664 * 665 * Special cases: log10(0) = -INF with divide-by-zero exception raised log10(1) = +0 log10(x) = 666 * NaN with invalid exception raised if x < -0, including -INF log10(+INF) = +INF 667 * 668 */ 669 670 public void log10Intrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { 671 ArrayDataPointerConstant highmaskLogTenPtr = new ArrayDataPointerConstant(highmaskLogTen, 16); 672 ArrayDataPointerConstant logTenEPtr = new ArrayDataPointerConstant(logTenE, 16); 673 ArrayDataPointerConstant logTenTablePtr = new ArrayDataPointerConstant(logTenTable, 16); 674 ArrayDataPointerConstant logTwoLogTenDataPtr = new ArrayDataPointerConstant(logTwoLogTenData, 16); 675 ArrayDataPointerConstant coeffLogTenDataPtr = new ArrayDataPointerConstant(coeffLogTenData, 16); 676 677 Label bb0 = new Label(); 678 Label bb1 = new Label(); 679 Label bb2 = new Label(); 680 Label bb3 = new Label(); 681 Label bb4 = new Label(); 682 Label bb5 = new Label(); 683 Label bb6 = new Label(); 684 Label bb7 = new Label(); 685 Label bb8 = new Label(); 686 687 Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); 688 Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); 689 Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); 690 Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); 691 692 Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); 693 Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); 694 Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); 695 Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); 696 Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); 697 Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); 698 Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); 699 700 AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); 701 702 setCrb(crb); 703 masm.movdq(stackSlot, value); 704 if (dest.encoding != value.encoding) { 705 masm.movdqu(dest, value); 706 } 707 masm.movdqu(temp5, externalAddress(highmaskLogTenPtr)); // 0xf8000000, 708 // 0xffffffff, 709 // 0x00000000, 710 // 0xffffe000 711 masm.xorpd(temp2, temp2); 712 masm.movl(gpr1, 16368); 713 masm.pinsrw(temp2, gpr1, 3); 714 masm.movl(gpr2, 1054736384); 715 masm.movdl(temp7, gpr2); 716 masm.xorpd(temp3, temp3); 717 masm.movl(gpr3, 30704); 718 masm.pinsrw(temp3, gpr3, 3); 719 masm.movl(gpr3, 32768); 720 masm.movdl(temp4, gpr3); 721 masm.movdqu(temp1, value); 722 masm.pextrw(gpr1, dest, 3); 723 masm.por(dest, temp2); 724 masm.movl(gpr2, 16352); 725 masm.psrlq(dest, 27); 726 masm.movdqu(temp2, externalAddress(logTenEPtr)); // 0x00000000, 727 // 0x3fdbc000, 728 // 0xbf2e4108, 729 // 0x3f5a7a6c 730 masm.psrld(dest, 2); 731 masm.rcpps(dest, dest); 732 masm.psllq(temp1, 12); 733 masm.pshufd(temp6, temp5, 0x4E); 734 masm.psrlq(temp1, 12); 735 masm.subl(gpr1, 16); 736 masm.cmpl(gpr1, 32736); 737 masm.jcc(ConditionFlag.AboveEqual, bb0); 738 739 masm.bind(bb1); 740 masm.mulss(dest, temp7); 741 masm.por(temp1, temp3); 742 masm.andpd(temp5, temp1); 743 masm.paddd(dest, temp4); 744 masm.movdqu(temp3, externalAddress(coeffLogTenDataPtr)); // 0xc1a5f12e, 745 // 0x40358874, 746 // 0x64d4ef0d, 747 // 0xc0089309 748 masm.leaq(gpr4, externalAddress(coeffLogTenDataPtr)); 749 masm.movdqu(temp4, new AMD64Address(gpr4, 16)); // 0x385593b1, 750 // 0xc025c917, 751 // 0xdc963467, 752 // 0x3ffc6a02 753 masm.subsd(temp1, temp5); 754 masm.movdl(gpr3, dest); 755 masm.psllq(dest, 29); 756 masm.andpd(dest, temp6); 757 masm.movdq(temp6, externalAddress(logTwoLogTenDataPtr)); // 0x509f7800, 758 // 0x3f934413 759 masm.andl(gpr1, 32752); 760 masm.subl(gpr1, gpr2); 761 masm.cvtsi2sdl(temp7, gpr1); 762 masm.mulpd(temp5, dest); 763 masm.mulsd(temp1, dest); 764 masm.subsd(temp5, temp2); 765 masm.movdqu(temp2, new AMD64Address(gpr4, 32)); // 0x7f9d3aa1, 766 // 0x4016ab9f, 767 // 0xdc77b115, 768 // 0xbff27af2 769 masm.leaq(gpr4, externalAddress(logTenTablePtr)); 770 masm.andl(gpr3, 16711680); 771 masm.shrl(gpr3, 12); 772 masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, -1504)); 773 masm.addsd(temp1, temp5); 774 masm.mulsd(temp6, temp7); 775 masm.pshufd(temp5, temp1, 0x44); 776 masm.leaq(gpr4, externalAddress(logTwoLogTenDataPtr)); 777 masm.mulsd(temp7, new AMD64Address(gpr4, 8)); // 0x1f12b358, 778 // 0x3cdfef31 779 masm.mulsd(temp3, temp1); 780 masm.addsd(dest, temp6); 781 masm.mulpd(temp4, temp5); 782 masm.leaq(gpr4, externalAddress(logTenEPtr)); 783 masm.movdq(temp6, new AMD64Address(gpr4, 8)); // 0xbf2e4108, 784 // 0x3f5a7a6c 785 masm.mulpd(temp5, temp5); 786 masm.addpd(temp4, temp2); 787 masm.mulpd(temp3, temp5); 788 masm.pshufd(temp2, dest, 0xE4); 789 masm.addsd(dest, temp1); 790 masm.mulsd(temp4, temp1); 791 masm.subsd(temp2, dest); 792 masm.mulsd(temp6, temp1); 793 masm.addsd(temp1, temp2); 794 masm.pshufd(temp2, dest, 0xEE); 795 masm.mulsd(temp5, temp5); 796 masm.addsd(temp7, temp2); 797 masm.addsd(temp1, temp6); 798 masm.addpd(temp4, temp3); 799 masm.addsd(temp1, temp7); 800 masm.mulpd(temp4, temp5); 801 masm.addsd(temp1, temp4); 802 masm.pshufd(temp5, temp4, 0xEE); 803 masm.addsd(temp1, temp5); 804 masm.addsd(dest, temp1); 805 masm.jmp(bb8); 806 807 masm.bind(bb0); 808 masm.movdq(dest, stackSlot); 809 masm.movdq(temp1, stackSlot); 810 masm.addl(gpr1, 16); 811 masm.cmpl(gpr1, 32768); 812 masm.jcc(ConditionFlag.AboveEqual, bb2); 813 814 masm.cmpl(gpr1, 16); 815 masm.jcc(ConditionFlag.Below, bb3); 816 817 masm.bind(bb4); 818 masm.addsd(dest, dest); 819 masm.jmp(bb8); 820 821 masm.bind(bb5); 822 masm.jcc(ConditionFlag.Above, bb4); 823 824 masm.cmpl(gpr3, 0); 825 masm.jcc(ConditionFlag.Above, bb4); 826 827 masm.jmp(bb6); 828 829 masm.bind(bb3); 830 masm.xorpd(temp1, temp1); 831 masm.addsd(temp1, dest); 832 masm.movdl(gpr3, temp1); 833 masm.psrlq(temp1, 32); 834 masm.movdl(gpr2, temp1); 835 masm.orl(gpr3, gpr2); 836 masm.cmpl(gpr3, 0); 837 masm.jcc(ConditionFlag.Equal, bb7); 838 839 masm.xorpd(temp1, temp1); 840 masm.xorpd(temp2, temp2); 841 masm.movl(gpr1, 18416); 842 masm.pinsrw(temp1, gpr1, 3); 843 masm.mulsd(dest, temp1); 844 masm.movl(gpr1, 16368); 845 masm.pinsrw(temp2, gpr1, 3); 846 masm.movdqu(temp1, dest); 847 masm.pextrw(gpr1, dest, 3); 848 masm.por(dest, temp2); 849 masm.movl(gpr2, 18416); 850 masm.psrlq(dest, 27); 851 masm.movdqu(temp2, externalAddress(logTenEPtr)); // 0x00000000, 852 // 0x3fdbc000, 853 // 0xbf2e4108, 854 // 0x3f5a7a6c 855 masm.psrld(dest, 2); 856 masm.rcpps(dest, dest); 857 masm.psllq(temp1, 12); 858 masm.pshufd(temp6, temp5, 0x4E); 859 masm.psrlq(temp1, 12); 860 masm.jmp(bb1); 861 862 masm.bind(bb2); 863 masm.movdl(gpr3, temp1); 864 masm.psrlq(temp1, 32); 865 masm.movdl(gpr2, temp1); 866 masm.addl(gpr2, gpr2); 867 masm.cmpl(gpr2, -2097152); 868 masm.jcc(ConditionFlag.AboveEqual, bb5); 869 870 masm.orl(gpr3, gpr2); 871 masm.cmpl(gpr3, 0); 872 masm.jcc(ConditionFlag.Equal, bb7); 873 874 masm.bind(bb6); 875 masm.xorpd(temp1, temp1); 876 masm.xorpd(dest, dest); 877 masm.movl(gpr1, 32752); 878 masm.pinsrw(temp1, gpr1, 3); 879 masm.mulsd(dest, temp1); 880 masm.jmp(bb8); 881 882 masm.bind(bb7); 883 masm.xorpd(temp1, temp1); 884 masm.xorpd(dest, dest); 885 masm.movl(gpr1, 49136); 886 masm.pinsrw(dest, gpr1, 3); 887 masm.divsd(dest, temp1); 888 889 masm.bind(bb8); 890 } 891 892 /* 893 * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) 894 * Source Code 895 * 896 * ALGORITHM DESCRIPTION - SIN() --------------------- 897 * 898 * 1. RANGE REDUCTION 899 * 900 * We perform an initial range reduction from X to r with 901 * 902 * X =~= N * pi/32 + r 903 * 904 * so that |r| <= pi/64 + epsilon. We restrict inputs to those where |N| <= 932560. Beyond this, 905 * the range reduction is insufficiently accurate. For extremely small inputs, denormalization 906 * can occur internally, impacting performance. This means that the main path is actually only 907 * taken for 2^-252 <= |X| < 90112. 908 * 909 * To avoid branches, we perform the range reduction to full accuracy each time. 910 * 911 * X - N * (P_1 + P_2 + P_3) 912 * 913 * where P_1 and P_2 are 32-bit numbers (so multiplication by N is exact) and P_3 is a 53-bit 914 * number. Together, these approximate pi well enough for all cases in the restricted range. 915 * 916 * The main reduction sequence is: 917 * 918 * y = 32/pi * x N = integer(y) (computed by adding and subtracting off SHIFTER) 919 * 920 * m_1 = N * P_1 m_2 = N * P_2 r_1 = x - m_1 r = r_1 - m_2 (this r can be used for most of the 921 * calculation) 922 * 923 * c_1 = r_1 - r m_3 = N * P_3 c_2 = c_1 - m_2 c = c_2 - m_3 924 * 925 * 2. MAIN ALGORITHM 926 * 927 * The algorithm uses a table lookup based on B = M * pi / 32 where M = N mod 64. The stored 928 * values are: sigma closest power of 2 to cos(B) C_hl 53-bit cos(B) - sigma S_hi + S_lo 2 * 929 * 53-bit sin(B) 930 * 931 * The computation is organized as follows: 932 * 933 * sin(B + r + c) = [sin(B) + sigma * r] + r * (cos(B) - sigma) + sin(B) * [cos(r + c) - 1] + 934 * cos(B) * [sin(r + c) - r] 935 * 936 * which is approximately: 937 * 938 * [S_hi + sigma * r] + C_hl * r + S_lo + S_hi * [(cos(r) - 1) - r * c] + (C_hl + sigma) * 939 * [(sin(r) - r) + c] 940 * 941 * and this is what is actually computed. We separate this sum into four parts: 942 * 943 * hi + med + pols + corr 944 * 945 * where 946 * 947 * hi = S_hi + sigma r med = C_hl * r pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) 948 * corr = S_lo + c * ((C_hl + sigma) - S_hi * r) 949 * 950 * 3. POLYNOMIAL 951 * 952 * The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) can be rearranged freely, 953 * since it is quite small, so we exploit parallelism to the fullest. 954 * 955 * psc4 = SC_4 * r_1 msc4 = psc4 * r r2 = r * r msc2 = SC_2 * r2 r4 = r2 * r2 psc3 = SC_3 + msc4 956 * psc1 = SC_1 + msc2 msc3 = r4 * psc3 sincospols = psc1 + msc3 pols = sincospols * <S_hi * r^2 957 * | (C_hl + sigma) * r^3> 958 * 959 * 4. CORRECTION TERM 960 * 961 * This is where the "c" component of the range reduction is taken into account; recall that 962 * just "r" is used for most of the calculation. 963 * 964 * -c = m_3 - c_2 -d = S_hi * r - (C_hl + sigma) corr = -c * -d + S_lo 965 * 966 * 5. COMPENSATED SUMMATIONS 967 * 968 * The two successive compensated summations add up the high and medium parts, leaving just the 969 * low parts to add up at the end. 970 * 971 * rs = sigma * r res_int = S_hi + rs k_0 = S_hi - res_int k_2 = k_0 + rs med = C_hl * r res_hi 972 * = res_int + med k_1 = res_int - res_hi k_3 = k_1 + med 973 * 974 * 6. FINAL SUMMATION 975 * 976 * We now add up all the small parts: 977 * 978 * res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 979 * 980 * Now the overall result is just: 981 * 982 * res_hi + res_lo 983 * 984 * 7. SMALL ARGUMENTS 985 * 986 * If |x| < SNN (SNN meaning the smallest normal number), we simply perform 0.1111111 cdots 1111 987 * * x. For SNN <= |x|, we do 2^-55 * (2^55 * x - x). 988 * 989 * Special cases: sin(NaN) = quiet NaN, and raise invalid exception sin(INF) = NaN and raise 990 * invalid exception sin(+/-0) = +/-0 991 * 992 */ 993 994 public int[] oneHalf = { 995 0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000 996 }; 997 998 public int[] pTwo = { 999 0x1a600000, 0x3d90b461, 0x1a600000, 0x3d90b461 1000 }; 1001 1002 public int[] scFour = { 1003 0xa556c734, 0x3ec71de3, 0x1a01a01a, 0x3efa01a0 1004 }; 1005 1006 public int[] cTable = { 1007 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 1008 0x00000000, 0x00000000, 0x3ff00000, 0x176d6d31, 0xbf73b92e, 1009 0xbc29b42c, 0x3fb917a6, 0xe0000000, 0xbc3e2718, 0x00000000, 1010 0x3ff00000, 0x011469fb, 0xbf93ad06, 0x3c69a60b, 0x3fc8f8b8, 1011 0xc0000000, 0xbc626d19, 0x00000000, 0x3ff00000, 0x939d225a, 1012 0xbfa60bea, 0x2ed59f06, 0x3fd29406, 0xa0000000, 0xbc75d28d, 1013 0x00000000, 0x3ff00000, 0x866b95cf, 0xbfb37ca1, 0xa6aea963, 1014 0x3fd87de2, 0xe0000000, 0xbc672ced, 0x00000000, 0x3ff00000, 1015 0x73fa1279, 0xbfbe3a68, 0x3806f63b, 0x3fde2b5d, 0x20000000, 1016 0x3c5e0d89, 0x00000000, 0x3ff00000, 0x5bc57974, 0xbfc59267, 1017 0x39ae68c8, 0x3fe1c73b, 0x20000000, 0x3c8b25dd, 0x00000000, 1018 0x3ff00000, 0x53aba2fd, 0xbfcd0dfe, 0x25091dd6, 0x3fe44cf3, 1019 0x20000000, 0x3c68076a, 0x00000000, 0x3ff00000, 0x99fcef32, 1020 0x3fca8279, 0x667f3bcd, 0x3fe6a09e, 0x20000000, 0xbc8bdd34, 1021 0x00000000, 0x3fe00000, 0x94247758, 0x3fc133cc, 0x6b151741, 1022 0x3fe8bc80, 0x20000000, 0xbc82c5e1, 0x00000000, 0x3fe00000, 1023 0x9ae68c87, 0x3fac73b3, 0x290ea1a3, 0x3fea9b66, 0xe0000000, 1024 0x3c39f630, 0x00000000, 0x3fe00000, 0x7f909c4e, 0xbf9d4a2c, 1025 0xf180bdb1, 0x3fec38b2, 0x80000000, 0xbc76e0b1, 0x00000000, 1026 0x3fe00000, 0x65455a75, 0xbfbe0875, 0xcf328d46, 0x3fed906b, 1027 0x20000000, 0x3c7457e6, 0x00000000, 0x3fe00000, 0x76acf82d, 1028 0x3fa4a031, 0x56c62dda, 0x3fee9f41, 0xe0000000, 0x3c8760b1, 1029 0x00000000, 0x3fd00000, 0x0e5967d5, 0xbfac1d1f, 0xcff75cb0, 1030 0x3fef6297, 0x20000000, 0x3c756217, 0x00000000, 0x3fd00000, 1031 0x0f592f50, 0xbf9ba165, 0xa3d12526, 0x3fefd88d, 0x40000000, 1032 0xbc887df6, 0x00000000, 0x3fc00000, 0x00000000, 0x00000000, 1033 0x00000000, 0x3ff00000, 0x00000000, 0x00000000, 0x00000000, 1034 0x00000000, 0x0f592f50, 0x3f9ba165, 0xa3d12526, 0x3fefd88d, 1035 0x40000000, 0xbc887df6, 0x00000000, 0xbfc00000, 0x0e5967d5, 1036 0x3fac1d1f, 0xcff75cb0, 0x3fef6297, 0x20000000, 0x3c756217, 1037 0x00000000, 0xbfd00000, 0x76acf82d, 0xbfa4a031, 0x56c62dda, 1038 0x3fee9f41, 0xe0000000, 0x3c8760b1, 0x00000000, 0xbfd00000, 1039 0x65455a75, 0x3fbe0875, 0xcf328d46, 0x3fed906b, 0x20000000, 1040 0x3c7457e6, 0x00000000, 0xbfe00000, 0x7f909c4e, 0x3f9d4a2c, 1041 0xf180bdb1, 0x3fec38b2, 0x80000000, 0xbc76e0b1, 0x00000000, 1042 0xbfe00000, 0x9ae68c87, 0xbfac73b3, 0x290ea1a3, 0x3fea9b66, 1043 0xe0000000, 0x3c39f630, 0x00000000, 0xbfe00000, 0x94247758, 1044 0xbfc133cc, 0x6b151741, 0x3fe8bc80, 0x20000000, 0xbc82c5e1, 1045 0x00000000, 0xbfe00000, 0x99fcef32, 0xbfca8279, 0x667f3bcd, 1046 0x3fe6a09e, 0x20000000, 0xbc8bdd34, 0x00000000, 0xbfe00000, 1047 0x53aba2fd, 0x3fcd0dfe, 0x25091dd6, 0x3fe44cf3, 0x20000000, 1048 0x3c68076a, 0x00000000, 0xbff00000, 0x5bc57974, 0x3fc59267, 1049 0x39ae68c8, 0x3fe1c73b, 0x20000000, 0x3c8b25dd, 0x00000000, 1050 0xbff00000, 0x73fa1279, 0x3fbe3a68, 0x3806f63b, 0x3fde2b5d, 1051 0x20000000, 0x3c5e0d89, 0x00000000, 0xbff00000, 0x866b95cf, 1052 0x3fb37ca1, 0xa6aea963, 0x3fd87de2, 0xe0000000, 0xbc672ced, 1053 0x00000000, 0xbff00000, 0x939d225a, 0x3fa60bea, 0x2ed59f06, 1054 0x3fd29406, 0xa0000000, 0xbc75d28d, 0x00000000, 0xbff00000, 1055 0x011469fb, 0x3f93ad06, 0x3c69a60b, 0x3fc8f8b8, 0xc0000000, 1056 0xbc626d19, 0x00000000, 0xbff00000, 0x176d6d31, 0x3f73b92e, 1057 0xbc29b42c, 0x3fb917a6, 0xe0000000, 0xbc3e2718, 0x00000000, 1058 0xbff00000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 1059 0x00000000, 0x00000000, 0x00000000, 0xbff00000, 0x176d6d31, 1060 0x3f73b92e, 0xbc29b42c, 0xbfb917a6, 0xe0000000, 0x3c3e2718, 1061 0x00000000, 0xbff00000, 0x011469fb, 0x3f93ad06, 0x3c69a60b, 1062 0xbfc8f8b8, 0xc0000000, 0x3c626d19, 0x00000000, 0xbff00000, 1063 0x939d225a, 0x3fa60bea, 0x2ed59f06, 0xbfd29406, 0xa0000000, 1064 0x3c75d28d, 0x00000000, 0xbff00000, 0x866b95cf, 0x3fb37ca1, 1065 0xa6aea963, 0xbfd87de2, 0xe0000000, 0x3c672ced, 0x00000000, 1066 0xbff00000, 0x73fa1279, 0x3fbe3a68, 0x3806f63b, 0xbfde2b5d, 1067 0x20000000, 0xbc5e0d89, 0x00000000, 0xbff00000, 0x5bc57974, 1068 0x3fc59267, 0x39ae68c8, 0xbfe1c73b, 0x20000000, 0xbc8b25dd, 1069 0x00000000, 0xbff00000, 0x53aba2fd, 0x3fcd0dfe, 0x25091dd6, 1070 0xbfe44cf3, 0x20000000, 0xbc68076a, 0x00000000, 0xbff00000, 1071 0x99fcef32, 0xbfca8279, 0x667f3bcd, 0xbfe6a09e, 0x20000000, 1072 0x3c8bdd34, 0x00000000, 0xbfe00000, 0x94247758, 0xbfc133cc, 1073 0x6b151741, 0xbfe8bc80, 0x20000000, 0x3c82c5e1, 0x00000000, 1074 0xbfe00000, 0x9ae68c87, 0xbfac73b3, 0x290ea1a3, 0xbfea9b66, 1075 0xe0000000, 0xbc39f630, 0x00000000, 0xbfe00000, 0x7f909c4e, 1076 0x3f9d4a2c, 0xf180bdb1, 0xbfec38b2, 0x80000000, 0x3c76e0b1, 1077 0x00000000, 0xbfe00000, 0x65455a75, 0x3fbe0875, 0xcf328d46, 1078 0xbfed906b, 0x20000000, 0xbc7457e6, 0x00000000, 0xbfe00000, 1079 0x76acf82d, 0xbfa4a031, 0x56c62dda, 0xbfee9f41, 0xe0000000, 1080 0xbc8760b1, 0x00000000, 0xbfd00000, 0x0e5967d5, 0x3fac1d1f, 1081 0xcff75cb0, 0xbfef6297, 0x20000000, 0xbc756217, 0x00000000, 1082 0xbfd00000, 0x0f592f50, 0x3f9ba165, 0xa3d12526, 0xbfefd88d, 1083 0x40000000, 0x3c887df6, 0x00000000, 0xbfc00000, 0x00000000, 1084 0x00000000, 0x00000000, 0xbff00000, 0x00000000, 0x00000000, 1085 0x00000000, 0x00000000, 0x0f592f50, 0xbf9ba165, 0xa3d12526, 1086 0xbfefd88d, 0x40000000, 0x3c887df6, 0x00000000, 0x3fc00000, 1087 0x0e5967d5, 0xbfac1d1f, 0xcff75cb0, 0xbfef6297, 0x20000000, 1088 0xbc756217, 0x00000000, 0x3fd00000, 0x76acf82d, 0x3fa4a031, 1089 0x56c62dda, 0xbfee9f41, 0xe0000000, 0xbc8760b1, 0x00000000, 1090 0x3fd00000, 0x65455a75, 0xbfbe0875, 0xcf328d46, 0xbfed906b, 1091 0x20000000, 0xbc7457e6, 0x00000000, 0x3fe00000, 0x7f909c4e, 1092 0xbf9d4a2c, 0xf180bdb1, 0xbfec38b2, 0x80000000, 0x3c76e0b1, 1093 0x00000000, 0x3fe00000, 0x9ae68c87, 0x3fac73b3, 0x290ea1a3, 1094 0xbfea9b66, 0xe0000000, 0xbc39f630, 0x00000000, 0x3fe00000, 1095 0x94247758, 0x3fc133cc, 0x6b151741, 0xbfe8bc80, 0x20000000, 1096 0x3c82c5e1, 0x00000000, 0x3fe00000, 0x99fcef32, 0x3fca8279, 1097 0x667f3bcd, 0xbfe6a09e, 0x20000000, 0x3c8bdd34, 0x00000000, 1098 0x3fe00000, 0x53aba2fd, 0xbfcd0dfe, 0x25091dd6, 0xbfe44cf3, 1099 0x20000000, 0xbc68076a, 0x00000000, 0x3ff00000, 0x5bc57974, 1100 0xbfc59267, 0x39ae68c8, 0xbfe1c73b, 0x20000000, 0xbc8b25dd, 1101 0x00000000, 0x3ff00000, 0x73fa1279, 0xbfbe3a68, 0x3806f63b, 1102 0xbfde2b5d, 0x20000000, 0xbc5e0d89, 0x00000000, 0x3ff00000, 1103 0x866b95cf, 0xbfb37ca1, 0xa6aea963, 0xbfd87de2, 0xe0000000, 1104 0x3c672ced, 0x00000000, 0x3ff00000, 0x939d225a, 0xbfa60bea, 1105 0x2ed59f06, 0xbfd29406, 0xa0000000, 0x3c75d28d, 0x00000000, 1106 0x3ff00000, 0x011469fb, 0xbf93ad06, 0x3c69a60b, 0xbfc8f8b8, 1107 0xc0000000, 0x3c626d19, 0x00000000, 0x3ff00000, 0x176d6d31, 1108 0xbf73b92e, 0xbc29b42c, 0xbfb917a6, 0xe0000000, 0x3c3e2718, 1109 0x00000000, 0x3ff00000 1110 }; 1111 1112 public int[] scTwo = { 1113 0x11111111, 0x3f811111, 0x55555555, 0x3fa55555 1114 }; 1115 1116 public int[] scThree = { 1117 0x1a01a01a, 0xbf2a01a0, 0x16c16c17, 0xbf56c16c 1118 }; 1119 1120 public int[] scOne = { 1121 0x55555555, 0xbfc55555, 0x00000000, 0xbfe00000 1122 }; 1123 1124 public int[] piInvTable = { 1125 0x00000000, 0x00000000, 0xa2f9836e, 0x4e441529, 0xfc2757d1, 1126 0xf534ddc0, 0xdb629599, 0x3c439041, 0xfe5163ab, 0xdebbc561, 1127 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c, 0xfe1deb1c, 1128 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41, 1129 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 1130 0xde05980f, 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 1131 0x4f463f66, 0x9e5fea2d, 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 1132 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08, 0x56033046, 0xfc7b6bab, 1133 0xf0cfbc21 1134 }; 1135 1136 public int[] piFour = { 1137 0x40000000, 0x3fe921fb, 0x18469899, 0x3e64442d 1138 }; 1139 1140 public int[] piThirtyTwoInv = { 1141 0x6dc9c883, 0x40245f30 1142 }; 1143 1144 public int[] shifter = { 1145 0x00000000, 0x43380000 1146 }; 1147 1148 public int[] signMask = { 1149 0x00000000, 0x80000000 1150 }; 1151 1152 public int[] pThree = { 1153 0x2e037073, 0x3b63198a 1154 }; 1155 1156 public int[] allOnes = { 1157 0xffffffff, 0x3fefffff 1158 }; 1159 1160 public int[] twoPowFiftyFive = { 1161 0x00000000, 0x43600000 1162 }; 1163 1164 public int[] twoPowFiftyFiveM = { 1165 0x00000000, 0x3c800000 1166 }; 1167 1168 public int[] pOne = { 1169 0x54400000, 0x3fb921fb 1170 }; 1171 1172 public void sinIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { 1173 ArrayDataPointerConstant oneHalfPtr = new ArrayDataPointerConstant(oneHalf, 16); 1174 ArrayDataPointerConstant pTwoPtr = new ArrayDataPointerConstant(pTwo, 16); 1175 ArrayDataPointerConstant scFourPtr = new ArrayDataPointerConstant(scFour, 16); 1176 ArrayDataPointerConstant cTablePtr = new ArrayDataPointerConstant(cTable, 16); 1177 ArrayDataPointerConstant scTwoPtr = new ArrayDataPointerConstant(scTwo, 16); 1178 ArrayDataPointerConstant scThreePtr = new ArrayDataPointerConstant(scThree, 16); 1179 ArrayDataPointerConstant scOnePtr = new ArrayDataPointerConstant(scOne, 16); 1180 ArrayDataPointerConstant piInvTablePtr = new ArrayDataPointerConstant(piInvTable, 16); 1181 ArrayDataPointerConstant piFourPtr = new ArrayDataPointerConstant(piFour, 16); 1182 ArrayDataPointerConstant piThirtyTwoInvPtr = new ArrayDataPointerConstant(piThirtyTwoInv, 8); 1183 ArrayDataPointerConstant shifterPtr = new ArrayDataPointerConstant(shifter, 8); 1184 ArrayDataPointerConstant signMaskPtr = new ArrayDataPointerConstant(signMask, 8); 1185 ArrayDataPointerConstant pThreePtr = new ArrayDataPointerConstant(pThree, 8); 1186 ArrayDataPointerConstant allOnesPtr = new ArrayDataPointerConstant(allOnes, 8); 1187 ArrayDataPointerConstant twoPowFiftyFivePtr = new ArrayDataPointerConstant(twoPowFiftyFive, 8); 1188 ArrayDataPointerConstant twoPowFiftyFiveMPtr = new ArrayDataPointerConstant(twoPowFiftyFiveM, 8); 1189 ArrayDataPointerConstant pOnePtr = new ArrayDataPointerConstant(pOne, 8); 1190 1191 Label bb0 = new Label(); 1192 Label bb1 = new Label(); 1193 Label bb2 = new Label(); 1194 Label bb4 = new Label(); 1195 Label bb5 = new Label(); 1196 Label bb6 = new Label(); 1197 Label bb8 = new Label(); 1198 Label bb9 = new Label(); 1199 Label bb10 = new Label(); 1200 Label bb11 = new Label(); 1201 Label bb12 = new Label(); 1202 Label bb13 = new Label(); 1203 Label bb14 = new Label(); 1204 Label bb15 = new Label(); 1205 1206 Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); 1207 Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); 1208 Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); 1209 Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); 1210 Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD); 1211 Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD); 1212 Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD); 1213 Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD); 1214 Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD); 1215 Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD); 1216 1217 Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); 1218 Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); 1219 Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); 1220 Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); 1221 Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); 1222 Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); 1223 Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); 1224 Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE); 1225 Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE); 1226 1227 AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); 1228 1229 setCrb(crb); 1230 masm.movsd(stackSlot, value); 1231 if (dest.encoding != value.encoding) { 1232 masm.movdqu(dest, value); 1233 } 1234 1235 masm.leaq(gpr1, stackSlot); 1236 masm.movl(gpr1, new AMD64Address(gpr1, 4)); 1237 masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr)); // 0x6dc9c883, 1238 // 0x40245f30 1239 masm.movdq(temp2, externalAddress(shifterPtr)); // 0x00000000, 1240 // 0x43380000 1241 1242 masm.andl(gpr1, 2147418112); 1243 masm.subl(gpr1, 808452096); 1244 masm.cmpl(gpr1, 281346048); 1245 masm.jcc(ConditionFlag.Above, bb0); 1246 1247 masm.mulsd(temp1, dest); 1248 masm.movdqu(temp5, externalAddress(oneHalfPtr)); // 0x00000000, 1249 // 0x3fe00000, 1250 // 0x00000000, 1251 // 0x3fe00000 1252 masm.movdq(temp4, externalAddress(signMaskPtr)); // 0x00000000, 1253 // 0x80000000 1254 masm.pand(temp4, dest); 1255 masm.por(temp5, temp4); 1256 masm.addpd(temp1, temp5); 1257 masm.cvttsd2sil(gpr4, temp1); 1258 masm.cvtsi2sdl(temp1, gpr4); 1259 masm.movdqu(temp6, externalAddress(pTwoPtr)); // 0x1a600000, 1260 // 0x3d90b461, 1261 // 0x1a600000, 1262 // 0x3d90b461 1263 masm.movq(gpr7, 0x3fb921fb54400000L); 1264 masm.movdq(temp3, gpr7); 1265 masm.movdqu(temp5, externalAddress(scFourPtr)); // 0xa556c734, 1266 // 0x3ec71de3, 1267 // 0x1a01a01a, 1268 // 0x3efa01a0 1269 masm.pshufd(temp4, dest, 0x44); 1270 masm.mulsd(temp3, temp1); 1271 if (masm.supports(CPUFeature.SSE3)) { 1272 masm.movddup(temp1, temp1); 1273 } else { 1274 masm.movlhps(temp1, temp1); 1275 } 1276 masm.andl(gpr4, 63); 1277 masm.shll(gpr4, 5); 1278 masm.leaq(gpr1, externalAddress(cTablePtr)); 1279 masm.addq(gpr1, gpr4); 1280 masm.movdqu(temp8, new AMD64Address(gpr1, 0)); 1281 masm.mulpd(temp6, temp1); 1282 masm.mulsd(temp1, externalAddress(pThreePtr)); // 0x2e037073, 1283 // 0x3b63198a 1284 masm.subsd(temp4, temp3); 1285 masm.subsd(dest, temp3); 1286 if (masm.supports(CPUFeature.SSE3)) { 1287 masm.movddup(temp3, temp4); 1288 } else { 1289 masm.movdqu(temp3, temp4); 1290 masm.movlhps(temp3, temp3); 1291 } 1292 masm.subsd(temp4, temp6); 1293 masm.pshufd(dest, dest, 0x44); 1294 masm.pshufd(temp7, temp8, 0xE); 1295 masm.movdqu(temp2, temp8); 1296 masm.movdqu(temp9, temp7); 1297 masm.mulpd(temp5, dest); 1298 masm.subpd(dest, temp6); 1299 masm.mulsd(temp7, temp4); 1300 masm.subsd(temp3, temp4); 1301 masm.mulpd(temp5, dest); 1302 masm.mulpd(dest, dest); 1303 masm.subsd(temp3, temp6); 1304 masm.movdqu(temp6, externalAddress(scTwoPtr)); // 0x11111111, 1305 // 0x3f811111, 1306 // 0x55555555, 1307 // 0x3fa55555 1308 masm.subsd(temp1, temp3); 1309 masm.movdq(temp3, new AMD64Address(gpr1, 24)); 1310 masm.addsd(temp2, temp3); 1311 masm.subsd(temp7, temp2); 1312 masm.mulsd(temp2, temp4); 1313 masm.mulpd(temp6, dest); 1314 masm.mulsd(temp3, temp4); 1315 masm.mulpd(temp2, dest); 1316 masm.mulpd(dest, dest); 1317 masm.addpd(temp5, externalAddress(scThreePtr)); // 0x1a01a01a, 1318 // 0xbf2a01a0, 1319 // 0x16c16c17, 1320 // 0xbf56c16c 1321 masm.mulsd(temp4, temp8); 1322 masm.addpd(temp6, externalAddress(scOnePtr)); // 0x55555555, 1323 // 0xbfc55555, 1324 // 0x00000000, 1325 // 0xbfe00000 1326 masm.mulpd(temp5, dest); 1327 masm.movdqu(dest, temp3); 1328 masm.addsd(temp3, temp9); 1329 masm.mulpd(temp1, temp7); 1330 masm.movdqu(temp7, temp4); 1331 masm.addsd(temp4, temp3); 1332 masm.addpd(temp6, temp5); 1333 masm.subsd(temp9, temp3); 1334 masm.subsd(temp3, temp4); 1335 masm.addsd(temp1, new AMD64Address(gpr1, 16)); 1336 masm.mulpd(temp6, temp2); 1337 masm.addsd(temp9, dest); 1338 masm.addsd(temp3, temp7); 1339 masm.addsd(temp1, temp9); 1340 masm.addsd(temp1, temp3); 1341 masm.addsd(temp1, temp6); 1342 masm.unpckhpd(temp6, temp6); 1343 masm.movdqu(dest, temp4); 1344 masm.addsd(temp1, temp6); 1345 masm.addsd(dest, temp1); 1346 masm.jmp(bb15); 1347 1348 masm.bind(bb14); 1349 masm.xorpd(temp1, temp1); 1350 masm.xorpd(dest, dest); 1351 masm.divsd(dest, temp1); 1352 masm.jmp(bb15); 1353 1354 masm.bind(bb0); 1355 masm.jcc(ConditionFlag.Greater, bb1); 1356 1357 masm.shrl(gpr1, 20); 1358 masm.cmpl(gpr1, 3325); 1359 masm.jcc(ConditionFlag.NotEqual, bb2); 1360 1361 masm.mulsd(dest, externalAddress(allOnesPtr)); // 0xffffffff, 1362 // 0x3fefffff 1363 masm.jmp(bb15); 1364 1365 masm.bind(bb2); 1366 masm.movdq(temp3, externalAddress(twoPowFiftyFivePtr)); // 0x00000000, 1367 // 0x43600000 1368 masm.mulsd(temp3, dest); 1369 masm.subsd(temp3, dest); 1370 masm.mulsd(temp3, externalAddress(twoPowFiftyFiveMPtr)); // 0x00000000, 1371 // 0x3c800000 1372 masm.jmp(bb15); 1373 1374 masm.bind(bb1); 1375 masm.pextrw(gpr3, dest, 3); 1376 masm.andl(gpr3, 32752); 1377 masm.cmpl(gpr3, 32752); 1378 masm.jcc(ConditionFlag.Equal, bb14); 1379 1380 masm.subl(gpr3, 16224); 1381 masm.shrl(gpr3, 7); 1382 masm.andl(gpr3, 65532); 1383 masm.leaq(gpr10, externalAddress(piInvTablePtr)); 1384 masm.addq(gpr3, gpr10); 1385 masm.movdq(gpr1, dest); 1386 masm.movl(gpr9, new AMD64Address(gpr3, 20)); 1387 masm.movl(gpr7, new AMD64Address(gpr3, 24)); 1388 masm.movl(gpr4, gpr1); 1389 masm.shrq(gpr1, 21); 1390 masm.orl(gpr1, Integer.MIN_VALUE); 1391 masm.shrl(gpr1, 11); 1392 masm.movl(gpr8, gpr9); 1393 masm.imulq(gpr9, gpr4); 1394 masm.imulq(gpr8, gpr1); 1395 masm.imulq(gpr7, gpr1); 1396 masm.movl(gpr5, new AMD64Address(gpr3, 16)); 1397 masm.movl(gpr6, new AMD64Address(gpr3, 12)); 1398 masm.movl(gpr10, gpr9); 1399 masm.shrq(gpr9, 32); 1400 masm.addq(gpr8, gpr9); 1401 masm.addq(gpr10, gpr7); 1402 masm.movl(gpr7, gpr10); 1403 masm.shrq(gpr10, 32); 1404 masm.addq(gpr8, gpr10); 1405 masm.movl(gpr9, gpr5); 1406 masm.imulq(gpr5, gpr4); 1407 masm.imulq(gpr9, gpr1); 1408 masm.movl(gpr10, gpr6); 1409 masm.imulq(gpr6, gpr4); 1410 masm.movl(gpr2, gpr5); 1411 masm.shrq(gpr5, 32); 1412 masm.addq(gpr8, gpr2); 1413 masm.movl(gpr2, gpr8); 1414 masm.shrq(gpr8, 32); 1415 masm.addq(gpr9, gpr5); 1416 masm.addq(gpr9, gpr8); 1417 masm.shlq(gpr2, 32); 1418 masm.orq(gpr7, gpr2); 1419 masm.imulq(gpr10, gpr1); 1420 masm.movl(gpr8, new AMD64Address(gpr3, 8)); 1421 masm.movl(gpr5, new AMD64Address(gpr3, 4)); 1422 masm.movl(gpr2, gpr6); 1423 masm.shrq(gpr6, 32); 1424 masm.addq(gpr9, gpr2); 1425 masm.movl(gpr2, gpr9); 1426 masm.shrq(gpr9, 32); 1427 masm.addq(gpr10, gpr6); 1428 masm.addq(gpr10, gpr9); 1429 masm.movq(gpr6, gpr8); 1430 masm.imulq(gpr8, gpr4); 1431 masm.imulq(gpr6, gpr1); 1432 masm.movl(gpr9, gpr8); 1433 masm.shrq(gpr8, 32); 1434 masm.addq(gpr10, gpr9); 1435 masm.movl(gpr9, gpr10); 1436 masm.shrq(gpr10, 32); 1437 masm.addq(gpr6, gpr8); 1438 masm.addq(gpr6, gpr10); 1439 masm.movq(gpr8, gpr5); 1440 masm.imulq(gpr5, gpr4); 1441 masm.imulq(gpr8, gpr1); 1442 masm.shlq(gpr9, 32); 1443 masm.orq(gpr9, gpr2); 1444 masm.movl(gpr1, new AMD64Address(gpr3, 0)); 1445 masm.movl(gpr10, gpr5); 1446 masm.shrq(gpr5, 32); 1447 masm.addq(gpr6, gpr10); 1448 masm.movl(gpr10, gpr6); 1449 masm.shrq(gpr6, 32); 1450 masm.addq(gpr8, gpr5); 1451 masm.addq(gpr8, gpr6); 1452 masm.imulq(gpr4, gpr1); 1453 masm.pextrw(gpr2, dest, 3); 1454 masm.leaq(gpr6, externalAddress(piInvTablePtr)); 1455 masm.subq(gpr3, gpr6); 1456 masm.addl(gpr3, gpr3); 1457 masm.addl(gpr3, gpr3); 1458 masm.addl(gpr3, gpr3); 1459 masm.addl(gpr3, 19); 1460 masm.movl(gpr5, 32768); 1461 masm.andl(gpr5, gpr2); 1462 masm.shrl(gpr2, 4); 1463 masm.andl(gpr2, 2047); 1464 masm.subl(gpr2, 1023); 1465 masm.subl(gpr3, gpr2); 1466 masm.addq(gpr8, gpr4); 1467 masm.movl(gpr4, gpr3); 1468 masm.addl(gpr4, 32); 1469 masm.cmpl(gpr3, 1); 1470 masm.jcc(ConditionFlag.Less, bb4); 1471 1472 masm.negl(gpr3); 1473 masm.addl(gpr3, 29); 1474 masm.shll(gpr8); 1475 masm.movl(gpr6, gpr8); 1476 masm.andl(gpr8, 536870911); 1477 masm.testl(gpr8, 268435456); 1478 masm.jcc(ConditionFlag.NotEqual, bb5); 1479 1480 masm.shrl(gpr8); 1481 masm.movl(gpr2, 0); 1482 masm.shlq(gpr8, 32); 1483 masm.orq(gpr8, gpr10); 1484 1485 masm.bind(bb6); 1486 1487 masm.cmpq(gpr8, 0); 1488 masm.jcc(ConditionFlag.Equal, bb8); 1489 1490 masm.bind(bb9); 1491 masm.bsrq(gpr10, gpr8); 1492 masm.movl(gpr3, 29); 1493 masm.subl(gpr3, gpr10); 1494 masm.jcc(ConditionFlag.LessEqual, bb10); 1495 1496 masm.shlq(gpr8); 1497 masm.movq(gpr1, gpr9); 1498 masm.shlq(gpr9); 1499 masm.addl(gpr4, gpr3); 1500 masm.negl(gpr3); 1501 masm.addl(gpr3, 64); 1502 masm.shrq(gpr1); 1503 masm.shrq(gpr7); 1504 masm.orq(gpr8, gpr1); 1505 masm.orq(gpr9, gpr7); 1506 1507 masm.bind(bb11); 1508 masm.cvtsi2sdq(dest, gpr8); 1509 masm.shrq(gpr9, 1); 1510 masm.cvtsi2sdq(temp3, gpr9); 1511 masm.xorpd(temp4, temp4); 1512 masm.shll(gpr4, 4); 1513 masm.negl(gpr4); 1514 masm.addl(gpr4, 16368); 1515 masm.orl(gpr4, gpr5); 1516 masm.xorl(gpr4, gpr2); 1517 masm.pinsrw(temp4, gpr4, 3); 1518 masm.leaq(gpr1, externalAddress(piFourPtr)); 1519 masm.movdqu(temp2, new AMD64Address(gpr1, 0)); // 0x40000000, 1520 // 0x3fe921fb, 1521 // 0x18469899, 1522 // 0x3e64442d 1523 masm.xorpd(temp5, temp5); 1524 masm.subl(gpr4, 1008); 1525 masm.pinsrw(temp5, gpr4, 3); 1526 masm.mulsd(dest, temp4); 1527 masm.shll(gpr5, 16); 1528 masm.sarl(gpr5, 31); 1529 masm.mulsd(temp3, temp5); 1530 masm.movdqu(temp1, dest); 1531 masm.pshufd(temp6, temp2, 0xE); 1532 masm.mulsd(dest, temp2); 1533 masm.shrl(gpr6, 29); 1534 masm.addsd(temp1, temp3); 1535 masm.mulsd(temp3, temp2); 1536 masm.addl(gpr6, gpr5); 1537 masm.xorl(gpr6, gpr5); 1538 masm.mulsd(temp6, temp1); 1539 masm.movl(gpr1, gpr6); 1540 masm.addsd(temp6, temp3); 1541 masm.movdqu(temp2, dest); 1542 masm.addsd(dest, temp6); 1543 masm.subsd(temp2, dest); 1544 masm.addsd(temp6, temp2); 1545 1546 masm.bind(bb12); 1547 masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr)); // 0x6dc9c883, 1548 // 0x40245f30 1549 masm.mulsd(temp1, dest); 1550 masm.movdq(temp5, externalAddress(oneHalfPtr)); // 0x00000000, 1551 // 0x3fe00000, 1552 // 0x00000000, 1553 // 0x3fe00000 1554 masm.movdq(temp4, externalAddress(signMaskPtr)); // 0x00000000, 1555 // 0x80000000 1556 masm.pand(temp4, dest); 1557 masm.por(temp5, temp4); 1558 masm.addpd(temp1, temp5); 1559 masm.cvttsd2sil(gpr4, temp1); 1560 masm.cvtsi2sdl(temp1, gpr4); 1561 masm.movdq(temp3, externalAddress(pOnePtr)); // 0x54400000, 1562 // 0x3fb921fb 1563 masm.movdqu(temp2, externalAddress(pTwoPtr)); // 0x1a600000, 1564 // 0x3d90b461, 1565 // 0x1a600000, 1566 // 0x3d90b461 1567 masm.mulsd(temp3, temp1); 1568 masm.unpcklpd(temp1, temp1); 1569 masm.shll(gpr1, 3); 1570 masm.addl(gpr4, 1865216); 1571 masm.movdqu(temp4, dest); 1572 masm.addl(gpr4, gpr1); 1573 masm.andl(gpr4, 63); 1574 masm.movdqu(temp5, externalAddress(scFourPtr)); // 0x54400000, 1575 // 0x3fb921fb 1576 masm.leaq(gpr1, externalAddress(cTablePtr)); 1577 masm.shll(gpr4, 5); 1578 masm.addq(gpr1, gpr4); 1579 masm.movdqu(temp8, new AMD64Address(gpr1, 0)); 1580 masm.mulpd(temp2, temp1); 1581 masm.subsd(dest, temp3); 1582 masm.mulsd(temp1, externalAddress(pThreePtr)); // 0x2e037073, 1583 // 0x3b63198a 1584 masm.subsd(temp4, temp3); 1585 masm.unpcklpd(dest, dest); 1586 masm.movdqu(temp3, temp4); 1587 masm.subsd(temp4, temp2); 1588 masm.mulpd(temp5, dest); 1589 masm.subpd(dest, temp2); 1590 masm.pshufd(temp7, temp8, 0xE); 1591 masm.movdqu(temp9, temp7); 1592 masm.mulsd(temp7, temp4); 1593 masm.subsd(temp3, temp4); 1594 masm.mulpd(temp5, dest); 1595 masm.mulpd(dest, dest); 1596 masm.subsd(temp3, temp2); 1597 masm.movdqu(temp2, temp8); 1598 masm.subsd(temp1, temp3); 1599 masm.movdq(temp3, new AMD64Address(gpr1, 24)); 1600 masm.addsd(temp2, temp3); 1601 masm.subsd(temp7, temp2); 1602 masm.subsd(temp1, temp6); 1603 masm.movdqu(temp6, externalAddress(scTwoPtr)); // 0x11111111, 1604 // 0x3f811111, 1605 // 0x55555555, 1606 // 0x3fa55555 1607 masm.mulsd(temp2, temp4); 1608 masm.mulpd(temp6, dest); 1609 masm.mulsd(temp3, temp4); 1610 masm.mulpd(temp2, dest); 1611 masm.mulpd(dest, dest); 1612 masm.addpd(temp5, externalAddress(scThreePtr)); // 0x1a01a01a, 1613 // 0xbf2a01a0, 1614 // 0x16c16c17, 1615 // 0xbf56c16c 1616 masm.mulsd(temp4, temp8); 1617 masm.addpd(temp6, externalAddress(scOnePtr)); // 0x55555555, 1618 // 0xbfc55555, 1619 // 0x00000000, 1620 // 0xbfe00000 1621 masm.mulpd(temp5, dest); 1622 masm.movdqu(dest, temp3); 1623 masm.addsd(temp3, temp9); 1624 masm.mulpd(temp1, temp7); 1625 masm.movdqu(temp7, temp4); 1626 masm.addsd(temp4, temp3); 1627 masm.addpd(temp6, temp5); 1628 masm.subsd(temp9, temp3); 1629 masm.subsd(temp3, temp4); 1630 masm.addsd(temp1, new AMD64Address(gpr1, 16)); 1631 masm.mulpd(temp6, temp2); 1632 masm.addsd(temp9, dest); 1633 masm.addsd(temp3, temp7); 1634 masm.addsd(temp1, temp9); 1635 masm.addsd(temp1, temp3); 1636 masm.addsd(temp1, temp6); 1637 masm.unpckhpd(temp6, temp6); 1638 masm.movdqu(dest, temp4); 1639 masm.addsd(temp1, temp6); 1640 masm.addsd(dest, temp1); 1641 masm.jmp(bb15); 1642 1643 masm.bind(bb8); 1644 masm.addl(gpr4, 64); 1645 masm.movq(gpr8, gpr9); 1646 masm.movq(gpr9, gpr7); 1647 masm.movl(gpr7, 0); 1648 masm.cmpq(gpr8, 0); 1649 masm.jcc(ConditionFlag.NotEqual, bb9); 1650 1651 masm.addl(gpr4, 64); 1652 masm.movq(gpr8, gpr9); 1653 masm.movq(gpr9, gpr7); 1654 masm.cmpq(gpr8, 0); 1655 masm.jcc(ConditionFlag.NotEqual, bb9); 1656 1657 masm.xorpd(dest, dest); 1658 masm.xorpd(temp6, temp6); 1659 masm.jmp(bb12); 1660 1661 masm.bind(bb10); 1662 masm.jcc(ConditionFlag.Equal, bb11); 1663 1664 masm.negl(gpr3); 1665 masm.shrq(gpr9); 1666 masm.movq(gpr1, gpr8); 1667 masm.shrq(gpr8); 1668 masm.subl(gpr4, gpr3); 1669 masm.negl(gpr3); 1670 masm.addl(gpr3, 64); 1671 masm.shlq(gpr1); 1672 masm.orq(gpr9, gpr1); 1673 masm.jmp(bb11); 1674 1675 masm.bind(bb4); 1676 masm.negl(gpr3); 1677 masm.shlq(gpr8, 32); 1678 masm.orq(gpr8, gpr10); 1679 masm.shlq(gpr8); 1680 masm.movq(gpr6, gpr8); 1681 masm.testl(gpr8, Integer.MIN_VALUE); 1682 masm.jcc(ConditionFlag.NotEqual, bb13); 1683 1684 masm.shrl(gpr8); 1685 masm.movl(gpr2, 0); 1686 masm.shrq(gpr6, 3); 1687 masm.jmp(bb6); 1688 1689 masm.bind(bb5); 1690 masm.shrl(gpr8); 1691 masm.movl(gpr2, 536870912); 1692 masm.shrl(gpr2); 1693 masm.shlq(gpr8, 32); 1694 masm.orq(gpr8, gpr10); 1695 masm.shlq(gpr2, 32); 1696 masm.addl(gpr6, 536870912); 1697 masm.movl(gpr3, 0); 1698 masm.movl(gpr10, 0); 1699 masm.subq(gpr3, gpr7); 1700 masm.sbbq(gpr10, gpr9); 1701 masm.sbbq(gpr2, gpr8); 1702 masm.movq(gpr7, gpr3); 1703 masm.movq(gpr9, gpr10); 1704 masm.movq(gpr8, gpr2); 1705 masm.movl(gpr2, 32768); 1706 masm.jmp(bb6); 1707 1708 masm.bind(bb13); 1709 masm.shrl(gpr8); 1710 masm.movq(gpr2, 0x100000000L); 1711 masm.shrq(gpr2); 1712 masm.movl(gpr3, 0); 1713 masm.movl(gpr10, 0); 1714 masm.subq(gpr3, gpr7); 1715 masm.sbbq(gpr10, gpr9); 1716 masm.sbbq(gpr2, gpr8); 1717 masm.movq(gpr7, gpr3); 1718 masm.movq(gpr9, gpr10); 1719 masm.movq(gpr8, gpr2); 1720 masm.movl(gpr2, 32768); 1721 masm.shrq(gpr6, 3); 1722 masm.addl(gpr6, 536870912); 1723 masm.jmp(bb6); 1724 1725 masm.bind(bb15); 1726 } 1727 1728 /* 1729 * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) 1730 * Source Code 1731 * 1732 * ALGORITHM DESCRIPTION - COS() --------------------- 1733 * 1734 * 1. RANGE REDUCTION 1735 * 1736 * We perform an initial range reduction from X to r with 1737 * 1738 * X =~= N * pi/32 + r 1739 * 1740 * so that |r| <= pi/64 + epsilon. We restrict inputs to those where |N| <= 932560. Beyond this, 1741 * the range reduction is insufficiently accurate. For extremely small inputs, denormalization 1742 * can occur internally, impacting performance. This means that the main path is actually only 1743 * taken for 2^-252 <= |X| < 90112. 1744 * 1745 * To avoid branches, we perform the range reduction to full accuracy each time. 1746 * 1747 * X - N * (P_1 + P_2 + P_3) 1748 * 1749 * where P_1 and P_2 are 32-bit numbers (so multiplication by N is exact) and P_3 is a 53-bit 1750 * number. Together, these approximate pi well enough for all cases in the restricted range. 1751 * 1752 * The main reduction sequence is: 1753 * 1754 * y = 32/pi * x N = integer(y) (computed by adding and subtracting off SHIFTER) 1755 * 1756 * m_1 = N * P_1 m_2 = N * P_2 r_1 = x - m_1 r = r_1 - m_2 (this r can be used for most of the 1757 * calculation) 1758 * 1759 * c_1 = r_1 - r m_3 = N * P_3 c_2 = c_1 - m_2 c = c_2 - m_3 1760 * 1761 * 2. MAIN ALGORITHM 1762 * 1763 * The algorithm uses a table lookup based on B = M * pi / 32 where M = N mod 64. The stored 1764 * values are: sigma closest power of 2 to cos(B) C_hl 53-bit cos(B) - sigma S_hi + S_lo 2 * 1765 * 53-bit sin(B) 1766 * 1767 * The computation is organized as follows: 1768 * 1769 * sin(B + r + c) = [sin(B) + sigma * r] + r * (cos(B) - sigma) + sin(B) * [cos(r + c) - 1] + 1770 * cos(B) * [sin(r + c) - r] 1771 * 1772 * which is approximately: 1773 * 1774 * [S_hi + sigma * r] + C_hl * r + S_lo + S_hi * [(cos(r) - 1) - r * c] + (C_hl + sigma) * 1775 * [(sin(r) - r) + c] 1776 * 1777 * and this is what is actually computed. We separate this sum into four parts: 1778 * 1779 * hi + med + pols + corr 1780 * 1781 * where 1782 * 1783 * hi = S_hi + sigma r med = C_hl * r pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) 1784 * corr = S_lo + c * ((C_hl + sigma) - S_hi * r) 1785 * 1786 * 3. POLYNOMIAL 1787 * 1788 * The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) can be rearranged freely, 1789 * since it is quite small, so we exploit parallelism to the fullest. 1790 * 1791 * psc4 = SC_4 * r_1 msc4 = psc4 * r r2 = r * r msc2 = SC_2 * r2 r4 = r2 * r2 psc3 = SC_3 + msc4 1792 * psc1 = SC_1 + msc2 msc3 = r4 * psc3 sincospols = psc1 + msc3 pols = sincospols * <S_hi * r^2 1793 * | (C_hl + sigma) * r^3> 1794 * 1795 * 4. CORRECTION TERM 1796 * 1797 * This is where the "c" component of the range reduction is taken into account; recall that 1798 * just "r" is used for most of the calculation. 1799 * 1800 * -c = m_3 - c_2 -d = S_hi * r - (C_hl + sigma) corr = -c * -d + S_lo 1801 * 1802 * 5. COMPENSATED SUMMATIONS 1803 * 1804 * The two successive compensated summations add up the high and medium parts, leaving just the 1805 * low parts to add up at the end. 1806 * 1807 * rs = sigma * r res_int = S_hi + rs k_0 = S_hi - res_int k_2 = k_0 + rs med = C_hl * r res_hi 1808 * = res_int + med k_1 = res_int - res_hi k_3 = k_1 + med 1809 * 1810 * 6. FINAL SUMMATION 1811 * 1812 * We now add up all the small parts: 1813 * 1814 * res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 1815 * 1816 * Now the overall result is just: 1817 * 1818 * res_hi + res_lo 1819 * 1820 * 7. SMALL ARGUMENTS 1821 * 1822 * Inputs with |X| < 2^-252 are treated specially as 1 - |x|. 1823 * 1824 * Special cases: cos(NaN) = quiet NaN, and raise invalid exception cos(INF) = NaN and raise 1825 * invalid exception cos(0) = 1 1826 * 1827 */ 1828 1829 public int[] one = { 1830 0x00000000, 0x3ff00000 1831 }; 1832 1833 public void cosIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { 1834 ArrayDataPointerConstant oneHalfPtr = new ArrayDataPointerConstant(oneHalf, 16); 1835 ArrayDataPointerConstant pTwoPtr = new ArrayDataPointerConstant(pTwo, 16); 1836 ArrayDataPointerConstant scFourPtr = new ArrayDataPointerConstant(scFour, 16); 1837 ArrayDataPointerConstant cTablePtr = new ArrayDataPointerConstant(cTable, 16); 1838 ArrayDataPointerConstant scTwoPtr = new ArrayDataPointerConstant(scTwo, 16); 1839 ArrayDataPointerConstant scThreePtr = new ArrayDataPointerConstant(scThree, 16); 1840 ArrayDataPointerConstant scOnePtr = new ArrayDataPointerConstant(scOne, 16); 1841 ArrayDataPointerConstant piInvTablePtr = new ArrayDataPointerConstant(piInvTable, 16); 1842 ArrayDataPointerConstant piFourPtr = new ArrayDataPointerConstant(piFour, 16); 1843 ArrayDataPointerConstant piThirtyTwoInvPtr = new ArrayDataPointerConstant(piThirtyTwoInv, 8); 1844 ArrayDataPointerConstant signMaskPtr = new ArrayDataPointerConstant(signMask, 8); 1845 ArrayDataPointerConstant pThreePtr = new ArrayDataPointerConstant(pThree, 8); 1846 ArrayDataPointerConstant pOnePtr = new ArrayDataPointerConstant(pOne, 8); 1847 ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 8); 1848 1849 Label bb0 = new Label(); 1850 Label bb1 = new Label(); 1851 Label bb3 = new Label(); 1852 Label bb4 = new Label(); 1853 Label bb5 = new Label(); 1854 Label bb6 = new Label(); 1855 Label bb7 = new Label(); 1856 Label bb8 = new Label(); 1857 Label bb9 = new Label(); 1858 Label bb10 = new Label(); 1859 Label bb11 = new Label(); 1860 Label bb12 = new Label(); 1861 Label bb13 = new Label(); 1862 Label bb14 = new Label(); 1863 1864 Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); 1865 Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); 1866 Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); 1867 Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); 1868 Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD); 1869 Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD); 1870 Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD); 1871 Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD); 1872 Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD); 1873 Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD); 1874 1875 Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); 1876 Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); 1877 Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); 1878 Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); 1879 Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); 1880 Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); 1881 Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); 1882 Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE); 1883 Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE); 1884 1885 AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); 1886 1887 setCrb(crb); 1888 masm.movdq(stackSlot, value); 1889 if (dest.encoding != value.encoding) { 1890 masm.movdqu(dest, value); 1891 } 1892 1893 masm.leaq(gpr1, stackSlot); 1894 masm.movl(gpr1, new AMD64Address(gpr1, 4)); 1895 masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr)); // 0x6dc9c883, 1896 // 0x40245f30 1897 1898 masm.andl(gpr1, 2147418112); 1899 masm.subl(gpr1, 808452096); 1900 masm.cmpl(gpr1, 281346048); 1901 masm.jcc(ConditionFlag.Above, bb0); 1902 1903 masm.mulsd(temp1, dest); 1904 masm.movdqu(temp5, externalAddress(oneHalfPtr)); // 0x00000000, 1905 // 0x3fe00000, 1906 // 0x00000000, 1907 // 0x3fe00000 1908 masm.movdq(temp4, externalAddress(signMaskPtr)); // 0x00000000, 1909 // 0x80000000 1910 masm.pand(temp4, dest); 1911 masm.por(temp5, temp4); 1912 masm.addpd(temp1, temp5); 1913 masm.cvttsd2sil(gpr4, temp1); 1914 masm.cvtsi2sdl(temp1, gpr4); 1915 masm.movdqu(temp2, externalAddress(pTwoPtr)); // 0x1a600000, 1916 // 0x3d90b461, 1917 // 0x1a600000, 1918 // 0x3d90b461 1919 masm.movdq(temp3, externalAddress(pOnePtr)); // 0x54400000, 1920 // 0x3fb921fb 1921 masm.mulsd(temp3, temp1); 1922 masm.unpcklpd(temp1, temp1); 1923 masm.addq(gpr4, 1865232); 1924 masm.movdqu(temp4, dest); 1925 masm.andq(gpr4, 63); 1926 masm.movdqu(temp5, externalAddress(scFourPtr)); // 0xa556c734, 1927 // 0x3ec71de3, 1928 // 0x1a01a01a, 1929 // 0x3efa01a0 1930 masm.leaq(gpr1, externalAddress(cTablePtr)); 1931 masm.shlq(gpr4, 5); 1932 masm.addq(gpr1, gpr4); 1933 masm.movdqu(temp8, new AMD64Address(gpr1, 0)); 1934 masm.mulpd(temp2, temp1); 1935 masm.subsd(dest, temp3); 1936 masm.mulsd(temp1, externalAddress(pThreePtr)); // 0x2e037073, 1937 // 0x3b63198a 1938 masm.subsd(temp4, temp3); 1939 masm.unpcklpd(dest, dest); 1940 masm.movdqu(temp3, temp4); 1941 masm.subsd(temp4, temp2); 1942 masm.mulpd(temp5, dest); 1943 masm.subpd(dest, temp2); 1944 masm.pshufd(temp7, temp8, 0xE); 1945 masm.movdqu(temp6, externalAddress(scTwoPtr)); // 0x11111111, 1946 // 0x3f811111, 1947 // 0x55555555, 1948 // 0x3fa55555 1949 masm.mulsd(temp7, temp4); 1950 masm.subsd(temp3, temp4); 1951 masm.mulpd(temp5, dest); 1952 masm.mulpd(dest, dest); 1953 masm.subsd(temp3, temp2); 1954 masm.movdqu(temp2, temp8); 1955 masm.subsd(temp1, temp3); 1956 masm.movdq(temp3, new AMD64Address(gpr1, 24)); 1957 masm.addsd(temp2, temp3); 1958 masm.subsd(temp7, temp2); 1959 masm.mulsd(temp2, temp4); 1960 masm.mulpd(temp6, dest); 1961 masm.mulsd(temp3, temp4); 1962 masm.mulpd(temp2, dest); 1963 masm.mulpd(dest, dest); 1964 masm.addpd(temp5, externalAddress(scThreePtr)); // 0x1a01a01a, 1965 // 0xbf2a01a0, 1966 // 0x16c16c17, 1967 // 0xbf56c16c 1968 masm.mulsd(temp4, temp8); 1969 masm.pshufd(temp9, temp8, 0xE); 1970 masm.addpd(temp6, externalAddress(scOnePtr)); // 0x55555555, 1971 // 0xbfc55555, 1972 // 0x00000000, 1973 // 0xbfe00000 1974 masm.mulpd(temp5, dest); 1975 masm.movdqu(dest, temp3); 1976 masm.addsd(temp3, temp9); 1977 masm.mulpd(temp1, temp7); 1978 masm.movdqu(temp7, temp4); 1979 masm.addsd(temp4, temp3); 1980 masm.addpd(temp6, temp5); 1981 masm.subsd(temp9, temp3); 1982 masm.subsd(temp3, temp4); 1983 masm.addsd(temp1, new AMD64Address(gpr1, 16)); 1984 masm.mulpd(temp6, temp2); 1985 masm.addsd(dest, temp9); 1986 masm.addsd(temp3, temp7); 1987 masm.addsd(dest, temp1); 1988 masm.addsd(dest, temp3); 1989 masm.addsd(dest, temp6); 1990 masm.unpckhpd(temp6, temp6); 1991 masm.addsd(dest, temp6); 1992 masm.addsd(dest, temp4); 1993 masm.jmp(bb13); 1994 1995 masm.bind(bb14); 1996 masm.xorpd(temp1, temp1); 1997 masm.xorpd(dest, dest); 1998 masm.divsd(dest, temp1); 1999 masm.jmp(bb13); 2000 2001 masm.bind(bb0); 2002 masm.jcc(ConditionFlag.Greater, bb1); 2003 2004 masm.pextrw(gpr1, dest, 3); 2005 masm.andl(gpr1, 32767); 2006 masm.pinsrw(dest, gpr1, 3); 2007 masm.movdq(temp1, externalAddress(onePtr)); // 0x00000000, 2008 // 0x3ff00000 2009 masm.subsd(temp1, dest); 2010 masm.movdqu(dest, temp1); 2011 masm.jmp(bb13); 2012 2013 masm.bind(bb1); 2014 masm.pextrw(gpr3, dest, 3); 2015 masm.andl(gpr3, 32752); 2016 masm.cmpl(gpr3, 32752); 2017 masm.jcc(ConditionFlag.Equal, bb14); 2018 2019 masm.subl(gpr3, 16224); 2020 masm.shrl(gpr3, 7); 2021 masm.andl(gpr3, 65532); 2022 masm.leaq(gpr10, externalAddress(piInvTablePtr)); 2023 masm.addq(gpr3, gpr10); 2024 masm.movdq(gpr1, dest); 2025 masm.movl(gpr9, new AMD64Address(gpr3, 20)); 2026 masm.movl(gpr7, new AMD64Address(gpr3, 24)); 2027 masm.movl(gpr4, gpr1); 2028 masm.shrq(gpr1, 21); 2029 masm.orl(gpr1, Integer.MIN_VALUE); 2030 masm.shrl(gpr1, 11); 2031 masm.movl(gpr8, gpr9); 2032 masm.imulq(gpr9, gpr4); 2033 masm.imulq(gpr8, gpr1); 2034 masm.imulq(gpr7, gpr1); 2035 masm.movl(gpr5, new AMD64Address(gpr3, 16)); 2036 masm.movl(gpr6, new AMD64Address(gpr3, 12)); 2037 masm.movl(gpr10, gpr9); 2038 masm.shrq(gpr9, 32); 2039 masm.addq(gpr8, gpr9); 2040 masm.addq(gpr10, gpr7); 2041 masm.movl(gpr7, gpr10); 2042 masm.shrq(gpr10, 32); 2043 masm.addq(gpr8, gpr10); 2044 masm.movl(gpr9, gpr5); 2045 masm.imulq(gpr5, gpr4); 2046 masm.imulq(gpr9, gpr1); 2047 masm.movl(gpr10, gpr6); 2048 masm.imulq(gpr6, gpr4); 2049 masm.movl(gpr2, gpr5); 2050 masm.shrq(gpr5, 32); 2051 masm.addq(gpr8, gpr2); 2052 masm.movl(gpr2, gpr8); 2053 masm.shrq(gpr8, 32); 2054 masm.addq(gpr9, gpr5); 2055 masm.addq(gpr9, gpr8); 2056 masm.shlq(gpr2, 32); 2057 masm.orq(gpr7, gpr2); 2058 masm.imulq(gpr10, gpr1); 2059 masm.movl(gpr8, new AMD64Address(gpr3, 8)); 2060 masm.movl(gpr5, new AMD64Address(gpr3, 4)); 2061 masm.movl(gpr2, gpr6); 2062 masm.shrq(gpr6, 32); 2063 masm.addq(gpr9, gpr2); 2064 masm.movl(gpr2, gpr9); 2065 masm.shrq(gpr9, 32); 2066 masm.addq(gpr10, gpr6); 2067 masm.addq(gpr10, gpr9); 2068 masm.movq(gpr6, gpr8); 2069 masm.imulq(gpr8, gpr4); 2070 masm.imulq(gpr6, gpr1); 2071 masm.movl(gpr9, gpr8); 2072 masm.shrq(gpr8, 32); 2073 masm.addq(gpr10, gpr9); 2074 masm.movl(gpr9, gpr10); 2075 masm.shrq(gpr10, 32); 2076 masm.addq(gpr6, gpr8); 2077 masm.addq(gpr6, gpr10); 2078 masm.movq(gpr8, gpr5); 2079 masm.imulq(gpr5, gpr4); 2080 masm.imulq(gpr8, gpr1); 2081 masm.shlq(gpr9, 32); 2082 masm.orq(gpr9, gpr2); 2083 masm.movl(gpr1, new AMD64Address(gpr3, 0)); 2084 masm.movl(gpr10, gpr5); 2085 masm.shrq(gpr5, 32); 2086 masm.addq(gpr6, gpr10); 2087 masm.movl(gpr10, gpr6); 2088 masm.shrq(gpr6, 32); 2089 masm.addq(gpr8, gpr5); 2090 masm.addq(gpr8, gpr6); 2091 masm.imulq(gpr4, gpr1); 2092 masm.pextrw(gpr2, dest, 3); 2093 masm.leaq(gpr6, externalAddress(piInvTablePtr)); 2094 masm.subq(gpr3, gpr6); 2095 masm.addl(gpr3, gpr3); 2096 masm.addl(gpr3, gpr3); 2097 masm.addl(gpr3, gpr3); 2098 masm.addl(gpr3, 19); 2099 masm.movl(gpr5, 32768); 2100 masm.andl(gpr5, gpr2); 2101 masm.shrl(gpr2, 4); 2102 masm.andl(gpr2, 2047); 2103 masm.subl(gpr2, 1023); 2104 masm.subl(gpr3, gpr2); 2105 masm.addq(gpr8, gpr4); 2106 masm.movl(gpr4, gpr3); 2107 masm.addl(gpr4, 32); 2108 masm.cmpl(gpr3, 1); 2109 masm.jcc(ConditionFlag.Less, bb3); 2110 2111 masm.negl(gpr3); 2112 masm.addl(gpr3, 29); 2113 masm.shll(gpr8); 2114 masm.movl(gpr6, gpr8); 2115 masm.andl(gpr8, 536870911); 2116 masm.testl(gpr8, 268435456); 2117 masm.jcc(ConditionFlag.NotEqual, bb4); 2118 2119 masm.shrl(gpr8); 2120 masm.movl(gpr2, 0); 2121 masm.shlq(gpr8, 32); 2122 masm.orq(gpr8, gpr10); 2123 2124 masm.bind(bb5); 2125 2126 masm.bind(bb6); 2127 masm.cmpq(gpr8, 0); 2128 masm.jcc(ConditionFlag.Equal, bb7); 2129 2130 masm.bind(bb8); 2131 masm.bsrq(gpr10, gpr8); 2132 masm.movl(gpr3, 29); 2133 masm.subl(gpr3, gpr10); 2134 masm.jcc(ConditionFlag.LessEqual, bb9); 2135 2136 masm.shlq(gpr8); 2137 masm.movq(gpr1, gpr9); 2138 masm.shlq(gpr9); 2139 masm.addl(gpr4, gpr3); 2140 masm.negl(gpr3); 2141 masm.addl(gpr3, 64); 2142 masm.shrq(gpr1); 2143 masm.shrq(gpr7); 2144 masm.orq(gpr8, gpr1); 2145 masm.orq(gpr9, gpr7); 2146 2147 masm.bind(bb10); 2148 masm.cvtsi2sdq(dest, gpr8); 2149 masm.shrq(gpr9, 1); 2150 masm.cvtsi2sdq(temp3, gpr9); 2151 masm.xorpd(temp4, temp4); 2152 masm.shll(gpr4, 4); 2153 masm.negl(gpr4); 2154 masm.addl(gpr4, 16368); 2155 masm.orl(gpr4, gpr5); 2156 masm.xorl(gpr4, gpr2); 2157 masm.pinsrw(temp4, gpr4, 3); 2158 masm.leaq(gpr2, externalAddress(piFourPtr)); 2159 masm.movdqu(temp2, new AMD64Address(gpr2, 0)); // 0x40000000, 2160 // 0x3fe921fb, 2161 // 0x18469899, 2162 // 0x3e64442d 2163 masm.xorpd(temp5, temp5); 2164 masm.subl(gpr4, 1008); 2165 masm.pinsrw(temp5, gpr4, 3); 2166 masm.mulsd(dest, temp4); 2167 masm.shll(gpr5, 16); 2168 masm.sarl(gpr5, 31); 2169 masm.mulsd(temp3, temp5); 2170 masm.movdqu(temp1, dest); 2171 masm.mulsd(dest, temp2); 2172 masm.pshufd(temp6, temp2, 0xE); 2173 masm.shrl(gpr6, 29); 2174 masm.addsd(temp1, temp3); 2175 masm.mulsd(temp3, temp2); 2176 masm.addl(gpr6, gpr5); 2177 masm.xorl(gpr6, gpr5); 2178 masm.mulsd(temp6, temp1); 2179 masm.movl(gpr1, gpr6); 2180 masm.addsd(temp6, temp3); 2181 masm.movdqu(temp2, dest); 2182 masm.addsd(dest, temp6); 2183 masm.subsd(temp2, dest); 2184 masm.addsd(temp6, temp2); 2185 2186 masm.bind(bb11); 2187 masm.movq(temp1, externalAddress(piThirtyTwoInvPtr)); // 0x6dc9c883, 2188 // 0x40245f30 2189 masm.mulsd(temp1, dest); 2190 masm.movdq(temp5, externalAddress(oneHalfPtr)); // 0x00000000, 2191 // 0x3fe00000, 2192 // 0x00000000, 2193 // 0x3fe00000 2194 masm.movdq(temp4, externalAddress(signMaskPtr)); // 0x00000000, 2195 // 0x80000000 2196 masm.pand(temp4, dest); 2197 masm.por(temp5, temp4); 2198 masm.addpd(temp1, temp5); 2199 masm.cvttsd2siq(gpr4, temp1); 2200 masm.cvtsi2sdq(temp1, gpr4); 2201 masm.movdq(temp3, externalAddress(pOnePtr)); // 0x54400000, 2202 // 0x3fb921fb 2203 masm.movdqu(temp2, externalAddress(pTwoPtr)); // 0x1a600000, 2204 // 0x3d90b461, 2205 // 0x1a600000, 2206 // 0x3d90b461 2207 masm.mulsd(temp3, temp1); 2208 masm.unpcklpd(temp1, temp1); 2209 masm.shll(gpr1, 3); 2210 masm.addl(gpr4, 1865232); 2211 masm.movdqu(temp4, dest); 2212 masm.addl(gpr4, gpr1); 2213 masm.andl(gpr4, 63); 2214 masm.movdqu(temp5, externalAddress(scFourPtr)); // 0xa556c734, 2215 // 0x3ec71de3, 2216 // 0x1a01a01a, 2217 // 0x3efa01a0 2218 masm.leaq(gpr1, externalAddress(cTablePtr)); 2219 masm.shll(gpr4, 5); 2220 masm.addq(gpr1, gpr4); 2221 masm.movdqu(temp8, new AMD64Address(gpr1, 0)); 2222 masm.mulpd(temp2, temp1); 2223 masm.subsd(dest, temp3); 2224 masm.mulsd(temp1, externalAddress(pThreePtr)); // 0x2e037073, 2225 // 0x3b63198a 2226 masm.subsd(temp4, temp3); 2227 masm.unpcklpd(dest, dest); 2228 masm.movdqu(temp3, temp4); 2229 masm.subsd(temp4, temp2); 2230 masm.mulpd(temp5, dest); 2231 masm.pshufd(temp7, temp8, 0xE); 2232 masm.movdqu(temp9, temp7); 2233 masm.subpd(dest, temp2); 2234 masm.mulsd(temp7, temp4); 2235 masm.subsd(temp3, temp4); 2236 masm.mulpd(temp5, dest); 2237 masm.mulpd(dest, dest); 2238 masm.subsd(temp3, temp2); 2239 masm.movdqu(temp2, temp8); 2240 masm.subsd(temp1, temp3); 2241 masm.movdq(temp3, new AMD64Address(gpr1, 24)); 2242 masm.addsd(temp2, temp3); 2243 masm.subsd(temp7, temp2); 2244 masm.subsd(temp1, temp6); 2245 masm.movdqu(temp6, externalAddress(scTwoPtr)); // 0x11111111, 2246 // 0x3f811111, 2247 // 0x55555555, 2248 // 0x3fa55555 2249 masm.mulsd(temp2, temp4); 2250 masm.mulpd(temp6, dest); 2251 masm.mulsd(temp3, temp4); 2252 masm.mulpd(temp2, dest); 2253 masm.mulpd(dest, dest); 2254 masm.addpd(temp5, externalAddress(scThreePtr)); // 0x1a01a01a, 2255 // 0xbf2a01a0, 2256 // 0x16c16c17, 2257 // 0xbf56c16c 2258 masm.mulsd(temp4, temp8); 2259 masm.addpd(temp6, externalAddress(scOnePtr)); // 0x55555555, 2260 // 0xbfc55555, 2261 // 0x00000000, 2262 // 0xbfe00000 2263 masm.mulpd(temp5, dest); 2264 masm.movdqu(dest, temp3); 2265 masm.addsd(temp3, temp9); 2266 masm.mulpd(temp1, temp7); 2267 masm.movdqu(temp7, temp4); 2268 masm.addsd(temp4, temp3); 2269 masm.addpd(temp6, temp5); 2270 masm.subsd(temp9, temp3); 2271 masm.subsd(temp3, temp4); 2272 masm.addsd(temp1, new AMD64Address(gpr1, 16)); 2273 masm.mulpd(temp6, temp2); 2274 masm.addsd(temp9, dest); 2275 masm.addsd(temp3, temp7); 2276 masm.addsd(temp1, temp9); 2277 masm.addsd(temp1, temp3); 2278 masm.addsd(temp1, temp6); 2279 masm.unpckhpd(temp6, temp6); 2280 masm.movdqu(dest, temp4); 2281 masm.addsd(temp1, temp6); 2282 masm.addsd(dest, temp1); 2283 masm.jmp(bb13); 2284 2285 masm.bind(bb7); 2286 masm.addl(gpr4, 64); 2287 masm.movq(gpr8, gpr9); 2288 masm.movq(gpr9, gpr7); 2289 masm.movl(gpr7, 0); 2290 masm.cmpq(gpr8, 0); 2291 masm.jcc(ConditionFlag.NotEqual, bb8); 2292 2293 masm.addl(gpr4, 64); 2294 masm.movq(gpr8, gpr9); 2295 masm.movq(gpr9, gpr7); 2296 masm.cmpq(gpr8, 0); 2297 masm.jcc(ConditionFlag.NotEqual, bb8); 2298 2299 masm.xorpd(dest, dest); 2300 masm.xorpd(temp6, temp6); 2301 masm.jmp(bb11); 2302 2303 masm.bind(bb9); 2304 masm.jcc(ConditionFlag.Equal, bb10); 2305 2306 masm.negl(gpr3); 2307 masm.shrq(gpr9); 2308 masm.movq(gpr1, gpr8); 2309 masm.shrq(gpr8); 2310 masm.subl(gpr4, gpr3); 2311 masm.negl(gpr3); 2312 masm.addl(gpr3, 64); 2313 masm.shlq(gpr1); 2314 masm.orq(gpr9, gpr1); 2315 masm.jmp(bb10); 2316 2317 masm.bind(bb3); 2318 masm.negl(gpr3); 2319 masm.shlq(gpr8, 32); 2320 masm.orq(gpr8, gpr10); 2321 masm.shlq(gpr8); 2322 masm.movq(gpr6, gpr8); 2323 masm.testl(gpr8, Integer.MIN_VALUE); 2324 masm.jcc(ConditionFlag.NotEqual, bb12); 2325 2326 masm.shrl(gpr8); 2327 masm.movl(gpr2, 0); 2328 masm.shrq(gpr6, 3); 2329 masm.jmp(bb6); 2330 2331 masm.bind(bb4); 2332 masm.shrl(gpr8); 2333 masm.movl(gpr2, 536870912); 2334 masm.shrl(gpr2); 2335 masm.shlq(gpr8, 32); 2336 masm.orq(gpr8, gpr10); 2337 masm.shlq(gpr2, 32); 2338 masm.addl(gpr6, 536870912); 2339 masm.movl(gpr3, 0); 2340 masm.movl(gpr10, 0); 2341 masm.subq(gpr3, gpr7); 2342 masm.sbbq(gpr10, gpr9); 2343 masm.sbbq(gpr2, gpr8); 2344 masm.movq(gpr7, gpr3); 2345 masm.movq(gpr9, gpr10); 2346 masm.movq(gpr8, gpr2); 2347 masm.movl(gpr2, 32768); 2348 masm.jmp(bb5); 2349 2350 masm.bind(bb12); 2351 masm.shrl(gpr8); 2352 masm.movq(gpr2, 0x100000000L); 2353 masm.shrq(gpr2); 2354 masm.movl(gpr3, 0); 2355 masm.movl(gpr10, 0); 2356 masm.subq(gpr3, gpr7); 2357 masm.sbbq(gpr10, gpr9); 2358 masm.sbbq(gpr2, gpr8); 2359 masm.movq(gpr7, gpr3); 2360 masm.movq(gpr9, gpr10); 2361 masm.movq(gpr8, gpr2); 2362 masm.movl(gpr2, 32768); 2363 masm.shrq(gpr6, 3); 2364 masm.addl(gpr6, 536870912); 2365 masm.jmp(bb6); 2366 2367 masm.bind(bb13); 2368 } 2369 2370 /* 2371 * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) 2372 * Source Code 2373 * 2374 * ALGORITHM DESCRIPTION - TAN() --------------------- 2375 * 2376 * Polynomials coefficients and other constants. 2377 * 2378 * Note that in this algorithm, there is a different polynomial for each breakpoint, so there 2379 * are 32 sets of polynomial coefficients as well as 32 instances of the other constants. 2380 * 2381 * The polynomial coefficients and constants are offset from the start of the main block as 2382 * follows: 2383 * 2384 * 0: c8 | c0 16: c9 | c1 32: c10 | c2 48: c11 | c3 64: c12 | c4 80: c13 | c5 96: c14 | c6 112: 2385 * c15 | c7 128: T_hi 136: T_lo 144: Sigma 152: T_hl 160: Tau 168: Mask 176: (end of block) 2386 * 2387 * The total table size is therefore 5632 bytes. 2388 * 2389 * Note that c0 and c1 are always zero. We could try storing other constants here, and just 2390 * loading the low part of the SIMD register in these cases, after ensuring the high part is 2391 * zero. 2392 * 2393 * The higher terms of the polynomial are computed in the *low* part of the SIMD register. This 2394 * is so we can overlap the multiplication by r^8 and the unpacking of the other part. 2395 * 2396 * The constants are: T_hi + T_lo = accurate constant term in power series Sigma + T_hl = 2397 * accurate coefficient of r in power series (Sigma=1 bit) Tau = multiplier for the reciprocal, 2398 * always -1 or 0 2399 * 2400 * The basic reconstruction formula using these constants is: 2401 * 2402 * High = tau * recip_hi + t_hi Med = (sgn * r + t_hl * r)_hi Low = (sgn * r + t_hl * r)_lo + 2403 * tau * recip_lo + T_lo + (T_hl + sigma) * c + pol 2404 * 2405 * where pol = c0 + c1 * r + c2 * r^2 + ... + c15 * r^15 2406 * 2407 * (c0 = c1 = 0, but using them keeps SIMD regularity) 2408 * 2409 * We then do a compensated sum High + Med, add the low parts together and then do the final 2410 * sum. 2411 * 2412 * Here recip_hi + recip_lo is an accurate reciprocal of the remainder modulo pi/2 2413 * 2414 * Special cases: tan(NaN) = quiet NaN, and raise invalid exception tan(INF) = NaN and raise 2415 * invalid exception tan(+/-0) = +/-0 2416 * 2417 */ 2418 2419 private static int[] oneHalfTan = { 2420 0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000 2421 }; 2422 2423 private static int[] mulSixteen = { 2424 0x00000000, 0x40300000, 0x00000000, 0x3ff00000 2425 }; 2426 2427 private static int[] signMaskTan = { 2428 0x00000000, 0x80000000, 0x00000000, 0x80000000 2429 }; 2430 2431 private static int[] piThirtyTwoInvTan = { 2432 0x6dc9c883, 0x3fe45f30, 0x6dc9c883, 0x40245f30 2433 }; 2434 2435 private static int[] pOneTan = { 2436 0x54444000, 0x3fb921fb, 0x54440000, 0x3fb921fb 2437 }; 2438 2439 private static int[] pTwoTan = { 2440 0x67674000, 0xbd32e7b9, 0x4c4c0000, 0x3d468c23 2441 }; 2442 2443 private static int[] pThreeTan = { 2444 0x3707344a, 0x3aa8a2e0, 0x03707345, 0x3ae98a2e 2445 }; 2446 2447 private static int[] cTableTan = { 2448 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x882c10fa, 2449 0x3f9664f4, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 2450 0x00000000, 0x00000000, 0x55e6c23d, 0x3f8226e3, 0x55555555, 2451 0x3fd55555, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 2452 0x0e157de0, 0x3f6d6d3d, 0x11111111, 0x3fc11111, 0x00000000, 2453 0x00000000, 0x00000000, 0x00000000, 0x452b75e3, 0x3f57da36, 2454 0x1ba1ba1c, 0x3faba1ba, 0x00000000, 0x00000000, 0x00000000, 2455 0x00000000, 0x00000000, 0x3ff00000, 0x00000000, 0x00000000, 2456 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x4e435f9b, 2457 0x3f953f83, 0x00000000, 0x00000000, 0x3c6e8e46, 0x3f9b74ea, 2458 0x00000000, 0x00000000, 0xda5b7511, 0x3f85ad63, 0xdc230b9b, 2459 0x3fb97558, 0x26cb3788, 0x3f881308, 0x76fc4985, 0x3fd62ac9, 2460 0x77bb08ba, 0x3f757c85, 0xb6247521, 0x3fb1381e, 0x5922170c, 2461 0x3f754e95, 0x8746482d, 0x3fc27f83, 0x11055b30, 0x3f64e391, 2462 0x3e666320, 0x3fa3e609, 0x0de9dae3, 0x3f6301df, 0x1f1dca06, 2463 0x3fafa8ae, 0x8c5b2da2, 0x3fb936bb, 0x4e88f7a5, 0x3c587d05, 2464 0x00000000, 0x3ff00000, 0xa8935dd9, 0x3f83dde2, 0x00000000, 2465 0x00000000, 0x00000000, 0x00000000, 0x5a279ea3, 0x3faa3407, 2466 0x00000000, 0x00000000, 0x432d65fa, 0x3fa70153, 0x00000000, 2467 0x00000000, 0x891a4602, 0x3f9d03ef, 0xd62ca5f8, 0x3fca77d9, 2468 0xb35f4628, 0x3f97a265, 0x433258fa, 0x3fd8cf51, 0xb58fd909, 2469 0x3f8f88e3, 0x01771cea, 0x3fc2b154, 0xf3562f8e, 0x3f888f57, 2470 0xc028a723, 0x3fc7370f, 0x20b7f9f0, 0x3f80f44c, 0x214368e9, 2471 0x3fb6dfaa, 0x28891863, 0x3f79b4b6, 0x172dbbf0, 0x3fb6cb8e, 2472 0xe0553158, 0x3fc975f5, 0x593fe814, 0x3c2ef5d3, 0x00000000, 2473 0x3ff00000, 0x03dec550, 0x3fa44203, 0x00000000, 0x00000000, 2474 0x00000000, 0x00000000, 0x9314533e, 0x3fbb8ec5, 0x00000000, 2475 0x00000000, 0x09aa36d0, 0x3fb6d3f4, 0x00000000, 0x00000000, 2476 0xdcb427fd, 0x3fb13950, 0xd87ab0bb, 0x3fd5335e, 0xce0ae8a5, 2477 0x3fabb382, 0x79143126, 0x3fddba41, 0x5f2b28d4, 0x3fa552f1, 2478 0x59f21a6d, 0x3fd015ab, 0x22c27d95, 0x3fa0e984, 0xe19fc6aa, 2479 0x3fd0576c, 0x8f2c2950, 0x3f9a4898, 0xc0b3f22c, 0x3fc59462, 2480 0x1883a4b8, 0x3f94b61c, 0x3f838640, 0x3fc30eb8, 0x355c63dc, 2481 0x3fd36a08, 0x1dce993d, 0xbc6d704d, 0x00000000, 0x3ff00000, 2482 0x2b82ab63, 0x3fb78e92, 0x00000000, 0x00000000, 0x00000000, 2483 0x00000000, 0x56f37042, 0x3fccfc56, 0x00000000, 0x00000000, 2484 0xaa563951, 0x3fc90125, 0x00000000, 0x00000000, 0x3d0e7c5d, 2485 0x3fc50533, 0x9bed9b2e, 0x3fdf0ed9, 0x5fe7c47c, 0x3fc1f250, 2486 0x96c125e5, 0x3fe2edd9, 0x5a02bbd8, 0x3fbe5c71, 0x86362c20, 2487 0x3fda08b7, 0x4b4435ed, 0x3fb9d342, 0x4b494091, 0x3fd911bd, 2488 0xb56658be, 0x3fb5e4c7, 0x93a2fd76, 0x3fd3c092, 0xda271794, 2489 0x3fb29910, 0x3303df2b, 0x3fd189be, 0x99fcef32, 0x3fda8279, 2490 0xb68c1467, 0x3c708b2f, 0x00000000, 0x3ff00000, 0x980c4337, 2491 0x3fc5f619, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 2492 0xcc03e501, 0x3fdff10f, 0x00000000, 0x00000000, 0x44a4e845, 2493 0x3fddb63b, 0x00000000, 0x00000000, 0x3768ad9f, 0x3fdb72a4, 2494 0x3dd01cca, 0x3fe5fdb9, 0xa61d2811, 0x3fd972b2, 0x5645ad0b, 2495 0x3fe977f9, 0xd013b3ab, 0x3fd78ca3, 0xbf0bf914, 0x3fe4f192, 2496 0x4d53e730, 0x3fd5d060, 0x3f8b9000, 0x3fe49933, 0xe2b82f08, 2497 0x3fd4322a, 0x5936a835, 0x3fe27ae1, 0xb1c61c9b, 0x3fd2b3fb, 2498 0xef478605, 0x3fe1659e, 0x190834ec, 0x3fe11ab7, 0xcdb625ea, 2499 0xbc8e564b, 0x00000000, 0x3ff00000, 0xb07217e3, 0x3fd248f1, 2500 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2b2c49d0, 2501 0x3ff2de9c, 0x00000000, 0x00000000, 0x2655bc98, 0x3ff33e58, 2502 0x00000000, 0x00000000, 0xff691fa2, 0x3ff3972e, 0xe93463bd, 2503 0x3feeed87, 0x070e10a0, 0x3ff3f5b2, 0xf4d790a4, 0x3ff20c10, 2504 0xa04e8ea3, 0x3ff4541a, 0x386accd3, 0x3ff1369e, 0x222a66dd, 2505 0x3ff4b521, 0x22a9777e, 0x3ff20817, 0x52a04a6e, 0x3ff5178f, 2506 0xddaa0031, 0x3ff22137, 0x4447d47c, 0x3ff57c01, 0x1e9c7f1d, 2507 0x3ff29311, 0x2ab7f990, 0x3fe561b8, 0x209c7df1, 0x3c87a8c5, 2508 0x00000000, 0x3ff00000, 0x4170bcc6, 0x3fdc92d8, 0x00000000, 2509 0x00000000, 0x00000000, 0x00000000, 0xc7ab4d5a, 0x40085e24, 2510 0x00000000, 0x00000000, 0xe93ea75d, 0x400b963d, 0x00000000, 2511 0x00000000, 0x94a7f25a, 0x400f37e2, 0x4b6261cb, 0x3ff5f984, 2512 0x5a9dd812, 0x4011aab0, 0x74c30018, 0x3ffaf5a5, 0x7f2ce8e3, 2513 0x4013fe8b, 0xfe8e54fa, 0x3ffd7334, 0x670d618d, 0x4016a10c, 2514 0x4db97058, 0x4000e012, 0x24df44dd, 0x40199c5f, 0x697d6ece, 2515 0x4003006e, 0x83298b82, 0x401cfc4d, 0x19d490d6, 0x40058c19, 2516 0x2ae42850, 0x3fea4300, 0x118e20e6, 0xbc7a6db8, 0x00000000, 2517 0x40000000, 0xe33345b8, 0xbfd4e526, 0x00000000, 0x00000000, 2518 0x00000000, 0x00000000, 0x65965966, 0x40219659, 0x00000000, 2519 0x00000000, 0x882c10fa, 0x402664f4, 0x00000000, 0x00000000, 2520 0x83cd3723, 0x402c8342, 0x00000000, 0x40000000, 0x55e6c23d, 2521 0x403226e3, 0x55555555, 0x40055555, 0x34451939, 0x40371c96, 2522 0xaaaaaaab, 0x400aaaaa, 0x0e157de0, 0x403d6d3d, 0x11111111, 2523 0x40111111, 0xa738201f, 0x4042bbce, 0x05b05b06, 0x4015b05b, 2524 0x452b75e3, 0x4047da36, 0x1ba1ba1c, 0x401ba1ba, 0x00000000, 2525 0x3ff00000, 0x00000000, 0x00000000, 0x00000000, 0x40000000, 2526 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 2527 0x00000000, 0x4f48b8d3, 0xbf33eaf9, 0x00000000, 0x00000000, 2528 0x0cf7586f, 0x3f20b8ea, 0x00000000, 0x00000000, 0xd0258911, 2529 0xbf0abaf3, 0x23e49fe9, 0xbfab5a8c, 0x2d53222e, 0x3ef60d15, 2530 0x21169451, 0x3fa172b2, 0xbb254dbc, 0xbee1d3b5, 0xdbf93b8e, 2531 0xbf84c7db, 0x05b4630b, 0x3ecd3364, 0xee9aada7, 0x3f743924, 2532 0x794a8297, 0xbeb7b7b9, 0xe015f797, 0xbf5d41f5, 0xe41a4a56, 2533 0x3ea35dfb, 0xe4c2a251, 0x3f49a2ab, 0x5af9e000, 0xbfce49ce, 2534 0x8c743719, 0x3d1eb860, 0x00000000, 0x00000000, 0x1b4863cf, 2535 0x3fd78294, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 2536 0x535ad890, 0xbf2b9320, 0x00000000, 0x00000000, 0x018fdf1f, 2537 0x3f16d61d, 0x00000000, 0x00000000, 0x0359f1be, 0xbf0139e4, 2538 0xa4317c6d, 0xbfa67e17, 0x82672d0f, 0x3eebb405, 0x2f1b621e, 2539 0x3f9f455b, 0x51ccf238, 0xbed55317, 0xf437b9ac, 0xbf804bee, 2540 0xc791a2b5, 0x3ec0e993, 0x919a1db2, 0x3f7080c2, 0x336a5b0e, 2541 0xbeaa48a2, 0x0a268358, 0xbf55a443, 0xdfd978e4, 0x3e94b61f, 2542 0xd7767a58, 0x3f431806, 0x2aea0000, 0xbfc9bbe8, 0x7723ea61, 2543 0xbd3a2369, 0x00000000, 0x00000000, 0xdf7796ff, 0x3fd6e642, 2544 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0xb9ff07ce, 2545 0xbf231c78, 0x00000000, 0x00000000, 0xa5517182, 0x3f0ff0e0, 2546 0x00000000, 0x00000000, 0x790b4cbc, 0xbef66191, 0x848a46c6, 2547 0xbfa21ac0, 0xb16435fa, 0x3ee1d3ec, 0x2a1aa832, 0x3f9c71ea, 2548 0xfdd299ef, 0xbec9dd1a, 0x3f8dbaaf, 0xbf793363, 0x309fc6ea, 2549 0x3eb415d6, 0xbee60471, 0x3f6b83ba, 0x94a0a697, 0xbe9dae11, 2550 0x3e5c67b3, 0xbf4fd07b, 0x9a8f3e3e, 0x3e86bd75, 0xa4beb7a4, 2551 0x3f3d1eb1, 0x29cfc000, 0xbfc549ce, 0xbf159358, 0xbd397b33, 2552 0x00000000, 0x00000000, 0x871fee6c, 0x3fd666f0, 0x00000000, 2553 0x3ff00000, 0x00000000, 0xfffffff8, 0x7d98a556, 0xbf1a3958, 2554 0x00000000, 0x00000000, 0x9d88dc01, 0x3f0704c2, 0x00000000, 2555 0x00000000, 0x73742a2b, 0xbeed054a, 0x58844587, 0xbf9c2a13, 2556 0x55688a79, 0x3ed7a326, 0xee33f1d6, 0x3f9a48f4, 0xa8dc9888, 2557 0xbebf8939, 0xaad4b5b8, 0xbf72f746, 0x9102efa1, 0x3ea88f82, 2558 0xdabc29cf, 0x3f678228, 0x9289afb8, 0xbe90f456, 0x741fb4ed, 2559 0xbf46f3a3, 0xa97f6663, 0x3e79b4bf, 0xca89ff3f, 0x3f36db70, 2560 0xa8a2a000, 0xbfc0ee13, 0x3da24be1, 0xbd338b9f, 0x00000000, 2561 0x00000000, 0x11cd6c69, 0x3fd601fd, 0x00000000, 0x3ff00000, 2562 0x00000000, 0xfffffff8, 0x1a154b97, 0xbf116b01, 0x00000000, 2563 0x00000000, 0x2d427630, 0x3f0147bf, 0x00000000, 0x00000000, 2564 0xb93820c8, 0xbee264d4, 0xbb6cbb18, 0xbf94ab8c, 0x888d4d92, 2565 0x3ed0568b, 0x60730f7c, 0x3f98b19b, 0xe4b1fb11, 0xbeb2f950, 2566 0x22cf9f74, 0xbf6b21cd, 0x4a3ff0a6, 0x3e9f499e, 0xfd2b83ce, 2567 0x3f64aad7, 0x637b73af, 0xbe83487c, 0xe522591a, 0xbf3fc092, 2568 0xa158e8bc, 0x3e6e3aae, 0xe5e82ffa, 0x3f329d2f, 0xd636a000, 2569 0xbfb9477f, 0xc2c2d2bc, 0xbd135ef9, 0x00000000, 0x00000000, 2570 0xf2fdb123, 0x3fd5b566, 0x00000000, 0x3ff00000, 0x00000000, 2571 0xfffffff8, 0xc41acb64, 0xbf05448d, 0x00000000, 0x00000000, 2572 0xdbb03d6f, 0x3efb7ad2, 0x00000000, 0x00000000, 0x9e42962d, 2573 0xbed5aea5, 0x2579f8ef, 0xbf8b2398, 0x288a1ed9, 0x3ec81441, 2574 0xb0198dc5, 0x3f979a3a, 0x2fdfe253, 0xbea57cd3, 0x5766336f, 2575 0xbf617caa, 0x600944c3, 0x3e954ed6, 0xa4e0aaf8, 0x3f62c646, 2576 0x6b8fb29c, 0xbe74e3a3, 0xdc4c0409, 0xbf33f952, 0x9bffe365, 2577 0x3e6301ec, 0xb8869e44, 0x3f2fc566, 0xe1e04000, 0xbfb0cc62, 2578 0x016b907f, 0xbd119cbc, 0x00000000, 0x00000000, 0xe6b9d8fa, 2579 0x3fd57fb3, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 2580 0x5daf22a6, 0xbef429d7, 0x00000000, 0x00000000, 0x06bca545, 2581 0x3ef7a27d, 0x00000000, 0x00000000, 0x7211c19a, 0xbec41c3e, 2582 0x956ed53e, 0xbf7ae3f4, 0xee750e72, 0x3ec3901b, 0x91d443f5, 2583 0x3f96f713, 0x36661e6c, 0xbe936e09, 0x506f9381, 0xbf5122e8, 2584 0xcb6dd43f, 0x3e9041b9, 0x6698b2ff, 0x3f61b0c7, 0x576bf12b, 2585 0xbe625a8a, 0xe5a0e9dc, 0xbf23499d, 0x110384dd, 0x3e5b1c2c, 2586 0x68d43db6, 0x3f2cb899, 0x6ecac000, 0xbfa0c414, 0xcd7dd58c, 2587 0x3d13500f, 0x00000000, 0x00000000, 0x85a2c8fb, 0x3fd55fe0, 2588 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0x00000000, 2589 0x00000000, 0x00000000, 0x00000000, 0x2bf70ebe, 0x3ef66a8f, 2590 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 2591 0x00000000, 0xd644267f, 0x3ec22805, 0x16c16c17, 0x3f96c16c, 2592 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xc4e09162, 2593 0x3e8d6db2, 0xbc011567, 0x3f61566a, 0x00000000, 0x00000000, 2594 0x00000000, 0x00000000, 0x1f79955c, 0x3e57da4e, 0x9334ef0b, 2595 0x3f2bbd77, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 2596 0x00000000, 0x00000000, 0x55555555, 0x3fd55555, 0x00000000, 2597 0x3ff00000, 0x00000000, 0xfffffff8, 0x5daf22a6, 0x3ef429d7, 2598 0x00000000, 0x00000000, 0x06bca545, 0x3ef7a27d, 0x00000000, 2599 0x00000000, 0x7211c19a, 0x3ec41c3e, 0x956ed53e, 0x3f7ae3f4, 2600 0xee750e72, 0x3ec3901b, 0x91d443f5, 0x3f96f713, 0x36661e6c, 2601 0x3e936e09, 0x506f9381, 0x3f5122e8, 0xcb6dd43f, 0x3e9041b9, 2602 0x6698b2ff, 0x3f61b0c7, 0x576bf12b, 0x3e625a8a, 0xe5a0e9dc, 2603 0x3f23499d, 0x110384dd, 0x3e5b1c2c, 0x68d43db6, 0x3f2cb899, 2604 0x6ecac000, 0x3fa0c414, 0xcd7dd58c, 0xbd13500f, 0x00000000, 2605 0x00000000, 0x85a2c8fb, 0x3fd55fe0, 0x00000000, 0x3ff00000, 2606 0x00000000, 0xfffffff8, 0xc41acb64, 0x3f05448d, 0x00000000, 2607 0x00000000, 0xdbb03d6f, 0x3efb7ad2, 0x00000000, 0x00000000, 2608 0x9e42962d, 0x3ed5aea5, 0x2579f8ef, 0x3f8b2398, 0x288a1ed9, 2609 0x3ec81441, 0xb0198dc5, 0x3f979a3a, 0x2fdfe253, 0x3ea57cd3, 2610 0x5766336f, 0x3f617caa, 0x600944c3, 0x3e954ed6, 0xa4e0aaf8, 2611 0x3f62c646, 0x6b8fb29c, 0x3e74e3a3, 0xdc4c0409, 0x3f33f952, 2612 0x9bffe365, 0x3e6301ec, 0xb8869e44, 0x3f2fc566, 0xe1e04000, 2613 0x3fb0cc62, 0x016b907f, 0x3d119cbc, 0x00000000, 0x00000000, 2614 0xe6b9d8fa, 0x3fd57fb3, 0x00000000, 0x3ff00000, 0x00000000, 2615 0xfffffff8, 0x1a154b97, 0x3f116b01, 0x00000000, 0x00000000, 2616 0x2d427630, 0x3f0147bf, 0x00000000, 0x00000000, 0xb93820c8, 2617 0x3ee264d4, 0xbb6cbb18, 0x3f94ab8c, 0x888d4d92, 0x3ed0568b, 2618 0x60730f7c, 0x3f98b19b, 0xe4b1fb11, 0x3eb2f950, 0x22cf9f74, 2619 0x3f6b21cd, 0x4a3ff0a6, 0x3e9f499e, 0xfd2b83ce, 0x3f64aad7, 2620 0x637b73af, 0x3e83487c, 0xe522591a, 0x3f3fc092, 0xa158e8bc, 2621 0x3e6e3aae, 0xe5e82ffa, 0x3f329d2f, 0xd636a000, 0x3fb9477f, 2622 0xc2c2d2bc, 0x3d135ef9, 0x00000000, 0x00000000, 0xf2fdb123, 2623 0x3fd5b566, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 2624 0x7d98a556, 0x3f1a3958, 0x00000000, 0x00000000, 0x9d88dc01, 2625 0x3f0704c2, 0x00000000, 0x00000000, 0x73742a2b, 0x3eed054a, 2626 0x58844587, 0x3f9c2a13, 0x55688a79, 0x3ed7a326, 0xee33f1d6, 2627 0x3f9a48f4, 0xa8dc9888, 0x3ebf8939, 0xaad4b5b8, 0x3f72f746, 2628 0x9102efa1, 0x3ea88f82, 0xdabc29cf, 0x3f678228, 0x9289afb8, 2629 0x3e90f456, 0x741fb4ed, 0x3f46f3a3, 0xa97f6663, 0x3e79b4bf, 2630 0xca89ff3f, 0x3f36db70, 0xa8a2a000, 0x3fc0ee13, 0x3da24be1, 2631 0x3d338b9f, 0x00000000, 0x00000000, 0x11cd6c69, 0x3fd601fd, 2632 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0xb9ff07ce, 2633 0x3f231c78, 0x00000000, 0x00000000, 0xa5517182, 0x3f0ff0e0, 2634 0x00000000, 0x00000000, 0x790b4cbc, 0x3ef66191, 0x848a46c6, 2635 0x3fa21ac0, 0xb16435fa, 0x3ee1d3ec, 0x2a1aa832, 0x3f9c71ea, 2636 0xfdd299ef, 0x3ec9dd1a, 0x3f8dbaaf, 0x3f793363, 0x309fc6ea, 2637 0x3eb415d6, 0xbee60471, 0x3f6b83ba, 0x94a0a697, 0x3e9dae11, 2638 0x3e5c67b3, 0x3f4fd07b, 0x9a8f3e3e, 0x3e86bd75, 0xa4beb7a4, 2639 0x3f3d1eb1, 0x29cfc000, 0x3fc549ce, 0xbf159358, 0x3d397b33, 2640 0x00000000, 0x00000000, 0x871fee6c, 0x3fd666f0, 0x00000000, 2641 0x3ff00000, 0x00000000, 0xfffffff8, 0x535ad890, 0x3f2b9320, 2642 0x00000000, 0x00000000, 0x018fdf1f, 0x3f16d61d, 0x00000000, 2643 0x00000000, 0x0359f1be, 0x3f0139e4, 0xa4317c6d, 0x3fa67e17, 2644 0x82672d0f, 0x3eebb405, 0x2f1b621e, 0x3f9f455b, 0x51ccf238, 2645 0x3ed55317, 0xf437b9ac, 0x3f804bee, 0xc791a2b5, 0x3ec0e993, 2646 0x919a1db2, 0x3f7080c2, 0x336a5b0e, 0x3eaa48a2, 0x0a268358, 2647 0x3f55a443, 0xdfd978e4, 0x3e94b61f, 0xd7767a58, 0x3f431806, 2648 0x2aea0000, 0x3fc9bbe8, 0x7723ea61, 0x3d3a2369, 0x00000000, 2649 0x00000000, 0xdf7796ff, 0x3fd6e642, 0x00000000, 0x3ff00000, 2650 0x00000000, 0xfffffff8, 0x4f48b8d3, 0x3f33eaf9, 0x00000000, 2651 0x00000000, 0x0cf7586f, 0x3f20b8ea, 0x00000000, 0x00000000, 2652 0xd0258911, 0x3f0abaf3, 0x23e49fe9, 0x3fab5a8c, 0x2d53222e, 2653 0x3ef60d15, 0x21169451, 0x3fa172b2, 0xbb254dbc, 0x3ee1d3b5, 2654 0xdbf93b8e, 0x3f84c7db, 0x05b4630b, 0x3ecd3364, 0xee9aada7, 2655 0x3f743924, 0x794a8297, 0x3eb7b7b9, 0xe015f797, 0x3f5d41f5, 2656 0xe41a4a56, 0x3ea35dfb, 0xe4c2a251, 0x3f49a2ab, 0x5af9e000, 2657 0x3fce49ce, 0x8c743719, 0xbd1eb860, 0x00000000, 0x00000000, 2658 0x1b4863cf, 0x3fd78294, 0x00000000, 0x3ff00000, 0x00000000, 2659 0xfffffff8, 0x65965966, 0xc0219659, 0x00000000, 0x00000000, 2660 0x882c10fa, 0x402664f4, 0x00000000, 0x00000000, 0x83cd3723, 2661 0xc02c8342, 0x00000000, 0xc0000000, 0x55e6c23d, 0x403226e3, 2662 0x55555555, 0x40055555, 0x34451939, 0xc0371c96, 0xaaaaaaab, 2663 0xc00aaaaa, 0x0e157de0, 0x403d6d3d, 0x11111111, 0x40111111, 2664 0xa738201f, 0xc042bbce, 0x05b05b06, 0xc015b05b, 0x452b75e3, 2665 0x4047da36, 0x1ba1ba1c, 0x401ba1ba, 0x00000000, 0xbff00000, 2666 0x00000000, 0x00000000, 0x00000000, 0x40000000, 0x00000000, 2667 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 2668 0xc7ab4d5a, 0xc0085e24, 0x00000000, 0x00000000, 0xe93ea75d, 2669 0x400b963d, 0x00000000, 0x00000000, 0x94a7f25a, 0xc00f37e2, 2670 0x4b6261cb, 0xbff5f984, 0x5a9dd812, 0x4011aab0, 0x74c30018, 2671 0x3ffaf5a5, 0x7f2ce8e3, 0xc013fe8b, 0xfe8e54fa, 0xbffd7334, 2672 0x670d618d, 0x4016a10c, 0x4db97058, 0x4000e012, 0x24df44dd, 2673 0xc0199c5f, 0x697d6ece, 0xc003006e, 0x83298b82, 0x401cfc4d, 2674 0x19d490d6, 0x40058c19, 0x2ae42850, 0xbfea4300, 0x118e20e6, 2675 0x3c7a6db8, 0x00000000, 0x40000000, 0xe33345b8, 0xbfd4e526, 2676 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2b2c49d0, 2677 0xbff2de9c, 0x00000000, 0x00000000, 0x2655bc98, 0x3ff33e58, 2678 0x00000000, 0x00000000, 0xff691fa2, 0xbff3972e, 0xe93463bd, 2679 0xbfeeed87, 0x070e10a0, 0x3ff3f5b2, 0xf4d790a4, 0x3ff20c10, 2680 0xa04e8ea3, 0xbff4541a, 0x386accd3, 0xbff1369e, 0x222a66dd, 2681 0x3ff4b521, 0x22a9777e, 0x3ff20817, 0x52a04a6e, 0xbff5178f, 2682 0xddaa0031, 0xbff22137, 0x4447d47c, 0x3ff57c01, 0x1e9c7f1d, 2683 0x3ff29311, 0x2ab7f990, 0xbfe561b8, 0x209c7df1, 0xbc87a8c5, 2684 0x00000000, 0x3ff00000, 0x4170bcc6, 0x3fdc92d8, 0x00000000, 2685 0x00000000, 0x00000000, 0x00000000, 0xcc03e501, 0xbfdff10f, 2686 0x00000000, 0x00000000, 0x44a4e845, 0x3fddb63b, 0x00000000, 2687 0x00000000, 0x3768ad9f, 0xbfdb72a4, 0x3dd01cca, 0xbfe5fdb9, 2688 0xa61d2811, 0x3fd972b2, 0x5645ad0b, 0x3fe977f9, 0xd013b3ab, 2689 0xbfd78ca3, 0xbf0bf914, 0xbfe4f192, 0x4d53e730, 0x3fd5d060, 2690 0x3f8b9000, 0x3fe49933, 0xe2b82f08, 0xbfd4322a, 0x5936a835, 2691 0xbfe27ae1, 0xb1c61c9b, 0x3fd2b3fb, 0xef478605, 0x3fe1659e, 2692 0x190834ec, 0xbfe11ab7, 0xcdb625ea, 0x3c8e564b, 0x00000000, 2693 0x3ff00000, 0xb07217e3, 0x3fd248f1, 0x00000000, 0x00000000, 2694 0x00000000, 0x00000000, 0x56f37042, 0xbfccfc56, 0x00000000, 2695 0x00000000, 0xaa563951, 0x3fc90125, 0x00000000, 0x00000000, 2696 0x3d0e7c5d, 0xbfc50533, 0x9bed9b2e, 0xbfdf0ed9, 0x5fe7c47c, 2697 0x3fc1f250, 0x96c125e5, 0x3fe2edd9, 0x5a02bbd8, 0xbfbe5c71, 2698 0x86362c20, 0xbfda08b7, 0x4b4435ed, 0x3fb9d342, 0x4b494091, 2699 0x3fd911bd, 0xb56658be, 0xbfb5e4c7, 0x93a2fd76, 0xbfd3c092, 2700 0xda271794, 0x3fb29910, 0x3303df2b, 0x3fd189be, 0x99fcef32, 2701 0xbfda8279, 0xb68c1467, 0xbc708b2f, 0x00000000, 0x3ff00000, 2702 0x980c4337, 0x3fc5f619, 0x00000000, 0x00000000, 0x00000000, 2703 0x00000000, 0x9314533e, 0xbfbb8ec5, 0x00000000, 0x00000000, 2704 0x09aa36d0, 0x3fb6d3f4, 0x00000000, 0x00000000, 0xdcb427fd, 2705 0xbfb13950, 0xd87ab0bb, 0xbfd5335e, 0xce0ae8a5, 0x3fabb382, 2706 0x79143126, 0x3fddba41, 0x5f2b28d4, 0xbfa552f1, 0x59f21a6d, 2707 0xbfd015ab, 0x22c27d95, 0x3fa0e984, 0xe19fc6aa, 0x3fd0576c, 2708 0x8f2c2950, 0xbf9a4898, 0xc0b3f22c, 0xbfc59462, 0x1883a4b8, 2709 0x3f94b61c, 0x3f838640, 0x3fc30eb8, 0x355c63dc, 0xbfd36a08, 2710 0x1dce993d, 0x3c6d704d, 0x00000000, 0x3ff00000, 0x2b82ab63, 2711 0x3fb78e92, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 2712 0x5a279ea3, 0xbfaa3407, 0x00000000, 0x00000000, 0x432d65fa, 2713 0x3fa70153, 0x00000000, 0x00000000, 0x891a4602, 0xbf9d03ef, 2714 0xd62ca5f8, 0xbfca77d9, 0xb35f4628, 0x3f97a265, 0x433258fa, 2715 0x3fd8cf51, 0xb58fd909, 0xbf8f88e3, 0x01771cea, 0xbfc2b154, 2716 0xf3562f8e, 0x3f888f57, 0xc028a723, 0x3fc7370f, 0x20b7f9f0, 2717 0xbf80f44c, 0x214368e9, 0xbfb6dfaa, 0x28891863, 0x3f79b4b6, 2718 0x172dbbf0, 0x3fb6cb8e, 0xe0553158, 0xbfc975f5, 0x593fe814, 2719 0xbc2ef5d3, 0x00000000, 0x3ff00000, 0x03dec550, 0x3fa44203, 2720 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x4e435f9b, 2721 0xbf953f83, 0x00000000, 0x00000000, 0x3c6e8e46, 0x3f9b74ea, 2722 0x00000000, 0x00000000, 0xda5b7511, 0xbf85ad63, 0xdc230b9b, 2723 0xbfb97558, 0x26cb3788, 0x3f881308, 0x76fc4985, 0x3fd62ac9, 2724 0x77bb08ba, 0xbf757c85, 0xb6247521, 0xbfb1381e, 0x5922170c, 2725 0x3f754e95, 0x8746482d, 0x3fc27f83, 0x11055b30, 0xbf64e391, 2726 0x3e666320, 0xbfa3e609, 0x0de9dae3, 0x3f6301df, 0x1f1dca06, 2727 0x3fafa8ae, 0x8c5b2da2, 0xbfb936bb, 0x4e88f7a5, 0xbc587d05, 2728 0x00000000, 0x3ff00000, 0xa8935dd9, 0x3f83dde2, 0x00000000, 2729 0x00000000, 0x00000000, 0x00000000 2730 }; 2731 2732 private static int[] maskThirtyFiveTan = { 2733 0xfffc0000, 0xffffffff, 0x00000000, 0x00000000 2734 }; 2735 2736 private static int[] qElevenTan = { 2737 0xb8fe4d77, 0x3f82609a 2738 }; 2739 2740 private static int[] qNineTan = { 2741 0xbf847a43, 0x3f9664a0 2742 }; 2743 2744 private static int[] qSevenTan = { 2745 0x52c4c8ab, 0x3faba1ba 2746 }; 2747 2748 private static int[] qFiveTan = { 2749 0x11092746, 0x3fc11111 2750 }; 2751 2752 private static int[] qThreeTan = { 2753 0x55555612, 0x3fd55555 2754 }; 2755 2756 private static int[] piInvTableTan = { 2757 0x00000000, 0x00000000, 0xa2f9836e, 0x4e441529, 0xfc2757d1, 2758 0xf534ddc0, 0xdb629599, 0x3c439041, 0xfe5163ab, 0xdebbc561, 2759 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c, 0xfe1deb1c, 2760 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41, 2761 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 2762 0xde05980f, 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 2763 0x4f463f66, 0x9e5fea2d, 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 2764 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08, 0x56033046, 0xfc7b6bab, 2765 0xf0cfbc21 2766 }; 2767 2768 private static int[] piFourTan = { 2769 0x00000000, 0x3fe921fb, 0x4611a626, 0x3e85110b 2770 }; 2771 2772 private static int[] qqTwoTan = { 2773 0x676733af, 0x3d32e7b9 2774 }; 2775 2776 private static int[] twoPowFiftyFiveTan = { 2777 0x00000000, 0x43600000 2778 }; 2779 2780 private static int[] twoPowMFiftyFiveTan = { 2781 0x00000000, 0x3c800000 2782 }; 2783 2784 public void tanIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { 2785 ArrayDataPointerConstant oneHalfTanPtr = new ArrayDataPointerConstant(oneHalfTan, 16); 2786 ArrayDataPointerConstant mulSixteenPtr = new ArrayDataPointerConstant(mulSixteen, 16); 2787 ArrayDataPointerConstant signMaskTanPtr = new ArrayDataPointerConstant(signMaskTan, 16); 2788 ArrayDataPointerConstant piThirtyTwoInvTanPtr = new ArrayDataPointerConstant(piThirtyTwoInvTan, 16); 2789 ArrayDataPointerConstant pOneTanPtr = new ArrayDataPointerConstant(pOneTan, 16); 2790 ArrayDataPointerConstant pTwoTanPtr = new ArrayDataPointerConstant(pTwoTan, 16); 2791 ArrayDataPointerConstant pThreeTanPtr = new ArrayDataPointerConstant(pThreeTan, 16); 2792 ArrayDataPointerConstant cTableTanPtr = new ArrayDataPointerConstant(cTableTan, 16); 2793 ArrayDataPointerConstant maskThirtyFiveTanPtr = new ArrayDataPointerConstant(maskThirtyFiveTan, 16); 2794 ArrayDataPointerConstant qElevenTanPtr = new ArrayDataPointerConstant(qElevenTan, 16); 2795 ArrayDataPointerConstant qNineTanPtr = new ArrayDataPointerConstant(qNineTan, 16); 2796 ArrayDataPointerConstant qSevenTanPtr = new ArrayDataPointerConstant(qSevenTan, 8); 2797 ArrayDataPointerConstant qFiveTanPtr = new ArrayDataPointerConstant(qFiveTan, 16); 2798 ArrayDataPointerConstant qThreeTanPtr = new ArrayDataPointerConstant(qThreeTan, 16); 2799 ArrayDataPointerConstant piInvTableTanPtr = new ArrayDataPointerConstant(piInvTableTan, 16); 2800 ArrayDataPointerConstant piFourTanPtr = new ArrayDataPointerConstant(piFourTan, 8); 2801 ArrayDataPointerConstant qqTwoTanPtr = new ArrayDataPointerConstant(qqTwoTan, 8); 2802 ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 8); 2803 ArrayDataPointerConstant twoPowFiftyFiveTanPtr = new ArrayDataPointerConstant(twoPowFiftyFiveTan, 8); 2804 ArrayDataPointerConstant twoPowMFiftyFiveTanPtr = new ArrayDataPointerConstant(twoPowMFiftyFiveTan, 8); 2805 2806 Label bb0 = new Label(); 2807 Label bb1 = new Label(); 2808 Label bb2 = new Label(); 2809 Label bb3 = new Label(); 2810 Label bb5 = new Label(); 2811 Label bb6 = new Label(); 2812 Label bb8 = new Label(); 2813 Label bb9 = new Label(); 2814 Label bb10 = new Label(); 2815 Label bb11 = new Label(); 2816 Label bb12 = new Label(); 2817 Label bb13 = new Label(); 2818 Label bb14 = new Label(); 2819 Label bb15 = new Label(); 2820 2821 Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); 2822 Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); 2823 Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); 2824 Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); 2825 Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD); 2826 Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD); 2827 Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD); 2828 Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD); 2829 Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD); 2830 Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD); 2831 2832 Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); 2833 Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); 2834 Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); 2835 Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); 2836 Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); 2837 Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); 2838 Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); 2839 2840 setCrb(crb); 2841 if (dest.encoding != value.encoding) { 2842 masm.movdqu(dest, value); 2843 } 2844 2845 masm.pextrw(gpr1, dest, 3); 2846 masm.andl(gpr1, 32767); 2847 masm.subl(gpr1, 16314); 2848 masm.cmpl(gpr1, 270); 2849 masm.jcc(ConditionFlag.Above, bb0); 2850 2851 masm.movdqu(temp5, externalAddress(oneHalfTanPtr)); // 0x00000000, 2852 // 0x3fe00000, 2853 // 0x00000000, 2854 // 0x3fe00000 2855 masm.movdqu(temp6, externalAddress(mulSixteenPtr)); // 0x00000000, 2856 // 0x40300000, 2857 // 0x00000000, 2858 // 0x3ff00000 2859 masm.unpcklpd(dest, dest); 2860 masm.movdqu(temp4, externalAddress(signMaskTanPtr)); // 0x00000000, 2861 // 0x80000000, 2862 // 0x00000000, 2863 // 0x80000000 2864 masm.andpd(temp4, dest); 2865 masm.movdqu(temp1, externalAddress(piThirtyTwoInvTanPtr)); // 0x6dc9c883, 2866 // 0x3fe45f30, 2867 // 0x6dc9c883, 2868 // 0x40245f30 2869 masm.mulpd(temp1, dest); 2870 masm.por(temp5, temp4); 2871 masm.addpd(temp1, temp5); 2872 masm.movdqu(temp7, temp1); 2873 masm.unpckhpd(temp7, temp7); 2874 masm.cvttsd2sil(gpr4, temp7); 2875 masm.cvttpd2dq(temp1, temp1); 2876 masm.cvtdq2pd(temp1, temp1); 2877 masm.mulpd(temp1, temp6); 2878 masm.movdqu(temp3, externalAddress(pOneTanPtr)); // 0x54444000, 2879 // 0x3fb921fb, 2880 // 0x54440000, 2881 // 0x3fb921fb 2882 masm.movdq(temp5, externalAddress(qqTwoTanPtr)); // 0x676733af, 2883 // 0x3d32e7b9 2884 masm.addq(gpr4, 469248); 2885 masm.movdqu(temp4, externalAddress(pTwoTanPtr)); // 0x67674000, 2886 // 0xbd32e7b9, 2887 // 0x4c4c0000, 2888 // 0x3d468c23 2889 masm.mulpd(temp3, temp1); 2890 masm.andq(gpr4, 31); 2891 masm.mulsd(temp5, temp1); 2892 masm.movq(gpr3, gpr4); 2893 masm.mulpd(temp4, temp1); 2894 masm.shlq(gpr3, 1); 2895 masm.subpd(dest, temp3); 2896 masm.mulpd(temp1, externalAddress(pThreeTanPtr)); // 0x3707344a, 2897 // 0x3aa8a2e0, 2898 // 0x03707345, 2899 // 0x3ae98a2e 2900 masm.addq(gpr4, gpr3); 2901 masm.shlq(gpr3, 2); 2902 masm.addq(gpr4, gpr3); 2903 masm.addsd(temp5, dest); 2904 masm.movdqu(temp2, dest); 2905 masm.subpd(dest, temp4); 2906 masm.movdq(temp6, externalAddress(onePtr)); // 0x00000000, 2907 // 0x3ff00000 2908 masm.shlq(gpr4, 4); 2909 masm.leaq(gpr1, externalAddress(cTableTanPtr)); 2910 masm.andpd(temp5, externalAddress(maskThirtyFiveTanPtr)); // 0xfffc0000, 2911 // 0xffffffff, 2912 // 0x00000000, 2913 // 0x00000000 2914 masm.movdqu(temp3, dest); 2915 masm.addq(gpr1, gpr4); 2916 masm.subpd(temp2, dest); 2917 masm.unpckhpd(dest, dest); 2918 masm.divsd(temp6, temp5); 2919 masm.subpd(temp2, temp4); 2920 masm.movdqu(temp7, new AMD64Address(gpr1, 16)); 2921 masm.subsd(temp3, temp5); 2922 masm.mulpd(temp7, dest); 2923 masm.subpd(temp2, temp1); 2924 masm.movdqu(temp1, new AMD64Address(gpr1, 48)); 2925 masm.mulpd(temp1, dest); 2926 masm.movdqu(temp4, new AMD64Address(gpr1, 96)); 2927 masm.mulpd(temp4, dest); 2928 masm.addsd(temp2, temp3); 2929 masm.movdqu(temp3, dest); 2930 masm.mulpd(dest, dest); 2931 masm.addpd(temp7, new AMD64Address(gpr1, 0)); 2932 masm.addpd(temp1, new AMD64Address(gpr1, 32)); 2933 masm.mulpd(temp1, dest); 2934 masm.addpd(temp4, new AMD64Address(gpr1, 80)); 2935 masm.addpd(temp7, temp1); 2936 masm.movdqu(temp1, new AMD64Address(gpr1, 112)); 2937 masm.mulpd(temp1, dest); 2938 masm.mulpd(dest, dest); 2939 masm.addpd(temp4, temp1); 2940 masm.movdqu(temp1, new AMD64Address(gpr1, 64)); 2941 masm.mulpd(temp1, dest); 2942 masm.addpd(temp7, temp1); 2943 masm.movdqu(temp1, temp3); 2944 masm.mulpd(temp3, dest); 2945 masm.mulsd(dest, dest); 2946 masm.mulpd(temp1, new AMD64Address(gpr1, 144)); 2947 masm.mulpd(temp4, temp3); 2948 masm.movdqu(temp3, temp1); 2949 masm.addpd(temp7, temp4); 2950 masm.movdqu(temp4, temp1); 2951 masm.mulsd(dest, temp7); 2952 masm.unpckhpd(temp7, temp7); 2953 masm.addsd(dest, temp7); 2954 masm.unpckhpd(temp1, temp1); 2955 masm.addsd(temp3, temp1); 2956 masm.subsd(temp4, temp3); 2957 masm.addsd(temp1, temp4); 2958 masm.movdqu(temp4, temp2); 2959 masm.movdq(temp7, new AMD64Address(gpr1, 144)); 2960 masm.unpckhpd(temp2, temp2); 2961 masm.addsd(temp7, new AMD64Address(gpr1, 152)); 2962 masm.mulsd(temp7, temp2); 2963 masm.addsd(temp7, new AMD64Address(gpr1, 136)); 2964 masm.addsd(temp7, temp1); 2965 masm.addsd(dest, temp7); 2966 masm.movdq(temp7, externalAddress(onePtr)); // 0x00000000, 2967 // 0x3ff00000 2968 masm.mulsd(temp4, temp6); 2969 masm.movdq(temp2, new AMD64Address(gpr1, 168)); 2970 masm.andpd(temp2, temp6); 2971 masm.mulsd(temp5, temp2); 2972 masm.mulsd(temp6, new AMD64Address(gpr1, 160)); 2973 masm.subsd(temp7, temp5); 2974 masm.subsd(temp2, new AMD64Address(gpr1, 128)); 2975 masm.subsd(temp7, temp4); 2976 masm.mulsd(temp7, temp6); 2977 masm.movdqu(temp4, temp3); 2978 masm.subsd(temp3, temp2); 2979 masm.addsd(temp2, temp3); 2980 masm.subsd(temp4, temp2); 2981 masm.addsd(dest, temp4); 2982 masm.subsd(dest, temp7); 2983 masm.addsd(dest, temp3); 2984 masm.jmp(bb15); 2985 2986 masm.bind(bb0); 2987 masm.jcc(ConditionFlag.Greater, bb1); 2988 2989 masm.pextrw(gpr1, dest, 3); 2990 masm.movl(gpr4, gpr1); 2991 masm.andl(gpr1, 32752); 2992 masm.jcc(ConditionFlag.Equal, bb2); 2993 2994 masm.andl(gpr4, 32767); 2995 masm.cmpl(gpr4, 15904); 2996 masm.jcc(ConditionFlag.Below, bb3); 2997 2998 masm.movdqu(temp2, dest); 2999 masm.movdqu(temp3, dest); 3000 masm.movdq(temp1, externalAddress(qElevenTanPtr)); // 0xb8fe4d77, 3001 // 0x3f82609a 3002 masm.mulsd(temp2, dest); 3003 masm.mulsd(temp3, temp2); 3004 masm.mulsd(temp1, temp2); 3005 masm.addsd(temp1, externalAddress(qNineTanPtr)); // 0xbf847a43, 3006 // 0x3f9664a0 3007 masm.mulsd(temp1, temp2); 3008 masm.addsd(temp1, externalAddress(qSevenTanPtr)); // 0x52c4c8ab, 3009 // 0x3faba1ba 3010 masm.mulsd(temp1, temp2); 3011 masm.addsd(temp1, externalAddress(qFiveTanPtr)); // 0x11092746, 3012 // 0x3fc11111 3013 masm.mulsd(temp1, temp2); 3014 masm.addsd(temp1, externalAddress(qThreeTanPtr)); // 0x55555612, 3015 // 0x3fd55555 3016 masm.mulsd(temp1, temp3); 3017 masm.addsd(dest, temp1); 3018 masm.jmp(bb15); 3019 3020 masm.bind(bb3); 3021 masm.movdq(temp3, externalAddress(twoPowFiftyFiveTanPtr)); // 0x00000000, 3022 // 0x43600000 3023 masm.mulsd(temp3, dest); 3024 masm.addsd(dest, temp3); 3025 masm.mulsd(dest, externalAddress(twoPowMFiftyFiveTanPtr)); // 0x00000000, 3026 // 0x3c800000 3027 masm.jmp(bb15); 3028 3029 masm.bind(bb14); 3030 masm.xorpd(temp1, temp1); 3031 masm.xorpd(dest, dest); 3032 masm.divsd(dest, temp1); 3033 masm.jmp(bb15); 3034 3035 masm.bind(bb2); 3036 masm.movdqu(temp1, dest); 3037 masm.mulsd(temp1, temp1); 3038 masm.jmp(bb15); 3039 3040 masm.bind(bb1); 3041 masm.pextrw(gpr3, dest, 3); 3042 masm.andl(gpr3, 32752); 3043 masm.cmpl(gpr3, 32752); 3044 masm.jcc(ConditionFlag.Equal, bb14); 3045 3046 masm.subl(gpr3, 16224); 3047 masm.shrl(gpr3, 7); 3048 masm.andl(gpr3, 65532); 3049 masm.leaq(gpr10, externalAddress(piInvTableTanPtr)); 3050 masm.addq(gpr3, gpr10); 3051 masm.movdq(gpr1, dest); 3052 masm.movl(gpr9, new AMD64Address(gpr3, 20)); 3053 masm.movl(gpr7, new AMD64Address(gpr3, 24)); 3054 masm.movl(gpr4, gpr1); 3055 masm.shrq(gpr1, 21); 3056 masm.orl(gpr1, Integer.MIN_VALUE); 3057 masm.shrl(gpr1, 11); 3058 masm.movl(gpr8, gpr9); 3059 masm.imulq(gpr9, gpr4); 3060 masm.imulq(gpr8, gpr1); 3061 masm.imulq(gpr7, gpr1); 3062 masm.movl(gpr5, new AMD64Address(gpr3, 16)); 3063 masm.movl(gpr6, new AMD64Address(gpr3, 12)); 3064 masm.movl(gpr10, gpr9); 3065 masm.shrq(gpr9, 32); 3066 masm.addq(gpr8, gpr9); 3067 masm.addq(gpr10, gpr7); 3068 masm.movl(gpr7, gpr10); 3069 masm.shrq(gpr10, 32); 3070 masm.addq(gpr8, gpr10); 3071 masm.movl(gpr9, gpr5); 3072 masm.imulq(gpr5, gpr4); 3073 masm.imulq(gpr9, gpr1); 3074 masm.movl(gpr10, gpr6); 3075 masm.imulq(gpr6, gpr4); 3076 masm.movl(gpr2, gpr5); 3077 masm.shrq(gpr5, 32); 3078 masm.addq(gpr8, gpr2); 3079 masm.movl(gpr2, gpr8); 3080 masm.shrq(gpr8, 32); 3081 masm.addq(gpr9, gpr5); 3082 masm.addq(gpr9, gpr8); 3083 masm.shlq(gpr2, 32); 3084 masm.orq(gpr7, gpr2); 3085 masm.imulq(gpr10, gpr1); 3086 masm.movl(gpr8, new AMD64Address(gpr3, 8)); 3087 masm.movl(gpr5, new AMD64Address(gpr3, 4)); 3088 masm.movl(gpr2, gpr6); 3089 masm.shrq(gpr6, 32); 3090 masm.addq(gpr9, gpr2); 3091 masm.movl(gpr2, gpr9); 3092 masm.shrq(gpr9, 32); 3093 masm.addq(gpr10, gpr6); 3094 masm.addq(gpr10, gpr9); 3095 masm.movq(gpr6, gpr8); 3096 masm.imulq(gpr8, gpr4); 3097 masm.imulq(gpr6, gpr1); 3098 masm.movl(gpr9, gpr8); 3099 masm.shrq(gpr8, 32); 3100 masm.addq(gpr10, gpr9); 3101 masm.movl(gpr9, gpr10); 3102 masm.shrq(gpr10, 32); 3103 masm.addq(gpr6, gpr8); 3104 masm.addq(gpr6, gpr10); 3105 masm.movq(gpr8, gpr5); 3106 masm.imulq(gpr5, gpr4); 3107 masm.imulq(gpr8, gpr1); 3108 masm.shlq(gpr9, 32); 3109 masm.orq(gpr9, gpr2); 3110 masm.movl(gpr1, new AMD64Address(gpr3, 0)); 3111 masm.movl(gpr10, gpr5); 3112 masm.shrq(gpr5, 32); 3113 masm.addq(gpr6, gpr10); 3114 masm.movl(gpr10, gpr6); 3115 masm.shrq(gpr6, 32); 3116 masm.addq(gpr8, gpr5); 3117 masm.addq(gpr8, gpr6); 3118 masm.imulq(gpr4, gpr1); 3119 masm.pextrw(gpr2, dest, 3); 3120 masm.leaq(gpr6, externalAddress(piInvTableTanPtr)); 3121 masm.subq(gpr3, gpr6); 3122 masm.addl(gpr3, gpr3); 3123 masm.addl(gpr3, gpr3); 3124 masm.addl(gpr3, gpr3); 3125 masm.addl(gpr3, 19); 3126 masm.movl(gpr5, 32768); 3127 masm.andl(gpr5, gpr2); 3128 masm.shrl(gpr2, 4); 3129 masm.andl(gpr2, 2047); 3130 masm.subl(gpr2, 1023); 3131 masm.subl(gpr3, gpr2); 3132 masm.addq(gpr8, gpr4); 3133 masm.movl(gpr4, gpr3); 3134 masm.addl(gpr4, 32); 3135 masm.cmpl(gpr3, 0); 3136 masm.jcc(ConditionFlag.Less, bb5); 3137 3138 masm.negl(gpr3); 3139 masm.addl(gpr3, 29); 3140 masm.shll(gpr8); 3141 masm.movl(gpr6, gpr8); 3142 masm.andl(gpr8, 1073741823); 3143 masm.testl(gpr8, 536870912); 3144 masm.jcc(ConditionFlag.NotEqual, bb6); 3145 3146 masm.shrl(gpr8); 3147 masm.movl(gpr2, 0); 3148 masm.shlq(gpr8, 32); 3149 masm.orq(gpr8, gpr10); 3150 3151 masm.bind(bb8); 3152 masm.cmpq(gpr8, 0); 3153 masm.jcc(ConditionFlag.Equal, bb9); 3154 3155 masm.bind(bb10); 3156 masm.bsrq(gpr10, gpr8); 3157 masm.movl(gpr3, 29); 3158 masm.subl(gpr3, gpr10); 3159 masm.jcc(ConditionFlag.LessEqual, bb11); 3160 3161 masm.shlq(gpr8); 3162 masm.movq(gpr1, gpr9); 3163 masm.shlq(gpr9); 3164 masm.addl(gpr4, gpr3); 3165 masm.negl(gpr3); 3166 masm.addl(gpr3, 64); 3167 masm.shrq(gpr1); 3168 masm.shrq(gpr7); 3169 masm.orq(gpr8, gpr1); 3170 masm.orq(gpr9, gpr7); 3171 3172 masm.bind(bb12); 3173 masm.cvtsi2sdq(dest, gpr8); 3174 masm.shrq(gpr9, 1); 3175 masm.cvtsi2sdq(temp3, gpr9); 3176 masm.xorpd(temp4, temp4); 3177 masm.shll(gpr4, 4); 3178 masm.negl(gpr4); 3179 masm.addl(gpr4, 16368); 3180 masm.orl(gpr4, gpr5); 3181 masm.xorl(gpr4, gpr2); 3182 masm.pinsrw(temp4, gpr4, 3); 3183 masm.leaq(gpr1, externalAddress(piFourTanPtr)); 3184 masm.movdq(temp2, new AMD64Address(gpr1, 0)); // 0x00000000, 3185 // 0x3fe921fb, 3186 masm.movdq(temp7, new AMD64Address(gpr1, 8)); // 0x4611a626, 3187 // 0x3e85110b 3188 masm.xorpd(temp5, temp5); 3189 masm.subl(gpr4, 1008); 3190 masm.pinsrw(temp5, gpr4, 3); 3191 masm.mulsd(dest, temp4); 3192 masm.shll(gpr5, 16); 3193 masm.sarl(gpr5, 31); 3194 masm.mulsd(temp3, temp5); 3195 masm.movdqu(temp1, dest); 3196 masm.mulsd(dest, temp2); 3197 masm.shrl(gpr6, 30); 3198 masm.addsd(temp1, temp3); 3199 masm.mulsd(temp3, temp2); 3200 masm.addl(gpr6, gpr5); 3201 masm.xorl(gpr6, gpr5); 3202 masm.mulsd(temp7, temp1); 3203 masm.movl(gpr1, gpr6); 3204 masm.addsd(temp7, temp3); 3205 masm.movdqu(temp2, dest); 3206 masm.addsd(dest, temp7); 3207 masm.subsd(temp2, dest); 3208 masm.addsd(temp7, temp2); 3209 masm.movdqu(temp1, externalAddress(piThirtyTwoInvTanPtr)); // 0x6dc9c883, 3210 // 0x3fe45f30, 3211 // 0x6dc9c883, 3212 // 0x40245f30 3213 if (masm.supports(CPUFeature.SSE3)) { 3214 masm.movddup(dest, dest); 3215 } else { 3216 masm.movlhps(dest, dest); 3217 } 3218 masm.movdqu(temp4, externalAddress(signMaskTanPtr)); // 0x00000000, 3219 // 0x80000000, 3220 // 0x00000000, 3221 // 0x80000000 3222 masm.andpd(temp4, dest); 3223 masm.mulpd(temp1, dest); 3224 if (masm.supports(CPUFeature.SSE3)) { 3225 masm.movddup(temp7, temp7); 3226 } else { 3227 masm.movlhps(temp7, temp7); 3228 } 3229 masm.movdqu(temp5, externalAddress(oneHalfTanPtr)); // 0x00000000, 3230 // 0x3fe00000, 3231 // 0x00000000, 3232 // 0x3fe00000 3233 masm.movdqu(temp6, externalAddress(mulSixteenPtr)); // 0x00000000, 3234 // 0x40300000, 3235 // 0x00000000, 3236 // 0x3ff00000 3237 masm.por(temp5, temp4); 3238 masm.addpd(temp1, temp5); 3239 masm.movdqu(temp5, temp1); 3240 masm.unpckhpd(temp5, temp5); 3241 masm.cvttsd2sil(gpr4, temp5); 3242 masm.cvttpd2dq(temp1, temp1); 3243 masm.cvtdq2pd(temp1, temp1); 3244 masm.mulpd(temp1, temp6); 3245 masm.movdqu(temp3, externalAddress(pOneTanPtr)); // 0x54444000, 3246 // 0x3fb921fb, 3247 // 0x54440000, 3248 // 0x3fb921fb 3249 masm.movdq(temp5, externalAddress(qqTwoTanPtr)); // 0x676733af, 3250 // 0x3d32e7b9 3251 masm.shll(gpr1, 4); 3252 masm.addl(gpr4, 469248); 3253 masm.movdqu(temp4, externalAddress(pTwoTanPtr)); // 0x67674000, 3254 // 0xbd32e7b9, 3255 // 0x4c4c0000, 3256 // 0x3d468c23 3257 masm.mulpd(temp3, temp1); 3258 masm.addl(gpr4, gpr1); 3259 masm.andl(gpr4, 31); 3260 masm.mulsd(temp5, temp1); 3261 masm.movl(gpr3, gpr4); 3262 masm.mulpd(temp4, temp1); 3263 masm.shll(gpr3, 1); 3264 masm.subpd(dest, temp3); 3265 masm.mulpd(temp1, externalAddress(pThreeTanPtr)); // 0x3707344a, 3266 // 0x3aa8a2e0, 3267 // 0x03707345, 3268 // 0x3ae98a2e 3269 masm.addl(gpr4, gpr3); 3270 masm.shll(gpr3, 2); 3271 masm.addl(gpr4, gpr3); 3272 masm.addsd(temp5, dest); 3273 masm.movdqu(temp2, dest); 3274 masm.subpd(dest, temp4); 3275 masm.movdq(temp6, externalAddress(onePtr)); // 0x00000000, 3276 // 0x3ff00000 3277 masm.shll(gpr4, 4); 3278 masm.leaq(gpr1, externalAddress(cTableTanPtr)); 3279 masm.andpd(temp5, externalAddress(maskThirtyFiveTanPtr)); // 0xfffc0000, 3280 // 0xffffffff, 3281 // 0x00000000, 3282 // 0x00000000 3283 masm.movdqu(temp3, dest); 3284 masm.addq(gpr1, gpr4); 3285 masm.subpd(temp2, dest); 3286 masm.unpckhpd(dest, dest); 3287 masm.divsd(temp6, temp5); 3288 masm.subpd(temp2, temp4); 3289 masm.subsd(temp3, temp5); 3290 masm.subpd(temp2, temp1); 3291 masm.movdqu(temp1, new AMD64Address(gpr1, 48)); 3292 masm.addpd(temp2, temp7); 3293 masm.movdqu(temp7, new AMD64Address(gpr1, 16)); 3294 masm.mulpd(temp7, dest); 3295 masm.movdqu(temp4, new AMD64Address(gpr1, 96)); 3296 masm.mulpd(temp1, dest); 3297 masm.mulpd(temp4, dest); 3298 masm.addsd(temp2, temp3); 3299 masm.movdqu(temp3, dest); 3300 masm.mulpd(dest, dest); 3301 masm.addpd(temp7, new AMD64Address(gpr1, 0)); 3302 masm.addpd(temp1, new AMD64Address(gpr1, 32)); 3303 masm.mulpd(temp1, dest); 3304 masm.addpd(temp4, new AMD64Address(gpr1, 80)); 3305 masm.addpd(temp7, temp1); 3306 masm.movdqu(temp1, new AMD64Address(gpr1, 112)); 3307 masm.mulpd(temp1, dest); 3308 masm.mulpd(dest, dest); 3309 masm.addpd(temp4, temp1); 3310 masm.movdqu(temp1, new AMD64Address(gpr1, 64)); 3311 masm.mulpd(temp1, dest); 3312 masm.addpd(temp7, temp1); 3313 masm.movdqu(temp1, temp3); 3314 masm.mulpd(temp3, dest); 3315 masm.mulsd(dest, dest); 3316 masm.mulpd(temp1, new AMD64Address(gpr1, 144)); 3317 masm.mulpd(temp4, temp3); 3318 masm.movdqu(temp3, temp1); 3319 masm.addpd(temp7, temp4); 3320 masm.movdqu(temp4, temp1); 3321 masm.mulsd(dest, temp7); 3322 masm.unpckhpd(temp7, temp7); 3323 masm.addsd(dest, temp7); 3324 masm.unpckhpd(temp1, temp1); 3325 masm.addsd(temp3, temp1); 3326 masm.subsd(temp4, temp3); 3327 masm.addsd(temp1, temp4); 3328 masm.movdqu(temp4, temp2); 3329 masm.movdq(temp7, new AMD64Address(gpr1, 144)); 3330 masm.unpckhpd(temp2, temp2); 3331 masm.addsd(temp7, new AMD64Address(gpr1, 152)); 3332 masm.mulsd(temp7, temp2); 3333 masm.addsd(temp7, new AMD64Address(gpr1, 136)); 3334 masm.addsd(temp7, temp1); 3335 masm.addsd(dest, temp7); 3336 masm.movdq(temp7, externalAddress(onePtr)); // 0x00000000, 3337 // 0x3ff00000 3338 masm.mulsd(temp4, temp6); 3339 masm.movdq(temp2, new AMD64Address(gpr1, 168)); 3340 masm.andpd(temp2, temp6); 3341 masm.mulsd(temp5, temp2); 3342 masm.mulsd(temp6, new AMD64Address(gpr1, 160)); 3343 masm.subsd(temp7, temp5); 3344 masm.subsd(temp2, new AMD64Address(gpr1, 128)); 3345 masm.subsd(temp7, temp4); 3346 masm.mulsd(temp7, temp6); 3347 masm.movdqu(temp4, temp3); 3348 masm.subsd(temp3, temp2); 3349 masm.addsd(temp2, temp3); 3350 masm.subsd(temp4, temp2); 3351 masm.addsd(dest, temp4); 3352 masm.subsd(dest, temp7); 3353 masm.addsd(dest, temp3); 3354 masm.jmp(bb15); 3355 3356 masm.bind(bb9); 3357 masm.addl(gpr4, 64); 3358 masm.movq(gpr8, gpr9); 3359 masm.movq(gpr9, gpr7); 3360 masm.movl(gpr7, 0); 3361 masm.cmpq(gpr8, 0); 3362 masm.jcc(ConditionFlag.NotEqual, bb10); 3363 3364 masm.addl(gpr4, 64); 3365 masm.movq(gpr8, gpr9); 3366 masm.movq(gpr9, gpr7); 3367 masm.cmpq(gpr8, 0); 3368 masm.jcc(ConditionFlag.NotEqual, bb10); 3369 3370 masm.jmp(bb12); 3371 3372 masm.bind(bb11); 3373 masm.jcc(ConditionFlag.Equal, bb12); 3374 3375 masm.negl(gpr3); 3376 masm.shrq(gpr9); 3377 masm.movq(gpr1, gpr8); 3378 masm.shrq(gpr8); 3379 masm.subl(gpr4, gpr3); 3380 masm.negl(gpr3); 3381 masm.addl(gpr3, 64); 3382 masm.shlq(gpr1); 3383 masm.orq(gpr9, gpr1); 3384 masm.jmp(bb12); 3385 3386 masm.bind(bb5); 3387 masm.notl(gpr3); 3388 masm.shlq(gpr8, 32); 3389 masm.orq(gpr8, gpr10); 3390 masm.shlq(gpr8); 3391 masm.movq(gpr6, gpr8); 3392 masm.testl(gpr8, Integer.MIN_VALUE); 3393 masm.jcc(ConditionFlag.NotEqual, bb13); 3394 3395 masm.shrl(gpr8); 3396 masm.movl(gpr2, 0); 3397 masm.shrq(gpr6, 2); 3398 masm.jmp(bb8); 3399 3400 masm.bind(bb6); 3401 masm.shrl(gpr8); 3402 masm.movl(gpr2, 1073741824); 3403 masm.shrl(gpr2); 3404 masm.shlq(gpr8, 32); 3405 masm.orq(gpr8, gpr10); 3406 masm.shlq(gpr2, 32); 3407 masm.addl(gpr6, 1073741824); 3408 masm.movl(gpr3, 0); 3409 masm.movl(gpr10, 0); 3410 masm.subq(gpr3, gpr7); 3411 masm.sbbq(gpr10, gpr9); 3412 masm.sbbq(gpr2, gpr8); 3413 masm.movq(gpr7, gpr3); 3414 masm.movq(gpr9, gpr10); 3415 masm.movq(gpr8, gpr2); 3416 masm.movl(gpr2, 32768); 3417 masm.jmp(bb8); 3418 3419 masm.bind(bb13); 3420 masm.shrl(gpr8); 3421 masm.movq(gpr2, 0x100000000L); 3422 masm.shrq(gpr2); 3423 masm.movl(gpr3, 0); 3424 masm.movl(gpr10, 0); 3425 masm.subq(gpr3, gpr7); 3426 masm.sbbq(gpr10, gpr9); 3427 masm.sbbq(gpr2, gpr8); 3428 masm.movq(gpr7, gpr3); 3429 masm.movq(gpr9, gpr10); 3430 masm.movq(gpr8, gpr2); 3431 masm.movl(gpr2, 32768); 3432 masm.shrq(gpr6, 2); 3433 masm.addl(gpr6, 1073741824); 3434 masm.jmp(bb8); 3435 3436 masm.bind(bb15); 3437 } 3438 3439 /* 3440 * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) 3441 * Source Code 3442 * 3443 * ALGORITHM DESCRIPTION - EXP() --------------------- 3444 * 3445 * Description: Let K = 64 (table size). x x/log(2) n e = 2 = 2 * T[j] * (1 + P(y)) where x = 3446 * m*log(2)/K + y, y in [-log(2)/K..log(2)/K] m = n*K + j, m,n,j - signed integer, j in 3447 * [-K/2..K/2] j/K values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]). 3448 * 3449 * P(y) is a minimax polynomial approximation of exp(x)-1 on small interval 3450 * [-log(2)/K..log(2)/K] (were calculated by Maple V). 3451 * 3452 * To avoid problems with arithmetic overflow and underflow, n n1 n2 value of 2 is safely 3453 * computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2] where BIAS is a value of exponent bias. 3454 * 3455 * Special cases: exp(NaN) = NaN exp(+INF) = +INF exp(-INF) = 0 exp(x) = 1 for subnormals for 3456 * finite argument, only exp(0)=1 is exact For IEEE double if x > 709.782712893383973096 then 3457 * exp(x) overflow if x < -745.133219101941108420 then exp(x) underflow 3458 * 3459 */ 3460 3461 private static int[] cvExp = { 3462 0x652b82fe, 0x40571547, 0x652b82fe, 0x40571547, 0xfefa0000, 3463 0x3f862e42, 0xfefa0000, 0x3f862e42, 0xbc9e3b3a, 0x3d1cf79a, 3464 0xbc9e3b3a, 0x3d1cf79a, 0xfffffffe, 0x3fdfffff, 0xfffffffe, 3465 0x3fdfffff, 0xe3289860, 0x3f56c15c, 0x555b9e25, 0x3fa55555, 3466 0xc090cf0f, 0x3f811115, 0x55548ba1, 0x3fc55555 3467 }; 3468 3469 private static int[] shifterExp = { 3470 0x00000000, 0x43380000, 0x00000000, 0x43380000 3471 }; 3472 3473 private static int[] mMaskExp = { 3474 0xffffffc0, 0x00000000, 0xffffffc0, 0x00000000 3475 }; 3476 3477 private static int[] biasExp = { 3478 0x0000ffc0, 0x00000000, 0x0000ffc0, 0x00000000 3479 }; 3480 3481 private static int[] tblAddrExp = { 3482 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0e03754d, 3483 0x3cad7bbf, 0x3e778060, 0x00002c9a, 0x3567f613, 0x3c8cd252, 3484 0xd3158574, 0x000059b0, 0x61e6c861, 0x3c60f74e, 0x18759bc8, 3485 0x00008745, 0x5d837b6c, 0x3c979aa6, 0x6cf9890f, 0x0000b558, 3486 0x702f9cd1, 0x3c3ebe3d, 0x32d3d1a2, 0x0000e3ec, 0x1e63bcd8, 3487 0x3ca3516e, 0xd0125b50, 0x00011301, 0x26f0387b, 0x3ca4c554, 3488 0xaea92ddf, 0x0001429a, 0x62523fb6, 0x3ca95153, 0x3c7d517a, 3489 0x000172b8, 0x3f1353bf, 0x3c8b898c, 0xeb6fcb75, 0x0001a35b, 3490 0x3e3a2f5f, 0x3c9aecf7, 0x3168b9aa, 0x0001d487, 0x44a6c38d, 3491 0x3c8a6f41, 0x88628cd6, 0x0002063b, 0xe3a8a894, 0x3c968efd, 3492 0x6e756238, 0x0002387a, 0x981fe7f2, 0x3c80472b, 0x65e27cdd, 3493 0x00026b45, 0x6d09ab31, 0x3c82f7e1, 0xf51fdee1, 0x00029e9d, 3494 0x720c0ab3, 0x3c8b3782, 0xa6e4030b, 0x0002d285, 0x4db0abb6, 3495 0x3c834d75, 0x0a31b715, 0x000306fe, 0x5dd3f84a, 0x3c8fdd39, 3496 0xb26416ff, 0x00033c08, 0xcc187d29, 0x3ca12f8c, 0x373aa9ca, 3497 0x000371a7, 0x738b5e8b, 0x3ca7d229, 0x34e59ff6, 0x0003a7db, 3498 0xa72a4c6d, 0x3c859f48, 0x4c123422, 0x0003dea6, 0x259d9205, 3499 0x3ca8b846, 0x21f72e29, 0x0004160a, 0x60c2ac12, 0x3c4363ed, 3500 0x6061892d, 0x00044e08, 0xdaa10379, 0x3c6ecce1, 0xb5c13cd0, 3501 0x000486a2, 0xbb7aafb0, 0x3c7690ce, 0xd5362a27, 0x0004bfda, 3502 0x9b282a09, 0x3ca083cc, 0x769d2ca6, 0x0004f9b2, 0xc1aae707, 3503 0x3ca509b0, 0x569d4f81, 0x0005342b, 0x18fdd78e, 0x3c933505, 3504 0x36b527da, 0x00056f47, 0xe21c5409, 0x3c9063e1, 0xdd485429, 3505 0x0005ab07, 0x2b64c035, 0x3c9432e6, 0x15ad2148, 0x0005e76f, 3506 0x99f08c0a, 0x3ca01284, 0xb03a5584, 0x0006247e, 0x0073dc06, 3507 0x3c99f087, 0x82552224, 0x00066238, 0x0da05571, 0x3c998d4d, 3508 0x667f3bcc, 0x0006a09e, 0x86ce4786, 0x3ca52bb9, 0x3c651a2e, 3509 0x0006dfb2, 0x206f0dab, 0x3ca32092, 0xe8ec5f73, 0x00071f75, 3510 0x8e17a7a6, 0x3ca06122, 0x564267c8, 0x00075feb, 0x461e9f86, 3511 0x3ca244ac, 0x73eb0186, 0x0007a114, 0xabd66c55, 0x3c65ebe1, 3512 0x36cf4e62, 0x0007e2f3, 0xbbff67d0, 0x3c96fe9f, 0x994cce12, 3513 0x00082589, 0x14c801df, 0x3c951f14, 0x9b4492ec, 0x000868d9, 3514 0xc1f0eab4, 0x3c8db72f, 0x422aa0db, 0x0008ace5, 0x59f35f44, 3515 0x3c7bf683, 0x99157736, 0x0008f1ae, 0x9c06283c, 0x3ca360ba, 3516 0xb0cdc5e4, 0x00093737, 0x20f962aa, 0x3c95e8d1, 0x9fde4e4f, 3517 0x00097d82, 0x2b91ce27, 0x3c71affc, 0x82a3f090, 0x0009c491, 3518 0x589a2ebd, 0x3c9b6d34, 0x7b5de564, 0x000a0c66, 0x9ab89880, 3519 0x3c95277c, 0xb23e255c, 0x000a5503, 0x6e735ab3, 0x3c846984, 3520 0x5579fdbf, 0x000a9e6b, 0x92cb3387, 0x3c8c1a77, 0x995ad3ad, 3521 0x000ae89f, 0xdc2d1d96, 0x3ca22466, 0xb84f15fa, 0x000b33a2, 3522 0xb19505ae, 0x3ca1112e, 0xf2fb5e46, 0x000b7f76, 0x0a5fddcd, 3523 0x3c74ffd7, 0x904bc1d2, 0x000bcc1e, 0x30af0cb3, 0x3c736eae, 3524 0xdd85529c, 0x000c199b, 0xd10959ac, 0x3c84e08f, 0x2e57d14b, 3525 0x000c67f1, 0x6c921968, 0x3c676b2c, 0xdcef9069, 0x000cb720, 3526 0x36df99b3, 0x3c937009, 0x4a07897b, 0x000d072d, 0xa63d07a7, 3527 0x3c74a385, 0xdcfba487, 0x000d5818, 0xd5c192ac, 0x3c8e5a50, 3528 0x03db3285, 0x000da9e6, 0x1c4a9792, 0x3c98bb73, 0x337b9b5e, 3529 0x000dfc97, 0x603a88d3, 0x3c74b604, 0xe78b3ff6, 0x000e502e, 3530 0x92094926, 0x3c916f27, 0xa2a490d9, 0x000ea4af, 0x41aa2008, 3531 0x3c8ec3bc, 0xee615a27, 0x000efa1b, 0x31d185ee, 0x3c8a64a9, 3532 0x5b6e4540, 0x000f5076, 0x4d91cd9d, 0x3c77893b, 0x819e90d8, 3533 0x000fa7c1 3534 }; 3535 3536 private static int[] allOnesExp = { 3537 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff 3538 }; 3539 3540 private static int[] expBias = { 3541 0x00000000, 0x3ff00000, 0x00000000, 0x3ff00000 3542 }; 3543 3544 private static int[] xMaxExp = { 3545 0xffffffff, 0x7fefffff 3546 }; 3547 3548 private static int[] xMinExp = { 3549 0x00000000, 0x00100000 3550 }; 3551 3552 private static int[] infExp = { 3553 0x00000000, 0x7ff00000 3554 }; 3555 3556 private static int[] zeroExp = { 3557 0x00000000, 0x00000000 3558 }; 3559 3560 public void expIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { 3561 ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 16); 3562 ArrayDataPointerConstant cvExpPtr = new ArrayDataPointerConstant(cvExp, 16); 3563 ArrayDataPointerConstant shifterExpPtr = new ArrayDataPointerConstant(shifterExp, 8); 3564 ArrayDataPointerConstant mMaskExpPtr = new ArrayDataPointerConstant(mMaskExp, 16); 3565 ArrayDataPointerConstant biasExpPtr = new ArrayDataPointerConstant(biasExp, 16); 3566 ArrayDataPointerConstant tblAddrExpPtr = new ArrayDataPointerConstant(tblAddrExp, 16); 3567 ArrayDataPointerConstant expBiasPtr = new ArrayDataPointerConstant(expBias, 8); 3568 ArrayDataPointerConstant xMaxExpPtr = new ArrayDataPointerConstant(xMaxExp, 8); 3569 ArrayDataPointerConstant xMinExpPtr = new ArrayDataPointerConstant(xMinExp, 8); 3570 ArrayDataPointerConstant infExpPtr = new ArrayDataPointerConstant(infExp, 8); 3571 ArrayDataPointerConstant zeroExpPtr = new ArrayDataPointerConstant(zeroExp, 8); 3572 ArrayDataPointerConstant allOnesExpPtr = new ArrayDataPointerConstant(allOnesExp, 8); 3573 3574 Label bb0 = new Label(); 3575 Label bb1 = new Label(); 3576 Label bb2 = new Label(); 3577 Label bb3 = new Label(); 3578 Label bb4 = new Label(); 3579 Label bb5 = new Label(); 3580 Label bb7 = new Label(); 3581 Label bb8 = new Label(); 3582 Label bb9 = new Label(); 3583 Label bb10 = new Label(); 3584 Label bb11 = new Label(); 3585 Label bb12 = new Label(); 3586 Label bb14 = new Label(); 3587 3588 Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); 3589 Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); 3590 Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); 3591 Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); 3592 Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD); 3593 3594 Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); 3595 Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); 3596 Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); 3597 Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); 3598 Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); 3599 Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); 3600 Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); 3601 Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE); 3602 Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE); 3603 Register temp10 = asRegister(xmm10Temp, AMD64Kind.DOUBLE); 3604 3605 AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); 3606 3607 setCrb(crb); 3608 masm.movsd(stackSlot, value); 3609 if (dest.encoding != value.encoding) { 3610 masm.movdqu(dest, value); 3611 } 3612 3613 masm.movdqu(temp9, externalAddress(mMaskExpPtr)); // 0xffffffc0, 3614 // 0x00000000, 3615 // 0xffffffc0, 3616 // 0x00000000 3617 masm.movdqu(temp10, externalAddress(biasExpPtr)); // 0x0000ffc0, 3618 // 0x00000000, 3619 // 0x0000ffc0, 3620 // 0x00000000 3621 masm.unpcklpd(dest, dest); 3622 masm.leaq(gpr5, stackSlot); 3623 masm.leaq(gpr2, externalAddress(cvExpPtr)); 3624 masm.movdqu(temp1, new AMD64Address(gpr2, 0)); // 0x652b82fe, 3625 // 0x40571547, 3626 // 0x652b82fe, 3627 // 0x40571547 3628 masm.movdqu(temp6, externalAddress(shifterExpPtr)); // 0x00000000, 3629 // 0x43380000, 3630 // 0x00000000, 3631 // 0x43380000 3632 masm.movdqu(temp2, new AMD64Address(gpr2, 16)); // 0xfefa0000, 3633 // 0x3f862e42, 3634 // 0xfefa0000, 3635 // 0x3f862e42 3636 masm.movdqu(temp3, new AMD64Address(gpr2, 32)); // 0xbc9e3b3a, 3637 // 0x3d1cf79a, 3638 // 0xbc9e3b3a, 3639 // 0x3d1cf79a 3640 masm.pextrw(gpr1, dest, 3); 3641 masm.andl(gpr1, 32767); 3642 masm.movl(gpr4, 16527); 3643 masm.subl(gpr4, gpr1); 3644 masm.subl(gpr1, 15504); 3645 masm.orl(gpr4, gpr1); 3646 masm.cmpl(gpr4, Integer.MIN_VALUE); 3647 masm.jcc(ConditionFlag.AboveEqual, bb0); 3648 3649 masm.leaq(gpr4, externalAddress(tblAddrExpPtr)); 3650 masm.movdqu(temp8, new AMD64Address(gpr2, 48)); // 0xfffffffe, 3651 // 0x3fdfffff, 3652 // 0xfffffffe, 3653 // 0x3fdfffff 3654 masm.movdqu(temp4, new AMD64Address(gpr2, 64)); // 0xe3289860, 3655 // 0x3f56c15c, 3656 // 0x555b9e25, 3657 // 0x3fa55555 3658 masm.movdqu(temp5, new AMD64Address(gpr2, 80)); // 0xc090cf0f, 3659 // 0x3f811115, 3660 // 0x55548ba1, 3661 // 0x3fc55555 3662 masm.mulpd(temp1, dest); 3663 masm.addpd(temp1, temp6); 3664 masm.movapd(temp7, temp1); 3665 masm.movdl(gpr1, temp1); 3666 masm.pand(temp7, temp9); 3667 masm.subpd(temp1, temp6); 3668 masm.mulpd(temp2, temp1); 3669 masm.mulpd(temp3, temp1); 3670 masm.paddq(temp7, temp10); 3671 masm.subpd(dest, temp2); 3672 masm.movl(gpr3, gpr1); 3673 masm.andl(gpr3, 63); 3674 masm.shll(gpr3, 4); 3675 masm.movdqu(temp2, new AMD64Address(gpr3, gpr4, Scale.Times1, 0)); 3676 masm.sarl(gpr1, 6); 3677 masm.psllq(temp7, 46); 3678 masm.subpd(dest, temp3); 3679 masm.mulpd(temp4, dest); 3680 masm.movl(gpr4, gpr1); 3681 masm.movapd(temp6, dest); 3682 masm.movapd(temp1, dest); 3683 masm.mulpd(temp6, temp6); 3684 masm.mulpd(dest, temp6); 3685 masm.addpd(temp5, temp4); 3686 masm.mulsd(dest, temp6); 3687 masm.mulpd(temp6, temp8); 3688 masm.addsd(temp1, temp2); 3689 masm.unpckhpd(temp2, temp2); 3690 masm.mulpd(dest, temp5); 3691 masm.addsd(temp1, dest); 3692 masm.por(temp2, temp7); 3693 masm.unpckhpd(dest, dest); 3694 masm.addsd(dest, temp1); 3695 masm.addsd(dest, temp6); 3696 masm.addl(gpr4, 894); 3697 masm.cmpl(gpr4, 1916); 3698 masm.jcc(ConditionFlag.Above, bb1); 3699 3700 masm.mulsd(dest, temp2); 3701 masm.addsd(dest, temp2); 3702 masm.jmp(bb14); 3703 3704 masm.bind(bb1); 3705 masm.movdqu(temp6, externalAddress(expBiasPtr)); // 0x00000000, 3706 // 0x3ff00000, 3707 // 0x00000000, 3708 // 0x3ff00000 3709 masm.xorpd(temp3, temp3); 3710 masm.movdqu(temp4, externalAddress(allOnesExpPtr)); // 0xffffffff, 3711 // 0xffffffff, 3712 // 0xffffffff, 3713 // 0xffffffff 3714 masm.movl(gpr4, -1022); 3715 masm.subl(gpr4, gpr1); 3716 masm.movdl(temp5, gpr4); 3717 masm.psllq(temp4, temp5); 3718 masm.movl(gpr3, gpr1); 3719 masm.sarl(gpr1, 1); 3720 masm.pinsrw(temp3, gpr1, 3); 3721 masm.psllq(temp3, 4); 3722 masm.psubd(temp2, temp3); 3723 masm.mulsd(dest, temp2); 3724 masm.cmpl(gpr4, 52); 3725 masm.jcc(ConditionFlag.Greater, bb2); 3726 3727 masm.pand(temp4, temp2); 3728 masm.paddd(temp3, temp6); 3729 masm.subsd(temp2, temp4); 3730 masm.addsd(dest, temp2); 3731 masm.cmpl(gpr3, 1023); 3732 masm.jcc(ConditionFlag.GreaterEqual, bb3); 3733 3734 masm.pextrw(gpr3, dest, 3); 3735 masm.andl(gpr3, 32768); 3736 masm.orl(gpr4, gpr3); 3737 masm.cmpl(gpr4, 0); 3738 masm.jcc(ConditionFlag.Equal, bb4); 3739 3740 masm.movapd(temp6, dest); 3741 masm.addsd(dest, temp4); 3742 masm.mulsd(dest, temp3); 3743 masm.pextrw(gpr3, dest, 3); 3744 masm.andl(gpr3, 32752); 3745 masm.cmpl(gpr3, 0); 3746 masm.jcc(ConditionFlag.Equal, bb5); 3747 3748 masm.jmp(bb14); 3749 3750 masm.bind(bb5); 3751 masm.mulsd(temp6, temp3); 3752 masm.mulsd(temp4, temp3); 3753 masm.movdqu(dest, temp6); 3754 masm.pxor(temp6, temp4); 3755 masm.psrad(temp6, 31); 3756 masm.pshufd(temp6, temp6, 85); 3757 masm.psllq(dest, 1); 3758 masm.psrlq(dest, 1); 3759 masm.pxor(dest, temp6); 3760 masm.psrlq(temp6, 63); 3761 masm.paddq(dest, temp6); 3762 masm.paddq(dest, temp4); 3763 masm.jmp(bb14); 3764 3765 masm.bind(bb4); 3766 masm.addsd(dest, temp4); 3767 masm.mulsd(dest, temp3); 3768 masm.jmp(bb14); 3769 3770 masm.bind(bb3); 3771 masm.addsd(dest, temp4); 3772 masm.mulsd(dest, temp3); 3773 masm.pextrw(gpr3, dest, 3); 3774 masm.andl(gpr3, 32752); 3775 masm.cmpl(gpr3, 32752); 3776 masm.jcc(ConditionFlag.AboveEqual, bb7); 3777 3778 masm.jmp(bb14); 3779 3780 masm.bind(bb2); 3781 masm.paddd(temp3, temp6); 3782 masm.addpd(dest, temp2); 3783 masm.mulsd(dest, temp3); 3784 masm.jmp(bb14); 3785 3786 masm.bind(bb8); 3787 masm.movsd(dest, externalAddress(xMaxExpPtr)); // 0xffffffff, 3788 // 0x7fefffff 3789 masm.movsd(temp8, externalAddress(xMinExpPtr)); // 0x00000000, 3790 // 0x00100000 3791 masm.cmpl(gpr1, 2146435072); 3792 masm.jcc(ConditionFlag.AboveEqual, bb9); 3793 3794 masm.movl(gpr1, new AMD64Address(gpr5, 4)); 3795 masm.cmpl(gpr1, Integer.MIN_VALUE); 3796 masm.jcc(ConditionFlag.AboveEqual, bb10); 3797 3798 masm.mulsd(dest, dest); 3799 3800 masm.bind(bb7); 3801 masm.jmp(bb14); 3802 3803 masm.bind(bb10); 3804 masm.mulsd(dest, temp8); 3805 masm.jmp(bb14); 3806 3807 masm.bind(bb9); 3808 masm.movl(gpr4, stackSlot); 3809 masm.cmpl(gpr1, 2146435072); 3810 masm.jcc(ConditionFlag.Above, bb11); 3811 3812 masm.cmpl(gpr4, 0); 3813 masm.jcc(ConditionFlag.NotEqual, bb11); 3814 3815 masm.movl(gpr1, new AMD64Address(gpr5, 4)); 3816 masm.cmpl(gpr1, 2146435072); 3817 masm.jcc(ConditionFlag.NotEqual, bb12); 3818 3819 masm.movsd(dest, externalAddress(infExpPtr)); // 0x00000000, 3820 // 0x7ff00000 3821 masm.jmp(bb14); 3822 3823 masm.bind(bb12); 3824 masm.movsd(dest, externalAddress(zeroExpPtr)); // 0x00000000, 3825 // 0x00000000 3826 masm.jmp(bb14); 3827 3828 masm.bind(bb11); 3829 masm.movsd(dest, stackSlot); 3830 masm.addsd(dest, dest); 3831 masm.jmp(bb14); 3832 3833 masm.bind(bb0); 3834 masm.movl(gpr1, new AMD64Address(gpr5, 4)); 3835 masm.andl(gpr1, 2147483647); 3836 masm.cmpl(gpr1, 1083179008); 3837 masm.jcc(ConditionFlag.AboveEqual, bb8); 3838 3839 masm.addsd(dest, externalAddress(onePtr)); // 0x00000000, 3840 // 0x3ff00000 3841 masm.bind(bb14); 3842 } 3843} 3844