1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines an instruction selector for the AArch64 target. 11// 12//===----------------------------------------------------------------------===// 13 14#define DEBUG_TYPE "aarch64-isel" 15#include "AArch64.h" 16#include "AArch64InstrInfo.h" 17#include "AArch64Subtarget.h" 18#include "AArch64TargetMachine.h" 19#include "Utils/AArch64BaseInfo.h" 20#include "llvm/ADT/APSInt.h" 21#include "llvm/CodeGen/SelectionDAGISel.h" 22#include "llvm/IR/GlobalValue.h" 23#include "llvm/Support/Debug.h" 24#include "llvm/Support/raw_ostream.h" 25 26using namespace llvm; 27 28//===--------------------------------------------------------------------===// 29/// AArch64 specific code to select AArch64 machine instructions for 30/// SelectionDAG operations. 31/// 32namespace { 33 34class AArch64DAGToDAGISel : public SelectionDAGISel { 35 AArch64TargetMachine &TM; 36 37 /// Keep a pointer to the AArch64Subtarget around so that we can 38 /// make the right decision when generating code for different targets. 39 const AArch64Subtarget *Subtarget; 40 41public: 42 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, 43 CodeGenOpt::Level OptLevel) 44 : SelectionDAGISel(tm, OptLevel), TM(tm), 45 Subtarget(&TM.getSubtarget<AArch64Subtarget>()) { 46 } 47 48 virtual const char *getPassName() const { 49 return "AArch64 Instruction Selection"; 50 } 51 52 // Include the pieces autogenerated from the target description. 53#include "AArch64GenDAGISel.inc" 54 55 template<unsigned MemSize> 56 bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) { 57 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); 58 if (!CN || CN->getZExtValue() % MemSize != 0 59 || CN->getZExtValue() / MemSize > 0xfff) 60 return false; 61 62 UImm12 = CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64); 63 return true; 64 } 65 66 template<unsigned RegWidth> 67 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { 68 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); 69 } 70 71 /// Used for pre-lowered address-reference nodes, so we already know 72 /// the fields match. This operand's job is simply to add an 73 /// appropriate shift operand to the MOVZ/MOVK instruction. 74 template<unsigned LogShift> 75 bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) { 76 Imm = N; 77 Shift = CurDAG->getTargetConstant(LogShift, MVT::i32); 78 return true; 79 } 80 81 bool SelectFPZeroOperand(SDValue N, SDValue &Dummy); 82 83 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 84 unsigned RegWidth); 85 86 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 87 char ConstraintCode, 88 std::vector<SDValue> &OutOps); 89 90 bool SelectLogicalImm(SDValue N, SDValue &Imm); 91 92 template<unsigned RegWidth> 93 bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) { 94 return SelectTSTBOperand(N, FixedPos, RegWidth); 95 } 96 97 bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth); 98 99 SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, 100 unsigned Op64); 101 102 /// Put the given constant into a pool and return a DAG which will give its 103 /// address. 104 SDValue getConstantPoolItemAddress(SDLoc DL, const Constant *CV); 105 106 SDNode *TrySelectToMoveImm(SDNode *N); 107 SDNode *LowerToFPLitPool(SDNode *Node); 108 SDNode *SelectToLitPool(SDNode *N); 109 110 SDNode* Select(SDNode*); 111private: 112 /// Get the opcode for table lookup instruction 113 unsigned getTBLOpc(bool IsExt, bool Is64Bit, unsigned NumOfVec); 114 115 /// Select NEON table lookup intrinsics. NumVecs should be 1, 2, 3 or 4. 116 /// IsExt is to indicate if the result will be extended with an argument. 117 SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, bool IsExt); 118 119 /// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4. 120 SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 121 const uint16_t *Opcode); 122 123 /// Select NEON store intrinsics. NumVecs should be 1, 2, 3 or 4. 124 SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 125 const uint16_t *Opcodes); 126 127 /// Form sequences of consecutive 64/128-bit registers for use in NEON 128 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have 129 /// between 1 and 4 elements. If it contains a single element that is returned 130 /// unchanged; otherwise a REG_SEQUENCE value is returned. 131 SDValue createDTuple(ArrayRef<SDValue> Vecs); 132 SDValue createQTuple(ArrayRef<SDValue> Vecs); 133 134 /// Generic helper for the createDTuple/createQTuple 135 /// functions. Those should almost always be called instead. 136 SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[], 137 unsigned SubRegs[]); 138 139 /// Select NEON load-duplicate intrinsics. NumVecs should be 2, 3 or 4. 140 /// The opcode array specifies the instructions used for load. 141 SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, 142 const uint16_t *Opcodes); 143 144 /// Select NEON load/store lane intrinsics. NumVecs should be 2, 3 or 4. 145 /// The opcode arrays specify the instructions used for load/store. 146 SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 147 unsigned NumVecs, const uint16_t *Opcodes); 148 149 SDValue getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD, 150 SDValue Operand); 151}; 152} 153 154bool 155AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 156 unsigned RegWidth) { 157 const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N); 158 if (!CN) return false; 159 160 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits 161 // is between 1 and 32 for a destination w-register, or 1 and 64 for an 162 // x-register. 163 // 164 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we 165 // want THIS_NODE to be 2^fbits. This is much easier to deal with using 166 // integers. 167 bool IsExact; 168 169 // fbits is between 1 and 64 in the worst-case, which means the fmul 170 // could have 2^64 as an actual operand. Need 65 bits of precision. 171 APSInt IntVal(65, true); 172 CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); 173 174 // N.b. isPowerOf2 also checks for > 0. 175 if (!IsExact || !IntVal.isPowerOf2()) return false; 176 unsigned FBits = IntVal.logBase2(); 177 178 // Checks above should have guaranteed that we haven't lost information in 179 // finding FBits, but it must still be in range. 180 if (FBits == 0 || FBits > RegWidth) return false; 181 182 FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32); 183 return true; 184} 185 186bool 187AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, 188 char ConstraintCode, 189 std::vector<SDValue> &OutOps) { 190 switch (ConstraintCode) { 191 default: llvm_unreachable("Unrecognised AArch64 memory constraint"); 192 case 'm': 193 // FIXME: more freedom is actually permitted for 'm'. We can go 194 // hunting for a base and an offset if we want. Of course, since 195 // we don't really know how the operand is going to be used we're 196 // probably restricted to the load/store pair's simm7 as an offset 197 // range anyway. 198 case 'Q': 199 OutOps.push_back(Op); 200 } 201 202 return false; 203} 204 205bool 206AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) { 207 ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N); 208 if (!Imm || !Imm->getValueAPF().isPosZero()) 209 return false; 210 211 // Doesn't actually carry any information, but keeps TableGen quiet. 212 Dummy = CurDAG->getTargetConstant(0, MVT::i32); 213 return true; 214} 215 216bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) { 217 uint32_t Bits; 218 uint32_t RegWidth = N.getValueType().getSizeInBits(); 219 220 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); 221 if (!CN) return false; 222 223 if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits)) 224 return false; 225 226 Imm = CurDAG->getTargetConstant(Bits, MVT::i32); 227 return true; 228} 229 230SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) { 231 SDNode *ResNode; 232 SDLoc dl(Node); 233 EVT DestType = Node->getValueType(0); 234 unsigned DestWidth = DestType.getSizeInBits(); 235 236 unsigned MOVOpcode; 237 EVT MOVType; 238 int UImm16, Shift; 239 uint32_t LogicalBits; 240 241 uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue(); 242 if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) { 243 MOVType = DestType; 244 MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii; 245 } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) { 246 MOVType = DestType; 247 MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii; 248 } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) { 249 // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can 250 // use a 32-bit instruction: "movn w0, 0xedbc". 251 MOVType = MVT::i32; 252 MOVOpcode = AArch64::MOVNwii; 253 } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits)) { 254 MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi; 255 uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR; 256 257 return CurDAG->getMachineNode(MOVOpcode, dl, DestType, 258 CurDAG->getRegister(ZR, DestType), 259 CurDAG->getTargetConstant(LogicalBits, MVT::i32)); 260 } else { 261 // Can't handle it in one instruction. There's scope for permitting two (or 262 // more) instructions, but that'll need more thought. 263 return NULL; 264 } 265 266 ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType, 267 CurDAG->getTargetConstant(UImm16, MVT::i32), 268 CurDAG->getTargetConstant(Shift, MVT::i32)); 269 270 if (MOVType != DestType) { 271 ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, 272 MVT::i64, MVT::i32, MVT::Other, 273 CurDAG->getTargetConstant(0, MVT::i64), 274 SDValue(ResNode, 0), 275 CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32)); 276 } 277 278 return ResNode; 279} 280 281SDValue 282AArch64DAGToDAGISel::getConstantPoolItemAddress(SDLoc DL, 283 const Constant *CV) { 284 EVT PtrVT = getTargetLowering()->getPointerTy(); 285 286 switch (getTargetLowering()->getTargetMachine().getCodeModel()) { 287 case CodeModel::Small: { 288 unsigned Alignment = 289 getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType()); 290 return CurDAG->getNode( 291 AArch64ISD::WrapperSmall, DL, PtrVT, 292 CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG), 293 CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_LO12), 294 CurDAG->getConstant(Alignment, MVT::i32)); 295 } 296 case CodeModel::Large: { 297 SDNode *LitAddr; 298 LitAddr = CurDAG->getMachineNode( 299 AArch64::MOVZxii, DL, PtrVT, 300 CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3), 301 CurDAG->getTargetConstant(3, MVT::i32)); 302 LitAddr = CurDAG->getMachineNode( 303 AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), 304 CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC), 305 CurDAG->getTargetConstant(2, MVT::i32)); 306 LitAddr = CurDAG->getMachineNode( 307 AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), 308 CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC), 309 CurDAG->getTargetConstant(1, MVT::i32)); 310 LitAddr = CurDAG->getMachineNode( 311 AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), 312 CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC), 313 CurDAG->getTargetConstant(0, MVT::i32)); 314 return SDValue(LitAddr, 0); 315 } 316 default: 317 llvm_unreachable("Only small and large code models supported now"); 318 } 319} 320 321SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) { 322 SDLoc DL(Node); 323 uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue(); 324 int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue(); 325 EVT DestType = Node->getValueType(0); 326 327 // Since we may end up loading a 64-bit constant from a 32-bit entry the 328 // constant in the pool may have a different type to the eventual node. 329 ISD::LoadExtType Extension; 330 EVT MemType; 331 332 assert((DestType == MVT::i64 || DestType == MVT::i32) 333 && "Only expect integer constants at the moment"); 334 335 if (DestType == MVT::i32) { 336 Extension = ISD::NON_EXTLOAD; 337 MemType = MVT::i32; 338 } else if (UnsignedVal <= UINT32_MAX) { 339 Extension = ISD::ZEXTLOAD; 340 MemType = MVT::i32; 341 } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) { 342 Extension = ISD::SEXTLOAD; 343 MemType = MVT::i32; 344 } else { 345 Extension = ISD::NON_EXTLOAD; 346 MemType = MVT::i64; 347 } 348 349 Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(), 350 MemType.getSizeInBits()), 351 UnsignedVal); 352 SDValue PoolAddr = getConstantPoolItemAddress(DL, CV); 353 unsigned Alignment = 354 getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType()); 355 356 return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(), 357 PoolAddr, 358 MachinePointerInfo::getConstantPool(), MemType, 359 /* isVolatile = */ false, 360 /* isNonTemporal = */ false, 361 Alignment).getNode(); 362} 363 364SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) { 365 SDLoc DL(Node); 366 const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue(); 367 EVT DestType = Node->getValueType(0); 368 369 unsigned Alignment = 370 getTargetLowering()->getDataLayout()->getABITypeAlignment(FV->getType()); 371 SDValue PoolAddr = getConstantPoolItemAddress(DL, FV); 372 373 return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr, 374 MachinePointerInfo::getConstantPool(), 375 /* isVolatile = */ false, 376 /* isNonTemporal = */ false, 377 /* isInvariant = */ true, 378 Alignment).getNode(); 379} 380 381bool 382AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos, 383 unsigned RegWidth) { 384 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); 385 if (!CN) return false; 386 387 uint64_t Val = CN->getZExtValue(); 388 389 if (!isPowerOf2_64(Val)) return false; 390 391 unsigned TestedBit = Log2_64(Val); 392 // Checks above should have guaranteed that we haven't lost information in 393 // finding TestedBit, but it must still be in range. 394 if (TestedBit >= RegWidth) return false; 395 396 FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64); 397 return true; 398} 399 400SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, 401 unsigned Op16,unsigned Op32, 402 unsigned Op64) { 403 // Mostly direct translation to the given operations, except that we preserve 404 // the AtomicOrdering for use later on. 405 AtomicSDNode *AN = cast<AtomicSDNode>(Node); 406 EVT VT = AN->getMemoryVT(); 407 408 unsigned Op; 409 if (VT == MVT::i8) 410 Op = Op8; 411 else if (VT == MVT::i16) 412 Op = Op16; 413 else if (VT == MVT::i32) 414 Op = Op32; 415 else if (VT == MVT::i64) 416 Op = Op64; 417 else 418 llvm_unreachable("Unexpected atomic operation"); 419 420 SmallVector<SDValue, 4> Ops; 421 for (unsigned i = 1; i < AN->getNumOperands(); ++i) 422 Ops.push_back(AN->getOperand(i)); 423 424 Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32)); 425 Ops.push_back(AN->getOperand(0)); // Chain moves to the end 426 427 return CurDAG->SelectNodeTo(Node, Op, 428 AN->getValueType(0), MVT::Other, 429 &Ops[0], Ops.size()); 430} 431 432SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { 433 static unsigned RegClassIDs[] = { AArch64::DPairRegClassID, 434 AArch64::DTripleRegClassID, 435 AArch64::DQuadRegClassID }; 436 static unsigned SubRegs[] = { AArch64::dsub_0, AArch64::dsub_1, 437 AArch64::dsub_2, AArch64::dsub_3 }; 438 439 return createTuple(Regs, RegClassIDs, SubRegs); 440} 441 442SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { 443 static unsigned RegClassIDs[] = { AArch64::QPairRegClassID, 444 AArch64::QTripleRegClassID, 445 AArch64::QQuadRegClassID }; 446 static unsigned SubRegs[] = { AArch64::qsub_0, AArch64::qsub_1, 447 AArch64::qsub_2, AArch64::qsub_3 }; 448 449 return createTuple(Regs, RegClassIDs, SubRegs); 450} 451 452SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, 453 unsigned RegClassIDs[], 454 unsigned SubRegs[]) { 455 // There's no special register-class for a vector-list of 1 element: it's just 456 // a vector. 457 if (Regs.size() == 1) 458 return Regs[0]; 459 460 assert(Regs.size() >= 2 && Regs.size() <= 4); 461 462 SDLoc DL(Regs[0].getNode()); 463 464 SmallVector<SDValue, 4> Ops; 465 466 // First operand of REG_SEQUENCE is the desired RegClass. 467 Ops.push_back( 468 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32)); 469 470 // Then we get pairs of source & subregister-position for the components. 471 for (unsigned i = 0; i < Regs.size(); ++i) { 472 Ops.push_back(Regs[i]); 473 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32)); 474 } 475 476 SDNode *N = 477 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 478 return SDValue(N, 0); 479} 480 481 482// Get the register stride update opcode of a VLD/VST instruction that 483// is otherwise equivalent to the given fixed stride updating instruction. 484static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 485 switch (Opc) { 486 default: break; 487 case AArch64::LD1WB_8B_fixed: return AArch64::LD1WB_8B_register; 488 case AArch64::LD1WB_4H_fixed: return AArch64::LD1WB_4H_register; 489 case AArch64::LD1WB_2S_fixed: return AArch64::LD1WB_2S_register; 490 case AArch64::LD1WB_1D_fixed: return AArch64::LD1WB_1D_register; 491 case AArch64::LD1WB_16B_fixed: return AArch64::LD1WB_16B_register; 492 case AArch64::LD1WB_8H_fixed: return AArch64::LD1WB_8H_register; 493 case AArch64::LD1WB_4S_fixed: return AArch64::LD1WB_4S_register; 494 case AArch64::LD1WB_2D_fixed: return AArch64::LD1WB_2D_register; 495 496 case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register; 497 case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register; 498 case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register; 499 case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register; 500 case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register; 501 case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register; 502 case AArch64::LD2WB_2D_fixed: return AArch64::LD2WB_2D_register; 503 504 case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register; 505 case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register; 506 case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register; 507 case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register; 508 case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register; 509 case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register; 510 case AArch64::LD3WB_2D_fixed: return AArch64::LD3WB_2D_register; 511 512 case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register; 513 case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register; 514 case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register; 515 case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register; 516 case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register; 517 case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register; 518 case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register; 519 520 case AArch64::LD1x2WB_8B_fixed: return AArch64::LD1x2WB_8B_register; 521 case AArch64::LD1x2WB_4H_fixed: return AArch64::LD1x2WB_4H_register; 522 case AArch64::LD1x2WB_2S_fixed: return AArch64::LD1x2WB_2S_register; 523 case AArch64::LD1x2WB_1D_fixed: return AArch64::LD1x2WB_1D_register; 524 case AArch64::LD1x2WB_16B_fixed: return AArch64::LD1x2WB_16B_register; 525 case AArch64::LD1x2WB_8H_fixed: return AArch64::LD1x2WB_8H_register; 526 case AArch64::LD1x2WB_4S_fixed: return AArch64::LD1x2WB_4S_register; 527 case AArch64::LD1x2WB_2D_fixed: return AArch64::LD1x2WB_2D_register; 528 529 case AArch64::LD1x3WB_8B_fixed: return AArch64::LD1x3WB_8B_register; 530 case AArch64::LD1x3WB_4H_fixed: return AArch64::LD1x3WB_4H_register; 531 case AArch64::LD1x3WB_2S_fixed: return AArch64::LD1x3WB_2S_register; 532 case AArch64::LD1x3WB_1D_fixed: return AArch64::LD1x3WB_1D_register; 533 case AArch64::LD1x3WB_16B_fixed: return AArch64::LD1x3WB_16B_register; 534 case AArch64::LD1x3WB_8H_fixed: return AArch64::LD1x3WB_8H_register; 535 case AArch64::LD1x3WB_4S_fixed: return AArch64::LD1x3WB_4S_register; 536 case AArch64::LD1x3WB_2D_fixed: return AArch64::LD1x3WB_2D_register; 537 538 case AArch64::LD1x4WB_8B_fixed: return AArch64::LD1x4WB_8B_register; 539 case AArch64::LD1x4WB_4H_fixed: return AArch64::LD1x4WB_4H_register; 540 case AArch64::LD1x4WB_2S_fixed: return AArch64::LD1x4WB_2S_register; 541 case AArch64::LD1x4WB_1D_fixed: return AArch64::LD1x4WB_1D_register; 542 case AArch64::LD1x4WB_16B_fixed: return AArch64::LD1x4WB_16B_register; 543 case AArch64::LD1x4WB_8H_fixed: return AArch64::LD1x4WB_8H_register; 544 case AArch64::LD1x4WB_4S_fixed: return AArch64::LD1x4WB_4S_register; 545 case AArch64::LD1x4WB_2D_fixed: return AArch64::LD1x4WB_2D_register; 546 547 case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register; 548 case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register; 549 case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register; 550 case AArch64::ST1WB_1D_fixed: return AArch64::ST1WB_1D_register; 551 case AArch64::ST1WB_16B_fixed: return AArch64::ST1WB_16B_register; 552 case AArch64::ST1WB_8H_fixed: return AArch64::ST1WB_8H_register; 553 case AArch64::ST1WB_4S_fixed: return AArch64::ST1WB_4S_register; 554 case AArch64::ST1WB_2D_fixed: return AArch64::ST1WB_2D_register; 555 556 case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register; 557 case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register; 558 case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register; 559 case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register; 560 case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register; 561 case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register; 562 case AArch64::ST2WB_2D_fixed: return AArch64::ST2WB_2D_register; 563 564 case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register; 565 case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register; 566 case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register; 567 case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register; 568 case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register; 569 case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register; 570 case AArch64::ST3WB_2D_fixed: return AArch64::ST3WB_2D_register; 571 572 case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register; 573 case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register; 574 case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register; 575 case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register; 576 case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register; 577 case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register; 578 case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register; 579 580 case AArch64::ST1x2WB_8B_fixed: return AArch64::ST1x2WB_8B_register; 581 case AArch64::ST1x2WB_4H_fixed: return AArch64::ST1x2WB_4H_register; 582 case AArch64::ST1x2WB_2S_fixed: return AArch64::ST1x2WB_2S_register; 583 case AArch64::ST1x2WB_1D_fixed: return AArch64::ST1x2WB_1D_register; 584 case AArch64::ST1x2WB_16B_fixed: return AArch64::ST1x2WB_16B_register; 585 case AArch64::ST1x2WB_8H_fixed: return AArch64::ST1x2WB_8H_register; 586 case AArch64::ST1x2WB_4S_fixed: return AArch64::ST1x2WB_4S_register; 587 case AArch64::ST1x2WB_2D_fixed: return AArch64::ST1x2WB_2D_register; 588 589 case AArch64::ST1x3WB_8B_fixed: return AArch64::ST1x3WB_8B_register; 590 case AArch64::ST1x3WB_4H_fixed: return AArch64::ST1x3WB_4H_register; 591 case AArch64::ST1x3WB_2S_fixed: return AArch64::ST1x3WB_2S_register; 592 case AArch64::ST1x3WB_1D_fixed: return AArch64::ST1x3WB_1D_register; 593 case AArch64::ST1x3WB_16B_fixed: return AArch64::ST1x3WB_16B_register; 594 case AArch64::ST1x3WB_8H_fixed: return AArch64::ST1x3WB_8H_register; 595 case AArch64::ST1x3WB_4S_fixed: return AArch64::ST1x3WB_4S_register; 596 case AArch64::ST1x3WB_2D_fixed: return AArch64::ST1x3WB_2D_register; 597 598 case AArch64::ST1x4WB_8B_fixed: return AArch64::ST1x4WB_8B_register; 599 case AArch64::ST1x4WB_4H_fixed: return AArch64::ST1x4WB_4H_register; 600 case AArch64::ST1x4WB_2S_fixed: return AArch64::ST1x4WB_2S_register; 601 case AArch64::ST1x4WB_1D_fixed: return AArch64::ST1x4WB_1D_register; 602 case AArch64::ST1x4WB_16B_fixed: return AArch64::ST1x4WB_16B_register; 603 case AArch64::ST1x4WB_8H_fixed: return AArch64::ST1x4WB_8H_register; 604 case AArch64::ST1x4WB_4S_fixed: return AArch64::ST1x4WB_4S_register; 605 case AArch64::ST1x4WB_2D_fixed: return AArch64::ST1x4WB_2D_register; 606 607 // Post-index of duplicate loads 608 case AArch64::LD2R_WB_8B_fixed: return AArch64::LD2R_WB_8B_register; 609 case AArch64::LD2R_WB_4H_fixed: return AArch64::LD2R_WB_4H_register; 610 case AArch64::LD2R_WB_2S_fixed: return AArch64::LD2R_WB_2S_register; 611 case AArch64::LD2R_WB_1D_fixed: return AArch64::LD2R_WB_1D_register; 612 case AArch64::LD2R_WB_16B_fixed: return AArch64::LD2R_WB_16B_register; 613 case AArch64::LD2R_WB_8H_fixed: return AArch64::LD2R_WB_8H_register; 614 case AArch64::LD2R_WB_4S_fixed: return AArch64::LD2R_WB_4S_register; 615 case AArch64::LD2R_WB_2D_fixed: return AArch64::LD2R_WB_2D_register; 616 617 case AArch64::LD3R_WB_8B_fixed: return AArch64::LD3R_WB_8B_register; 618 case AArch64::LD3R_WB_4H_fixed: return AArch64::LD3R_WB_4H_register; 619 case AArch64::LD3R_WB_2S_fixed: return AArch64::LD3R_WB_2S_register; 620 case AArch64::LD3R_WB_1D_fixed: return AArch64::LD3R_WB_1D_register; 621 case AArch64::LD3R_WB_16B_fixed: return AArch64::LD3R_WB_16B_register; 622 case AArch64::LD3R_WB_8H_fixed: return AArch64::LD3R_WB_8H_register; 623 case AArch64::LD3R_WB_4S_fixed: return AArch64::LD3R_WB_4S_register; 624 case AArch64::LD3R_WB_2D_fixed: return AArch64::LD3R_WB_2D_register; 625 626 case AArch64::LD4R_WB_8B_fixed: return AArch64::LD4R_WB_8B_register; 627 case AArch64::LD4R_WB_4H_fixed: return AArch64::LD4R_WB_4H_register; 628 case AArch64::LD4R_WB_2S_fixed: return AArch64::LD4R_WB_2S_register; 629 case AArch64::LD4R_WB_1D_fixed: return AArch64::LD4R_WB_1D_register; 630 case AArch64::LD4R_WB_16B_fixed: return AArch64::LD4R_WB_16B_register; 631 case AArch64::LD4R_WB_8H_fixed: return AArch64::LD4R_WB_8H_register; 632 case AArch64::LD4R_WB_4S_fixed: return AArch64::LD4R_WB_4S_register; 633 case AArch64::LD4R_WB_2D_fixed: return AArch64::LD4R_WB_2D_register; 634 635 // Post-index of lane loads 636 case AArch64::LD2LN_WB_B_fixed: return AArch64::LD2LN_WB_B_register; 637 case AArch64::LD2LN_WB_H_fixed: return AArch64::LD2LN_WB_H_register; 638 case AArch64::LD2LN_WB_S_fixed: return AArch64::LD2LN_WB_S_register; 639 case AArch64::LD2LN_WB_D_fixed: return AArch64::LD2LN_WB_D_register; 640 641 case AArch64::LD3LN_WB_B_fixed: return AArch64::LD3LN_WB_B_register; 642 case AArch64::LD3LN_WB_H_fixed: return AArch64::LD3LN_WB_H_register; 643 case AArch64::LD3LN_WB_S_fixed: return AArch64::LD3LN_WB_S_register; 644 case AArch64::LD3LN_WB_D_fixed: return AArch64::LD3LN_WB_D_register; 645 646 case AArch64::LD4LN_WB_B_fixed: return AArch64::LD4LN_WB_B_register; 647 case AArch64::LD4LN_WB_H_fixed: return AArch64::LD4LN_WB_H_register; 648 case AArch64::LD4LN_WB_S_fixed: return AArch64::LD4LN_WB_S_register; 649 case AArch64::LD4LN_WB_D_fixed: return AArch64::LD4LN_WB_D_register; 650 651 // Post-index of lane stores 652 case AArch64::ST2LN_WB_B_fixed: return AArch64::ST2LN_WB_B_register; 653 case AArch64::ST2LN_WB_H_fixed: return AArch64::ST2LN_WB_H_register; 654 case AArch64::ST2LN_WB_S_fixed: return AArch64::ST2LN_WB_S_register; 655 case AArch64::ST2LN_WB_D_fixed: return AArch64::ST2LN_WB_D_register; 656 657 case AArch64::ST3LN_WB_B_fixed: return AArch64::ST3LN_WB_B_register; 658 case AArch64::ST3LN_WB_H_fixed: return AArch64::ST3LN_WB_H_register; 659 case AArch64::ST3LN_WB_S_fixed: return AArch64::ST3LN_WB_S_register; 660 case AArch64::ST3LN_WB_D_fixed: return AArch64::ST3LN_WB_D_register; 661 662 case AArch64::ST4LN_WB_B_fixed: return AArch64::ST4LN_WB_B_register; 663 case AArch64::ST4LN_WB_H_fixed: return AArch64::ST4LN_WB_H_register; 664 case AArch64::ST4LN_WB_S_fixed: return AArch64::ST4LN_WB_S_register; 665 case AArch64::ST4LN_WB_D_fixed: return AArch64::ST4LN_WB_D_register; 666 } 667 return Opc; // If not one we handle, return it unchanged. 668} 669 670SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, 671 unsigned NumVecs, 672 const uint16_t *Opcodes) { 673 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 674 675 EVT VT = N->getValueType(0); 676 unsigned OpcodeIndex; 677 bool is64BitVector = VT.is64BitVector(); 678 switch (VT.getScalarType().getSizeInBits()) { 679 case 8: OpcodeIndex = is64BitVector ? 0 : 4; break; 680 case 16: OpcodeIndex = is64BitVector ? 1 : 5; break; 681 case 32: OpcodeIndex = is64BitVector ? 2 : 6; break; 682 case 64: OpcodeIndex = is64BitVector ? 3 : 7; break; 683 default: llvm_unreachable("unhandled vector load type"); 684 } 685 unsigned Opc = Opcodes[OpcodeIndex]; 686 687 SmallVector<SDValue, 2> Ops; 688 unsigned AddrOpIdx = isUpdating ? 1 : 2; 689 Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address 690 691 if (isUpdating) { 692 SDValue Inc = N->getOperand(AddrOpIdx + 1); 693 if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register 694 Opc = getVLDSTRegisterUpdateOpcode(Opc); 695 Ops.push_back(Inc); 696 } 697 698 Ops.push_back(N->getOperand(0)); // Push back the Chain 699 700 SmallVector<EVT, 3> ResTys; 701 // Push back the type of return super register 702 if (NumVecs == 1) 703 ResTys.push_back(VT); 704 else if (NumVecs == 3) 705 ResTys.push_back(MVT::Untyped); 706 else { 707 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, 708 is64BitVector ? NumVecs : NumVecs * 2); 709 ResTys.push_back(ResTy); 710 } 711 712 if (isUpdating) 713 ResTys.push_back(MVT::i64); // Type of the updated register 714 ResTys.push_back(MVT::Other); // Type of the Chain 715 SDLoc dl(N); 716 SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 717 718 // Transfer memoperands. 719 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 720 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 721 cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1); 722 723 if (NumVecs == 1) 724 return VLd; 725 726 // If NumVecs > 1, the return result is a super register containing 2-4 727 // consecutive vector registers. 728 SDValue SuperReg = SDValue(VLd, 0); 729 730 unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0; 731 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 732 ReplaceUses(SDValue(N, Vec), 733 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 734 // Update users of the Chain 735 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 736 if (isUpdating) 737 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 738 739 return NULL; 740} 741 742SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, 743 unsigned NumVecs, 744 const uint16_t *Opcodes) { 745 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 746 SDLoc dl(N); 747 748 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 749 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 750 751 unsigned AddrOpIdx = isUpdating ? 1 : 2; 752 unsigned Vec0Idx = 3; 753 EVT VT = N->getOperand(Vec0Idx).getValueType(); 754 unsigned OpcodeIndex; 755 bool is64BitVector = VT.is64BitVector(); 756 switch (VT.getScalarType().getSizeInBits()) { 757 case 8: OpcodeIndex = is64BitVector ? 0 : 4; break; 758 case 16: OpcodeIndex = is64BitVector ? 1 : 5; break; 759 case 32: OpcodeIndex = is64BitVector ? 2 : 6; break; 760 case 64: OpcodeIndex = is64BitVector ? 3 : 7; break; 761 default: llvm_unreachable("unhandled vector store type"); 762 } 763 unsigned Opc = Opcodes[OpcodeIndex]; 764 765 SmallVector<EVT, 2> ResTys; 766 if (isUpdating) 767 ResTys.push_back(MVT::i64); 768 ResTys.push_back(MVT::Other); // Type for the Chain 769 770 SmallVector<SDValue, 6> Ops; 771 Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address 772 773 if (isUpdating) { 774 SDValue Inc = N->getOperand(AddrOpIdx + 1); 775 if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register 776 Opc = getVLDSTRegisterUpdateOpcode(Opc); 777 Ops.push_back(Inc); 778 } 779 780 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx, 781 N->op_begin() + Vec0Idx + NumVecs); 782 SDValue SrcReg = is64BitVector ? createDTuple(Regs) : createQTuple(Regs); 783 Ops.push_back(SrcReg); 784 785 // Push back the Chain 786 Ops.push_back(N->getOperand(0)); 787 788 // Transfer memoperands. 789 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 790 cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1); 791 792 return VSt; 793} 794 795SDValue 796AArch64DAGToDAGISel::getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD, 797 SDValue Operand) { 798 SDNode *Reg = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, DL, 799 VT, VTD, MVT::Other, 800 CurDAG->getTargetConstant(0, MVT::i64), 801 Operand, 802 CurDAG->getTargetConstant(AArch64::sub_64, MVT::i32)); 803 return SDValue(Reg, 0); 804} 805 806SDNode *AArch64DAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, 807 unsigned NumVecs, 808 const uint16_t *Opcodes) { 809 assert(NumVecs >=2 && NumVecs <= 4 && "Load Dup NumVecs out-of-range"); 810 SDLoc dl(N); 811 812 EVT VT = N->getValueType(0); 813 unsigned OpcodeIndex; 814 bool is64BitVector = VT.is64BitVector(); 815 switch (VT.getScalarType().getSizeInBits()) { 816 case 8: OpcodeIndex = is64BitVector ? 0 : 4; break; 817 case 16: OpcodeIndex = is64BitVector ? 1 : 5; break; 818 case 32: OpcodeIndex = is64BitVector ? 2 : 6; break; 819 case 64: OpcodeIndex = is64BitVector ? 3 : 7; break; 820 default: llvm_unreachable("unhandled vector duplicate lane load type"); 821 } 822 unsigned Opc = Opcodes[OpcodeIndex]; 823 824 SDValue SuperReg; 825 SmallVector<SDValue, 6> Ops; 826 Ops.push_back(N->getOperand(1)); // Push back the Memory Address 827 if (isUpdating) { 828 SDValue Inc = N->getOperand(2); 829 if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register 830 Opc = getVLDSTRegisterUpdateOpcode(Opc); 831 Ops.push_back(Inc); 832 } 833 Ops.push_back(N->getOperand(0)); // Push back the Chain 834 835 SmallVector<EVT, 3> ResTys; 836 // Push back the type of return super register 837 if (NumVecs == 3) 838 ResTys.push_back(MVT::Untyped); 839 else { 840 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, 841 is64BitVector ? NumVecs : NumVecs * 2); 842 ResTys.push_back(ResTy); 843 } 844 if (isUpdating) 845 ResTys.push_back(MVT::i64); // Type of the updated register 846 ResTys.push_back(MVT::Other); // Type of the Chain 847 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 848 849 // Transfer memoperands. 850 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 851 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 852 cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1); 853 854 SuperReg = SDValue(VLdDup, 0); 855 unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0; 856 // Update uses of each registers in super register 857 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 858 ReplaceUses(SDValue(N, Vec), 859 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 860 // Update uses of the Chain 861 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 862 if (isUpdating) 863 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 864 return NULL; 865} 866 867// We only have 128-bit vector type of load/store lane instructions. 868// If it is 64-bit vector, we also select it to the 128-bit instructions. 869// Just use SUBREG_TO_REG to adapt the input to 128-bit vector and 870// EXTRACT_SUBREG to get the 64-bit vector from the 128-bit vector output. 871SDNode *AArch64DAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, 872 bool isUpdating, unsigned NumVecs, 873 const uint16_t *Opcodes) { 874 assert(NumVecs >= 2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 875 SDLoc dl(N); 876 unsigned AddrOpIdx = isUpdating ? 1 : 2; 877 unsigned Vec0Idx = 3; 878 879 SDValue Chain = N->getOperand(0); 880 unsigned Lane = 881 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 882 EVT VT = N->getOperand(Vec0Idx).getValueType(); 883 bool is64BitVector = VT.is64BitVector(); 884 EVT VT64; // 64-bit Vector Type 885 886 if (is64BitVector) { 887 VT64 = VT; 888 VT = EVT::getVectorVT(*CurDAG->getContext(), VT.getVectorElementType(), 889 VT.getVectorNumElements() * 2); 890 } 891 892 unsigned OpcodeIndex; 893 switch (VT.getScalarType().getSizeInBits()) { 894 case 8: OpcodeIndex = 0; break; 895 case 16: OpcodeIndex = 1; break; 896 case 32: OpcodeIndex = 2; break; 897 case 64: OpcodeIndex = 3; break; 898 default: llvm_unreachable("unhandled vector lane load/store type"); 899 } 900 unsigned Opc = Opcodes[OpcodeIndex]; 901 902 SmallVector<EVT, 3> ResTys; 903 if (IsLoad) { 904 // Push back the type of return super register 905 if (NumVecs == 3) 906 ResTys.push_back(MVT::Untyped); 907 else { 908 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, 909 is64BitVector ? NumVecs : NumVecs * 2); 910 ResTys.push_back(ResTy); 911 } 912 } 913 if (isUpdating) 914 ResTys.push_back(MVT::i64); // Type of the updated register 915 ResTys.push_back(MVT::Other); // Type of Chain 916 SmallVector<SDValue, 5> Ops; 917 Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address 918 if (isUpdating) { 919 SDValue Inc = N->getOperand(AddrOpIdx + 1); 920 if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register 921 Opc = getVLDSTRegisterUpdateOpcode(Opc); 922 Ops.push_back(Inc); 923 } 924 925 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx, 926 N->op_begin() + Vec0Idx + NumVecs); 927 if (is64BitVector) 928 for (unsigned i = 0; i < Regs.size(); i++) 929 Regs[i] = getTargetSubregToReg(AArch64::sub_64, dl, VT, VT64, Regs[i]); 930 SDValue SuperReg = createQTuple(Regs); 931 932 Ops.push_back(SuperReg); // Source Reg 933 SDValue LaneValue = CurDAG->getTargetConstant(Lane, MVT::i32); 934 Ops.push_back(LaneValue); 935 Ops.push_back(Chain); // Push back the Chain 936 937 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 938 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 939 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 940 cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1); 941 if (!IsLoad) 942 return VLdLn; 943 944 // Extract the subregisters. 945 SuperReg = SDValue(VLdLn, 0); 946 unsigned Sub0 = AArch64::qsub_0; 947 // Update uses of each registers in super register 948 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { 949 SDValue SUB0 = CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg); 950 if (is64BitVector) { 951 SUB0 = CurDAG->getTargetExtractSubreg(AArch64::sub_64, dl, VT64, SUB0); 952 } 953 ReplaceUses(SDValue(N, Vec), SUB0); 954 } 955 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 956 if (isUpdating) 957 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 958 return NULL; 959} 960 961unsigned AArch64DAGToDAGISel::getTBLOpc(bool IsExt, bool Is64Bit, 962 unsigned NumOfVec) { 963 assert(NumOfVec >= 1 && NumOfVec <= 4 && "VST NumVecs out-of-range"); 964 965 unsigned Opc = 0; 966 switch (NumOfVec) { 967 default: 968 break; 969 case 1: 970 if (IsExt) 971 Opc = Is64Bit ? AArch64::TBX1_8b : AArch64::TBX1_16b; 972 else 973 Opc = Is64Bit ? AArch64::TBL1_8b : AArch64::TBL1_16b; 974 break; 975 case 2: 976 if (IsExt) 977 Opc = Is64Bit ? AArch64::TBX2_8b : AArch64::TBX2_16b; 978 else 979 Opc = Is64Bit ? AArch64::TBL2_8b : AArch64::TBL2_16b; 980 break; 981 case 3: 982 if (IsExt) 983 Opc = Is64Bit ? AArch64::TBX3_8b : AArch64::TBX3_16b; 984 else 985 Opc = Is64Bit ? AArch64::TBL3_8b : AArch64::TBL3_16b; 986 break; 987 case 4: 988 if (IsExt) 989 Opc = Is64Bit ? AArch64::TBX4_8b : AArch64::TBX4_16b; 990 else 991 Opc = Is64Bit ? AArch64::TBL4_8b : AArch64::TBL4_16b; 992 break; 993 } 994 995 return Opc; 996} 997 998SDNode *AArch64DAGToDAGISel::SelectVTBL(SDNode *N, unsigned NumVecs, 999 bool IsExt) { 1000 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 1001 SDLoc dl(N); 1002 1003 // Check the element of look up table is 64-bit or not 1004 unsigned Vec0Idx = IsExt ? 2 : 1; 1005 assert(!N->getOperand(Vec0Idx + 0).getValueType().is64BitVector() && 1006 "The element of lookup table for vtbl and vtbx must be 128-bit"); 1007 1008 // Check the return value type is 64-bit or not 1009 EVT ResVT = N->getValueType(0); 1010 bool is64BitRes = ResVT.is64BitVector(); 1011 1012 // Create new SDValue for vector list 1013 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx, 1014 N->op_begin() + Vec0Idx + NumVecs); 1015 SDValue TblReg = createQTuple(Regs); 1016 unsigned Opc = getTBLOpc(IsExt, is64BitRes, NumVecs); 1017 1018 SmallVector<SDValue, 3> Ops; 1019 if (IsExt) 1020 Ops.push_back(N->getOperand(1)); 1021 Ops.push_back(TblReg); 1022 Ops.push_back(N->getOperand(Vec0Idx + NumVecs)); 1023 return CurDAG->getMachineNode(Opc, dl, ResVT, Ops); 1024} 1025 1026SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { 1027 // Dump information about the Node being selected 1028 DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n"); 1029 1030 if (Node->isMachineOpcode()) { 1031 DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 1032 Node->setNodeId(-1); 1033 return NULL; 1034 } 1035 1036 switch (Node->getOpcode()) { 1037 case ISD::ATOMIC_LOAD_ADD: 1038 return SelectAtomic(Node, 1039 AArch64::ATOMIC_LOAD_ADD_I8, 1040 AArch64::ATOMIC_LOAD_ADD_I16, 1041 AArch64::ATOMIC_LOAD_ADD_I32, 1042 AArch64::ATOMIC_LOAD_ADD_I64); 1043 case ISD::ATOMIC_LOAD_SUB: 1044 return SelectAtomic(Node, 1045 AArch64::ATOMIC_LOAD_SUB_I8, 1046 AArch64::ATOMIC_LOAD_SUB_I16, 1047 AArch64::ATOMIC_LOAD_SUB_I32, 1048 AArch64::ATOMIC_LOAD_SUB_I64); 1049 case ISD::ATOMIC_LOAD_AND: 1050 return SelectAtomic(Node, 1051 AArch64::ATOMIC_LOAD_AND_I8, 1052 AArch64::ATOMIC_LOAD_AND_I16, 1053 AArch64::ATOMIC_LOAD_AND_I32, 1054 AArch64::ATOMIC_LOAD_AND_I64); 1055 case ISD::ATOMIC_LOAD_OR: 1056 return SelectAtomic(Node, 1057 AArch64::ATOMIC_LOAD_OR_I8, 1058 AArch64::ATOMIC_LOAD_OR_I16, 1059 AArch64::ATOMIC_LOAD_OR_I32, 1060 AArch64::ATOMIC_LOAD_OR_I64); 1061 case ISD::ATOMIC_LOAD_XOR: 1062 return SelectAtomic(Node, 1063 AArch64::ATOMIC_LOAD_XOR_I8, 1064 AArch64::ATOMIC_LOAD_XOR_I16, 1065 AArch64::ATOMIC_LOAD_XOR_I32, 1066 AArch64::ATOMIC_LOAD_XOR_I64); 1067 case ISD::ATOMIC_LOAD_NAND: 1068 return SelectAtomic(Node, 1069 AArch64::ATOMIC_LOAD_NAND_I8, 1070 AArch64::ATOMIC_LOAD_NAND_I16, 1071 AArch64::ATOMIC_LOAD_NAND_I32, 1072 AArch64::ATOMIC_LOAD_NAND_I64); 1073 case ISD::ATOMIC_LOAD_MIN: 1074 return SelectAtomic(Node, 1075 AArch64::ATOMIC_LOAD_MIN_I8, 1076 AArch64::ATOMIC_LOAD_MIN_I16, 1077 AArch64::ATOMIC_LOAD_MIN_I32, 1078 AArch64::ATOMIC_LOAD_MIN_I64); 1079 case ISD::ATOMIC_LOAD_MAX: 1080 return SelectAtomic(Node, 1081 AArch64::ATOMIC_LOAD_MAX_I8, 1082 AArch64::ATOMIC_LOAD_MAX_I16, 1083 AArch64::ATOMIC_LOAD_MAX_I32, 1084 AArch64::ATOMIC_LOAD_MAX_I64); 1085 case ISD::ATOMIC_LOAD_UMIN: 1086 return SelectAtomic(Node, 1087 AArch64::ATOMIC_LOAD_UMIN_I8, 1088 AArch64::ATOMIC_LOAD_UMIN_I16, 1089 AArch64::ATOMIC_LOAD_UMIN_I32, 1090 AArch64::ATOMIC_LOAD_UMIN_I64); 1091 case ISD::ATOMIC_LOAD_UMAX: 1092 return SelectAtomic(Node, 1093 AArch64::ATOMIC_LOAD_UMAX_I8, 1094 AArch64::ATOMIC_LOAD_UMAX_I16, 1095 AArch64::ATOMIC_LOAD_UMAX_I32, 1096 AArch64::ATOMIC_LOAD_UMAX_I64); 1097 case ISD::ATOMIC_SWAP: 1098 return SelectAtomic(Node, 1099 AArch64::ATOMIC_SWAP_I8, 1100 AArch64::ATOMIC_SWAP_I16, 1101 AArch64::ATOMIC_SWAP_I32, 1102 AArch64::ATOMIC_SWAP_I64); 1103 case ISD::ATOMIC_CMP_SWAP: 1104 return SelectAtomic(Node, 1105 AArch64::ATOMIC_CMP_SWAP_I8, 1106 AArch64::ATOMIC_CMP_SWAP_I16, 1107 AArch64::ATOMIC_CMP_SWAP_I32, 1108 AArch64::ATOMIC_CMP_SWAP_I64); 1109 case ISD::FrameIndex: { 1110 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 1111 EVT PtrTy = getTargetLowering()->getPointerTy(); 1112 SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy); 1113 return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy, 1114 TFI, CurDAG->getTargetConstant(0, PtrTy)); 1115 } 1116 case ISD::ConstantPool: { 1117 // Constant pools are fine, just create a Target entry. 1118 ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Node); 1119 const Constant *C = CN->getConstVal(); 1120 SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0)); 1121 1122 ReplaceUses(SDValue(Node, 0), CP); 1123 return NULL; 1124 } 1125 case ISD::Constant: { 1126 SDNode *ResNode = 0; 1127 if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) { 1128 // XZR and WZR are probably even better than an actual move: most of the 1129 // time they can be folded into another instruction with *no* cost. 1130 1131 EVT Ty = Node->getValueType(0); 1132 assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type"); 1133 uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR; 1134 ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1135 SDLoc(Node), 1136 Register, Ty).getNode(); 1137 } 1138 1139 // Next best option is a move-immediate, see if we can do that. 1140 if (!ResNode) { 1141 ResNode = TrySelectToMoveImm(Node); 1142 } 1143 1144 if (ResNode) 1145 return ResNode; 1146 1147 // If even that fails we fall back to a lit-pool entry at the moment. Future 1148 // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions. 1149 ResNode = SelectToLitPool(Node); 1150 assert(ResNode && "We need *some* way to materialise a constant"); 1151 1152 // We want to continue selection at this point since the litpool access 1153 // generated used generic nodes for simplicity. 1154 ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); 1155 Node = ResNode; 1156 break; 1157 } 1158 case ISD::ConstantFP: { 1159 if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) { 1160 // FMOV will take care of it from TableGen 1161 break; 1162 } 1163 1164 SDNode *ResNode = LowerToFPLitPool(Node); 1165 ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); 1166 1167 // We want to continue selection at this point since the litpool access 1168 // generated used generic nodes for simplicity. 1169 Node = ResNode; 1170 break; 1171 } 1172 case AArch64ISD::NEON_LD1_UPD: { 1173 static const uint16_t Opcodes[] = { 1174 AArch64::LD1WB_8B_fixed, AArch64::LD1WB_4H_fixed, 1175 AArch64::LD1WB_2S_fixed, AArch64::LD1WB_1D_fixed, 1176 AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed, 1177 AArch64::LD1WB_4S_fixed, AArch64::LD1WB_2D_fixed 1178 }; 1179 return SelectVLD(Node, true, 1, Opcodes); 1180 } 1181 case AArch64ISD::NEON_LD2_UPD: { 1182 static const uint16_t Opcodes[] = { 1183 AArch64::LD2WB_8B_fixed, AArch64::LD2WB_4H_fixed, 1184 AArch64::LD2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed, 1185 AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed, 1186 AArch64::LD2WB_4S_fixed, AArch64::LD2WB_2D_fixed 1187 }; 1188 return SelectVLD(Node, true, 2, Opcodes); 1189 } 1190 case AArch64ISD::NEON_LD3_UPD: { 1191 static const uint16_t Opcodes[] = { 1192 AArch64::LD3WB_8B_fixed, AArch64::LD3WB_4H_fixed, 1193 AArch64::LD3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed, 1194 AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed, 1195 AArch64::LD3WB_4S_fixed, AArch64::LD3WB_2D_fixed 1196 }; 1197 return SelectVLD(Node, true, 3, Opcodes); 1198 } 1199 case AArch64ISD::NEON_LD4_UPD: { 1200 static const uint16_t Opcodes[] = { 1201 AArch64::LD4WB_8B_fixed, AArch64::LD4WB_4H_fixed, 1202 AArch64::LD4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed, 1203 AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed, 1204 AArch64::LD4WB_4S_fixed, AArch64::LD4WB_2D_fixed 1205 }; 1206 return SelectVLD(Node, true, 4, Opcodes); 1207 } 1208 case AArch64ISD::NEON_LD1x2_UPD: { 1209 static const uint16_t Opcodes[] = { 1210 AArch64::LD1x2WB_8B_fixed, AArch64::LD1x2WB_4H_fixed, 1211 AArch64::LD1x2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed, 1212 AArch64::LD1x2WB_16B_fixed, AArch64::LD1x2WB_8H_fixed, 1213 AArch64::LD1x2WB_4S_fixed, AArch64::LD1x2WB_2D_fixed 1214 }; 1215 return SelectVLD(Node, true, 2, Opcodes); 1216 } 1217 case AArch64ISD::NEON_LD1x3_UPD: { 1218 static const uint16_t Opcodes[] = { 1219 AArch64::LD1x3WB_8B_fixed, AArch64::LD1x3WB_4H_fixed, 1220 AArch64::LD1x3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed, 1221 AArch64::LD1x3WB_16B_fixed, AArch64::LD1x3WB_8H_fixed, 1222 AArch64::LD1x3WB_4S_fixed, AArch64::LD1x3WB_2D_fixed 1223 }; 1224 return SelectVLD(Node, true, 3, Opcodes); 1225 } 1226 case AArch64ISD::NEON_LD1x4_UPD: { 1227 static const uint16_t Opcodes[] = { 1228 AArch64::LD1x4WB_8B_fixed, AArch64::LD1x4WB_4H_fixed, 1229 AArch64::LD1x4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed, 1230 AArch64::LD1x4WB_16B_fixed, AArch64::LD1x4WB_8H_fixed, 1231 AArch64::LD1x4WB_4S_fixed, AArch64::LD1x4WB_2D_fixed 1232 }; 1233 return SelectVLD(Node, true, 4, Opcodes); 1234 } 1235 case AArch64ISD::NEON_ST1_UPD: { 1236 static const uint16_t Opcodes[] = { 1237 AArch64::ST1WB_8B_fixed, AArch64::ST1WB_4H_fixed, 1238 AArch64::ST1WB_2S_fixed, AArch64::ST1WB_1D_fixed, 1239 AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed, 1240 AArch64::ST1WB_4S_fixed, AArch64::ST1WB_2D_fixed 1241 }; 1242 return SelectVST(Node, true, 1, Opcodes); 1243 } 1244 case AArch64ISD::NEON_ST2_UPD: { 1245 static const uint16_t Opcodes[] = { 1246 AArch64::ST2WB_8B_fixed, AArch64::ST2WB_4H_fixed, 1247 AArch64::ST2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed, 1248 AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed, 1249 AArch64::ST2WB_4S_fixed, AArch64::ST2WB_2D_fixed 1250 }; 1251 return SelectVST(Node, true, 2, Opcodes); 1252 } 1253 case AArch64ISD::NEON_ST3_UPD: { 1254 static const uint16_t Opcodes[] = { 1255 AArch64::ST3WB_8B_fixed, AArch64::ST3WB_4H_fixed, 1256 AArch64::ST3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed, 1257 AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed, 1258 AArch64::ST3WB_4S_fixed, AArch64::ST3WB_2D_fixed 1259 }; 1260 return SelectVST(Node, true, 3, Opcodes); 1261 } 1262 case AArch64ISD::NEON_ST4_UPD: { 1263 static const uint16_t Opcodes[] = { 1264 AArch64::ST4WB_8B_fixed, AArch64::ST4WB_4H_fixed, 1265 AArch64::ST4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed, 1266 AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed, 1267 AArch64::ST4WB_4S_fixed, AArch64::ST4WB_2D_fixed 1268 }; 1269 return SelectVST(Node, true, 4, Opcodes); 1270 } 1271 case AArch64ISD::NEON_LD2DUP: { 1272 static const uint16_t Opcodes[] = { 1273 AArch64::LD2R_8B, AArch64::LD2R_4H, AArch64::LD2R_2S, 1274 AArch64::LD2R_1D, AArch64::LD2R_16B, AArch64::LD2R_8H, 1275 AArch64::LD2R_4S, AArch64::LD2R_2D 1276 }; 1277 return SelectVLDDup(Node, false, 2, Opcodes); 1278 } 1279 case AArch64ISD::NEON_LD3DUP: { 1280 static const uint16_t Opcodes[] = { 1281 AArch64::LD3R_8B, AArch64::LD3R_4H, AArch64::LD3R_2S, 1282 AArch64::LD3R_1D, AArch64::LD3R_16B, AArch64::LD3R_8H, 1283 AArch64::LD3R_4S, AArch64::LD3R_2D 1284 }; 1285 return SelectVLDDup(Node, false, 3, Opcodes); 1286 } 1287 case AArch64ISD::NEON_LD4DUP: { 1288 static const uint16_t Opcodes[] = { 1289 AArch64::LD4R_8B, AArch64::LD4R_4H, AArch64::LD4R_2S, 1290 AArch64::LD4R_1D, AArch64::LD4R_16B, AArch64::LD4R_8H, 1291 AArch64::LD4R_4S, AArch64::LD4R_2D 1292 }; 1293 return SelectVLDDup(Node, false, 4, Opcodes); 1294 } 1295 case AArch64ISD::NEON_LD2DUP_UPD: { 1296 static const uint16_t Opcodes[] = { 1297 AArch64::LD2R_WB_8B_fixed, AArch64::LD2R_WB_4H_fixed, 1298 AArch64::LD2R_WB_2S_fixed, AArch64::LD2R_WB_1D_fixed, 1299 AArch64::LD2R_WB_16B_fixed, AArch64::LD2R_WB_8H_fixed, 1300 AArch64::LD2R_WB_4S_fixed, AArch64::LD2R_WB_2D_fixed 1301 }; 1302 return SelectVLDDup(Node, true, 2, Opcodes); 1303 } 1304 case AArch64ISD::NEON_LD3DUP_UPD: { 1305 static const uint16_t Opcodes[] = { 1306 AArch64::LD3R_WB_8B_fixed, AArch64::LD3R_WB_4H_fixed, 1307 AArch64::LD3R_WB_2S_fixed, AArch64::LD3R_WB_1D_fixed, 1308 AArch64::LD3R_WB_16B_fixed, AArch64::LD3R_WB_8H_fixed, 1309 AArch64::LD3R_WB_4S_fixed, AArch64::LD3R_WB_2D_fixed 1310 }; 1311 return SelectVLDDup(Node, true, 3, Opcodes); 1312 } 1313 case AArch64ISD::NEON_LD4DUP_UPD: { 1314 static const uint16_t Opcodes[] = { 1315 AArch64::LD4R_WB_8B_fixed, AArch64::LD4R_WB_4H_fixed, 1316 AArch64::LD4R_WB_2S_fixed, AArch64::LD4R_WB_1D_fixed, 1317 AArch64::LD4R_WB_16B_fixed, AArch64::LD4R_WB_8H_fixed, 1318 AArch64::LD4R_WB_4S_fixed, AArch64::LD4R_WB_2D_fixed 1319 }; 1320 return SelectVLDDup(Node, true, 4, Opcodes); 1321 } 1322 case AArch64ISD::NEON_LD2LN_UPD: { 1323 static const uint16_t Opcodes[] = { 1324 AArch64::LD2LN_WB_B_fixed, AArch64::LD2LN_WB_H_fixed, 1325 AArch64::LD2LN_WB_S_fixed, AArch64::LD2LN_WB_D_fixed 1326 }; 1327 return SelectVLDSTLane(Node, true, true, 2, Opcodes); 1328 } 1329 case AArch64ISD::NEON_LD3LN_UPD: { 1330 static const uint16_t Opcodes[] = { 1331 AArch64::LD3LN_WB_B_fixed, AArch64::LD3LN_WB_H_fixed, 1332 AArch64::LD3LN_WB_S_fixed, AArch64::LD3LN_WB_D_fixed 1333 }; 1334 return SelectVLDSTLane(Node, true, true, 3, Opcodes); 1335 } 1336 case AArch64ISD::NEON_LD4LN_UPD: { 1337 static const uint16_t Opcodes[] = { 1338 AArch64::LD4LN_WB_B_fixed, AArch64::LD4LN_WB_H_fixed, 1339 AArch64::LD4LN_WB_S_fixed, AArch64::LD4LN_WB_D_fixed 1340 }; 1341 return SelectVLDSTLane(Node, true, true, 4, Opcodes); 1342 } 1343 case AArch64ISD::NEON_ST2LN_UPD: { 1344 static const uint16_t Opcodes[] = { 1345 AArch64::ST2LN_WB_B_fixed, AArch64::ST2LN_WB_H_fixed, 1346 AArch64::ST2LN_WB_S_fixed, AArch64::ST2LN_WB_D_fixed 1347 }; 1348 return SelectVLDSTLane(Node, false, true, 2, Opcodes); 1349 } 1350 case AArch64ISD::NEON_ST3LN_UPD: { 1351 static const uint16_t Opcodes[] = { 1352 AArch64::ST3LN_WB_B_fixed, AArch64::ST3LN_WB_H_fixed, 1353 AArch64::ST3LN_WB_S_fixed, AArch64::ST3LN_WB_D_fixed 1354 }; 1355 return SelectVLDSTLane(Node, false, true, 3, Opcodes); 1356 } 1357 case AArch64ISD::NEON_ST4LN_UPD: { 1358 static const uint16_t Opcodes[] = { 1359 AArch64::ST4LN_WB_B_fixed, AArch64::ST4LN_WB_H_fixed, 1360 AArch64::ST4LN_WB_S_fixed, AArch64::ST4LN_WB_D_fixed 1361 }; 1362 return SelectVLDSTLane(Node, false, true, 4, Opcodes); 1363 } 1364 case AArch64ISD::NEON_ST1x2_UPD: { 1365 static const uint16_t Opcodes[] = { 1366 AArch64::ST1x2WB_8B_fixed, AArch64::ST1x2WB_4H_fixed, 1367 AArch64::ST1x2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed, 1368 AArch64::ST1x2WB_16B_fixed, AArch64::ST1x2WB_8H_fixed, 1369 AArch64::ST1x2WB_4S_fixed, AArch64::ST1x2WB_2D_fixed 1370 }; 1371 return SelectVST(Node, true, 2, Opcodes); 1372 } 1373 case AArch64ISD::NEON_ST1x3_UPD: { 1374 static const uint16_t Opcodes[] = { 1375 AArch64::ST1x3WB_8B_fixed, AArch64::ST1x3WB_4H_fixed, 1376 AArch64::ST1x3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed, 1377 AArch64::ST1x3WB_16B_fixed, AArch64::ST1x3WB_8H_fixed, 1378 AArch64::ST1x3WB_4S_fixed, AArch64::ST1x3WB_2D_fixed 1379 }; 1380 return SelectVST(Node, true, 3, Opcodes); 1381 } 1382 case AArch64ISD::NEON_ST1x4_UPD: { 1383 static const uint16_t Opcodes[] = { 1384 AArch64::ST1x4WB_8B_fixed, AArch64::ST1x4WB_4H_fixed, 1385 AArch64::ST1x4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed, 1386 AArch64::ST1x4WB_16B_fixed, AArch64::ST1x4WB_8H_fixed, 1387 AArch64::ST1x4WB_4S_fixed, AArch64::ST1x4WB_2D_fixed 1388 }; 1389 return SelectVST(Node, true, 4, Opcodes); 1390 } 1391 case ISD::INTRINSIC_WO_CHAIN: { 1392 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 1393 bool IsExt = false; 1394 switch (IntNo) { 1395 default: 1396 break; 1397 case Intrinsic::aarch64_neon_vtbx1: 1398 IsExt = true; 1399 case Intrinsic::aarch64_neon_vtbl1: 1400 return SelectVTBL(Node, 1, IsExt); 1401 case Intrinsic::aarch64_neon_vtbx2: 1402 IsExt = true; 1403 case Intrinsic::aarch64_neon_vtbl2: 1404 return SelectVTBL(Node, 2, IsExt); 1405 case Intrinsic::aarch64_neon_vtbx3: 1406 IsExt = true; 1407 case Intrinsic::aarch64_neon_vtbl3: 1408 return SelectVTBL(Node, 3, IsExt); 1409 case Intrinsic::aarch64_neon_vtbx4: 1410 IsExt = true; 1411 case Intrinsic::aarch64_neon_vtbl4: 1412 return SelectVTBL(Node, 4, IsExt); 1413 } 1414 break; 1415 } 1416 case ISD::INTRINSIC_VOID: 1417 case ISD::INTRINSIC_W_CHAIN: { 1418 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1419 switch (IntNo) { 1420 default: 1421 break; 1422 case Intrinsic::arm_neon_vld1: { 1423 static const uint16_t Opcodes[] = { 1424 AArch64::LD1_8B, AArch64::LD1_4H, AArch64::LD1_2S, AArch64::LD1_1D, 1425 AArch64::LD1_16B, AArch64::LD1_8H, AArch64::LD1_4S, AArch64::LD1_2D 1426 }; 1427 return SelectVLD(Node, false, 1, Opcodes); 1428 } 1429 case Intrinsic::arm_neon_vld2: { 1430 static const uint16_t Opcodes[] = { 1431 AArch64::LD2_8B, AArch64::LD2_4H, AArch64::LD2_2S, AArch64::LD1x2_1D, 1432 AArch64::LD2_16B, AArch64::LD2_8H, AArch64::LD2_4S, AArch64::LD2_2D 1433 }; 1434 return SelectVLD(Node, false, 2, Opcodes); 1435 } 1436 case Intrinsic::arm_neon_vld3: { 1437 static const uint16_t Opcodes[] = { 1438 AArch64::LD3_8B, AArch64::LD3_4H, AArch64::LD3_2S, AArch64::LD1x3_1D, 1439 AArch64::LD3_16B, AArch64::LD3_8H, AArch64::LD3_4S, AArch64::LD3_2D 1440 }; 1441 return SelectVLD(Node, false, 3, Opcodes); 1442 } 1443 case Intrinsic::arm_neon_vld4: { 1444 static const uint16_t Opcodes[] = { 1445 AArch64::LD4_8B, AArch64::LD4_4H, AArch64::LD4_2S, AArch64::LD1x4_1D, 1446 AArch64::LD4_16B, AArch64::LD4_8H, AArch64::LD4_4S, AArch64::LD4_2D 1447 }; 1448 return SelectVLD(Node, false, 4, Opcodes); 1449 } 1450 case Intrinsic::aarch64_neon_vld1x2: { 1451 static const uint16_t Opcodes[] = { 1452 AArch64::LD1x2_8B, AArch64::LD1x2_4H, AArch64::LD1x2_2S, 1453 AArch64::LD1x2_1D, AArch64::LD1x2_16B, AArch64::LD1x2_8H, 1454 AArch64::LD1x2_4S, AArch64::LD1x2_2D 1455 }; 1456 return SelectVLD(Node, false, 2, Opcodes); 1457 } 1458 case Intrinsic::aarch64_neon_vld1x3: { 1459 static const uint16_t Opcodes[] = { 1460 AArch64::LD1x3_8B, AArch64::LD1x3_4H, AArch64::LD1x3_2S, 1461 AArch64::LD1x3_1D, AArch64::LD1x3_16B, AArch64::LD1x3_8H, 1462 AArch64::LD1x3_4S, AArch64::LD1x3_2D 1463 }; 1464 return SelectVLD(Node, false, 3, Opcodes); 1465 } 1466 case Intrinsic::aarch64_neon_vld1x4: { 1467 static const uint16_t Opcodes[] = { 1468 AArch64::LD1x4_8B, AArch64::LD1x4_4H, AArch64::LD1x4_2S, 1469 AArch64::LD1x4_1D, AArch64::LD1x4_16B, AArch64::LD1x4_8H, 1470 AArch64::LD1x4_4S, AArch64::LD1x4_2D 1471 }; 1472 return SelectVLD(Node, false, 4, Opcodes); 1473 } 1474 case Intrinsic::arm_neon_vst1: { 1475 static const uint16_t Opcodes[] = { 1476 AArch64::ST1_8B, AArch64::ST1_4H, AArch64::ST1_2S, AArch64::ST1_1D, 1477 AArch64::ST1_16B, AArch64::ST1_8H, AArch64::ST1_4S, AArch64::ST1_2D 1478 }; 1479 return SelectVST(Node, false, 1, Opcodes); 1480 } 1481 case Intrinsic::arm_neon_vst2: { 1482 static const uint16_t Opcodes[] = { 1483 AArch64::ST2_8B, AArch64::ST2_4H, AArch64::ST2_2S, AArch64::ST1x2_1D, 1484 AArch64::ST2_16B, AArch64::ST2_8H, AArch64::ST2_4S, AArch64::ST2_2D 1485 }; 1486 return SelectVST(Node, false, 2, Opcodes); 1487 } 1488 case Intrinsic::arm_neon_vst3: { 1489 static const uint16_t Opcodes[] = { 1490 AArch64::ST3_8B, AArch64::ST3_4H, AArch64::ST3_2S, AArch64::ST1x3_1D, 1491 AArch64::ST3_16B, AArch64::ST3_8H, AArch64::ST3_4S, AArch64::ST3_2D 1492 }; 1493 return SelectVST(Node, false, 3, Opcodes); 1494 } 1495 case Intrinsic::arm_neon_vst4: { 1496 static const uint16_t Opcodes[] = { 1497 AArch64::ST4_8B, AArch64::ST4_4H, AArch64::ST4_2S, AArch64::ST1x4_1D, 1498 AArch64::ST4_16B, AArch64::ST4_8H, AArch64::ST4_4S, AArch64::ST4_2D 1499 }; 1500 return SelectVST(Node, false, 4, Opcodes); 1501 } 1502 case Intrinsic::aarch64_neon_vst1x2: { 1503 static const uint16_t Opcodes[] = { 1504 AArch64::ST1x2_8B, AArch64::ST1x2_4H, AArch64::ST1x2_2S, 1505 AArch64::ST1x2_1D, AArch64::ST1x2_16B, AArch64::ST1x2_8H, 1506 AArch64::ST1x2_4S, AArch64::ST1x2_2D 1507 }; 1508 return SelectVST(Node, false, 2, Opcodes); 1509 } 1510 case Intrinsic::aarch64_neon_vst1x3: { 1511 static const uint16_t Opcodes[] = { 1512 AArch64::ST1x3_8B, AArch64::ST1x3_4H, AArch64::ST1x3_2S, 1513 AArch64::ST1x3_1D, AArch64::ST1x3_16B, AArch64::ST1x3_8H, 1514 AArch64::ST1x3_4S, AArch64::ST1x3_2D 1515 }; 1516 return SelectVST(Node, false, 3, Opcodes); 1517 } 1518 case Intrinsic::aarch64_neon_vst1x4: { 1519 static const uint16_t Opcodes[] = { 1520 AArch64::ST1x4_8B, AArch64::ST1x4_4H, AArch64::ST1x4_2S, 1521 AArch64::ST1x4_1D, AArch64::ST1x4_16B, AArch64::ST1x4_8H, 1522 AArch64::ST1x4_4S, AArch64::ST1x4_2D 1523 }; 1524 return SelectVST(Node, false, 4, Opcodes); 1525 } 1526 case Intrinsic::arm_neon_vld2lane: { 1527 static const uint16_t Opcodes[] = { 1528 AArch64::LD2LN_B, AArch64::LD2LN_H, AArch64::LD2LN_S, AArch64::LD2LN_D 1529 }; 1530 return SelectVLDSTLane(Node, true, false, 2, Opcodes); 1531 } 1532 case Intrinsic::arm_neon_vld3lane: { 1533 static const uint16_t Opcodes[] = { 1534 AArch64::LD3LN_B, AArch64::LD3LN_H, AArch64::LD3LN_S, AArch64::LD3LN_D 1535 }; 1536 return SelectVLDSTLane(Node, true, false, 3, Opcodes); 1537 } 1538 case Intrinsic::arm_neon_vld4lane: { 1539 static const uint16_t Opcodes[] = { 1540 AArch64::LD4LN_B, AArch64::LD4LN_H, AArch64::LD4LN_S, AArch64::LD4LN_D 1541 }; 1542 return SelectVLDSTLane(Node, true, false, 4, Opcodes); 1543 } 1544 case Intrinsic::arm_neon_vst2lane: { 1545 static const uint16_t Opcodes[] = { 1546 AArch64::ST2LN_B, AArch64::ST2LN_H, AArch64::ST2LN_S, AArch64::ST2LN_D 1547 }; 1548 return SelectVLDSTLane(Node, false, false, 2, Opcodes); 1549 } 1550 case Intrinsic::arm_neon_vst3lane: { 1551 static const uint16_t Opcodes[] = { 1552 AArch64::ST3LN_B, AArch64::ST3LN_H, AArch64::ST3LN_S, AArch64::ST3LN_D 1553 }; 1554 return SelectVLDSTLane(Node, false, false, 3, Opcodes); 1555 } 1556 case Intrinsic::arm_neon_vst4lane: { 1557 static const uint16_t Opcodes[] = { 1558 AArch64::ST4LN_B, AArch64::ST4LN_H, AArch64::ST4LN_S, AArch64::ST4LN_D 1559 }; 1560 return SelectVLDSTLane(Node, false, false, 4, Opcodes); 1561 } 1562 } // End of switch IntNo 1563 break; 1564 } // End of case ISD::INTRINSIC_VOID and :ISD::INTRINSIC_W_CHAIN 1565 default: 1566 break; // Let generic code handle it 1567 } 1568 1569 SDNode *ResNode = SelectCode(Node); 1570 1571 DEBUG(dbgs() << "=> "; 1572 if (ResNode == NULL || ResNode == Node) 1573 Node->dump(CurDAG); 1574 else 1575 ResNode->dump(CurDAG); 1576 dbgs() << "\n"); 1577 1578 return ResNode; 1579} 1580 1581/// This pass converts a legalized DAG into a AArch64-specific DAG, ready for 1582/// instruction scheduling. 1583FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM, 1584 CodeGenOpt::Level OptLevel) { 1585 return new AArch64DAGToDAGISel(TM, OptLevel); 1586} 1587