1//===-- SystemZISelLowering.h - SystemZ DAG lowering interface --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that SystemZ uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H 16#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H 17 18#include "SystemZ.h" 19#include "llvm/CodeGen/MachineBasicBlock.h" 20#include "llvm/CodeGen/SelectionDAG.h" 21#include "llvm/Target/TargetLowering.h" 22 23namespace llvm { 24namespace SystemZISD { 25enum NodeType : unsigned { 26 FIRST_NUMBER = ISD::BUILTIN_OP_END, 27 28 // Return with a flag operand. Operand 0 is the chain operand. 29 RET_FLAG, 30 31 // Calls a function. Operand 0 is the chain operand and operand 1 32 // is the target address. The arguments start at operand 2. 33 // There is an optional glue operand at the end. 34 CALL, 35 SIBCALL, 36 37 // TLS calls. Like regular calls, except operand 1 is the TLS symbol. 38 // (The call target is implicitly __tls_get_offset.) 39 TLS_GDCALL, 40 TLS_LDCALL, 41 42 // Wraps a TargetGlobalAddress that should be loaded using PC-relative 43 // accesses (LARL). Operand 0 is the address. 44 PCREL_WRAPPER, 45 46 // Used in cases where an offset is applied to a TargetGlobalAddress. 47 // Operand 0 is the full TargetGlobalAddress and operand 1 is a 48 // PCREL_WRAPPER for an anchor point. This is used so that we can 49 // cheaply refer to either the full address or the anchor point 50 // as a register base. 51 PCREL_OFFSET, 52 53 // Integer absolute. 54 IABS, 55 56 // Integer comparisons. There are three operands: the two values 57 // to compare, and an integer of type SystemZICMP. 58 ICMP, 59 60 // Floating-point comparisons. The two operands are the values to compare. 61 FCMP, 62 63 // Test under mask. The first operand is ANDed with the second operand 64 // and the condition codes are set on the result. The third operand is 65 // a boolean that is true if the condition codes need to distinguish 66 // between CCMASK_TM_MIXED_MSB_0 and CCMASK_TM_MIXED_MSB_1 (which the 67 // register forms do but the memory forms don't). 68 TM, 69 70 // Branches if a condition is true. Operand 0 is the chain operand; 71 // operand 1 is the 4-bit condition-code mask, with bit N in 72 // big-endian order meaning "branch if CC=N"; operand 2 is the 73 // target block and operand 3 is the flag operand. 74 BR_CCMASK, 75 76 // Selects between operand 0 and operand 1. Operand 2 is the 77 // mask of condition-code values for which operand 0 should be 78 // chosen over operand 1; it has the same form as BR_CCMASK. 79 // Operand 3 is the flag operand. 80 SELECT_CCMASK, 81 82 // Evaluates to the gap between the stack pointer and the 83 // base of the dynamically-allocatable area. 84 ADJDYNALLOC, 85 86 // Extracts the value of a 32-bit access register. Operand 0 is 87 // the number of the register. 88 EXTRACT_ACCESS, 89 90 // Count number of bits set in operand 0 per byte. 91 POPCNT, 92 93 // Wrappers around the ISD opcodes of the same name. The output and 94 // first input operands are GR128s. The trailing numbers are the 95 // widths of the second operand in bits. 96 UMUL_LOHI64, 97 SDIVREM32, 98 SDIVREM64, 99 UDIVREM32, 100 UDIVREM64, 101 102 // Use a series of MVCs to copy bytes from one memory location to another. 103 // The operands are: 104 // - the target address 105 // - the source address 106 // - the constant length 107 // 108 // This isn't a memory opcode because we'd need to attach two 109 // MachineMemOperands rather than one. 110 MVC, 111 112 // Like MVC, but implemented as a loop that handles X*256 bytes 113 // followed by straight-line code to handle the rest (if any). 114 // The value of X is passed as an additional operand. 115 MVC_LOOP, 116 117 // Similar to MVC and MVC_LOOP, but for logic operations (AND, OR, XOR). 118 NC, 119 NC_LOOP, 120 OC, 121 OC_LOOP, 122 XC, 123 XC_LOOP, 124 125 // Use CLC to compare two blocks of memory, with the same comments 126 // as for MVC and MVC_LOOP. 127 CLC, 128 CLC_LOOP, 129 130 // Use an MVST-based sequence to implement stpcpy(). 131 STPCPY, 132 133 // Use a CLST-based sequence to implement strcmp(). The two input operands 134 // are the addresses of the strings to compare. 135 STRCMP, 136 137 // Use an SRST-based sequence to search a block of memory. The first 138 // operand is the end address, the second is the start, and the third 139 // is the character to search for. CC is set to 1 on success and 2 140 // on failure. 141 SEARCH_STRING, 142 143 // Store the CC value in bits 29 and 28 of an integer. 144 IPM, 145 146 // Perform a serialization operation. (BCR 15,0 or BCR 14,0.) 147 SERIALIZE, 148 149 // Transaction begin. The first operand is the chain, the second 150 // the TDB pointer, and the third the immediate control field. 151 // Returns chain and glue. 152 TBEGIN, 153 TBEGIN_NOFLOAT, 154 155 // Transaction end. Just the chain operand. Returns chain and glue. 156 TEND, 157 158 // Create a vector constant by filling byte N of the result with bit 159 // 15-N of the single operand. 160 BYTE_MASK, 161 162 // Create a vector constant by replicating an element-sized RISBG-style mask. 163 // The first operand specifies the starting set bit and the second operand 164 // specifies the ending set bit. Both operands count from the MSB of the 165 // element. 166 ROTATE_MASK, 167 168 // Replicate a GPR scalar value into all elements of a vector. 169 REPLICATE, 170 171 // Create a vector from two i64 GPRs. 172 JOIN_DWORDS, 173 174 // Replicate one element of a vector into all elements. The first operand 175 // is the vector and the second is the index of the element to replicate. 176 SPLAT, 177 178 // Interleave elements from the high half of operand 0 and the high half 179 // of operand 1. 180 MERGE_HIGH, 181 182 // Likewise for the low halves. 183 MERGE_LOW, 184 185 // Concatenate the vectors in the first two operands, shift them left 186 // by the third operand, and take the first half of the result. 187 SHL_DOUBLE, 188 189 // Take one element of the first v2i64 operand and the one element of 190 // the second v2i64 operand and concatenate them to form a v2i64 result. 191 // The third operand is a 4-bit value of the form 0A0B, where A and B 192 // are the element selectors for the first operand and second operands 193 // respectively. 194 PERMUTE_DWORDS, 195 196 // Perform a general vector permute on vector operands 0 and 1. 197 // Each byte of operand 2 controls the corresponding byte of the result, 198 // in the same way as a byte-level VECTOR_SHUFFLE mask. 199 PERMUTE, 200 201 // Pack vector operands 0 and 1 into a single vector with half-sized elements. 202 PACK, 203 204 // Likewise, but saturate the result and set CC. PACKS_CC does signed 205 // saturation and PACKLS_CC does unsigned saturation. 206 PACKS_CC, 207 PACKLS_CC, 208 209 // Unpack the first half of vector operand 0 into double-sized elements. 210 // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends. 211 UNPACK_HIGH, 212 UNPACKL_HIGH, 213 214 // Likewise for the second half. 215 UNPACK_LOW, 216 UNPACKL_LOW, 217 218 // Shift each element of vector operand 0 by the number of bits specified 219 // by scalar operand 1. 220 VSHL_BY_SCALAR, 221 VSRL_BY_SCALAR, 222 VSRA_BY_SCALAR, 223 224 // For each element of the output type, sum across all sub-elements of 225 // operand 0 belonging to the corresponding element, and add in the 226 // rightmost sub-element of the corresponding element of operand 1. 227 VSUM, 228 229 // Compare integer vector operands 0 and 1 to produce the usual 0/-1 230 // vector result. VICMPE is for equality, VICMPH for "signed greater than" 231 // and VICMPHL for "unsigned greater than". 232 VICMPE, 233 VICMPH, 234 VICMPHL, 235 236 // Likewise, but also set the condition codes on the result. 237 VICMPES, 238 VICMPHS, 239 VICMPHLS, 240 241 // Compare floating-point vector operands 0 and 1 to preoduce the usual 0/-1 242 // vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and 243 // greater than" and VFCMPHE for "ordered and greater than or equal to". 244 VFCMPE, 245 VFCMPH, 246 VFCMPHE, 247 248 // Likewise, but also set the condition codes on the result. 249 VFCMPES, 250 VFCMPHS, 251 VFCMPHES, 252 253 // Test floating-point data class for vectors. 254 VFTCI, 255 256 // Extend the even f32 elements of vector operand 0 to produce a vector 257 // of f64 elements. 258 VEXTEND, 259 260 // Round the f64 elements of vector operand 0 to f32s and store them in the 261 // even elements of the result. 262 VROUND, 263 264 // AND the two vector operands together and set CC based on the result. 265 VTM, 266 267 // String operations that set CC as a side-effect. 268 VFAE_CC, 269 VFAEZ_CC, 270 VFEE_CC, 271 VFEEZ_CC, 272 VFENE_CC, 273 VFENEZ_CC, 274 VISTR_CC, 275 VSTRC_CC, 276 VSTRCZ_CC, 277 278 // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or 279 // ATOMIC_LOAD_<op>. 280 // 281 // Operand 0: the address of the containing 32-bit-aligned field 282 // Operand 1: the second operand of <op>, in the high bits of an i32 283 // for everything except ATOMIC_SWAPW 284 // Operand 2: how many bits to rotate the i32 left to bring the first 285 // operand into the high bits 286 // Operand 3: the negative of operand 2, for rotating the other way 287 // Operand 4: the width of the field in bits (8 or 16) 288 ATOMIC_SWAPW = ISD::FIRST_TARGET_MEMORY_OPCODE, 289 ATOMIC_LOADW_ADD, 290 ATOMIC_LOADW_SUB, 291 ATOMIC_LOADW_AND, 292 ATOMIC_LOADW_OR, 293 ATOMIC_LOADW_XOR, 294 ATOMIC_LOADW_NAND, 295 ATOMIC_LOADW_MIN, 296 ATOMIC_LOADW_MAX, 297 ATOMIC_LOADW_UMIN, 298 ATOMIC_LOADW_UMAX, 299 300 // A wrapper around the inner loop of an ATOMIC_CMP_SWAP. 301 // 302 // Operand 0: the address of the containing 32-bit-aligned field 303 // Operand 1: the compare value, in the low bits of an i32 304 // Operand 2: the swap value, in the low bits of an i32 305 // Operand 3: how many bits to rotate the i32 left to bring the first 306 // operand into the high bits 307 // Operand 4: the negative of operand 2, for rotating the other way 308 // Operand 5: the width of the field in bits (8 or 16) 309 ATOMIC_CMP_SWAPW, 310 311 // Prefetch from the second operand using the 4-bit control code in 312 // the first operand. The code is 1 for a load prefetch and 2 for 313 // a store prefetch. 314 PREFETCH 315}; 316 317// Return true if OPCODE is some kind of PC-relative address. 318inline bool isPCREL(unsigned Opcode) { 319 return Opcode == PCREL_WRAPPER || Opcode == PCREL_OFFSET; 320} 321} // end namespace SystemZISD 322 323namespace SystemZICMP { 324// Describes whether an integer comparison needs to be signed or unsigned, 325// or whether either type is OK. 326enum { 327 Any, 328 UnsignedOnly, 329 SignedOnly 330}; 331} // end namespace SystemZICMP 332 333class SystemZSubtarget; 334class SystemZTargetMachine; 335 336class SystemZTargetLowering : public TargetLowering { 337public: 338 explicit SystemZTargetLowering(const TargetMachine &TM, 339 const SystemZSubtarget &STI); 340 341 // Override TargetLowering. 342 MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { 343 return MVT::i32; 344 } 345 MVT getVectorIdxTy(const DataLayout &DL) const override { 346 // Only the lower 12 bits of an element index are used, so we don't 347 // want to clobber the upper 32 bits of a GPR unnecessarily. 348 return MVT::i32; 349 } 350 TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) 351 const override { 352 // Widen subvectors to the full width rather than promoting integer 353 // elements. This is better because: 354 // 355 // (a) it means that we can handle the ABI for passing and returning 356 // sub-128 vectors without having to handle them as legal types. 357 // 358 // (b) we don't have instructions to extend on load and truncate on store, 359 // so promoting the integers is less efficient. 360 // 361 // (c) there are no multiplication instructions for the widest integer 362 // type (v2i64). 363 if (VT.getVectorElementType().getSizeInBits() % 8 == 0) 364 return TypeWidenVector; 365 return TargetLoweringBase::getPreferredVectorAction(VT); 366 } 367 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, 368 EVT) const override; 369 bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; 370 bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; 371 bool isLegalICmpImmediate(int64_t Imm) const override; 372 bool isLegalAddImmediate(int64_t Imm) const override; 373 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 374 unsigned AS) const override; 375 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, 376 unsigned Align, 377 bool *Fast) const override; 378 bool isTruncateFree(Type *, Type *) const override; 379 bool isTruncateFree(EVT, EVT) const override; 380 const char *getTargetNodeName(unsigned Opcode) const override; 381 std::pair<unsigned, const TargetRegisterClass *> 382 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 383 StringRef Constraint, MVT VT) const override; 384 TargetLowering::ConstraintType 385 getConstraintType(StringRef Constraint) const override; 386 TargetLowering::ConstraintWeight 387 getSingleConstraintMatchWeight(AsmOperandInfo &info, 388 const char *constraint) const override; 389 void LowerAsmOperandForConstraint(SDValue Op, 390 std::string &Constraint, 391 std::vector<SDValue> &Ops, 392 SelectionDAG &DAG) const override; 393 394 unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 395 if (ConstraintCode.size() == 1) { 396 switch(ConstraintCode[0]) { 397 default: 398 break; 399 case 'Q': 400 return InlineAsm::Constraint_Q; 401 case 'R': 402 return InlineAsm::Constraint_R; 403 case 'S': 404 return InlineAsm::Constraint_S; 405 case 'T': 406 return InlineAsm::Constraint_T; 407 } 408 } 409 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 410 } 411 412 /// If a physical register, this returns the register that receives the 413 /// exception address on entry to an EH pad. 414 unsigned 415 getExceptionPointerRegister(const Constant *PersonalityFn) const override { 416 return SystemZ::R6D; 417 } 418 419 /// If a physical register, this returns the register that receives the 420 /// exception typeid on entry to a landing pad. 421 unsigned 422 getExceptionSelectorRegister(const Constant *PersonalityFn) const override { 423 return SystemZ::R7D; 424 } 425 426 MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, 427 MachineBasicBlock *BB) const 428 override; 429 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 430 bool allowTruncateForTailCall(Type *, Type *) const override; 431 bool mayBeEmittedAsTailCall(CallInst *CI) const override; 432 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 433 bool isVarArg, 434 const SmallVectorImpl<ISD::InputArg> &Ins, 435 SDLoc DL, SelectionDAG &DAG, 436 SmallVectorImpl<SDValue> &InVals) const override; 437 SDValue LowerCall(CallLoweringInfo &CLI, 438 SmallVectorImpl<SDValue> &InVals) const override; 439 440 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 441 bool isVarArg, 442 const SmallVectorImpl<ISD::OutputArg> &Outs, 443 LLVMContext &Context) const override; 444 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 445 const SmallVectorImpl<ISD::OutputArg> &Outs, 446 const SmallVectorImpl<SDValue> &OutVals, 447 SDLoc DL, SelectionDAG &DAG) const override; 448 SDValue prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, 449 SelectionDAG &DAG) const override; 450 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 451 452private: 453 const SystemZSubtarget &Subtarget; 454 455 // Implement LowerOperation for individual opcodes. 456 SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const; 457 SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const; 458 SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; 459 SDValue lowerGlobalAddress(GlobalAddressSDNode *Node, 460 SelectionDAG &DAG) const; 461 SDValue lowerTLSGetOffset(GlobalAddressSDNode *Node, 462 SelectionDAG &DAG, unsigned Opcode, 463 SDValue GOTOffset) const; 464 SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node, 465 SelectionDAG &DAG) const; 466 SDValue lowerBlockAddress(BlockAddressSDNode *Node, 467 SelectionDAG &DAG) const; 468 SDValue lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const; 469 SDValue lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const; 470 SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; 471 SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const; 472 SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 473 SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; 474 SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; 475 SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; 476 SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; 477 SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; 478 SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const; 479 SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const; 480 SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; 481 SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; 482 SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG, 483 unsigned Opcode) const; 484 SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; 485 SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; 486 SDValue lowerLOAD_SEQUENCE_POINT(SDValue Op, SelectionDAG &DAG) const; 487 SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; 488 SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; 489 SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; 490 SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; 491 SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 492 SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 493 SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 494 SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; 495 SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 496 SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 497 SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG, 498 unsigned UnpackHigh) const; 499 SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const; 500 501 SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp, 502 unsigned Index, DAGCombinerInfo &DCI, 503 bool Force) const; 504 SDValue combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op, 505 DAGCombinerInfo &DCI) const; 506 507 // If the last instruction before MBBI in MBB was some form of COMPARE, 508 // try to replace it with a COMPARE AND BRANCH just before MBBI. 509 // CCMask and Target are the BRC-like operands for the branch. 510 // Return true if the change was made. 511 bool convertPrevCompareToBranch(MachineBasicBlock *MBB, 512 MachineBasicBlock::iterator MBBI, 513 unsigned CCMask, 514 MachineBasicBlock *Target) const; 515 516 // Implement EmitInstrWithCustomInserter for individual operation types. 517 MachineBasicBlock *emitSelect(MachineInstr *MI, 518 MachineBasicBlock *BB) const; 519 MachineBasicBlock *emitCondStore(MachineInstr *MI, 520 MachineBasicBlock *BB, 521 unsigned StoreOpcode, unsigned STOCOpcode, 522 bool Invert) const; 523 MachineBasicBlock *emitExt128(MachineInstr *MI, 524 MachineBasicBlock *MBB, 525 bool ClearEven, unsigned SubReg) const; 526 MachineBasicBlock *emitAtomicLoadBinary(MachineInstr *MI, 527 MachineBasicBlock *BB, 528 unsigned BinOpcode, unsigned BitSize, 529 bool Invert = false) const; 530 MachineBasicBlock *emitAtomicLoadMinMax(MachineInstr *MI, 531 MachineBasicBlock *MBB, 532 unsigned CompareOpcode, 533 unsigned KeepOldMask, 534 unsigned BitSize) const; 535 MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI, 536 MachineBasicBlock *BB) const; 537 MachineBasicBlock *emitMemMemWrapper(MachineInstr *MI, 538 MachineBasicBlock *BB, 539 unsigned Opcode) const; 540 MachineBasicBlock *emitStringWrapper(MachineInstr *MI, 541 MachineBasicBlock *BB, 542 unsigned Opcode) const; 543 MachineBasicBlock *emitTransactionBegin(MachineInstr *MI, 544 MachineBasicBlock *MBB, 545 unsigned Opcode, 546 bool NoFloat) const; 547 MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr *MI, 548 MachineBasicBlock *MBB, 549 unsigned Opcode) const; 550 551}; 552} // end namespace llvm 553 554#endif 555