1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PPCISelLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "PPCISelLowering.h" 15#include "MCTargetDesc/PPCPredicates.h" 16#include "PPCCallingConv.h" 17#include "PPCMachineFunctionInfo.h" 18#include "PPCPerfectShuffle.h" 19#include "PPCTargetMachine.h" 20#include "PPCTargetObjectFile.h" 21#include "llvm/ADT/STLExtras.h" 22#include "llvm/ADT/StringSwitch.h" 23#include "llvm/ADT/Triple.h" 24#include "llvm/CodeGen/CallingConvLower.h" 25#include "llvm/CodeGen/MachineFrameInfo.h" 26#include "llvm/CodeGen/MachineFunction.h" 27#include "llvm/CodeGen/MachineInstrBuilder.h" 28#include "llvm/CodeGen/MachineLoopInfo.h" 29#include "llvm/CodeGen/MachineRegisterInfo.h" 30#include "llvm/CodeGen/SelectionDAG.h" 31#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 32#include "llvm/IR/CallingConv.h" 33#include "llvm/IR/Constants.h" 34#include "llvm/IR/DerivedTypes.h" 35#include "llvm/IR/Function.h" 36#include "llvm/IR/Intrinsics.h" 37#include "llvm/Support/CommandLine.h" 38#include "llvm/Support/ErrorHandling.h" 39#include "llvm/Support/MathExtras.h" 40#include "llvm/Support/raw_ostream.h" 41#include "llvm/Target/TargetOptions.h" 42using namespace llvm; 43 44// FIXME: Remove this once soft-float is supported. 45static cl::opt<bool> DisablePPCFloatInVariadic("disable-ppc-float-in-variadic", 46cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden); 47 48static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", 49cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); 50 51static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", 52cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); 53 54static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", 55cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); 56 57// FIXME: Remove this once the bug has been fixed! 58extern cl::opt<bool> ANDIGlueBug; 59 60PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM) 61 : TargetLowering(TM), 62 Subtarget(*TM.getSubtargetImpl()) { 63 // Use _setjmp/_longjmp instead of setjmp/longjmp. 64 setUseUnderscoreSetJmp(true); 65 setUseUnderscoreLongJmp(true); 66 67 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all 68 // arguments are at least 4/8 bytes aligned. 69 bool isPPC64 = Subtarget.isPPC64(); 70 setMinStackArgumentAlignment(isPPC64 ? 8:4); 71 72 // Set up the register classes. 73 addRegisterClass(MVT::i32, &PPC::GPRCRegClass); 74 addRegisterClass(MVT::f32, &PPC::F4RCRegClass); 75 addRegisterClass(MVT::f64, &PPC::F8RCRegClass); 76 77 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 78 for (MVT VT : MVT::integer_valuetypes()) { 79 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 80 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); 81 } 82 83 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 84 85 // PowerPC has pre-inc load and store's. 86 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 87 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 88 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 89 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 90 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 91 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 92 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 93 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 94 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 95 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 96 97 if (Subtarget.useCRBits()) { 98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 99 100 if (isPPC64 || Subtarget.hasFPCVT()) { 101 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); 102 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, 103 isPPC64 ? MVT::i64 : MVT::i32); 104 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); 105 AddPromotedToType (ISD::UINT_TO_FP, MVT::i1, 106 isPPC64 ? MVT::i64 : MVT::i32); 107 } else { 108 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); 109 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); 110 } 111 112 // PowerPC does not support direct load / store of condition registers 113 setOperationAction(ISD::LOAD, MVT::i1, Custom); 114 setOperationAction(ISD::STORE, MVT::i1, Custom); 115 116 // FIXME: Remove this once the ANDI glue bug is fixed: 117 if (ANDIGlueBug) 118 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); 119 120 for (MVT VT : MVT::integer_valuetypes()) { 121 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 122 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 123 setTruncStoreAction(VT, MVT::i1, Expand); 124 } 125 126 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); 127 } 128 129 // This is used in the ppcf128->int sequence. Note it has different semantics 130 // from FP_ROUND: that rounds to nearest, this rounds to zero. 131 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 132 133 // We do not currently implement these libm ops for PowerPC. 134 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); 135 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); 136 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); 137 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); 138 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); 139 setOperationAction(ISD::FREM, MVT::ppcf128, Expand); 140 141 // PowerPC has no SREM/UREM instructions 142 setOperationAction(ISD::SREM, MVT::i32, Expand); 143 setOperationAction(ISD::UREM, MVT::i32, Expand); 144 setOperationAction(ISD::SREM, MVT::i64, Expand); 145 setOperationAction(ISD::UREM, MVT::i64, Expand); 146 147 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 148 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 149 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 150 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 151 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 152 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 153 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 154 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 155 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 156 157 // We don't support sin/cos/sqrt/fmod/pow 158 setOperationAction(ISD::FSIN , MVT::f64, Expand); 159 setOperationAction(ISD::FCOS , MVT::f64, Expand); 160 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 161 setOperationAction(ISD::FREM , MVT::f64, Expand); 162 setOperationAction(ISD::FPOW , MVT::f64, Expand); 163 setOperationAction(ISD::FMA , MVT::f64, Legal); 164 setOperationAction(ISD::FSIN , MVT::f32, Expand); 165 setOperationAction(ISD::FCOS , MVT::f32, Expand); 166 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 167 setOperationAction(ISD::FREM , MVT::f32, Expand); 168 setOperationAction(ISD::FPOW , MVT::f32, Expand); 169 setOperationAction(ISD::FMA , MVT::f32, Legal); 170 171 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 172 173 // If we're enabling GP optimizations, use hardware square root 174 if (!Subtarget.hasFSQRT() && 175 !(TM.Options.UnsafeFPMath && 176 Subtarget.hasFRSQRTE() && Subtarget.hasFRE())) 177 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 178 179 if (!Subtarget.hasFSQRT() && 180 !(TM.Options.UnsafeFPMath && 181 Subtarget.hasFRSQRTES() && Subtarget.hasFRES())) 182 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 183 184 if (Subtarget.hasFCPSGN()) { 185 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); 186 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); 187 } else { 188 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 189 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 190 } 191 192 if (Subtarget.hasFPRND()) { 193 setOperationAction(ISD::FFLOOR, MVT::f64, Legal); 194 setOperationAction(ISD::FCEIL, MVT::f64, Legal); 195 setOperationAction(ISD::FTRUNC, MVT::f64, Legal); 196 setOperationAction(ISD::FROUND, MVT::f64, Legal); 197 198 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 199 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 200 setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 201 setOperationAction(ISD::FROUND, MVT::f32, Legal); 202 } 203 204 // PowerPC does not have BSWAP, CTPOP or CTTZ 205 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 206 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 207 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); 208 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); 209 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 210 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 211 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); 212 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); 213 214 if (Subtarget.hasPOPCNTD()) { 215 setOperationAction(ISD::CTPOP, MVT::i32 , Legal); 216 setOperationAction(ISD::CTPOP, MVT::i64 , Legal); 217 } else { 218 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 219 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 220 } 221 222 // PowerPC does not have ROTR 223 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 224 setOperationAction(ISD::ROTR, MVT::i64 , Expand); 225 226 if (!Subtarget.useCRBits()) { 227 // PowerPC does not have Select 228 setOperationAction(ISD::SELECT, MVT::i32, Expand); 229 setOperationAction(ISD::SELECT, MVT::i64, Expand); 230 setOperationAction(ISD::SELECT, MVT::f32, Expand); 231 setOperationAction(ISD::SELECT, MVT::f64, Expand); 232 } 233 234 // PowerPC wants to turn select_cc of FP into fsel when possible. 235 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 236 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 237 238 // PowerPC wants to optimize integer setcc a bit 239 if (!Subtarget.useCRBits()) 240 setOperationAction(ISD::SETCC, MVT::i32, Custom); 241 242 // PowerPC does not have BRCOND which requires SetCC 243 if (!Subtarget.useCRBits()) 244 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 245 246 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 247 248 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 249 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 250 251 // PowerPC does not have [U|S]INT_TO_FP 252 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 253 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 254 255 setOperationAction(ISD::BITCAST, MVT::f32, Expand); 256 setOperationAction(ISD::BITCAST, MVT::i32, Expand); 257 setOperationAction(ISD::BITCAST, MVT::i64, Expand); 258 setOperationAction(ISD::BITCAST, MVT::f64, Expand); 259 260 // We cannot sextinreg(i1). Expand to shifts. 261 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 262 263 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support 264 // SjLj exception handling but a light-weight setjmp/longjmp replacement to 265 // support continuation, user-level threading, and etc.. As a result, no 266 // other SjLj exception interfaces are implemented and please don't build 267 // your own exception handling based on them. 268 // LLVM/Clang supports zero-cost DWARF exception handling. 269 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 270 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 271 272 // We want to legalize GlobalAddress and ConstantPool nodes into the 273 // appropriate instructions to materialize the address. 274 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 275 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 276 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 277 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 278 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 279 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 280 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 281 setOperationAction(ISD::BlockAddress, MVT::i64, Custom); 282 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 283 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 284 285 // TRAP is legal. 286 setOperationAction(ISD::TRAP, MVT::Other, Legal); 287 288 // TRAMPOLINE is custom lowered. 289 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 290 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 291 292 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 293 setOperationAction(ISD::VASTART , MVT::Other, Custom); 294 295 if (Subtarget.isSVR4ABI()) { 296 if (isPPC64) { 297 // VAARG always uses double-word chunks, so promote anything smaller. 298 setOperationAction(ISD::VAARG, MVT::i1, Promote); 299 AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); 300 setOperationAction(ISD::VAARG, MVT::i8, Promote); 301 AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); 302 setOperationAction(ISD::VAARG, MVT::i16, Promote); 303 AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); 304 setOperationAction(ISD::VAARG, MVT::i32, Promote); 305 AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); 306 setOperationAction(ISD::VAARG, MVT::Other, Expand); 307 } else { 308 // VAARG is custom lowered with the 32-bit SVR4 ABI. 309 setOperationAction(ISD::VAARG, MVT::Other, Custom); 310 setOperationAction(ISD::VAARG, MVT::i64, Custom); 311 } 312 } else 313 setOperationAction(ISD::VAARG, MVT::Other, Expand); 314 315 if (Subtarget.isSVR4ABI() && !isPPC64) 316 // VACOPY is custom lowered with the 32-bit SVR4 ABI. 317 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 318 else 319 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 320 321 // Use the default implementation. 322 setOperationAction(ISD::VAEND , MVT::Other, Expand); 323 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 324 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 325 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 326 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 327 328 // We want to custom lower some of our intrinsics. 329 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 330 331 // To handle counter-based loop conditions. 332 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); 333 334 // Comparisons that require checking two conditions. 335 setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 336 setCondCodeAction(ISD::SETULT, MVT::f64, Expand); 337 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 338 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 339 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 340 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); 341 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 342 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 343 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 344 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); 345 setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 346 setCondCodeAction(ISD::SETONE, MVT::f64, Expand); 347 348 if (Subtarget.has64BitSupport()) { 349 // They also have instructions for converting between i64 and fp. 350 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 351 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 352 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 353 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 354 // This is just the low 32 bits of a (signed) fp->i64 conversion. 355 // We cannot do this with Promote because i64 is not a legal type. 356 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 357 358 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) 359 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 360 } else { 361 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 362 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 363 } 364 365 // With the instructions enabled under FPCVT, we can do everything. 366 if (Subtarget.hasFPCVT()) { 367 if (Subtarget.has64BitSupport()) { 368 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 369 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 370 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 371 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 372 } 373 374 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 375 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 376 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 377 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 378 } 379 380 if (Subtarget.use64BitRegs()) { 381 // 64-bit PowerPC implementations can support i64 types directly 382 addRegisterClass(MVT::i64, &PPC::G8RCRegClass); 383 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 384 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 385 // 64-bit PowerPC wants to expand i128 shifts itself. 386 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 387 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 388 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 389 } else { 390 // 32-bit PowerPC wants to expand i64 shifts itself. 391 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 392 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 393 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 394 } 395 396 if (Subtarget.hasAltivec()) { 397 // First set operation action for all vector types to expand. Then we 398 // will selectively turn on ones that can be effectively codegen'd. 399 for (MVT VT : MVT::vector_valuetypes()) { 400 // add/sub are legal for all supported vector VT's. 401 setOperationAction(ISD::ADD , VT, Legal); 402 setOperationAction(ISD::SUB , VT, Legal); 403 404 // We promote all shuffles to v16i8. 405 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 406 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 407 408 // We promote all non-typed operations to v4i32. 409 setOperationAction(ISD::AND , VT, Promote); 410 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 411 setOperationAction(ISD::OR , VT, Promote); 412 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 413 setOperationAction(ISD::XOR , VT, Promote); 414 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 415 setOperationAction(ISD::LOAD , VT, Promote); 416 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 417 setOperationAction(ISD::SELECT, VT, Promote); 418 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 419 setOperationAction(ISD::STORE, VT, Promote); 420 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 421 422 // No other operations are legal. 423 setOperationAction(ISD::MUL , VT, Expand); 424 setOperationAction(ISD::SDIV, VT, Expand); 425 setOperationAction(ISD::SREM, VT, Expand); 426 setOperationAction(ISD::UDIV, VT, Expand); 427 setOperationAction(ISD::UREM, VT, Expand); 428 setOperationAction(ISD::FDIV, VT, Expand); 429 setOperationAction(ISD::FREM, VT, Expand); 430 setOperationAction(ISD::FNEG, VT, Expand); 431 setOperationAction(ISD::FSQRT, VT, Expand); 432 setOperationAction(ISD::FLOG, VT, Expand); 433 setOperationAction(ISD::FLOG10, VT, Expand); 434 setOperationAction(ISD::FLOG2, VT, Expand); 435 setOperationAction(ISD::FEXP, VT, Expand); 436 setOperationAction(ISD::FEXP2, VT, Expand); 437 setOperationAction(ISD::FSIN, VT, Expand); 438 setOperationAction(ISD::FCOS, VT, Expand); 439 setOperationAction(ISD::FABS, VT, Expand); 440 setOperationAction(ISD::FPOWI, VT, Expand); 441 setOperationAction(ISD::FFLOOR, VT, Expand); 442 setOperationAction(ISD::FCEIL, VT, Expand); 443 setOperationAction(ISD::FTRUNC, VT, Expand); 444 setOperationAction(ISD::FRINT, VT, Expand); 445 setOperationAction(ISD::FNEARBYINT, VT, Expand); 446 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 447 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 448 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 449 setOperationAction(ISD::MULHU, VT, Expand); 450 setOperationAction(ISD::MULHS, VT, Expand); 451 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 452 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 453 setOperationAction(ISD::UDIVREM, VT, Expand); 454 setOperationAction(ISD::SDIVREM, VT, Expand); 455 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 456 setOperationAction(ISD::FPOW, VT, Expand); 457 setOperationAction(ISD::BSWAP, VT, Expand); 458 setOperationAction(ISD::CTPOP, VT, Expand); 459 setOperationAction(ISD::CTLZ, VT, Expand); 460 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); 461 setOperationAction(ISD::CTTZ, VT, Expand); 462 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); 463 setOperationAction(ISD::VSELECT, VT, Expand); 464 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 465 466 for (MVT InnerVT : MVT::vector_valuetypes()) { 467 setTruncStoreAction(VT, InnerVT, Expand); 468 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 469 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 470 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 471 } 472 } 473 474 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 475 // with merges, splats, etc. 476 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 477 478 setOperationAction(ISD::AND , MVT::v4i32, Legal); 479 setOperationAction(ISD::OR , MVT::v4i32, Legal); 480 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 481 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 482 setOperationAction(ISD::SELECT, MVT::v4i32, 483 Subtarget.useCRBits() ? Legal : Expand); 484 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 485 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 486 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 487 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 488 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 489 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 490 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 491 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 492 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 493 494 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); 495 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); 496 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); 497 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); 498 499 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 500 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 501 502 if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { 503 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 504 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 505 } 506 507 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 508 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 509 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 510 511 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 512 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 513 514 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 515 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 516 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 517 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 518 519 // Altivec does not contain unordered floating-point compare instructions 520 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); 521 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); 522 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); 523 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); 524 525 if (Subtarget.hasVSX()) { 526 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); 527 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); 528 529 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); 530 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); 531 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); 532 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); 533 setOperationAction(ISD::FROUND, MVT::v2f64, Legal); 534 535 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 536 537 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 538 setOperationAction(ISD::FMA, MVT::v2f64, Legal); 539 540 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 541 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 542 543 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); 544 setOperationAction(ISD::VSELECT, MVT::v8i16, Legal); 545 setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); 546 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); 547 setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); 548 549 // Share the Altivec comparison restrictions. 550 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); 551 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); 552 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); 553 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); 554 555 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 556 setOperationAction(ISD::STORE, MVT::v2f64, Legal); 557 558 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); 559 560 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); 561 562 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); 563 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); 564 565 // VSX v2i64 only supports non-arithmetic operations. 566 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 567 setOperationAction(ISD::SUB, MVT::v2i64, Expand); 568 569 setOperationAction(ISD::SHL, MVT::v2i64, Expand); 570 setOperationAction(ISD::SRA, MVT::v2i64, Expand); 571 setOperationAction(ISD::SRL, MVT::v2i64, Expand); 572 573 setOperationAction(ISD::SETCC, MVT::v2i64, Custom); 574 575 setOperationAction(ISD::LOAD, MVT::v2i64, Promote); 576 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); 577 setOperationAction(ISD::STORE, MVT::v2i64, Promote); 578 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); 579 580 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); 581 582 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); 583 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); 584 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); 585 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); 586 587 // Vector operation legalization checks the result type of 588 // SIGN_EXTEND_INREG, overall legalization checks the inner type. 589 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); 590 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); 591 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); 592 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); 593 594 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); 595 } 596 } 597 598 if (Subtarget.has64BitSupport()) 599 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 600 601 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); 602 603 if (!isPPC64) { 604 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); 605 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); 606 } 607 608 setBooleanContents(ZeroOrOneBooleanContent); 609 // Altivec instructions set fields to all zeros or all ones. 610 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 611 612 if (!isPPC64) { 613 // These libcalls are not available in 32-bit. 614 setLibcallName(RTLIB::SHL_I128, nullptr); 615 setLibcallName(RTLIB::SRL_I128, nullptr); 616 setLibcallName(RTLIB::SRA_I128, nullptr); 617 } 618 619 if (isPPC64) { 620 setStackPointerRegisterToSaveRestore(PPC::X1); 621 setExceptionPointerRegister(PPC::X3); 622 setExceptionSelectorRegister(PPC::X4); 623 } else { 624 setStackPointerRegisterToSaveRestore(PPC::R1); 625 setExceptionPointerRegister(PPC::R3); 626 setExceptionSelectorRegister(PPC::R4); 627 } 628 629 // We have target-specific dag combine patterns for the following nodes: 630 setTargetDAGCombine(ISD::SINT_TO_FP); 631 if (Subtarget.hasFPCVT()) 632 setTargetDAGCombine(ISD::UINT_TO_FP); 633 setTargetDAGCombine(ISD::LOAD); 634 setTargetDAGCombine(ISD::STORE); 635 setTargetDAGCombine(ISD::BR_CC); 636 if (Subtarget.useCRBits()) 637 setTargetDAGCombine(ISD::BRCOND); 638 setTargetDAGCombine(ISD::BSWAP); 639 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 640 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 641 setTargetDAGCombine(ISD::INTRINSIC_VOID); 642 643 setTargetDAGCombine(ISD::SIGN_EXTEND); 644 setTargetDAGCombine(ISD::ZERO_EXTEND); 645 setTargetDAGCombine(ISD::ANY_EXTEND); 646 647 if (Subtarget.useCRBits()) { 648 setTargetDAGCombine(ISD::TRUNCATE); 649 setTargetDAGCombine(ISD::SETCC); 650 setTargetDAGCombine(ISD::SELECT_CC); 651 } 652 653 // Use reciprocal estimates. 654 if (TM.Options.UnsafeFPMath) { 655 setTargetDAGCombine(ISD::FDIV); 656 setTargetDAGCombine(ISD::FSQRT); 657 } 658 659 // Darwin long double math library functions have $LDBL128 appended. 660 if (Subtarget.isDarwin()) { 661 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 662 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 663 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 664 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 665 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 666 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); 667 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); 668 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); 669 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); 670 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); 671 } 672 673 // With 32 condition bits, we don't need to sink (and duplicate) compares 674 // aggressively in CodeGenPrep. 675 if (Subtarget.useCRBits()) 676 setHasMultipleConditionRegisters(); 677 678 setMinFunctionAlignment(2); 679 if (Subtarget.isDarwin()) 680 setPrefFunctionAlignment(4); 681 682 switch (Subtarget.getDarwinDirective()) { 683 default: break; 684 case PPC::DIR_970: 685 case PPC::DIR_A2: 686 case PPC::DIR_E500mc: 687 case PPC::DIR_E5500: 688 case PPC::DIR_PWR4: 689 case PPC::DIR_PWR5: 690 case PPC::DIR_PWR5X: 691 case PPC::DIR_PWR6: 692 case PPC::DIR_PWR6X: 693 case PPC::DIR_PWR7: 694 case PPC::DIR_PWR8: 695 setPrefFunctionAlignment(4); 696 setPrefLoopAlignment(4); 697 break; 698 } 699 700 setInsertFencesForAtomic(true); 701 702 if (Subtarget.enableMachineScheduler()) 703 setSchedulingPreference(Sched::Source); 704 else 705 setSchedulingPreference(Sched::Hybrid); 706 707 computeRegisterProperties(); 708 709 // The Freescale cores do better with aggressive inlining of memcpy and 710 // friends. GCC uses same threshold of 128 bytes (= 32 word stores). 711 if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || 712 Subtarget.getDarwinDirective() == PPC::DIR_E5500) { 713 MaxStoresPerMemset = 32; 714 MaxStoresPerMemsetOptSize = 16; 715 MaxStoresPerMemcpy = 32; 716 MaxStoresPerMemcpyOptSize = 8; 717 MaxStoresPerMemmove = 32; 718 MaxStoresPerMemmoveOptSize = 8; 719 } 720} 721 722/// getMaxByValAlign - Helper for getByValTypeAlignment to determine 723/// the desired ByVal argument alignment. 724static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, 725 unsigned MaxMaxAlign) { 726 if (MaxAlign == MaxMaxAlign) 727 return; 728 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 729 if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) 730 MaxAlign = 32; 731 else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) 732 MaxAlign = 16; 733 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 734 unsigned EltAlign = 0; 735 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); 736 if (EltAlign > MaxAlign) 737 MaxAlign = EltAlign; 738 } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 739 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 740 unsigned EltAlign = 0; 741 getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign); 742 if (EltAlign > MaxAlign) 743 MaxAlign = EltAlign; 744 if (MaxAlign == MaxMaxAlign) 745 break; 746 } 747 } 748} 749 750/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 751/// function arguments in the caller parameter area. 752unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { 753 // Darwin passes everything on 4 byte boundary. 754 if (Subtarget.isDarwin()) 755 return 4; 756 757 // 16byte and wider vectors are passed on 16byte boundary. 758 // The rest is 8 on PPC64 and 4 on PPC32 boundary. 759 unsigned Align = Subtarget.isPPC64() ? 8 : 4; 760 if (Subtarget.hasAltivec() || Subtarget.hasQPX()) 761 getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16); 762 return Align; 763} 764 765const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 766 switch (Opcode) { 767 default: return nullptr; 768 case PPCISD::FSEL: return "PPCISD::FSEL"; 769 case PPCISD::FCFID: return "PPCISD::FCFID"; 770 case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; 771 case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; 772 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; 773 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 774 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 775 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; 776 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; 777 case PPCISD::FRE: return "PPCISD::FRE"; 778 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; 779 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 780 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 781 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 782 case PPCISD::VPERM: return "PPCISD::VPERM"; 783 case PPCISD::CMPB: return "PPCISD::CMPB"; 784 case PPCISD::Hi: return "PPCISD::Hi"; 785 case PPCISD::Lo: return "PPCISD::Lo"; 786 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; 787 case PPCISD::LOAD: return "PPCISD::LOAD"; 788 case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC"; 789 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 790 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 791 case PPCISD::SRL: return "PPCISD::SRL"; 792 case PPCISD::SRA: return "PPCISD::SRA"; 793 case PPCISD::SHL: return "PPCISD::SHL"; 794 case PPCISD::CALL: return "PPCISD::CALL"; 795 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; 796 case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS"; 797 case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS"; 798 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 799 case PPCISD::BCTRL: return "PPCISD::BCTRL"; 800 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; 801 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 802 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; 803 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; 804 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; 805 case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; 806 case PPCISD::VCMP: return "PPCISD::VCMP"; 807 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 808 case PPCISD::LBRX: return "PPCISD::LBRX"; 809 case PPCISD::STBRX: return "PPCISD::STBRX"; 810 case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; 811 case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; 812 case PPCISD::LARX: return "PPCISD::LARX"; 813 case PPCISD::STCX: return "PPCISD::STCX"; 814 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 815 case PPCISD::BDNZ: return "PPCISD::BDNZ"; 816 case PPCISD::BDZ: return "PPCISD::BDZ"; 817 case PPCISD::MFFS: return "PPCISD::MFFS"; 818 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 819 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 820 case PPCISD::CR6SET: return "PPCISD::CR6SET"; 821 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; 822 case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA"; 823 case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L"; 824 case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L"; 825 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; 826 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; 827 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; 828 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; 829 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; 830 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; 831 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; 832 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; 833 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; 834 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; 835 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; 836 case PPCISD::SC: return "PPCISD::SC"; 837 } 838} 839 840EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { 841 if (!VT.isVector()) 842 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; 843 return VT.changeVectorElementTypeToInteger(); 844} 845 846bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { 847 assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); 848 return true; 849} 850 851//===----------------------------------------------------------------------===// 852// Node matching predicates, for use by the tblgen matching code. 853//===----------------------------------------------------------------------===// 854 855/// isFloatingPointZero - Return true if this is 0.0 or -0.0. 856static bool isFloatingPointZero(SDValue Op) { 857 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 858 return CFP->getValueAPF().isZero(); 859 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 860 // Maybe this has already been legalized into the constant pool? 861 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 862 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 863 return CFP->getValueAPF().isZero(); 864 } 865 return false; 866} 867 868/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 869/// true if Op is undef or if it matches the specified value. 870static bool isConstantOrUndef(int Op, int Val) { 871 return Op < 0 || Op == Val; 872} 873 874/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 875/// VPKUHUM instruction. 876/// The ShuffleKind distinguishes between big-endian operations with 877/// two different inputs (0), either-endian operations with two identical 878/// inputs (1), and little-endian operantion with two different inputs (2). 879/// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 880bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 881 SelectionDAG &DAG) { 882 bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian(); 883 if (ShuffleKind == 0) { 884 if (IsLE) 885 return false; 886 for (unsigned i = 0; i != 16; ++i) 887 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) 888 return false; 889 } else if (ShuffleKind == 2) { 890 if (!IsLE) 891 return false; 892 for (unsigned i = 0; i != 16; ++i) 893 if (!isConstantOrUndef(N->getMaskElt(i), i*2)) 894 return false; 895 } else if (ShuffleKind == 1) { 896 unsigned j = IsLE ? 0 : 1; 897 for (unsigned i = 0; i != 8; ++i) 898 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || 899 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) 900 return false; 901 } 902 return true; 903} 904 905/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 906/// VPKUWUM instruction. 907/// The ShuffleKind distinguishes between big-endian operations with 908/// two different inputs (0), either-endian operations with two identical 909/// inputs (1), and little-endian operantion with two different inputs (2). 910/// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 911bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 912 SelectionDAG &DAG) { 913 bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian(); 914 if (ShuffleKind == 0) { 915 if (IsLE) 916 return false; 917 for (unsigned i = 0; i != 16; i += 2) 918 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 919 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) 920 return false; 921 } else if (ShuffleKind == 2) { 922 if (!IsLE) 923 return false; 924 for (unsigned i = 0; i != 16; i += 2) 925 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || 926 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) 927 return false; 928 } else if (ShuffleKind == 1) { 929 unsigned j = IsLE ? 0 : 2; 930 for (unsigned i = 0; i != 8; i += 2) 931 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || 932 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || 933 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || 934 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) 935 return false; 936 } 937 return true; 938} 939 940/// isVMerge - Common function, used to match vmrg* shuffles. 941/// 942static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, 943 unsigned LHSStart, unsigned RHSStart) { 944 if (N->getValueType(0) != MVT::v16i8) 945 return false; 946 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 947 "Unsupported merge size!"); 948 949 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 950 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 951 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), 952 LHSStart+j+i*UnitSize) || 953 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), 954 RHSStart+j+i*UnitSize)) 955 return false; 956 } 957 return true; 958} 959 960/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 961/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). 962/// The ShuffleKind distinguishes between big-endian merges with two 963/// different inputs (0), either-endian merges with two identical inputs (1), 964/// and little-endian merges with two different inputs (2). For the latter, 965/// the input operands are swapped (see PPCInstrAltivec.td). 966bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 967 unsigned ShuffleKind, SelectionDAG &DAG) { 968 if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) { 969 if (ShuffleKind == 1) // unary 970 return isVMerge(N, UnitSize, 0, 0); 971 else if (ShuffleKind == 2) // swapped 972 return isVMerge(N, UnitSize, 0, 16); 973 else 974 return false; 975 } else { 976 if (ShuffleKind == 1) // unary 977 return isVMerge(N, UnitSize, 8, 8); 978 else if (ShuffleKind == 0) // normal 979 return isVMerge(N, UnitSize, 8, 24); 980 else 981 return false; 982 } 983} 984 985/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 986/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). 987/// The ShuffleKind distinguishes between big-endian merges with two 988/// different inputs (0), either-endian merges with two identical inputs (1), 989/// and little-endian merges with two different inputs (2). For the latter, 990/// the input operands are swapped (see PPCInstrAltivec.td). 991bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 992 unsigned ShuffleKind, SelectionDAG &DAG) { 993 if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) { 994 if (ShuffleKind == 1) // unary 995 return isVMerge(N, UnitSize, 8, 8); 996 else if (ShuffleKind == 2) // swapped 997 return isVMerge(N, UnitSize, 8, 24); 998 else 999 return false; 1000 } else { 1001 if (ShuffleKind == 1) // unary 1002 return isVMerge(N, UnitSize, 0, 0); 1003 else if (ShuffleKind == 0) // normal 1004 return isVMerge(N, UnitSize, 0, 16); 1005 else 1006 return false; 1007 } 1008} 1009 1010 1011/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 1012/// amount, otherwise return -1. 1013/// The ShuffleKind distinguishes between big-endian operations with two 1014/// different inputs (0), either-endian operations with two identical inputs 1015/// (1), and little-endian operations with two different inputs (2). For the 1016/// latter, the input operands are swapped (see PPCInstrAltivec.td). 1017int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, 1018 SelectionDAG &DAG) { 1019 if (N->getValueType(0) != MVT::v16i8) 1020 return -1; 1021 1022 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1023 1024 // Find the first non-undef value in the shuffle mask. 1025 unsigned i; 1026 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) 1027 /*search*/; 1028 1029 if (i == 16) return -1; // all undef. 1030 1031 // Otherwise, check to see if the rest of the elements are consecutively 1032 // numbered from this value. 1033 unsigned ShiftAmt = SVOp->getMaskElt(i); 1034 if (ShiftAmt < i) return -1; 1035 1036 ShiftAmt -= i; 1037 bool isLE = DAG.getTarget().getSubtargetImpl()->getDataLayout()-> 1038 isLittleEndian(); 1039 1040 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { 1041 // Check the rest of the elements to see if they are consecutive. 1042 for (++i; i != 16; ++i) 1043 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 1044 return -1; 1045 } else if (ShuffleKind == 1) { 1046 // Check the rest of the elements to see if they are consecutive. 1047 for (++i; i != 16; ++i) 1048 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) 1049 return -1; 1050 } else 1051 return -1; 1052 1053 if (ShuffleKind == 2 && isLE) 1054 ShiftAmt = 16 - ShiftAmt; 1055 1056 return ShiftAmt; 1057} 1058 1059/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 1060/// specifies a splat of a single element that is suitable for input to 1061/// VSPLTB/VSPLTH/VSPLTW. 1062bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { 1063 assert(N->getValueType(0) == MVT::v16i8 && 1064 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 1065 1066 // This is a splat operation if each element of the permute is the same, and 1067 // if the value doesn't reference the second vector. 1068 unsigned ElementBase = N->getMaskElt(0); 1069 1070 // FIXME: Handle UNDEF elements too! 1071 if (ElementBase >= 16) 1072 return false; 1073 1074 // Check that the indices are consecutive, in the case of a multi-byte element 1075 // splatted with a v16i8 mask. 1076 for (unsigned i = 1; i != EltSize; ++i) 1077 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) 1078 return false; 1079 1080 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 1081 if (N->getMaskElt(i) < 0) continue; 1082 for (unsigned j = 0; j != EltSize; ++j) 1083 if (N->getMaskElt(i+j) != N->getMaskElt(j)) 1084 return false; 1085 } 1086 return true; 1087} 1088 1089/// isAllNegativeZeroVector - Returns true if all elements of build_vector 1090/// are -0.0. 1091bool PPC::isAllNegativeZeroVector(SDNode *N) { 1092 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N); 1093 1094 APInt APVal, APUndef; 1095 unsigned BitSize; 1096 bool HasAnyUndefs; 1097 1098 if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) 1099 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 1100 return CFP->getValueAPF().isNegZero(); 1101 1102 return false; 1103} 1104 1105/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 1106/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 1107unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, 1108 SelectionDAG &DAG) { 1109 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1110 assert(isSplatShuffleMask(SVOp, EltSize)); 1111 if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) 1112 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); 1113 else 1114 return SVOp->getMaskElt(0) / EltSize; 1115} 1116 1117/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 1118/// by using a vspltis[bhw] instruction of the specified element size, return 1119/// the constant being splatted. The ByteSize field indicates the number of 1120/// bytes of each element [124] -> [bhw]. 1121SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 1122 SDValue OpVal(nullptr, 0); 1123 1124 // If ByteSize of the splat is bigger than the element size of the 1125 // build_vector, then we have a case where we are checking for a splat where 1126 // multiple elements of the buildvector are folded together into a single 1127 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 1128 unsigned EltSize = 16/N->getNumOperands(); 1129 if (EltSize < ByteSize) { 1130 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 1131 SDValue UniquedVals[4]; 1132 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 1133 1134 // See if all of the elements in the buildvector agree across. 1135 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1136 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 1137 // If the element isn't a constant, bail fully out. 1138 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); 1139 1140 1141 if (!UniquedVals[i&(Multiple-1)].getNode()) 1142 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 1143 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 1144 return SDValue(); // no match. 1145 } 1146 1147 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 1148 // either constant or undef values that are identical for each chunk. See 1149 // if these chunks can form into a larger vspltis*. 1150 1151 // Check to see if all of the leading entries are either 0 or -1. If 1152 // neither, then this won't fit into the immediate field. 1153 bool LeadingZero = true; 1154 bool LeadingOnes = true; 1155 for (unsigned i = 0; i != Multiple-1; ++i) { 1156 if (!UniquedVals[i].getNode()) continue; // Must have been undefs. 1157 1158 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 1159 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 1160 } 1161 // Finally, check the least significant entry. 1162 if (LeadingZero) { 1163 if (!UniquedVals[Multiple-1].getNode()) 1164 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 1165 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); 1166 if (Val < 16) 1167 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 1168 } 1169 if (LeadingOnes) { 1170 if (!UniquedVals[Multiple-1].getNode()) 1171 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 1172 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); 1173 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 1174 return DAG.getTargetConstant(Val, MVT::i32); 1175 } 1176 1177 return SDValue(); 1178 } 1179 1180 // Check to see if this buildvec has a single non-undef value in its elements. 1181 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1182 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 1183 if (!OpVal.getNode()) 1184 OpVal = N->getOperand(i); 1185 else if (OpVal != N->getOperand(i)) 1186 return SDValue(); 1187 } 1188 1189 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. 1190 1191 unsigned ValSizeInBytes = EltSize; 1192 uint64_t Value = 0; 1193 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1194 Value = CN->getZExtValue(); 1195 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 1196 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 1197 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 1198 } 1199 1200 // If the splat value is larger than the element value, then we can never do 1201 // this splat. The only case that we could fit the replicated bits into our 1202 // immediate field for would be zero, and we prefer to use vxor for it. 1203 if (ValSizeInBytes < ByteSize) return SDValue(); 1204 1205 // If the element value is larger than the splat value, cut it in half and 1206 // check to see if the two halves are equal. Continue doing this until we 1207 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 1208 while (ValSizeInBytes > ByteSize) { 1209 ValSizeInBytes >>= 1; 1210 1211 // If the top half equals the bottom half, we're still ok. 1212 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 1213 (Value & ((1 << (8*ValSizeInBytes))-1))) 1214 return SDValue(); 1215 } 1216 1217 // Properly sign extend the value. 1218 int MaskVal = SignExtend32(Value, ByteSize * 8); 1219 1220 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 1221 if (MaskVal == 0) return SDValue(); 1222 1223 // Finally, if this value fits in a 5 bit sext field, return it 1224 if (SignExtend32<5>(MaskVal) == MaskVal) 1225 return DAG.getTargetConstant(MaskVal, MVT::i32); 1226 return SDValue(); 1227} 1228 1229//===----------------------------------------------------------------------===// 1230// Addressing Mode Selection 1231//===----------------------------------------------------------------------===// 1232 1233/// isIntS16Immediate - This method tests to see if the node is either a 32-bit 1234/// or 64-bit immediate, and if the value can be accurately represented as a 1235/// sign extension from a 16-bit value. If so, this returns true and the 1236/// immediate. 1237static bool isIntS16Immediate(SDNode *N, short &Imm) { 1238 if (!isa<ConstantSDNode>(N)) 1239 return false; 1240 1241 Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); 1242 if (N->getValueType(0) == MVT::i32) 1243 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); 1244 else 1245 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); 1246} 1247static bool isIntS16Immediate(SDValue Op, short &Imm) { 1248 return isIntS16Immediate(Op.getNode(), Imm); 1249} 1250 1251 1252/// SelectAddressRegReg - Given the specified addressed, check to see if it 1253/// can be represented as an indexed [r+r] operation. Returns false if it 1254/// can be more efficiently represented with [r+imm]. 1255bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, 1256 SDValue &Index, 1257 SelectionDAG &DAG) const { 1258 short imm = 0; 1259 if (N.getOpcode() == ISD::ADD) { 1260 if (isIntS16Immediate(N.getOperand(1), imm)) 1261 return false; // r+i 1262 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 1263 return false; // r+i 1264 1265 Base = N.getOperand(0); 1266 Index = N.getOperand(1); 1267 return true; 1268 } else if (N.getOpcode() == ISD::OR) { 1269 if (isIntS16Immediate(N.getOperand(1), imm)) 1270 return false; // r+i can fold it if we can. 1271 1272 // If this is an or of disjoint bitfields, we can codegen this as an add 1273 // (for better address arithmetic) if the LHS and RHS of the OR are provably 1274 // disjoint. 1275 APInt LHSKnownZero, LHSKnownOne; 1276 APInt RHSKnownZero, RHSKnownOne; 1277 DAG.computeKnownBits(N.getOperand(0), 1278 LHSKnownZero, LHSKnownOne); 1279 1280 if (LHSKnownZero.getBoolValue()) { 1281 DAG.computeKnownBits(N.getOperand(1), 1282 RHSKnownZero, RHSKnownOne); 1283 // If all of the bits are known zero on the LHS or RHS, the add won't 1284 // carry. 1285 if (~(LHSKnownZero | RHSKnownZero) == 0) { 1286 Base = N.getOperand(0); 1287 Index = N.getOperand(1); 1288 return true; 1289 } 1290 } 1291 } 1292 1293 return false; 1294} 1295 1296// If we happen to be doing an i64 load or store into a stack slot that has 1297// less than a 4-byte alignment, then the frame-index elimination may need to 1298// use an indexed load or store instruction (because the offset may not be a 1299// multiple of 4). The extra register needed to hold the offset comes from the 1300// register scavenger, and it is possible that the scavenger will need to use 1301// an emergency spill slot. As a result, we need to make sure that a spill slot 1302// is allocated when doing an i64 load/store into a less-than-4-byte-aligned 1303// stack slot. 1304static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { 1305 // FIXME: This does not handle the LWA case. 1306 if (VT != MVT::i64) 1307 return; 1308 1309 // NOTE: We'll exclude negative FIs here, which come from argument 1310 // lowering, because there are no known test cases triggering this problem 1311 // using packed structures (or similar). We can remove this exclusion if 1312 // we find such a test case. The reason why this is so test-case driven is 1313 // because this entire 'fixup' is only to prevent crashes (from the 1314 // register scavenger) on not-really-valid inputs. For example, if we have: 1315 // %a = alloca i1 1316 // %b = bitcast i1* %a to i64* 1317 // store i64* a, i64 b 1318 // then the store should really be marked as 'align 1', but is not. If it 1319 // were marked as 'align 1' then the indexed form would have been 1320 // instruction-selected initially, and the problem this 'fixup' is preventing 1321 // won't happen regardless. 1322 if (FrameIdx < 0) 1323 return; 1324 1325 MachineFunction &MF = DAG.getMachineFunction(); 1326 MachineFrameInfo *MFI = MF.getFrameInfo(); 1327 1328 unsigned Align = MFI->getObjectAlignment(FrameIdx); 1329 if (Align >= 4) 1330 return; 1331 1332 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1333 FuncInfo->setHasNonRISpills(); 1334} 1335 1336/// Returns true if the address N can be represented by a base register plus 1337/// a signed 16-bit displacement [r+imm], and if it is not better 1338/// represented as reg+reg. If Aligned is true, only accept displacements 1339/// suitable for STD and friends, i.e. multiples of 4. 1340bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, 1341 SDValue &Base, 1342 SelectionDAG &DAG, 1343 bool Aligned) const { 1344 // FIXME dl should come from parent load or store, not from address 1345 SDLoc dl(N); 1346 // If this can be more profitably realized as r+r, fail. 1347 if (SelectAddressRegReg(N, Disp, Base, DAG)) 1348 return false; 1349 1350 if (N.getOpcode() == ISD::ADD) { 1351 short imm = 0; 1352 if (isIntS16Immediate(N.getOperand(1), imm) && 1353 (!Aligned || (imm & 3) == 0)) { 1354 Disp = DAG.getTargetConstant(imm, N.getValueType()); 1355 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1356 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1357 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1358 } else { 1359 Base = N.getOperand(0); 1360 } 1361 return true; // [r+i] 1362 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 1363 // Match LOAD (ADD (X, Lo(G))). 1364 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 1365 && "Cannot handle constant offsets yet!"); 1366 Disp = N.getOperand(1).getOperand(0); // The global address. 1367 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 1368 Disp.getOpcode() == ISD::TargetGlobalTLSAddress || 1369 Disp.getOpcode() == ISD::TargetConstantPool || 1370 Disp.getOpcode() == ISD::TargetJumpTable); 1371 Base = N.getOperand(0); 1372 return true; // [&g+r] 1373 } 1374 } else if (N.getOpcode() == ISD::OR) { 1375 short imm = 0; 1376 if (isIntS16Immediate(N.getOperand(1), imm) && 1377 (!Aligned || (imm & 3) == 0)) { 1378 // If this is an or of disjoint bitfields, we can codegen this as an add 1379 // (for better address arithmetic) if the LHS and RHS of the OR are 1380 // provably disjoint. 1381 APInt LHSKnownZero, LHSKnownOne; 1382 DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); 1383 1384 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 1385 // If all of the bits are known zero on the LHS or RHS, the add won't 1386 // carry. 1387 if (FrameIndexSDNode *FI = 1388 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1389 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1390 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1391 } else { 1392 Base = N.getOperand(0); 1393 } 1394 Disp = DAG.getTargetConstant(imm, N.getValueType()); 1395 return true; 1396 } 1397 } 1398 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1399 // Loading from a constant address. 1400 1401 // If this address fits entirely in a 16-bit sext immediate field, codegen 1402 // this as "d, 0" 1403 short Imm; 1404 if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { 1405 Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); 1406 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1407 CN->getValueType(0)); 1408 return true; 1409 } 1410 1411 // Handle 32-bit sext immediates with LIS + addr mode. 1412 if ((CN->getValueType(0) == MVT::i32 || 1413 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && 1414 (!Aligned || (CN->getZExtValue() & 3) == 0)) { 1415 int Addr = (int)CN->getZExtValue(); 1416 1417 // Otherwise, break this down into an LIS + disp. 1418 Disp = DAG.getTargetConstant((short)Addr, MVT::i32); 1419 1420 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); 1421 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 1422 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); 1423 return true; 1424 } 1425 } 1426 1427 Disp = DAG.getTargetConstant(0, getPointerTy()); 1428 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) { 1429 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1430 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1431 } else 1432 Base = N; 1433 return true; // [r+0] 1434} 1435 1436/// SelectAddressRegRegOnly - Given the specified addressed, force it to be 1437/// represented as an indexed [r+r] operation. 1438bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, 1439 SDValue &Index, 1440 SelectionDAG &DAG) const { 1441 // Check to see if we can easily represent this as an [r+r] address. This 1442 // will fail if it thinks that the address is more profitably represented as 1443 // reg+imm, e.g. where imm = 0. 1444 if (SelectAddressRegReg(N, Base, Index, DAG)) 1445 return true; 1446 1447 // If the operand is an addition, always emit this as [r+r], since this is 1448 // better (for code size, and execution, as the memop does the add for free) 1449 // than emitting an explicit add. 1450 if (N.getOpcode() == ISD::ADD) { 1451 Base = N.getOperand(0); 1452 Index = N.getOperand(1); 1453 return true; 1454 } 1455 1456 // Otherwise, do it the hard way, using R0 as the base register. 1457 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1458 N.getValueType()); 1459 Index = N; 1460 return true; 1461} 1462 1463/// getPreIndexedAddressParts - returns true by value, base pointer and 1464/// offset pointer and addressing mode by reference if the node's address 1465/// can be legally represented as pre-indexed load / store address. 1466bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 1467 SDValue &Offset, 1468 ISD::MemIndexedMode &AM, 1469 SelectionDAG &DAG) const { 1470 if (DisablePPCPreinc) return false; 1471 1472 bool isLoad = true; 1473 SDValue Ptr; 1474 EVT VT; 1475 unsigned Alignment; 1476 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1477 Ptr = LD->getBasePtr(); 1478 VT = LD->getMemoryVT(); 1479 Alignment = LD->getAlignment(); 1480 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1481 Ptr = ST->getBasePtr(); 1482 VT = ST->getMemoryVT(); 1483 Alignment = ST->getAlignment(); 1484 isLoad = false; 1485 } else 1486 return false; 1487 1488 // PowerPC doesn't have preinc load/store instructions for vectors. 1489 if (VT.isVector()) 1490 return false; 1491 1492 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { 1493 1494 // Common code will reject creating a pre-inc form if the base pointer 1495 // is a frame index, or if N is a store and the base pointer is either 1496 // the same as or a predecessor of the value being stored. Check for 1497 // those situations here, and try with swapped Base/Offset instead. 1498 bool Swap = false; 1499 1500 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base)) 1501 Swap = true; 1502 else if (!isLoad) { 1503 SDValue Val = cast<StoreSDNode>(N)->getValue(); 1504 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) 1505 Swap = true; 1506 } 1507 1508 if (Swap) 1509 std::swap(Base, Offset); 1510 1511 AM = ISD::PRE_INC; 1512 return true; 1513 } 1514 1515 // LDU/STU can only handle immediates that are a multiple of 4. 1516 if (VT != MVT::i64) { 1517 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) 1518 return false; 1519 } else { 1520 // LDU/STU need an address with at least 4-byte alignment. 1521 if (Alignment < 4) 1522 return false; 1523 1524 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) 1525 return false; 1526 } 1527 1528 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1529 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 1530 // sext i32 to i64 when addr mode is r+i. 1531 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 1532 LD->getExtensionType() == ISD::SEXTLOAD && 1533 isa<ConstantSDNode>(Offset)) 1534 return false; 1535 } 1536 1537 AM = ISD::PRE_INC; 1538 return true; 1539} 1540 1541//===----------------------------------------------------------------------===// 1542// LowerOperation implementation 1543//===----------------------------------------------------------------------===// 1544 1545/// GetLabelAccessInfo - Return true if we should reference labels using a 1546/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags. 1547static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags, 1548 unsigned &LoOpFlags, 1549 const GlobalValue *GV = nullptr) { 1550 HiOpFlags = PPCII::MO_HA; 1551 LoOpFlags = PPCII::MO_LO; 1552 1553 // Don't use the pic base if not in PIC relocation model. 1554 bool isPIC = TM.getRelocationModel() == Reloc::PIC_; 1555 1556 if (isPIC) { 1557 HiOpFlags |= PPCII::MO_PIC_FLAG; 1558 LoOpFlags |= PPCII::MO_PIC_FLAG; 1559 } 1560 1561 // If this is a reference to a global value that requires a non-lazy-ptr, make 1562 // sure that instruction lowering adds it. 1563 if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) { 1564 HiOpFlags |= PPCII::MO_NLP_FLAG; 1565 LoOpFlags |= PPCII::MO_NLP_FLAG; 1566 1567 if (GV->hasHiddenVisibility()) { 1568 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1569 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1570 } 1571 } 1572 1573 return isPIC; 1574} 1575 1576static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, 1577 SelectionDAG &DAG) { 1578 EVT PtrVT = HiPart.getValueType(); 1579 SDValue Zero = DAG.getConstant(0, PtrVT); 1580 SDLoc DL(HiPart); 1581 1582 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); 1583 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); 1584 1585 // With PIC, the first instruction is actually "GR+hi(&G)". 1586 if (isPIC) 1587 Hi = DAG.getNode(ISD::ADD, DL, PtrVT, 1588 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); 1589 1590 // Generate non-pic code that has direct accesses to the constant pool. 1591 // The address of the global is just (hi(&g)+lo(&g)). 1592 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); 1593} 1594 1595SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, 1596 SelectionDAG &DAG) const { 1597 EVT PtrVT = Op.getValueType(); 1598 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1599 const Constant *C = CP->getConstVal(); 1600 1601 // 64-bit SVR4 ABI code is always position-independent. 1602 // The actual address of the GlobalValue is stored in the TOC. 1603 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 1604 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); 1605 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA, 1606 DAG.getRegister(PPC::X2, MVT::i64)); 1607 } 1608 1609 unsigned MOHiFlag, MOLoFlag; 1610 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1611 1612 if (isPIC && Subtarget.isSVR4ABI()) { 1613 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 1614 PPCII::MO_PIC_FLAG); 1615 SDLoc DL(CP); 1616 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA, 1617 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT)); 1618 } 1619 1620 SDValue CPIHi = 1621 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); 1622 SDValue CPILo = 1623 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); 1624 return LowerLabelRef(CPIHi, CPILo, isPIC, DAG); 1625} 1626 1627SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { 1628 EVT PtrVT = Op.getValueType(); 1629 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1630 1631 // 64-bit SVR4 ABI code is always position-independent. 1632 // The actual address of the GlobalValue is stored in the TOC. 1633 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 1634 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 1635 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA, 1636 DAG.getRegister(PPC::X2, MVT::i64)); 1637 } 1638 1639 unsigned MOHiFlag, MOLoFlag; 1640 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1641 1642 if (isPIC && Subtarget.isSVR4ABI()) { 1643 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 1644 PPCII::MO_PIC_FLAG); 1645 SDLoc DL(GA); 1646 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA, 1647 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT)); 1648 } 1649 1650 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); 1651 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); 1652 return LowerLabelRef(JTIHi, JTILo, isPIC, DAG); 1653} 1654 1655SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, 1656 SelectionDAG &DAG) const { 1657 EVT PtrVT = Op.getValueType(); 1658 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op); 1659 const BlockAddress *BA = BASDN->getBlockAddress(); 1660 1661 // 64-bit SVR4 ABI code is always position-independent. 1662 // The actual BlockAddress is stored in the TOC. 1663 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 1664 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); 1665 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(BASDN), MVT::i64, GA, 1666 DAG.getRegister(PPC::X2, MVT::i64)); 1667 } 1668 1669 unsigned MOHiFlag, MOLoFlag; 1670 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1671 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); 1672 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); 1673 return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); 1674} 1675 1676// Generate a call to __tls_get_addr for the given GOT entry Op. 1677std::pair<SDValue,SDValue> 1678PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl, 1679 SelectionDAG &DAG) const { 1680 1681 Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); 1682 TargetLowering::ArgListTy Args; 1683 TargetLowering::ArgListEntry Entry; 1684 Entry.Node = Op; 1685 Entry.Ty = IntPtrTy; 1686 Args.push_back(Entry); 1687 1688 TargetLowering::CallLoweringInfo CLI(DAG); 1689 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) 1690 .setCallee(CallingConv::C, IntPtrTy, 1691 DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()), 1692 std::move(Args), 0); 1693 1694 return LowerCallTo(CLI); 1695} 1696 1697SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, 1698 SelectionDAG &DAG) const { 1699 1700 // FIXME: TLS addresses currently use medium model code sequences, 1701 // which is the most useful form. Eventually support for small and 1702 // large models could be added if users need it, at the cost of 1703 // additional complexity. 1704 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 1705 SDLoc dl(GA); 1706 const GlobalValue *GV = GA->getGlobal(); 1707 EVT PtrVT = getPointerTy(); 1708 bool is64bit = Subtarget.isPPC64(); 1709 const Module *M = DAG.getMachineFunction().getFunction()->getParent(); 1710 PICLevel::Level picLevel = M->getPICLevel(); 1711 1712 TLSModel::Model Model = getTargetMachine().getTLSModel(GV); 1713 1714 if (Model == TLSModel::LocalExec) { 1715 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1716 PPCII::MO_TPREL_HA); 1717 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1718 PPCII::MO_TPREL_LO); 1719 SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, 1720 is64bit ? MVT::i64 : MVT::i32); 1721 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); 1722 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); 1723 } 1724 1725 if (Model == TLSModel::InitialExec) { 1726 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 1727 SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1728 PPCII::MO_TLS); 1729 SDValue GOTPtr; 1730 if (is64bit) { 1731 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1732 GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, 1733 PtrVT, GOTReg, TGA); 1734 } else 1735 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); 1736 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, 1737 PtrVT, TGA, GOTPtr); 1738 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); 1739 } 1740 1741 if (Model == TLSModel::GeneralDynamic) { 1742 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1743 PPCII::MO_TLSGD); 1744 SDValue GOTPtr; 1745 if (is64bit) { 1746 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1747 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, 1748 GOTReg, TGA); 1749 } else { 1750 if (picLevel == PICLevel::Small) 1751 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 1752 else 1753 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 1754 } 1755 SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, 1756 GOTPtr, TGA); 1757 std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG); 1758 return CallResult.first; 1759 } 1760 1761 if (Model == TLSModel::LocalDynamic) { 1762 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1763 PPCII::MO_TLSLD); 1764 SDValue GOTPtr; 1765 if (is64bit) { 1766 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1767 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, 1768 GOTReg, TGA); 1769 } else { 1770 if (picLevel == PICLevel::Small) 1771 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 1772 else 1773 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 1774 } 1775 SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, 1776 GOTPtr, TGA); 1777 std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG); 1778 SDValue TLSAddr = CallResult.first; 1779 SDValue Chain = CallResult.second; 1780 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, 1781 Chain, TLSAddr, TGA); 1782 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); 1783 } 1784 1785 llvm_unreachable("Unknown TLS model!"); 1786} 1787 1788SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, 1789 SelectionDAG &DAG) const { 1790 EVT PtrVT = Op.getValueType(); 1791 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 1792 SDLoc DL(GSDN); 1793 const GlobalValue *GV = GSDN->getGlobal(); 1794 1795 // 64-bit SVR4 ABI code is always position-independent. 1796 // The actual address of the GlobalValue is stored in the TOC. 1797 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 1798 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); 1799 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA, 1800 DAG.getRegister(PPC::X2, MVT::i64)); 1801 } 1802 1803 unsigned MOHiFlag, MOLoFlag; 1804 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV); 1805 1806 if (isPIC && Subtarget.isSVR4ABI()) { 1807 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 1808 GSDN->getOffset(), 1809 PPCII::MO_PIC_FLAG); 1810 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA, 1811 DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32)); 1812 } 1813 1814 SDValue GAHi = 1815 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); 1816 SDValue GALo = 1817 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); 1818 1819 SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG); 1820 1821 // If the global reference is actually to a non-lazy-pointer, we have to do an 1822 // extra load to get the address of the global. 1823 if (MOHiFlag & PPCII::MO_NLP_FLAG) 1824 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(), 1825 false, false, false, 0); 1826 return Ptr; 1827} 1828 1829SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 1830 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 1831 SDLoc dl(Op); 1832 1833 if (Op.getValueType() == MVT::v2i64) { 1834 // When the operands themselves are v2i64 values, we need to do something 1835 // special because VSX has no underlying comparison operations for these. 1836 if (Op.getOperand(0).getValueType() == MVT::v2i64) { 1837 // Equality can be handled by casting to the legal type for Altivec 1838 // comparisons, everything else needs to be expanded. 1839 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 1840 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, 1841 DAG.getSetCC(dl, MVT::v4i32, 1842 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), 1843 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), 1844 CC)); 1845 } 1846 1847 return SDValue(); 1848 } 1849 1850 // We handle most of these in the usual way. 1851 return Op; 1852 } 1853 1854 // If we're comparing for equality to zero, expose the fact that this is 1855 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 1856 // fold the new nodes. 1857 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1858 if (C->isNullValue() && CC == ISD::SETEQ) { 1859 EVT VT = Op.getOperand(0).getValueType(); 1860 SDValue Zext = Op.getOperand(0); 1861 if (VT.bitsLT(MVT::i32)) { 1862 VT = MVT::i32; 1863 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); 1864 } 1865 unsigned Log2b = Log2_32(VT.getSizeInBits()); 1866 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); 1867 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, 1868 DAG.getConstant(Log2b, MVT::i32)); 1869 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); 1870 } 1871 // Leave comparisons against 0 and -1 alone for now, since they're usually 1872 // optimized. FIXME: revisit this when we can custom lower all setcc 1873 // optimizations. 1874 if (C->isAllOnesValue() || C->isNullValue()) 1875 return SDValue(); 1876 } 1877 1878 // If we have an integer seteq/setne, turn it into a compare against zero 1879 // by xor'ing the rhs with the lhs, which is faster than setting a 1880 // condition register, reading it back out, and masking the correct bit. The 1881 // normal approach here uses sub to do this instead of xor. Using xor exposes 1882 // the result to other bit-twiddling opportunities. 1883 EVT LHSVT = Op.getOperand(0).getValueType(); 1884 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 1885 EVT VT = Op.getValueType(); 1886 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), 1887 Op.getOperand(1)); 1888 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC); 1889 } 1890 return SDValue(); 1891} 1892 1893SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, 1894 const PPCSubtarget &Subtarget) const { 1895 SDNode *Node = Op.getNode(); 1896 EVT VT = Node->getValueType(0); 1897 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1898 SDValue InChain = Node->getOperand(0); 1899 SDValue VAListPtr = Node->getOperand(1); 1900 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 1901 SDLoc dl(Node); 1902 1903 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); 1904 1905 // gpr_index 1906 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1907 VAListPtr, MachinePointerInfo(SV), MVT::i8, 1908 false, false, false, 0); 1909 InChain = GprIndex.getValue(1); 1910 1911 if (VT == MVT::i64) { 1912 // Check if GprIndex is even 1913 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, 1914 DAG.getConstant(1, MVT::i32)); 1915 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, 1916 DAG.getConstant(0, MVT::i32), ISD::SETNE); 1917 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, 1918 DAG.getConstant(1, MVT::i32)); 1919 // Align GprIndex to be even if it isn't 1920 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, 1921 GprIndex); 1922 } 1923 1924 // fpr index is 1 byte after gpr 1925 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1926 DAG.getConstant(1, MVT::i32)); 1927 1928 // fpr 1929 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1930 FprPtr, MachinePointerInfo(SV), MVT::i8, 1931 false, false, false, 0); 1932 InChain = FprIndex.getValue(1); 1933 1934 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1935 DAG.getConstant(8, MVT::i32)); 1936 1937 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1938 DAG.getConstant(4, MVT::i32)); 1939 1940 // areas 1941 SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, 1942 MachinePointerInfo(), false, false, 1943 false, 0); 1944 InChain = OverflowArea.getValue(1); 1945 1946 SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, 1947 MachinePointerInfo(), false, false, 1948 false, 0); 1949 InChain = RegSaveArea.getValue(1); 1950 1951 // select overflow_area if index > 8 1952 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, 1953 DAG.getConstant(8, MVT::i32), ISD::SETLT); 1954 1955 // adjustment constant gpr_index * 4/8 1956 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, 1957 VT.isInteger() ? GprIndex : FprIndex, 1958 DAG.getConstant(VT.isInteger() ? 4 : 8, 1959 MVT::i32)); 1960 1961 // OurReg = RegSaveArea + RegConstant 1962 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, 1963 RegConstant); 1964 1965 // Floating types are 32 bytes into RegSaveArea 1966 if (VT.isFloatingPoint()) 1967 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, 1968 DAG.getConstant(32, MVT::i32)); 1969 1970 // increase {f,g}pr_index by 1 (or 2 if VT is i64) 1971 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, 1972 VT.isInteger() ? GprIndex : FprIndex, 1973 DAG.getConstant(VT == MVT::i64 ? 2 : 1, 1974 MVT::i32)); 1975 1976 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, 1977 VT.isInteger() ? VAListPtr : FprPtr, 1978 MachinePointerInfo(SV), 1979 MVT::i8, false, false, 0); 1980 1981 // determine if we should load from reg_save_area or overflow_area 1982 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); 1983 1984 // increase overflow_area by 4/8 if gpr/fpr > 8 1985 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, 1986 DAG.getConstant(VT.isInteger() ? 4 : 8, 1987 MVT::i32)); 1988 1989 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, 1990 OverflowAreaPlusN); 1991 1992 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, 1993 OverflowAreaPtr, 1994 MachinePointerInfo(), 1995 MVT::i32, false, false, 0); 1996 1997 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), 1998 false, false, false, 0); 1999} 2000 2001SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG, 2002 const PPCSubtarget &Subtarget) const { 2003 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); 2004 2005 // We have to copy the entire va_list struct: 2006 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte 2007 return DAG.getMemcpy(Op.getOperand(0), Op, 2008 Op.getOperand(1), Op.getOperand(2), 2009 DAG.getConstant(12, MVT::i32), 8, false, true, 2010 MachinePointerInfo(), MachinePointerInfo()); 2011} 2012 2013SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, 2014 SelectionDAG &DAG) const { 2015 return Op.getOperand(0); 2016} 2017 2018SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, 2019 SelectionDAG &DAG) const { 2020 SDValue Chain = Op.getOperand(0); 2021 SDValue Trmp = Op.getOperand(1); // trampoline 2022 SDValue FPtr = Op.getOperand(2); // nested function 2023 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 2024 SDLoc dl(Op); 2025 2026 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2027 bool isPPC64 = (PtrVT == MVT::i64); 2028 Type *IntPtrTy = 2029 DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType( 2030 *DAG.getContext()); 2031 2032 TargetLowering::ArgListTy Args; 2033 TargetLowering::ArgListEntry Entry; 2034 2035 Entry.Ty = IntPtrTy; 2036 Entry.Node = Trmp; Args.push_back(Entry); 2037 2038 // TrampSize == (isPPC64 ? 48 : 40); 2039 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, 2040 isPPC64 ? MVT::i64 : MVT::i32); 2041 Args.push_back(Entry); 2042 2043 Entry.Node = FPtr; Args.push_back(Entry); 2044 Entry.Node = Nest; Args.push_back(Entry); 2045 2046 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) 2047 TargetLowering::CallLoweringInfo CLI(DAG); 2048 CLI.setDebugLoc(dl).setChain(Chain) 2049 .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), 2050 DAG.getExternalSymbol("__trampoline_setup", PtrVT), 2051 std::move(Args), 0); 2052 2053 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 2054 return CallResult.second; 2055} 2056 2057SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, 2058 const PPCSubtarget &Subtarget) const { 2059 MachineFunction &MF = DAG.getMachineFunction(); 2060 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2061 2062 SDLoc dl(Op); 2063 2064 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { 2065 // vastart just stores the address of the VarArgsFrameIndex slot into the 2066 // memory location argument. 2067 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2068 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2069 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2070 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2071 MachinePointerInfo(SV), 2072 false, false, 0); 2073 } 2074 2075 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. 2076 // We suppose the given va_list is already allocated. 2077 // 2078 // typedef struct { 2079 // char gpr; /* index into the array of 8 GPRs 2080 // * stored in the register save area 2081 // * gpr=0 corresponds to r3, 2082 // * gpr=1 to r4, etc. 2083 // */ 2084 // char fpr; /* index into the array of 8 FPRs 2085 // * stored in the register save area 2086 // * fpr=0 corresponds to f1, 2087 // * fpr=1 to f2, etc. 2088 // */ 2089 // char *overflow_arg_area; 2090 // /* location on stack that holds 2091 // * the next overflow argument 2092 // */ 2093 // char *reg_save_area; 2094 // /* where r3:r10 and f1:f8 (if saved) 2095 // * are stored 2096 // */ 2097 // } va_list[1]; 2098 2099 2100 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32); 2101 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32); 2102 2103 2104 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2105 2106 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), 2107 PtrVT); 2108 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 2109 PtrVT); 2110 2111 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 2112 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); 2113 2114 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 2115 SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT); 2116 2117 uint64_t FPROffset = 1; 2118 SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); 2119 2120 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2121 2122 // Store first byte : number of int regs 2123 SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, 2124 Op.getOperand(1), 2125 MachinePointerInfo(SV), 2126 MVT::i8, false, false, 0); 2127 uint64_t nextOffset = FPROffset; 2128 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), 2129 ConstFPROffset); 2130 2131 // Store second byte : number of float regs 2132 SDValue secondStore = 2133 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, 2134 MachinePointerInfo(SV, nextOffset), MVT::i8, 2135 false, false, 0); 2136 nextOffset += StackOffset; 2137 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); 2138 2139 // Store second word : arguments given on stack 2140 SDValue thirdStore = 2141 DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, 2142 MachinePointerInfo(SV, nextOffset), 2143 false, false, 0); 2144 nextOffset += FrameOffset; 2145 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); 2146 2147 // Store third word : arguments given in registers 2148 return DAG.getStore(thirdStore, dl, FR, nextPtr, 2149 MachinePointerInfo(SV, nextOffset), 2150 false, false, 0); 2151 2152} 2153 2154#include "PPCGenCallingConv.inc" 2155 2156// Function whose sole purpose is to kill compiler warnings 2157// stemming from unused functions included from PPCGenCallingConv.inc. 2158CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { 2159 return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; 2160} 2161 2162bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 2163 CCValAssign::LocInfo &LocInfo, 2164 ISD::ArgFlagsTy &ArgFlags, 2165 CCState &State) { 2166 return true; 2167} 2168 2169bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 2170 MVT &LocVT, 2171 CCValAssign::LocInfo &LocInfo, 2172 ISD::ArgFlagsTy &ArgFlags, 2173 CCState &State) { 2174 static const MCPhysReg ArgRegs[] = { 2175 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2176 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2177 }; 2178 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2179 2180 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 2181 2182 // Skip one register if the first unallocated register has an even register 2183 // number and there are still argument registers available which have not been 2184 // allocated yet. RegNum is actually an index into ArgRegs, which means we 2185 // need to skip a register if RegNum is odd. 2186 if (RegNum != NumArgRegs && RegNum % 2 == 1) { 2187 State.AllocateReg(ArgRegs[RegNum]); 2188 } 2189 2190 // Always return false here, as this function only makes sure that the first 2191 // unallocated register has an odd register number and does not actually 2192 // allocate a register for the current argument. 2193 return false; 2194} 2195 2196bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 2197 MVT &LocVT, 2198 CCValAssign::LocInfo &LocInfo, 2199 ISD::ArgFlagsTy &ArgFlags, 2200 CCState &State) { 2201 static const MCPhysReg ArgRegs[] = { 2202 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2203 PPC::F8 2204 }; 2205 2206 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2207 2208 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 2209 2210 // If there is only one Floating-point register left we need to put both f64 2211 // values of a split ppc_fp128 value on the stack. 2212 if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { 2213 State.AllocateReg(ArgRegs[RegNum]); 2214 } 2215 2216 // Always return false here, as this function only makes sure that the two f64 2217 // values a ppc_fp128 value is split into are both passed in registers or both 2218 // passed on the stack and does not actually allocate a register for the 2219 // current argument. 2220 return false; 2221} 2222 2223/// GetFPR - Get the set of FP registers that should be allocated for arguments, 2224/// on Darwin. 2225static const MCPhysReg *GetFPR() { 2226 static const MCPhysReg FPR[] = { 2227 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2228 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 2229 }; 2230 2231 return FPR; 2232} 2233 2234/// CalculateStackSlotSize - Calculates the size reserved for this argument on 2235/// the stack. 2236static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, 2237 unsigned PtrByteSize) { 2238 unsigned ArgSize = ArgVT.getStoreSize(); 2239 if (Flags.isByVal()) 2240 ArgSize = Flags.getByValSize(); 2241 2242 // Round up to multiples of the pointer size, except for array members, 2243 // which are always packed. 2244 if (!Flags.isInConsecutiveRegs()) 2245 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2246 2247 return ArgSize; 2248} 2249 2250/// CalculateStackSlotAlignment - Calculates the alignment of this argument 2251/// on the stack. 2252static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, 2253 ISD::ArgFlagsTy Flags, 2254 unsigned PtrByteSize) { 2255 unsigned Align = PtrByteSize; 2256 2257 // Altivec parameters are padded to a 16 byte boundary. 2258 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2259 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2260 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) 2261 Align = 16; 2262 2263 // ByVal parameters are aligned as requested. 2264 if (Flags.isByVal()) { 2265 unsigned BVAlign = Flags.getByValAlign(); 2266 if (BVAlign > PtrByteSize) { 2267 if (BVAlign % PtrByteSize != 0) 2268 llvm_unreachable( 2269 "ByVal alignment is not a multiple of the pointer size"); 2270 2271 Align = BVAlign; 2272 } 2273 } 2274 2275 // Array members are always packed to their original alignment. 2276 if (Flags.isInConsecutiveRegs()) { 2277 // If the array member was split into multiple registers, the first 2278 // needs to be aligned to the size of the full type. (Except for 2279 // ppcf128, which is only aligned as its f64 components.) 2280 if (Flags.isSplit() && OrigVT != MVT::ppcf128) 2281 Align = OrigVT.getStoreSize(); 2282 else 2283 Align = ArgVT.getStoreSize(); 2284 } 2285 2286 return Align; 2287} 2288 2289/// CalculateStackSlotUsed - Return whether this argument will use its 2290/// stack slot (instead of being passed in registers). ArgOffset, 2291/// AvailableFPRs, and AvailableVRs must hold the current argument 2292/// position, and will be updated to account for this argument. 2293static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, 2294 ISD::ArgFlagsTy Flags, 2295 unsigned PtrByteSize, 2296 unsigned LinkageSize, 2297 unsigned ParamAreaSize, 2298 unsigned &ArgOffset, 2299 unsigned &AvailableFPRs, 2300 unsigned &AvailableVRs) { 2301 bool UseMemory = false; 2302 2303 // Respect alignment of argument on the stack. 2304 unsigned Align = 2305 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 2306 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 2307 // If there's no space left in the argument save area, we must 2308 // use memory (this check also catches zero-sized arguments). 2309 if (ArgOffset >= LinkageSize + ParamAreaSize) 2310 UseMemory = true; 2311 2312 // Allocate argument on the stack. 2313 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 2314 if (Flags.isInConsecutiveRegsLast()) 2315 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2316 // If we overran the argument save area, we must use memory 2317 // (this check catches arguments passed partially in memory) 2318 if (ArgOffset > LinkageSize + ParamAreaSize) 2319 UseMemory = true; 2320 2321 // However, if the argument is actually passed in an FPR or a VR, 2322 // we don't use memory after all. 2323 if (!Flags.isByVal()) { 2324 if (ArgVT == MVT::f32 || ArgVT == MVT::f64) 2325 if (AvailableFPRs > 0) { 2326 --AvailableFPRs; 2327 return false; 2328 } 2329 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2330 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2331 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) 2332 if (AvailableVRs > 0) { 2333 --AvailableVRs; 2334 return false; 2335 } 2336 } 2337 2338 return UseMemory; 2339} 2340 2341/// EnsureStackAlignment - Round stack frame size up from NumBytes to 2342/// ensure minimum alignment required for target. 2343static unsigned EnsureStackAlignment(const TargetMachine &Target, 2344 unsigned NumBytes) { 2345 unsigned TargetAlign = 2346 Target.getSubtargetImpl()->getFrameLowering()->getStackAlignment(); 2347 unsigned AlignMask = TargetAlign - 1; 2348 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 2349 return NumBytes; 2350} 2351 2352SDValue 2353PPCTargetLowering::LowerFormalArguments(SDValue Chain, 2354 CallingConv::ID CallConv, bool isVarArg, 2355 const SmallVectorImpl<ISD::InputArg> 2356 &Ins, 2357 SDLoc dl, SelectionDAG &DAG, 2358 SmallVectorImpl<SDValue> &InVals) 2359 const { 2360 if (Subtarget.isSVR4ABI()) { 2361 if (Subtarget.isPPC64()) 2362 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, 2363 dl, DAG, InVals); 2364 else 2365 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, 2366 dl, DAG, InVals); 2367 } else { 2368 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, 2369 dl, DAG, InVals); 2370 } 2371} 2372 2373SDValue 2374PPCTargetLowering::LowerFormalArguments_32SVR4( 2375 SDValue Chain, 2376 CallingConv::ID CallConv, bool isVarArg, 2377 const SmallVectorImpl<ISD::InputArg> 2378 &Ins, 2379 SDLoc dl, SelectionDAG &DAG, 2380 SmallVectorImpl<SDValue> &InVals) const { 2381 2382 // 32-bit SVR4 ABI Stack Frame Layout: 2383 // +-----------------------------------+ 2384 // +--> | Back chain | 2385 // | +-----------------------------------+ 2386 // | | Floating-point register save area | 2387 // | +-----------------------------------+ 2388 // | | General register save area | 2389 // | +-----------------------------------+ 2390 // | | CR save word | 2391 // | +-----------------------------------+ 2392 // | | VRSAVE save word | 2393 // | +-----------------------------------+ 2394 // | | Alignment padding | 2395 // | +-----------------------------------+ 2396 // | | Vector register save area | 2397 // | +-----------------------------------+ 2398 // | | Local variable space | 2399 // | +-----------------------------------+ 2400 // | | Parameter list area | 2401 // | +-----------------------------------+ 2402 // | | LR save word | 2403 // | +-----------------------------------+ 2404 // SP--> +--- | Back chain | 2405 // +-----------------------------------+ 2406 // 2407 // Specifications: 2408 // System V Application Binary Interface PowerPC Processor Supplement 2409 // AltiVec Technology Programming Interface Manual 2410 2411 MachineFunction &MF = DAG.getMachineFunction(); 2412 MachineFrameInfo *MFI = MF.getFrameInfo(); 2413 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2414 2415 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2416 // Potential tail calls could cause overwriting of argument stack slots. 2417 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2418 (CallConv == CallingConv::Fast)); 2419 unsigned PtrByteSize = 4; 2420 2421 // Assign locations to all of the incoming arguments. 2422 SmallVector<CCValAssign, 16> ArgLocs; 2423 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 2424 *DAG.getContext()); 2425 2426 // Reserve space for the linkage area on the stack. 2427 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false); 2428 CCInfo.AllocateStack(LinkageSize, PtrByteSize); 2429 2430 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); 2431 2432 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2433 CCValAssign &VA = ArgLocs[i]; 2434 2435 // Arguments stored in registers. 2436 if (VA.isRegLoc()) { 2437 const TargetRegisterClass *RC; 2438 EVT ValVT = VA.getValVT(); 2439 2440 switch (ValVT.getSimpleVT().SimpleTy) { 2441 default: 2442 llvm_unreachable("ValVT not supported by formal arguments Lowering"); 2443 case MVT::i1: 2444 case MVT::i32: 2445 RC = &PPC::GPRCRegClass; 2446 break; 2447 case MVT::f32: 2448 RC = &PPC::F4RCRegClass; 2449 break; 2450 case MVT::f64: 2451 if (Subtarget.hasVSX()) 2452 RC = &PPC::VSFRCRegClass; 2453 else 2454 RC = &PPC::F8RCRegClass; 2455 break; 2456 case MVT::v16i8: 2457 case MVT::v8i16: 2458 case MVT::v4i32: 2459 case MVT::v4f32: 2460 RC = &PPC::VRRCRegClass; 2461 break; 2462 case MVT::v2f64: 2463 case MVT::v2i64: 2464 RC = &PPC::VSHRCRegClass; 2465 break; 2466 } 2467 2468 // Transform the arguments stored in physical registers into virtual ones. 2469 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2470 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, 2471 ValVT == MVT::i1 ? MVT::i32 : ValVT); 2472 2473 if (ValVT == MVT::i1) 2474 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); 2475 2476 InVals.push_back(ArgValue); 2477 } else { 2478 // Argument stored in memory. 2479 assert(VA.isMemLoc()); 2480 2481 unsigned ArgSize = VA.getLocVT().getStoreSize(); 2482 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), 2483 isImmutable); 2484 2485 // Create load nodes to retrieve arguments from the stack. 2486 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2487 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2488 MachinePointerInfo(), 2489 false, false, false, 0)); 2490 } 2491 } 2492 2493 // Assign locations to all of the incoming aggregate by value arguments. 2494 // Aggregates passed by value are stored in the local variable space of the 2495 // caller's stack frame, right above the parameter list area. 2496 SmallVector<CCValAssign, 16> ByValArgLocs; 2497 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2498 ByValArgLocs, *DAG.getContext()); 2499 2500 // Reserve stack space for the allocations in CCInfo. 2501 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 2502 2503 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); 2504 2505 // Area that is at least reserved in the caller of this function. 2506 unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); 2507 MinReservedArea = std::max(MinReservedArea, LinkageSize); 2508 2509 // Set the size that is at least reserved in caller of this function. Tail 2510 // call optimized function's reserved stack space needs to be aligned so that 2511 // taking the difference between two stack areas will result in an aligned 2512 // stack. 2513 MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea); 2514 FuncInfo->setMinReservedArea(MinReservedArea); 2515 2516 SmallVector<SDValue, 8> MemOps; 2517 2518 // If the function takes variable number of arguments, make a frame index for 2519 // the start of the first vararg value... for expansion of llvm.va_start. 2520 if (isVarArg) { 2521 static const MCPhysReg GPArgRegs[] = { 2522 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2523 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2524 }; 2525 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); 2526 2527 static const MCPhysReg FPArgRegs[] = { 2528 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2529 PPC::F8 2530 }; 2531 unsigned NumFPArgRegs = array_lengthof(FPArgRegs); 2532 if (DisablePPCFloatInVariadic) 2533 NumFPArgRegs = 0; 2534 2535 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs, 2536 NumGPArgRegs)); 2537 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs, 2538 NumFPArgRegs)); 2539 2540 // Make room for NumGPArgRegs and NumFPArgRegs. 2541 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + 2542 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; 2543 2544 FuncInfo->setVarArgsStackOffset( 2545 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 2546 CCInfo.getNextStackOffset(), true)); 2547 2548 FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); 2549 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2550 2551 // The fixed integer arguments of a variadic function are stored to the 2552 // VarArgsFrameIndex on the stack so that they may be loaded by deferencing 2553 // the result of va_next. 2554 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { 2555 // Get an existing live-in vreg, or add a new one. 2556 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); 2557 if (!VReg) 2558 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); 2559 2560 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2561 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2562 MachinePointerInfo(), false, false, 0); 2563 MemOps.push_back(Store); 2564 // Increment the address by four for the next argument to store 2565 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 2566 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2567 } 2568 2569 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 2570 // is set. 2571 // The double arguments are stored to the VarArgsFrameIndex 2572 // on the stack. 2573 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { 2574 // Get an existing live-in vreg, or add a new one. 2575 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); 2576 if (!VReg) 2577 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); 2578 2579 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); 2580 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2581 MachinePointerInfo(), false, false, 0); 2582 MemOps.push_back(Store); 2583 // Increment the address by eight for the next argument to store 2584 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, 2585 PtrVT); 2586 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2587 } 2588 } 2589 2590 if (!MemOps.empty()) 2591 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 2592 2593 return Chain; 2594} 2595 2596// PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2597// value to MVT::i64 and then truncate to the correct register size. 2598SDValue 2599PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, 2600 SelectionDAG &DAG, SDValue ArgVal, 2601 SDLoc dl) const { 2602 if (Flags.isSExt()) 2603 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, 2604 DAG.getValueType(ObjectVT)); 2605 else if (Flags.isZExt()) 2606 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, 2607 DAG.getValueType(ObjectVT)); 2608 2609 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); 2610} 2611 2612SDValue 2613PPCTargetLowering::LowerFormalArguments_64SVR4( 2614 SDValue Chain, 2615 CallingConv::ID CallConv, bool isVarArg, 2616 const SmallVectorImpl<ISD::InputArg> 2617 &Ins, 2618 SDLoc dl, SelectionDAG &DAG, 2619 SmallVectorImpl<SDValue> &InVals) const { 2620 // TODO: add description of PPC stack frame format, or at least some docs. 2621 // 2622 bool isELFv2ABI = Subtarget.isELFv2ABI(); 2623 bool isLittleEndian = Subtarget.isLittleEndian(); 2624 MachineFunction &MF = DAG.getMachineFunction(); 2625 MachineFrameInfo *MFI = MF.getFrameInfo(); 2626 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2627 2628 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2629 // Potential tail calls could cause overwriting of argument stack slots. 2630 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2631 (CallConv == CallingConv::Fast)); 2632 unsigned PtrByteSize = 8; 2633 2634 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false, 2635 isELFv2ABI); 2636 2637 static const MCPhysReg GPR[] = { 2638 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2639 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2640 }; 2641 2642 static const MCPhysReg *FPR = GetFPR(); 2643 2644 static const MCPhysReg VR[] = { 2645 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2646 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2647 }; 2648 static const MCPhysReg VSRH[] = { 2649 PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, 2650 PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 2651 }; 2652 2653 const unsigned Num_GPR_Regs = array_lengthof(GPR); 2654 const unsigned Num_FPR_Regs = 13; 2655 const unsigned Num_VR_Regs = array_lengthof(VR); 2656 2657 // Do a first pass over the arguments to determine whether the ABI 2658 // guarantees that our caller has allocated the parameter save area 2659 // on its stack frame. In the ELFv1 ABI, this is always the case; 2660 // in the ELFv2 ABI, it is true if this is a vararg function or if 2661 // any parameter is located in a stack slot. 2662 2663 bool HasParameterArea = !isELFv2ABI || isVarArg; 2664 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize; 2665 unsigned NumBytes = LinkageSize; 2666 unsigned AvailableFPRs = Num_FPR_Regs; 2667 unsigned AvailableVRs = Num_VR_Regs; 2668 for (unsigned i = 0, e = Ins.size(); i != e; ++i) 2669 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, 2670 PtrByteSize, LinkageSize, ParamAreaSize, 2671 NumBytes, AvailableFPRs, AvailableVRs)) 2672 HasParameterArea = true; 2673 2674 // Add DAG nodes to load the arguments or copy them out of registers. On 2675 // entry to a function on PPC, the arguments start after the linkage area, 2676 // although the first ones are often in registers. 2677 2678 unsigned ArgOffset = LinkageSize; 2679 unsigned GPR_idx, FPR_idx = 0, VR_idx = 0; 2680 SmallVector<SDValue, 8> MemOps; 2681 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 2682 unsigned CurArgIdx = 0; 2683 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 2684 SDValue ArgVal; 2685 bool needsLoad = false; 2686 EVT ObjectVT = Ins[ArgNo].VT; 2687 EVT OrigVT = Ins[ArgNo].ArgVT; 2688 unsigned ObjSize = ObjectVT.getStoreSize(); 2689 unsigned ArgSize = ObjSize; 2690 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
| 1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PPCISelLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "PPCISelLowering.h" 15#include "MCTargetDesc/PPCPredicates.h" 16#include "PPCCallingConv.h" 17#include "PPCMachineFunctionInfo.h" 18#include "PPCPerfectShuffle.h" 19#include "PPCTargetMachine.h" 20#include "PPCTargetObjectFile.h" 21#include "llvm/ADT/STLExtras.h" 22#include "llvm/ADT/StringSwitch.h" 23#include "llvm/ADT/Triple.h" 24#include "llvm/CodeGen/CallingConvLower.h" 25#include "llvm/CodeGen/MachineFrameInfo.h" 26#include "llvm/CodeGen/MachineFunction.h" 27#include "llvm/CodeGen/MachineInstrBuilder.h" 28#include "llvm/CodeGen/MachineLoopInfo.h" 29#include "llvm/CodeGen/MachineRegisterInfo.h" 30#include "llvm/CodeGen/SelectionDAG.h" 31#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 32#include "llvm/IR/CallingConv.h" 33#include "llvm/IR/Constants.h" 34#include "llvm/IR/DerivedTypes.h" 35#include "llvm/IR/Function.h" 36#include "llvm/IR/Intrinsics.h" 37#include "llvm/Support/CommandLine.h" 38#include "llvm/Support/ErrorHandling.h" 39#include "llvm/Support/MathExtras.h" 40#include "llvm/Support/raw_ostream.h" 41#include "llvm/Target/TargetOptions.h" 42using namespace llvm; 43 44// FIXME: Remove this once soft-float is supported. 45static cl::opt<bool> DisablePPCFloatInVariadic("disable-ppc-float-in-variadic", 46cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden); 47 48static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", 49cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); 50 51static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", 52cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); 53 54static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", 55cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); 56 57// FIXME: Remove this once the bug has been fixed! 58extern cl::opt<bool> ANDIGlueBug; 59 60PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM) 61 : TargetLowering(TM), 62 Subtarget(*TM.getSubtargetImpl()) { 63 // Use _setjmp/_longjmp instead of setjmp/longjmp. 64 setUseUnderscoreSetJmp(true); 65 setUseUnderscoreLongJmp(true); 66 67 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all 68 // arguments are at least 4/8 bytes aligned. 69 bool isPPC64 = Subtarget.isPPC64(); 70 setMinStackArgumentAlignment(isPPC64 ? 8:4); 71 72 // Set up the register classes. 73 addRegisterClass(MVT::i32, &PPC::GPRCRegClass); 74 addRegisterClass(MVT::f32, &PPC::F4RCRegClass); 75 addRegisterClass(MVT::f64, &PPC::F8RCRegClass); 76 77 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 78 for (MVT VT : MVT::integer_valuetypes()) { 79 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 80 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); 81 } 82 83 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 84 85 // PowerPC has pre-inc load and store's. 86 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 87 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 88 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 89 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 90 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 91 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 92 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 93 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 94 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 95 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 96 97 if (Subtarget.useCRBits()) { 98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 99 100 if (isPPC64 || Subtarget.hasFPCVT()) { 101 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); 102 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, 103 isPPC64 ? MVT::i64 : MVT::i32); 104 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); 105 AddPromotedToType (ISD::UINT_TO_FP, MVT::i1, 106 isPPC64 ? MVT::i64 : MVT::i32); 107 } else { 108 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); 109 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); 110 } 111 112 // PowerPC does not support direct load / store of condition registers 113 setOperationAction(ISD::LOAD, MVT::i1, Custom); 114 setOperationAction(ISD::STORE, MVT::i1, Custom); 115 116 // FIXME: Remove this once the ANDI glue bug is fixed: 117 if (ANDIGlueBug) 118 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); 119 120 for (MVT VT : MVT::integer_valuetypes()) { 121 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 122 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 123 setTruncStoreAction(VT, MVT::i1, Expand); 124 } 125 126 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); 127 } 128 129 // This is used in the ppcf128->int sequence. Note it has different semantics 130 // from FP_ROUND: that rounds to nearest, this rounds to zero. 131 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 132 133 // We do not currently implement these libm ops for PowerPC. 134 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); 135 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); 136 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); 137 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); 138 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); 139 setOperationAction(ISD::FREM, MVT::ppcf128, Expand); 140 141 // PowerPC has no SREM/UREM instructions 142 setOperationAction(ISD::SREM, MVT::i32, Expand); 143 setOperationAction(ISD::UREM, MVT::i32, Expand); 144 setOperationAction(ISD::SREM, MVT::i64, Expand); 145 setOperationAction(ISD::UREM, MVT::i64, Expand); 146 147 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 148 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 149 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 150 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 151 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 152 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 153 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 154 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 155 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 156 157 // We don't support sin/cos/sqrt/fmod/pow 158 setOperationAction(ISD::FSIN , MVT::f64, Expand); 159 setOperationAction(ISD::FCOS , MVT::f64, Expand); 160 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 161 setOperationAction(ISD::FREM , MVT::f64, Expand); 162 setOperationAction(ISD::FPOW , MVT::f64, Expand); 163 setOperationAction(ISD::FMA , MVT::f64, Legal); 164 setOperationAction(ISD::FSIN , MVT::f32, Expand); 165 setOperationAction(ISD::FCOS , MVT::f32, Expand); 166 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 167 setOperationAction(ISD::FREM , MVT::f32, Expand); 168 setOperationAction(ISD::FPOW , MVT::f32, Expand); 169 setOperationAction(ISD::FMA , MVT::f32, Legal); 170 171 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 172 173 // If we're enabling GP optimizations, use hardware square root 174 if (!Subtarget.hasFSQRT() && 175 !(TM.Options.UnsafeFPMath && 176 Subtarget.hasFRSQRTE() && Subtarget.hasFRE())) 177 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 178 179 if (!Subtarget.hasFSQRT() && 180 !(TM.Options.UnsafeFPMath && 181 Subtarget.hasFRSQRTES() && Subtarget.hasFRES())) 182 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 183 184 if (Subtarget.hasFCPSGN()) { 185 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); 186 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); 187 } else { 188 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 189 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 190 } 191 192 if (Subtarget.hasFPRND()) { 193 setOperationAction(ISD::FFLOOR, MVT::f64, Legal); 194 setOperationAction(ISD::FCEIL, MVT::f64, Legal); 195 setOperationAction(ISD::FTRUNC, MVT::f64, Legal); 196 setOperationAction(ISD::FROUND, MVT::f64, Legal); 197 198 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 199 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 200 setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 201 setOperationAction(ISD::FROUND, MVT::f32, Legal); 202 } 203 204 // PowerPC does not have BSWAP, CTPOP or CTTZ 205 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 206 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 207 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); 208 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); 209 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 210 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 211 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); 212 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); 213 214 if (Subtarget.hasPOPCNTD()) { 215 setOperationAction(ISD::CTPOP, MVT::i32 , Legal); 216 setOperationAction(ISD::CTPOP, MVT::i64 , Legal); 217 } else { 218 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 219 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 220 } 221 222 // PowerPC does not have ROTR 223 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 224 setOperationAction(ISD::ROTR, MVT::i64 , Expand); 225 226 if (!Subtarget.useCRBits()) { 227 // PowerPC does not have Select 228 setOperationAction(ISD::SELECT, MVT::i32, Expand); 229 setOperationAction(ISD::SELECT, MVT::i64, Expand); 230 setOperationAction(ISD::SELECT, MVT::f32, Expand); 231 setOperationAction(ISD::SELECT, MVT::f64, Expand); 232 } 233 234 // PowerPC wants to turn select_cc of FP into fsel when possible. 235 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 236 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 237 238 // PowerPC wants to optimize integer setcc a bit 239 if (!Subtarget.useCRBits()) 240 setOperationAction(ISD::SETCC, MVT::i32, Custom); 241 242 // PowerPC does not have BRCOND which requires SetCC 243 if (!Subtarget.useCRBits()) 244 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 245 246 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 247 248 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 249 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 250 251 // PowerPC does not have [U|S]INT_TO_FP 252 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 253 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 254 255 setOperationAction(ISD::BITCAST, MVT::f32, Expand); 256 setOperationAction(ISD::BITCAST, MVT::i32, Expand); 257 setOperationAction(ISD::BITCAST, MVT::i64, Expand); 258 setOperationAction(ISD::BITCAST, MVT::f64, Expand); 259 260 // We cannot sextinreg(i1). Expand to shifts. 261 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 262 263 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support 264 // SjLj exception handling but a light-weight setjmp/longjmp replacement to 265 // support continuation, user-level threading, and etc.. As a result, no 266 // other SjLj exception interfaces are implemented and please don't build 267 // your own exception handling based on them. 268 // LLVM/Clang supports zero-cost DWARF exception handling. 269 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 270 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 271 272 // We want to legalize GlobalAddress and ConstantPool nodes into the 273 // appropriate instructions to materialize the address. 274 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 275 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 276 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 277 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 278 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 279 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 280 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 281 setOperationAction(ISD::BlockAddress, MVT::i64, Custom); 282 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 283 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 284 285 // TRAP is legal. 286 setOperationAction(ISD::TRAP, MVT::Other, Legal); 287 288 // TRAMPOLINE is custom lowered. 289 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 290 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 291 292 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 293 setOperationAction(ISD::VASTART , MVT::Other, Custom); 294 295 if (Subtarget.isSVR4ABI()) { 296 if (isPPC64) { 297 // VAARG always uses double-word chunks, so promote anything smaller. 298 setOperationAction(ISD::VAARG, MVT::i1, Promote); 299 AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); 300 setOperationAction(ISD::VAARG, MVT::i8, Promote); 301 AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); 302 setOperationAction(ISD::VAARG, MVT::i16, Promote); 303 AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); 304 setOperationAction(ISD::VAARG, MVT::i32, Promote); 305 AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); 306 setOperationAction(ISD::VAARG, MVT::Other, Expand); 307 } else { 308 // VAARG is custom lowered with the 32-bit SVR4 ABI. 309 setOperationAction(ISD::VAARG, MVT::Other, Custom); 310 setOperationAction(ISD::VAARG, MVT::i64, Custom); 311 } 312 } else 313 setOperationAction(ISD::VAARG, MVT::Other, Expand); 314 315 if (Subtarget.isSVR4ABI() && !isPPC64) 316 // VACOPY is custom lowered with the 32-bit SVR4 ABI. 317 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 318 else 319 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 320 321 // Use the default implementation. 322 setOperationAction(ISD::VAEND , MVT::Other, Expand); 323 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 324 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 325 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 326 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 327 328 // We want to custom lower some of our intrinsics. 329 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 330 331 // To handle counter-based loop conditions. 332 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); 333 334 // Comparisons that require checking two conditions. 335 setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 336 setCondCodeAction(ISD::SETULT, MVT::f64, Expand); 337 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 338 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 339 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 340 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); 341 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 342 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 343 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 344 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); 345 setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 346 setCondCodeAction(ISD::SETONE, MVT::f64, Expand); 347 348 if (Subtarget.has64BitSupport()) { 349 // They also have instructions for converting between i64 and fp. 350 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 351 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 352 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 353 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 354 // This is just the low 32 bits of a (signed) fp->i64 conversion. 355 // We cannot do this with Promote because i64 is not a legal type. 356 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 357 358 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) 359 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 360 } else { 361 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 362 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 363 } 364 365 // With the instructions enabled under FPCVT, we can do everything. 366 if (Subtarget.hasFPCVT()) { 367 if (Subtarget.has64BitSupport()) { 368 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 369 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 370 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 371 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 372 } 373 374 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 375 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 376 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 377 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 378 } 379 380 if (Subtarget.use64BitRegs()) { 381 // 64-bit PowerPC implementations can support i64 types directly 382 addRegisterClass(MVT::i64, &PPC::G8RCRegClass); 383 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 384 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 385 // 64-bit PowerPC wants to expand i128 shifts itself. 386 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 387 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 388 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 389 } else { 390 // 32-bit PowerPC wants to expand i64 shifts itself. 391 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 392 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 393 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 394 } 395 396 if (Subtarget.hasAltivec()) { 397 // First set operation action for all vector types to expand. Then we 398 // will selectively turn on ones that can be effectively codegen'd. 399 for (MVT VT : MVT::vector_valuetypes()) { 400 // add/sub are legal for all supported vector VT's. 401 setOperationAction(ISD::ADD , VT, Legal); 402 setOperationAction(ISD::SUB , VT, Legal); 403 404 // We promote all shuffles to v16i8. 405 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 406 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 407 408 // We promote all non-typed operations to v4i32. 409 setOperationAction(ISD::AND , VT, Promote); 410 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 411 setOperationAction(ISD::OR , VT, Promote); 412 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 413 setOperationAction(ISD::XOR , VT, Promote); 414 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 415 setOperationAction(ISD::LOAD , VT, Promote); 416 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 417 setOperationAction(ISD::SELECT, VT, Promote); 418 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 419 setOperationAction(ISD::STORE, VT, Promote); 420 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 421 422 // No other operations are legal. 423 setOperationAction(ISD::MUL , VT, Expand); 424 setOperationAction(ISD::SDIV, VT, Expand); 425 setOperationAction(ISD::SREM, VT, Expand); 426 setOperationAction(ISD::UDIV, VT, Expand); 427 setOperationAction(ISD::UREM, VT, Expand); 428 setOperationAction(ISD::FDIV, VT, Expand); 429 setOperationAction(ISD::FREM, VT, Expand); 430 setOperationAction(ISD::FNEG, VT, Expand); 431 setOperationAction(ISD::FSQRT, VT, Expand); 432 setOperationAction(ISD::FLOG, VT, Expand); 433 setOperationAction(ISD::FLOG10, VT, Expand); 434 setOperationAction(ISD::FLOG2, VT, Expand); 435 setOperationAction(ISD::FEXP, VT, Expand); 436 setOperationAction(ISD::FEXP2, VT, Expand); 437 setOperationAction(ISD::FSIN, VT, Expand); 438 setOperationAction(ISD::FCOS, VT, Expand); 439 setOperationAction(ISD::FABS, VT, Expand); 440 setOperationAction(ISD::FPOWI, VT, Expand); 441 setOperationAction(ISD::FFLOOR, VT, Expand); 442 setOperationAction(ISD::FCEIL, VT, Expand); 443 setOperationAction(ISD::FTRUNC, VT, Expand); 444 setOperationAction(ISD::FRINT, VT, Expand); 445 setOperationAction(ISD::FNEARBYINT, VT, Expand); 446 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 447 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 448 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 449 setOperationAction(ISD::MULHU, VT, Expand); 450 setOperationAction(ISD::MULHS, VT, Expand); 451 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 452 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 453 setOperationAction(ISD::UDIVREM, VT, Expand); 454 setOperationAction(ISD::SDIVREM, VT, Expand); 455 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 456 setOperationAction(ISD::FPOW, VT, Expand); 457 setOperationAction(ISD::BSWAP, VT, Expand); 458 setOperationAction(ISD::CTPOP, VT, Expand); 459 setOperationAction(ISD::CTLZ, VT, Expand); 460 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); 461 setOperationAction(ISD::CTTZ, VT, Expand); 462 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); 463 setOperationAction(ISD::VSELECT, VT, Expand); 464 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 465 466 for (MVT InnerVT : MVT::vector_valuetypes()) { 467 setTruncStoreAction(VT, InnerVT, Expand); 468 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 469 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 470 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 471 } 472 } 473 474 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 475 // with merges, splats, etc. 476 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 477 478 setOperationAction(ISD::AND , MVT::v4i32, Legal); 479 setOperationAction(ISD::OR , MVT::v4i32, Legal); 480 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 481 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 482 setOperationAction(ISD::SELECT, MVT::v4i32, 483 Subtarget.useCRBits() ? Legal : Expand); 484 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 485 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 486 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 487 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 488 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 489 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 490 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 491 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 492 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 493 494 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); 495 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); 496 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); 497 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); 498 499 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 500 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 501 502 if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { 503 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 504 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 505 } 506 507 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 508 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 509 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 510 511 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 512 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 513 514 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 515 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 516 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 517 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 518 519 // Altivec does not contain unordered floating-point compare instructions 520 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); 521 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); 522 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); 523 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); 524 525 if (Subtarget.hasVSX()) { 526 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); 527 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); 528 529 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); 530 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); 531 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); 532 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); 533 setOperationAction(ISD::FROUND, MVT::v2f64, Legal); 534 535 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 536 537 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 538 setOperationAction(ISD::FMA, MVT::v2f64, Legal); 539 540 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 541 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 542 543 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); 544 setOperationAction(ISD::VSELECT, MVT::v8i16, Legal); 545 setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); 546 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); 547 setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); 548 549 // Share the Altivec comparison restrictions. 550 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); 551 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); 552 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); 553 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); 554 555 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 556 setOperationAction(ISD::STORE, MVT::v2f64, Legal); 557 558 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); 559 560 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); 561 562 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); 563 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); 564 565 // VSX v2i64 only supports non-arithmetic operations. 566 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 567 setOperationAction(ISD::SUB, MVT::v2i64, Expand); 568 569 setOperationAction(ISD::SHL, MVT::v2i64, Expand); 570 setOperationAction(ISD::SRA, MVT::v2i64, Expand); 571 setOperationAction(ISD::SRL, MVT::v2i64, Expand); 572 573 setOperationAction(ISD::SETCC, MVT::v2i64, Custom); 574 575 setOperationAction(ISD::LOAD, MVT::v2i64, Promote); 576 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); 577 setOperationAction(ISD::STORE, MVT::v2i64, Promote); 578 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); 579 580 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); 581 582 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); 583 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); 584 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); 585 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); 586 587 // Vector operation legalization checks the result type of 588 // SIGN_EXTEND_INREG, overall legalization checks the inner type. 589 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); 590 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); 591 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); 592 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); 593 594 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); 595 } 596 } 597 598 if (Subtarget.has64BitSupport()) 599 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 600 601 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); 602 603 if (!isPPC64) { 604 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); 605 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); 606 } 607 608 setBooleanContents(ZeroOrOneBooleanContent); 609 // Altivec instructions set fields to all zeros or all ones. 610 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 611 612 if (!isPPC64) { 613 // These libcalls are not available in 32-bit. 614 setLibcallName(RTLIB::SHL_I128, nullptr); 615 setLibcallName(RTLIB::SRL_I128, nullptr); 616 setLibcallName(RTLIB::SRA_I128, nullptr); 617 } 618 619 if (isPPC64) { 620 setStackPointerRegisterToSaveRestore(PPC::X1); 621 setExceptionPointerRegister(PPC::X3); 622 setExceptionSelectorRegister(PPC::X4); 623 } else { 624 setStackPointerRegisterToSaveRestore(PPC::R1); 625 setExceptionPointerRegister(PPC::R3); 626 setExceptionSelectorRegister(PPC::R4); 627 } 628 629 // We have target-specific dag combine patterns for the following nodes: 630 setTargetDAGCombine(ISD::SINT_TO_FP); 631 if (Subtarget.hasFPCVT()) 632 setTargetDAGCombine(ISD::UINT_TO_FP); 633 setTargetDAGCombine(ISD::LOAD); 634 setTargetDAGCombine(ISD::STORE); 635 setTargetDAGCombine(ISD::BR_CC); 636 if (Subtarget.useCRBits()) 637 setTargetDAGCombine(ISD::BRCOND); 638 setTargetDAGCombine(ISD::BSWAP); 639 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 640 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 641 setTargetDAGCombine(ISD::INTRINSIC_VOID); 642 643 setTargetDAGCombine(ISD::SIGN_EXTEND); 644 setTargetDAGCombine(ISD::ZERO_EXTEND); 645 setTargetDAGCombine(ISD::ANY_EXTEND); 646 647 if (Subtarget.useCRBits()) { 648 setTargetDAGCombine(ISD::TRUNCATE); 649 setTargetDAGCombine(ISD::SETCC); 650 setTargetDAGCombine(ISD::SELECT_CC); 651 } 652 653 // Use reciprocal estimates. 654 if (TM.Options.UnsafeFPMath) { 655 setTargetDAGCombine(ISD::FDIV); 656 setTargetDAGCombine(ISD::FSQRT); 657 } 658 659 // Darwin long double math library functions have $LDBL128 appended. 660 if (Subtarget.isDarwin()) { 661 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 662 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 663 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 664 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 665 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 666 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); 667 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); 668 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); 669 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); 670 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); 671 } 672 673 // With 32 condition bits, we don't need to sink (and duplicate) compares 674 // aggressively in CodeGenPrep. 675 if (Subtarget.useCRBits()) 676 setHasMultipleConditionRegisters(); 677 678 setMinFunctionAlignment(2); 679 if (Subtarget.isDarwin()) 680 setPrefFunctionAlignment(4); 681 682 switch (Subtarget.getDarwinDirective()) { 683 default: break; 684 case PPC::DIR_970: 685 case PPC::DIR_A2: 686 case PPC::DIR_E500mc: 687 case PPC::DIR_E5500: 688 case PPC::DIR_PWR4: 689 case PPC::DIR_PWR5: 690 case PPC::DIR_PWR5X: 691 case PPC::DIR_PWR6: 692 case PPC::DIR_PWR6X: 693 case PPC::DIR_PWR7: 694 case PPC::DIR_PWR8: 695 setPrefFunctionAlignment(4); 696 setPrefLoopAlignment(4); 697 break; 698 } 699 700 setInsertFencesForAtomic(true); 701 702 if (Subtarget.enableMachineScheduler()) 703 setSchedulingPreference(Sched::Source); 704 else 705 setSchedulingPreference(Sched::Hybrid); 706 707 computeRegisterProperties(); 708 709 // The Freescale cores do better with aggressive inlining of memcpy and 710 // friends. GCC uses same threshold of 128 bytes (= 32 word stores). 711 if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || 712 Subtarget.getDarwinDirective() == PPC::DIR_E5500) { 713 MaxStoresPerMemset = 32; 714 MaxStoresPerMemsetOptSize = 16; 715 MaxStoresPerMemcpy = 32; 716 MaxStoresPerMemcpyOptSize = 8; 717 MaxStoresPerMemmove = 32; 718 MaxStoresPerMemmoveOptSize = 8; 719 } 720} 721 722/// getMaxByValAlign - Helper for getByValTypeAlignment to determine 723/// the desired ByVal argument alignment. 724static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, 725 unsigned MaxMaxAlign) { 726 if (MaxAlign == MaxMaxAlign) 727 return; 728 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 729 if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) 730 MaxAlign = 32; 731 else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) 732 MaxAlign = 16; 733 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 734 unsigned EltAlign = 0; 735 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); 736 if (EltAlign > MaxAlign) 737 MaxAlign = EltAlign; 738 } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 739 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 740 unsigned EltAlign = 0; 741 getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign); 742 if (EltAlign > MaxAlign) 743 MaxAlign = EltAlign; 744 if (MaxAlign == MaxMaxAlign) 745 break; 746 } 747 } 748} 749 750/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 751/// function arguments in the caller parameter area. 752unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { 753 // Darwin passes everything on 4 byte boundary. 754 if (Subtarget.isDarwin()) 755 return 4; 756 757 // 16byte and wider vectors are passed on 16byte boundary. 758 // The rest is 8 on PPC64 and 4 on PPC32 boundary. 759 unsigned Align = Subtarget.isPPC64() ? 8 : 4; 760 if (Subtarget.hasAltivec() || Subtarget.hasQPX()) 761 getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16); 762 return Align; 763} 764 765const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 766 switch (Opcode) { 767 default: return nullptr; 768 case PPCISD::FSEL: return "PPCISD::FSEL"; 769 case PPCISD::FCFID: return "PPCISD::FCFID"; 770 case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; 771 case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; 772 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; 773 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 774 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 775 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; 776 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; 777 case PPCISD::FRE: return "PPCISD::FRE"; 778 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; 779 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 780 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 781 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 782 case PPCISD::VPERM: return "PPCISD::VPERM"; 783 case PPCISD::CMPB: return "PPCISD::CMPB"; 784 case PPCISD::Hi: return "PPCISD::Hi"; 785 case PPCISD::Lo: return "PPCISD::Lo"; 786 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; 787 case PPCISD::LOAD: return "PPCISD::LOAD"; 788 case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC"; 789 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 790 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 791 case PPCISD::SRL: return "PPCISD::SRL"; 792 case PPCISD::SRA: return "PPCISD::SRA"; 793 case PPCISD::SHL: return "PPCISD::SHL"; 794 case PPCISD::CALL: return "PPCISD::CALL"; 795 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; 796 case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS"; 797 case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS"; 798 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 799 case PPCISD::BCTRL: return "PPCISD::BCTRL"; 800 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; 801 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 802 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; 803 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; 804 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; 805 case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; 806 case PPCISD::VCMP: return "PPCISD::VCMP"; 807 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 808 case PPCISD::LBRX: return "PPCISD::LBRX"; 809 case PPCISD::STBRX: return "PPCISD::STBRX"; 810 case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; 811 case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; 812 case PPCISD::LARX: return "PPCISD::LARX"; 813 case PPCISD::STCX: return "PPCISD::STCX"; 814 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 815 case PPCISD::BDNZ: return "PPCISD::BDNZ"; 816 case PPCISD::BDZ: return "PPCISD::BDZ"; 817 case PPCISD::MFFS: return "PPCISD::MFFS"; 818 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 819 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 820 case PPCISD::CR6SET: return "PPCISD::CR6SET"; 821 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; 822 case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA"; 823 case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L"; 824 case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L"; 825 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; 826 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; 827 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; 828 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; 829 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; 830 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; 831 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; 832 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; 833 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; 834 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; 835 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; 836 case PPCISD::SC: return "PPCISD::SC"; 837 } 838} 839 840EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { 841 if (!VT.isVector()) 842 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; 843 return VT.changeVectorElementTypeToInteger(); 844} 845 846bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { 847 assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); 848 return true; 849} 850 851//===----------------------------------------------------------------------===// 852// Node matching predicates, for use by the tblgen matching code. 853//===----------------------------------------------------------------------===// 854 855/// isFloatingPointZero - Return true if this is 0.0 or -0.0. 856static bool isFloatingPointZero(SDValue Op) { 857 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 858 return CFP->getValueAPF().isZero(); 859 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 860 // Maybe this has already been legalized into the constant pool? 861 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 862 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 863 return CFP->getValueAPF().isZero(); 864 } 865 return false; 866} 867 868/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 869/// true if Op is undef or if it matches the specified value. 870static bool isConstantOrUndef(int Op, int Val) { 871 return Op < 0 || Op == Val; 872} 873 874/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 875/// VPKUHUM instruction. 876/// The ShuffleKind distinguishes between big-endian operations with 877/// two different inputs (0), either-endian operations with two identical 878/// inputs (1), and little-endian operantion with two different inputs (2). 879/// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 880bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 881 SelectionDAG &DAG) { 882 bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian(); 883 if (ShuffleKind == 0) { 884 if (IsLE) 885 return false; 886 for (unsigned i = 0; i != 16; ++i) 887 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) 888 return false; 889 } else if (ShuffleKind == 2) { 890 if (!IsLE) 891 return false; 892 for (unsigned i = 0; i != 16; ++i) 893 if (!isConstantOrUndef(N->getMaskElt(i), i*2)) 894 return false; 895 } else if (ShuffleKind == 1) { 896 unsigned j = IsLE ? 0 : 1; 897 for (unsigned i = 0; i != 8; ++i) 898 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || 899 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) 900 return false; 901 } 902 return true; 903} 904 905/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 906/// VPKUWUM instruction. 907/// The ShuffleKind distinguishes between big-endian operations with 908/// two different inputs (0), either-endian operations with two identical 909/// inputs (1), and little-endian operantion with two different inputs (2). 910/// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 911bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 912 SelectionDAG &DAG) { 913 bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian(); 914 if (ShuffleKind == 0) { 915 if (IsLE) 916 return false; 917 for (unsigned i = 0; i != 16; i += 2) 918 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 919 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) 920 return false; 921 } else if (ShuffleKind == 2) { 922 if (!IsLE) 923 return false; 924 for (unsigned i = 0; i != 16; i += 2) 925 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || 926 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) 927 return false; 928 } else if (ShuffleKind == 1) { 929 unsigned j = IsLE ? 0 : 2; 930 for (unsigned i = 0; i != 8; i += 2) 931 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || 932 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || 933 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || 934 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) 935 return false; 936 } 937 return true; 938} 939 940/// isVMerge - Common function, used to match vmrg* shuffles. 941/// 942static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, 943 unsigned LHSStart, unsigned RHSStart) { 944 if (N->getValueType(0) != MVT::v16i8) 945 return false; 946 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 947 "Unsupported merge size!"); 948 949 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 950 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 951 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), 952 LHSStart+j+i*UnitSize) || 953 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), 954 RHSStart+j+i*UnitSize)) 955 return false; 956 } 957 return true; 958} 959 960/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 961/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). 962/// The ShuffleKind distinguishes between big-endian merges with two 963/// different inputs (0), either-endian merges with two identical inputs (1), 964/// and little-endian merges with two different inputs (2). For the latter, 965/// the input operands are swapped (see PPCInstrAltivec.td). 966bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 967 unsigned ShuffleKind, SelectionDAG &DAG) { 968 if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) { 969 if (ShuffleKind == 1) // unary 970 return isVMerge(N, UnitSize, 0, 0); 971 else if (ShuffleKind == 2) // swapped 972 return isVMerge(N, UnitSize, 0, 16); 973 else 974 return false; 975 } else { 976 if (ShuffleKind == 1) // unary 977 return isVMerge(N, UnitSize, 8, 8); 978 else if (ShuffleKind == 0) // normal 979 return isVMerge(N, UnitSize, 8, 24); 980 else 981 return false; 982 } 983} 984 985/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 986/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). 987/// The ShuffleKind distinguishes between big-endian merges with two 988/// different inputs (0), either-endian merges with two identical inputs (1), 989/// and little-endian merges with two different inputs (2). For the latter, 990/// the input operands are swapped (see PPCInstrAltivec.td). 991bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 992 unsigned ShuffleKind, SelectionDAG &DAG) { 993 if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) { 994 if (ShuffleKind == 1) // unary 995 return isVMerge(N, UnitSize, 8, 8); 996 else if (ShuffleKind == 2) // swapped 997 return isVMerge(N, UnitSize, 8, 24); 998 else 999 return false; 1000 } else { 1001 if (ShuffleKind == 1) // unary 1002 return isVMerge(N, UnitSize, 0, 0); 1003 else if (ShuffleKind == 0) // normal 1004 return isVMerge(N, UnitSize, 0, 16); 1005 else 1006 return false; 1007 } 1008} 1009 1010 1011/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 1012/// amount, otherwise return -1. 1013/// The ShuffleKind distinguishes between big-endian operations with two 1014/// different inputs (0), either-endian operations with two identical inputs 1015/// (1), and little-endian operations with two different inputs (2). For the 1016/// latter, the input operands are swapped (see PPCInstrAltivec.td). 1017int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, 1018 SelectionDAG &DAG) { 1019 if (N->getValueType(0) != MVT::v16i8) 1020 return -1; 1021 1022 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1023 1024 // Find the first non-undef value in the shuffle mask. 1025 unsigned i; 1026 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) 1027 /*search*/; 1028 1029 if (i == 16) return -1; // all undef. 1030 1031 // Otherwise, check to see if the rest of the elements are consecutively 1032 // numbered from this value. 1033 unsigned ShiftAmt = SVOp->getMaskElt(i); 1034 if (ShiftAmt < i) return -1; 1035 1036 ShiftAmt -= i; 1037 bool isLE = DAG.getTarget().getSubtargetImpl()->getDataLayout()-> 1038 isLittleEndian(); 1039 1040 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { 1041 // Check the rest of the elements to see if they are consecutive. 1042 for (++i; i != 16; ++i) 1043 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 1044 return -1; 1045 } else if (ShuffleKind == 1) { 1046 // Check the rest of the elements to see if they are consecutive. 1047 for (++i; i != 16; ++i) 1048 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) 1049 return -1; 1050 } else 1051 return -1; 1052 1053 if (ShuffleKind == 2 && isLE) 1054 ShiftAmt = 16 - ShiftAmt; 1055 1056 return ShiftAmt; 1057} 1058 1059/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 1060/// specifies a splat of a single element that is suitable for input to 1061/// VSPLTB/VSPLTH/VSPLTW. 1062bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { 1063 assert(N->getValueType(0) == MVT::v16i8 && 1064 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 1065 1066 // This is a splat operation if each element of the permute is the same, and 1067 // if the value doesn't reference the second vector. 1068 unsigned ElementBase = N->getMaskElt(0); 1069 1070 // FIXME: Handle UNDEF elements too! 1071 if (ElementBase >= 16) 1072 return false; 1073 1074 // Check that the indices are consecutive, in the case of a multi-byte element 1075 // splatted with a v16i8 mask. 1076 for (unsigned i = 1; i != EltSize; ++i) 1077 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) 1078 return false; 1079 1080 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 1081 if (N->getMaskElt(i) < 0) continue; 1082 for (unsigned j = 0; j != EltSize; ++j) 1083 if (N->getMaskElt(i+j) != N->getMaskElt(j)) 1084 return false; 1085 } 1086 return true; 1087} 1088 1089/// isAllNegativeZeroVector - Returns true if all elements of build_vector 1090/// are -0.0. 1091bool PPC::isAllNegativeZeroVector(SDNode *N) { 1092 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N); 1093 1094 APInt APVal, APUndef; 1095 unsigned BitSize; 1096 bool HasAnyUndefs; 1097 1098 if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) 1099 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 1100 return CFP->getValueAPF().isNegZero(); 1101 1102 return false; 1103} 1104 1105/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 1106/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 1107unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, 1108 SelectionDAG &DAG) { 1109 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1110 assert(isSplatShuffleMask(SVOp, EltSize)); 1111 if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) 1112 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); 1113 else 1114 return SVOp->getMaskElt(0) / EltSize; 1115} 1116 1117/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 1118/// by using a vspltis[bhw] instruction of the specified element size, return 1119/// the constant being splatted. The ByteSize field indicates the number of 1120/// bytes of each element [124] -> [bhw]. 1121SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 1122 SDValue OpVal(nullptr, 0); 1123 1124 // If ByteSize of the splat is bigger than the element size of the 1125 // build_vector, then we have a case where we are checking for a splat where 1126 // multiple elements of the buildvector are folded together into a single 1127 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 1128 unsigned EltSize = 16/N->getNumOperands(); 1129 if (EltSize < ByteSize) { 1130 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 1131 SDValue UniquedVals[4]; 1132 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 1133 1134 // See if all of the elements in the buildvector agree across. 1135 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1136 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 1137 // If the element isn't a constant, bail fully out. 1138 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); 1139 1140 1141 if (!UniquedVals[i&(Multiple-1)].getNode()) 1142 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 1143 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 1144 return SDValue(); // no match. 1145 } 1146 1147 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 1148 // either constant or undef values that are identical for each chunk. See 1149 // if these chunks can form into a larger vspltis*. 1150 1151 // Check to see if all of the leading entries are either 0 or -1. If 1152 // neither, then this won't fit into the immediate field. 1153 bool LeadingZero = true; 1154 bool LeadingOnes = true; 1155 for (unsigned i = 0; i != Multiple-1; ++i) { 1156 if (!UniquedVals[i].getNode()) continue; // Must have been undefs. 1157 1158 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 1159 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 1160 } 1161 // Finally, check the least significant entry. 1162 if (LeadingZero) { 1163 if (!UniquedVals[Multiple-1].getNode()) 1164 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 1165 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); 1166 if (Val < 16) 1167 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 1168 } 1169 if (LeadingOnes) { 1170 if (!UniquedVals[Multiple-1].getNode()) 1171 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 1172 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); 1173 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 1174 return DAG.getTargetConstant(Val, MVT::i32); 1175 } 1176 1177 return SDValue(); 1178 } 1179 1180 // Check to see if this buildvec has a single non-undef value in its elements. 1181 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1182 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 1183 if (!OpVal.getNode()) 1184 OpVal = N->getOperand(i); 1185 else if (OpVal != N->getOperand(i)) 1186 return SDValue(); 1187 } 1188 1189 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. 1190 1191 unsigned ValSizeInBytes = EltSize; 1192 uint64_t Value = 0; 1193 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1194 Value = CN->getZExtValue(); 1195 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 1196 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 1197 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 1198 } 1199 1200 // If the splat value is larger than the element value, then we can never do 1201 // this splat. The only case that we could fit the replicated bits into our 1202 // immediate field for would be zero, and we prefer to use vxor for it. 1203 if (ValSizeInBytes < ByteSize) return SDValue(); 1204 1205 // If the element value is larger than the splat value, cut it in half and 1206 // check to see if the two halves are equal. Continue doing this until we 1207 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 1208 while (ValSizeInBytes > ByteSize) { 1209 ValSizeInBytes >>= 1; 1210 1211 // If the top half equals the bottom half, we're still ok. 1212 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 1213 (Value & ((1 << (8*ValSizeInBytes))-1))) 1214 return SDValue(); 1215 } 1216 1217 // Properly sign extend the value. 1218 int MaskVal = SignExtend32(Value, ByteSize * 8); 1219 1220 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 1221 if (MaskVal == 0) return SDValue(); 1222 1223 // Finally, if this value fits in a 5 bit sext field, return it 1224 if (SignExtend32<5>(MaskVal) == MaskVal) 1225 return DAG.getTargetConstant(MaskVal, MVT::i32); 1226 return SDValue(); 1227} 1228 1229//===----------------------------------------------------------------------===// 1230// Addressing Mode Selection 1231//===----------------------------------------------------------------------===// 1232 1233/// isIntS16Immediate - This method tests to see if the node is either a 32-bit 1234/// or 64-bit immediate, and if the value can be accurately represented as a 1235/// sign extension from a 16-bit value. If so, this returns true and the 1236/// immediate. 1237static bool isIntS16Immediate(SDNode *N, short &Imm) { 1238 if (!isa<ConstantSDNode>(N)) 1239 return false; 1240 1241 Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); 1242 if (N->getValueType(0) == MVT::i32) 1243 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); 1244 else 1245 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); 1246} 1247static bool isIntS16Immediate(SDValue Op, short &Imm) { 1248 return isIntS16Immediate(Op.getNode(), Imm); 1249} 1250 1251 1252/// SelectAddressRegReg - Given the specified addressed, check to see if it 1253/// can be represented as an indexed [r+r] operation. Returns false if it 1254/// can be more efficiently represented with [r+imm]. 1255bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, 1256 SDValue &Index, 1257 SelectionDAG &DAG) const { 1258 short imm = 0; 1259 if (N.getOpcode() == ISD::ADD) { 1260 if (isIntS16Immediate(N.getOperand(1), imm)) 1261 return false; // r+i 1262 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 1263 return false; // r+i 1264 1265 Base = N.getOperand(0); 1266 Index = N.getOperand(1); 1267 return true; 1268 } else if (N.getOpcode() == ISD::OR) { 1269 if (isIntS16Immediate(N.getOperand(1), imm)) 1270 return false; // r+i can fold it if we can. 1271 1272 // If this is an or of disjoint bitfields, we can codegen this as an add 1273 // (for better address arithmetic) if the LHS and RHS of the OR are provably 1274 // disjoint. 1275 APInt LHSKnownZero, LHSKnownOne; 1276 APInt RHSKnownZero, RHSKnownOne; 1277 DAG.computeKnownBits(N.getOperand(0), 1278 LHSKnownZero, LHSKnownOne); 1279 1280 if (LHSKnownZero.getBoolValue()) { 1281 DAG.computeKnownBits(N.getOperand(1), 1282 RHSKnownZero, RHSKnownOne); 1283 // If all of the bits are known zero on the LHS or RHS, the add won't 1284 // carry. 1285 if (~(LHSKnownZero | RHSKnownZero) == 0) { 1286 Base = N.getOperand(0); 1287 Index = N.getOperand(1); 1288 return true; 1289 } 1290 } 1291 } 1292 1293 return false; 1294} 1295 1296// If we happen to be doing an i64 load or store into a stack slot that has 1297// less than a 4-byte alignment, then the frame-index elimination may need to 1298// use an indexed load or store instruction (because the offset may not be a 1299// multiple of 4). The extra register needed to hold the offset comes from the 1300// register scavenger, and it is possible that the scavenger will need to use 1301// an emergency spill slot. As a result, we need to make sure that a spill slot 1302// is allocated when doing an i64 load/store into a less-than-4-byte-aligned 1303// stack slot. 1304static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { 1305 // FIXME: This does not handle the LWA case. 1306 if (VT != MVT::i64) 1307 return; 1308 1309 // NOTE: We'll exclude negative FIs here, which come from argument 1310 // lowering, because there are no known test cases triggering this problem 1311 // using packed structures (or similar). We can remove this exclusion if 1312 // we find such a test case. The reason why this is so test-case driven is 1313 // because this entire 'fixup' is only to prevent crashes (from the 1314 // register scavenger) on not-really-valid inputs. For example, if we have: 1315 // %a = alloca i1 1316 // %b = bitcast i1* %a to i64* 1317 // store i64* a, i64 b 1318 // then the store should really be marked as 'align 1', but is not. If it 1319 // were marked as 'align 1' then the indexed form would have been 1320 // instruction-selected initially, and the problem this 'fixup' is preventing 1321 // won't happen regardless. 1322 if (FrameIdx < 0) 1323 return; 1324 1325 MachineFunction &MF = DAG.getMachineFunction(); 1326 MachineFrameInfo *MFI = MF.getFrameInfo(); 1327 1328 unsigned Align = MFI->getObjectAlignment(FrameIdx); 1329 if (Align >= 4) 1330 return; 1331 1332 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1333 FuncInfo->setHasNonRISpills(); 1334} 1335 1336/// Returns true if the address N can be represented by a base register plus 1337/// a signed 16-bit displacement [r+imm], and if it is not better 1338/// represented as reg+reg. If Aligned is true, only accept displacements 1339/// suitable for STD and friends, i.e. multiples of 4. 1340bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, 1341 SDValue &Base, 1342 SelectionDAG &DAG, 1343 bool Aligned) const { 1344 // FIXME dl should come from parent load or store, not from address 1345 SDLoc dl(N); 1346 // If this can be more profitably realized as r+r, fail. 1347 if (SelectAddressRegReg(N, Disp, Base, DAG)) 1348 return false; 1349 1350 if (N.getOpcode() == ISD::ADD) { 1351 short imm = 0; 1352 if (isIntS16Immediate(N.getOperand(1), imm) && 1353 (!Aligned || (imm & 3) == 0)) { 1354 Disp = DAG.getTargetConstant(imm, N.getValueType()); 1355 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1356 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1357 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1358 } else { 1359 Base = N.getOperand(0); 1360 } 1361 return true; // [r+i] 1362 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 1363 // Match LOAD (ADD (X, Lo(G))). 1364 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 1365 && "Cannot handle constant offsets yet!"); 1366 Disp = N.getOperand(1).getOperand(0); // The global address. 1367 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 1368 Disp.getOpcode() == ISD::TargetGlobalTLSAddress || 1369 Disp.getOpcode() == ISD::TargetConstantPool || 1370 Disp.getOpcode() == ISD::TargetJumpTable); 1371 Base = N.getOperand(0); 1372 return true; // [&g+r] 1373 } 1374 } else if (N.getOpcode() == ISD::OR) { 1375 short imm = 0; 1376 if (isIntS16Immediate(N.getOperand(1), imm) && 1377 (!Aligned || (imm & 3) == 0)) { 1378 // If this is an or of disjoint bitfields, we can codegen this as an add 1379 // (for better address arithmetic) if the LHS and RHS of the OR are 1380 // provably disjoint. 1381 APInt LHSKnownZero, LHSKnownOne; 1382 DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); 1383 1384 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 1385 // If all of the bits are known zero on the LHS or RHS, the add won't 1386 // carry. 1387 if (FrameIndexSDNode *FI = 1388 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1389 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1390 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1391 } else { 1392 Base = N.getOperand(0); 1393 } 1394 Disp = DAG.getTargetConstant(imm, N.getValueType()); 1395 return true; 1396 } 1397 } 1398 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1399 // Loading from a constant address. 1400 1401 // If this address fits entirely in a 16-bit sext immediate field, codegen 1402 // this as "d, 0" 1403 short Imm; 1404 if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { 1405 Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); 1406 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1407 CN->getValueType(0)); 1408 return true; 1409 } 1410 1411 // Handle 32-bit sext immediates with LIS + addr mode. 1412 if ((CN->getValueType(0) == MVT::i32 || 1413 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && 1414 (!Aligned || (CN->getZExtValue() & 3) == 0)) { 1415 int Addr = (int)CN->getZExtValue(); 1416 1417 // Otherwise, break this down into an LIS + disp. 1418 Disp = DAG.getTargetConstant((short)Addr, MVT::i32); 1419 1420 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); 1421 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 1422 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); 1423 return true; 1424 } 1425 } 1426 1427 Disp = DAG.getTargetConstant(0, getPointerTy()); 1428 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) { 1429 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1430 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1431 } else 1432 Base = N; 1433 return true; // [r+0] 1434} 1435 1436/// SelectAddressRegRegOnly - Given the specified addressed, force it to be 1437/// represented as an indexed [r+r] operation. 1438bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, 1439 SDValue &Index, 1440 SelectionDAG &DAG) const { 1441 // Check to see if we can easily represent this as an [r+r] address. This 1442 // will fail if it thinks that the address is more profitably represented as 1443 // reg+imm, e.g. where imm = 0. 1444 if (SelectAddressRegReg(N, Base, Index, DAG)) 1445 return true; 1446 1447 // If the operand is an addition, always emit this as [r+r], since this is 1448 // better (for code size, and execution, as the memop does the add for free) 1449 // than emitting an explicit add. 1450 if (N.getOpcode() == ISD::ADD) { 1451 Base = N.getOperand(0); 1452 Index = N.getOperand(1); 1453 return true; 1454 } 1455 1456 // Otherwise, do it the hard way, using R0 as the base register. 1457 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1458 N.getValueType()); 1459 Index = N; 1460 return true; 1461} 1462 1463/// getPreIndexedAddressParts - returns true by value, base pointer and 1464/// offset pointer and addressing mode by reference if the node's address 1465/// can be legally represented as pre-indexed load / store address. 1466bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 1467 SDValue &Offset, 1468 ISD::MemIndexedMode &AM, 1469 SelectionDAG &DAG) const { 1470 if (DisablePPCPreinc) return false; 1471 1472 bool isLoad = true; 1473 SDValue Ptr; 1474 EVT VT; 1475 unsigned Alignment; 1476 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1477 Ptr = LD->getBasePtr(); 1478 VT = LD->getMemoryVT(); 1479 Alignment = LD->getAlignment(); 1480 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1481 Ptr = ST->getBasePtr(); 1482 VT = ST->getMemoryVT(); 1483 Alignment = ST->getAlignment(); 1484 isLoad = false; 1485 } else 1486 return false; 1487 1488 // PowerPC doesn't have preinc load/store instructions for vectors. 1489 if (VT.isVector()) 1490 return false; 1491 1492 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { 1493 1494 // Common code will reject creating a pre-inc form if the base pointer 1495 // is a frame index, or if N is a store and the base pointer is either 1496 // the same as or a predecessor of the value being stored. Check for 1497 // those situations here, and try with swapped Base/Offset instead. 1498 bool Swap = false; 1499 1500 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base)) 1501 Swap = true; 1502 else if (!isLoad) { 1503 SDValue Val = cast<StoreSDNode>(N)->getValue(); 1504 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) 1505 Swap = true; 1506 } 1507 1508 if (Swap) 1509 std::swap(Base, Offset); 1510 1511 AM = ISD::PRE_INC; 1512 return true; 1513 } 1514 1515 // LDU/STU can only handle immediates that are a multiple of 4. 1516 if (VT != MVT::i64) { 1517 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) 1518 return false; 1519 } else { 1520 // LDU/STU need an address with at least 4-byte alignment. 1521 if (Alignment < 4) 1522 return false; 1523 1524 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) 1525 return false; 1526 } 1527 1528 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1529 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 1530 // sext i32 to i64 when addr mode is r+i. 1531 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 1532 LD->getExtensionType() == ISD::SEXTLOAD && 1533 isa<ConstantSDNode>(Offset)) 1534 return false; 1535 } 1536 1537 AM = ISD::PRE_INC; 1538 return true; 1539} 1540 1541//===----------------------------------------------------------------------===// 1542// LowerOperation implementation 1543//===----------------------------------------------------------------------===// 1544 1545/// GetLabelAccessInfo - Return true if we should reference labels using a 1546/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags. 1547static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags, 1548 unsigned &LoOpFlags, 1549 const GlobalValue *GV = nullptr) { 1550 HiOpFlags = PPCII::MO_HA; 1551 LoOpFlags = PPCII::MO_LO; 1552 1553 // Don't use the pic base if not in PIC relocation model. 1554 bool isPIC = TM.getRelocationModel() == Reloc::PIC_; 1555 1556 if (isPIC) { 1557 HiOpFlags |= PPCII::MO_PIC_FLAG; 1558 LoOpFlags |= PPCII::MO_PIC_FLAG; 1559 } 1560 1561 // If this is a reference to a global value that requires a non-lazy-ptr, make 1562 // sure that instruction lowering adds it. 1563 if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) { 1564 HiOpFlags |= PPCII::MO_NLP_FLAG; 1565 LoOpFlags |= PPCII::MO_NLP_FLAG; 1566 1567 if (GV->hasHiddenVisibility()) { 1568 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1569 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1570 } 1571 } 1572 1573 return isPIC; 1574} 1575 1576static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, 1577 SelectionDAG &DAG) { 1578 EVT PtrVT = HiPart.getValueType(); 1579 SDValue Zero = DAG.getConstant(0, PtrVT); 1580 SDLoc DL(HiPart); 1581 1582 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); 1583 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); 1584 1585 // With PIC, the first instruction is actually "GR+hi(&G)". 1586 if (isPIC) 1587 Hi = DAG.getNode(ISD::ADD, DL, PtrVT, 1588 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); 1589 1590 // Generate non-pic code that has direct accesses to the constant pool. 1591 // The address of the global is just (hi(&g)+lo(&g)). 1592 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); 1593} 1594 1595SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, 1596 SelectionDAG &DAG) const { 1597 EVT PtrVT = Op.getValueType(); 1598 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1599 const Constant *C = CP->getConstVal(); 1600 1601 // 64-bit SVR4 ABI code is always position-independent. 1602 // The actual address of the GlobalValue is stored in the TOC. 1603 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 1604 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); 1605 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA, 1606 DAG.getRegister(PPC::X2, MVT::i64)); 1607 } 1608 1609 unsigned MOHiFlag, MOLoFlag; 1610 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1611 1612 if (isPIC && Subtarget.isSVR4ABI()) { 1613 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 1614 PPCII::MO_PIC_FLAG); 1615 SDLoc DL(CP); 1616 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA, 1617 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT)); 1618 } 1619 1620 SDValue CPIHi = 1621 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); 1622 SDValue CPILo = 1623 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); 1624 return LowerLabelRef(CPIHi, CPILo, isPIC, DAG); 1625} 1626 1627SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { 1628 EVT PtrVT = Op.getValueType(); 1629 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1630 1631 // 64-bit SVR4 ABI code is always position-independent. 1632 // The actual address of the GlobalValue is stored in the TOC. 1633 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 1634 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 1635 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA, 1636 DAG.getRegister(PPC::X2, MVT::i64)); 1637 } 1638 1639 unsigned MOHiFlag, MOLoFlag; 1640 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1641 1642 if (isPIC && Subtarget.isSVR4ABI()) { 1643 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 1644 PPCII::MO_PIC_FLAG); 1645 SDLoc DL(GA); 1646 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA, 1647 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT)); 1648 } 1649 1650 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); 1651 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); 1652 return LowerLabelRef(JTIHi, JTILo, isPIC, DAG); 1653} 1654 1655SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, 1656 SelectionDAG &DAG) const { 1657 EVT PtrVT = Op.getValueType(); 1658 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op); 1659 const BlockAddress *BA = BASDN->getBlockAddress(); 1660 1661 // 64-bit SVR4 ABI code is always position-independent. 1662 // The actual BlockAddress is stored in the TOC. 1663 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 1664 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); 1665 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(BASDN), MVT::i64, GA, 1666 DAG.getRegister(PPC::X2, MVT::i64)); 1667 } 1668 1669 unsigned MOHiFlag, MOLoFlag; 1670 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1671 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); 1672 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); 1673 return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); 1674} 1675 1676// Generate a call to __tls_get_addr for the given GOT entry Op. 1677std::pair<SDValue,SDValue> 1678PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl, 1679 SelectionDAG &DAG) const { 1680 1681 Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); 1682 TargetLowering::ArgListTy Args; 1683 TargetLowering::ArgListEntry Entry; 1684 Entry.Node = Op; 1685 Entry.Ty = IntPtrTy; 1686 Args.push_back(Entry); 1687 1688 TargetLowering::CallLoweringInfo CLI(DAG); 1689 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) 1690 .setCallee(CallingConv::C, IntPtrTy, 1691 DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()), 1692 std::move(Args), 0); 1693 1694 return LowerCallTo(CLI); 1695} 1696 1697SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, 1698 SelectionDAG &DAG) const { 1699 1700 // FIXME: TLS addresses currently use medium model code sequences, 1701 // which is the most useful form. Eventually support for small and 1702 // large models could be added if users need it, at the cost of 1703 // additional complexity. 1704 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 1705 SDLoc dl(GA); 1706 const GlobalValue *GV = GA->getGlobal(); 1707 EVT PtrVT = getPointerTy(); 1708 bool is64bit = Subtarget.isPPC64(); 1709 const Module *M = DAG.getMachineFunction().getFunction()->getParent(); 1710 PICLevel::Level picLevel = M->getPICLevel(); 1711 1712 TLSModel::Model Model = getTargetMachine().getTLSModel(GV); 1713 1714 if (Model == TLSModel::LocalExec) { 1715 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1716 PPCII::MO_TPREL_HA); 1717 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1718 PPCII::MO_TPREL_LO); 1719 SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, 1720 is64bit ? MVT::i64 : MVT::i32); 1721 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); 1722 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); 1723 } 1724 1725 if (Model == TLSModel::InitialExec) { 1726 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 1727 SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1728 PPCII::MO_TLS); 1729 SDValue GOTPtr; 1730 if (is64bit) { 1731 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1732 GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, 1733 PtrVT, GOTReg, TGA); 1734 } else 1735 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); 1736 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, 1737 PtrVT, TGA, GOTPtr); 1738 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); 1739 } 1740 1741 if (Model == TLSModel::GeneralDynamic) { 1742 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1743 PPCII::MO_TLSGD); 1744 SDValue GOTPtr; 1745 if (is64bit) { 1746 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1747 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, 1748 GOTReg, TGA); 1749 } else { 1750 if (picLevel == PICLevel::Small) 1751 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 1752 else 1753 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 1754 } 1755 SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, 1756 GOTPtr, TGA); 1757 std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG); 1758 return CallResult.first; 1759 } 1760 1761 if (Model == TLSModel::LocalDynamic) { 1762 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1763 PPCII::MO_TLSLD); 1764 SDValue GOTPtr; 1765 if (is64bit) { 1766 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1767 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, 1768 GOTReg, TGA); 1769 } else { 1770 if (picLevel == PICLevel::Small) 1771 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 1772 else 1773 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 1774 } 1775 SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, 1776 GOTPtr, TGA); 1777 std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG); 1778 SDValue TLSAddr = CallResult.first; 1779 SDValue Chain = CallResult.second; 1780 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, 1781 Chain, TLSAddr, TGA); 1782 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); 1783 } 1784 1785 llvm_unreachable("Unknown TLS model!"); 1786} 1787 1788SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, 1789 SelectionDAG &DAG) const { 1790 EVT PtrVT = Op.getValueType(); 1791 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 1792 SDLoc DL(GSDN); 1793 const GlobalValue *GV = GSDN->getGlobal(); 1794 1795 // 64-bit SVR4 ABI code is always position-independent. 1796 // The actual address of the GlobalValue is stored in the TOC. 1797 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 1798 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); 1799 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA, 1800 DAG.getRegister(PPC::X2, MVT::i64)); 1801 } 1802 1803 unsigned MOHiFlag, MOLoFlag; 1804 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV); 1805 1806 if (isPIC && Subtarget.isSVR4ABI()) { 1807 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 1808 GSDN->getOffset(), 1809 PPCII::MO_PIC_FLAG); 1810 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA, 1811 DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32)); 1812 } 1813 1814 SDValue GAHi = 1815 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); 1816 SDValue GALo = 1817 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); 1818 1819 SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG); 1820 1821 // If the global reference is actually to a non-lazy-pointer, we have to do an 1822 // extra load to get the address of the global. 1823 if (MOHiFlag & PPCII::MO_NLP_FLAG) 1824 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(), 1825 false, false, false, 0); 1826 return Ptr; 1827} 1828 1829SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 1830 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 1831 SDLoc dl(Op); 1832 1833 if (Op.getValueType() == MVT::v2i64) { 1834 // When the operands themselves are v2i64 values, we need to do something 1835 // special because VSX has no underlying comparison operations for these. 1836 if (Op.getOperand(0).getValueType() == MVT::v2i64) { 1837 // Equality can be handled by casting to the legal type for Altivec 1838 // comparisons, everything else needs to be expanded. 1839 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 1840 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, 1841 DAG.getSetCC(dl, MVT::v4i32, 1842 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), 1843 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), 1844 CC)); 1845 } 1846 1847 return SDValue(); 1848 } 1849 1850 // We handle most of these in the usual way. 1851 return Op; 1852 } 1853 1854 // If we're comparing for equality to zero, expose the fact that this is 1855 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 1856 // fold the new nodes. 1857 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1858 if (C->isNullValue() && CC == ISD::SETEQ) { 1859 EVT VT = Op.getOperand(0).getValueType(); 1860 SDValue Zext = Op.getOperand(0); 1861 if (VT.bitsLT(MVT::i32)) { 1862 VT = MVT::i32; 1863 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); 1864 } 1865 unsigned Log2b = Log2_32(VT.getSizeInBits()); 1866 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); 1867 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, 1868 DAG.getConstant(Log2b, MVT::i32)); 1869 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); 1870 } 1871 // Leave comparisons against 0 and -1 alone for now, since they're usually 1872 // optimized. FIXME: revisit this when we can custom lower all setcc 1873 // optimizations. 1874 if (C->isAllOnesValue() || C->isNullValue()) 1875 return SDValue(); 1876 } 1877 1878 // If we have an integer seteq/setne, turn it into a compare against zero 1879 // by xor'ing the rhs with the lhs, which is faster than setting a 1880 // condition register, reading it back out, and masking the correct bit. The 1881 // normal approach here uses sub to do this instead of xor. Using xor exposes 1882 // the result to other bit-twiddling opportunities. 1883 EVT LHSVT = Op.getOperand(0).getValueType(); 1884 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 1885 EVT VT = Op.getValueType(); 1886 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), 1887 Op.getOperand(1)); 1888 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC); 1889 } 1890 return SDValue(); 1891} 1892 1893SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, 1894 const PPCSubtarget &Subtarget) const { 1895 SDNode *Node = Op.getNode(); 1896 EVT VT = Node->getValueType(0); 1897 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1898 SDValue InChain = Node->getOperand(0); 1899 SDValue VAListPtr = Node->getOperand(1); 1900 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 1901 SDLoc dl(Node); 1902 1903 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); 1904 1905 // gpr_index 1906 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1907 VAListPtr, MachinePointerInfo(SV), MVT::i8, 1908 false, false, false, 0); 1909 InChain = GprIndex.getValue(1); 1910 1911 if (VT == MVT::i64) { 1912 // Check if GprIndex is even 1913 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, 1914 DAG.getConstant(1, MVT::i32)); 1915 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, 1916 DAG.getConstant(0, MVT::i32), ISD::SETNE); 1917 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, 1918 DAG.getConstant(1, MVT::i32)); 1919 // Align GprIndex to be even if it isn't 1920 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, 1921 GprIndex); 1922 } 1923 1924 // fpr index is 1 byte after gpr 1925 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1926 DAG.getConstant(1, MVT::i32)); 1927 1928 // fpr 1929 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1930 FprPtr, MachinePointerInfo(SV), MVT::i8, 1931 false, false, false, 0); 1932 InChain = FprIndex.getValue(1); 1933 1934 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1935 DAG.getConstant(8, MVT::i32)); 1936 1937 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1938 DAG.getConstant(4, MVT::i32)); 1939 1940 // areas 1941 SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, 1942 MachinePointerInfo(), false, false, 1943 false, 0); 1944 InChain = OverflowArea.getValue(1); 1945 1946 SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, 1947 MachinePointerInfo(), false, false, 1948 false, 0); 1949 InChain = RegSaveArea.getValue(1); 1950 1951 // select overflow_area if index > 8 1952 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, 1953 DAG.getConstant(8, MVT::i32), ISD::SETLT); 1954 1955 // adjustment constant gpr_index * 4/8 1956 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, 1957 VT.isInteger() ? GprIndex : FprIndex, 1958 DAG.getConstant(VT.isInteger() ? 4 : 8, 1959 MVT::i32)); 1960 1961 // OurReg = RegSaveArea + RegConstant 1962 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, 1963 RegConstant); 1964 1965 // Floating types are 32 bytes into RegSaveArea 1966 if (VT.isFloatingPoint()) 1967 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, 1968 DAG.getConstant(32, MVT::i32)); 1969 1970 // increase {f,g}pr_index by 1 (or 2 if VT is i64) 1971 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, 1972 VT.isInteger() ? GprIndex : FprIndex, 1973 DAG.getConstant(VT == MVT::i64 ? 2 : 1, 1974 MVT::i32)); 1975 1976 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, 1977 VT.isInteger() ? VAListPtr : FprPtr, 1978 MachinePointerInfo(SV), 1979 MVT::i8, false, false, 0); 1980 1981 // determine if we should load from reg_save_area or overflow_area 1982 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); 1983 1984 // increase overflow_area by 4/8 if gpr/fpr > 8 1985 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, 1986 DAG.getConstant(VT.isInteger() ? 4 : 8, 1987 MVT::i32)); 1988 1989 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, 1990 OverflowAreaPlusN); 1991 1992 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, 1993 OverflowAreaPtr, 1994 MachinePointerInfo(), 1995 MVT::i32, false, false, 0); 1996 1997 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), 1998 false, false, false, 0); 1999} 2000 2001SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG, 2002 const PPCSubtarget &Subtarget) const { 2003 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); 2004 2005 // We have to copy the entire va_list struct: 2006 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte 2007 return DAG.getMemcpy(Op.getOperand(0), Op, 2008 Op.getOperand(1), Op.getOperand(2), 2009 DAG.getConstant(12, MVT::i32), 8, false, true, 2010 MachinePointerInfo(), MachinePointerInfo()); 2011} 2012 2013SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, 2014 SelectionDAG &DAG) const { 2015 return Op.getOperand(0); 2016} 2017 2018SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, 2019 SelectionDAG &DAG) const { 2020 SDValue Chain = Op.getOperand(0); 2021 SDValue Trmp = Op.getOperand(1); // trampoline 2022 SDValue FPtr = Op.getOperand(2); // nested function 2023 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 2024 SDLoc dl(Op); 2025 2026 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2027 bool isPPC64 = (PtrVT == MVT::i64); 2028 Type *IntPtrTy = 2029 DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType( 2030 *DAG.getContext()); 2031 2032 TargetLowering::ArgListTy Args; 2033 TargetLowering::ArgListEntry Entry; 2034 2035 Entry.Ty = IntPtrTy; 2036 Entry.Node = Trmp; Args.push_back(Entry); 2037 2038 // TrampSize == (isPPC64 ? 48 : 40); 2039 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, 2040 isPPC64 ? MVT::i64 : MVT::i32); 2041 Args.push_back(Entry); 2042 2043 Entry.Node = FPtr; Args.push_back(Entry); 2044 Entry.Node = Nest; Args.push_back(Entry); 2045 2046 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) 2047 TargetLowering::CallLoweringInfo CLI(DAG); 2048 CLI.setDebugLoc(dl).setChain(Chain) 2049 .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), 2050 DAG.getExternalSymbol("__trampoline_setup", PtrVT), 2051 std::move(Args), 0); 2052 2053 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 2054 return CallResult.second; 2055} 2056 2057SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, 2058 const PPCSubtarget &Subtarget) const { 2059 MachineFunction &MF = DAG.getMachineFunction(); 2060 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2061 2062 SDLoc dl(Op); 2063 2064 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { 2065 // vastart just stores the address of the VarArgsFrameIndex slot into the 2066 // memory location argument. 2067 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2068 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2069 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2070 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2071 MachinePointerInfo(SV), 2072 false, false, 0); 2073 } 2074 2075 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. 2076 // We suppose the given va_list is already allocated. 2077 // 2078 // typedef struct { 2079 // char gpr; /* index into the array of 8 GPRs 2080 // * stored in the register save area 2081 // * gpr=0 corresponds to r3, 2082 // * gpr=1 to r4, etc. 2083 // */ 2084 // char fpr; /* index into the array of 8 FPRs 2085 // * stored in the register save area 2086 // * fpr=0 corresponds to f1, 2087 // * fpr=1 to f2, etc. 2088 // */ 2089 // char *overflow_arg_area; 2090 // /* location on stack that holds 2091 // * the next overflow argument 2092 // */ 2093 // char *reg_save_area; 2094 // /* where r3:r10 and f1:f8 (if saved) 2095 // * are stored 2096 // */ 2097 // } va_list[1]; 2098 2099 2100 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32); 2101 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32); 2102 2103 2104 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2105 2106 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), 2107 PtrVT); 2108 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 2109 PtrVT); 2110 2111 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 2112 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); 2113 2114 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 2115 SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT); 2116 2117 uint64_t FPROffset = 1; 2118 SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); 2119 2120 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2121 2122 // Store first byte : number of int regs 2123 SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, 2124 Op.getOperand(1), 2125 MachinePointerInfo(SV), 2126 MVT::i8, false, false, 0); 2127 uint64_t nextOffset = FPROffset; 2128 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), 2129 ConstFPROffset); 2130 2131 // Store second byte : number of float regs 2132 SDValue secondStore = 2133 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, 2134 MachinePointerInfo(SV, nextOffset), MVT::i8, 2135 false, false, 0); 2136 nextOffset += StackOffset; 2137 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); 2138 2139 // Store second word : arguments given on stack 2140 SDValue thirdStore = 2141 DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, 2142 MachinePointerInfo(SV, nextOffset), 2143 false, false, 0); 2144 nextOffset += FrameOffset; 2145 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); 2146 2147 // Store third word : arguments given in registers 2148 return DAG.getStore(thirdStore, dl, FR, nextPtr, 2149 MachinePointerInfo(SV, nextOffset), 2150 false, false, 0); 2151 2152} 2153 2154#include "PPCGenCallingConv.inc" 2155 2156// Function whose sole purpose is to kill compiler warnings 2157// stemming from unused functions included from PPCGenCallingConv.inc. 2158CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { 2159 return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; 2160} 2161 2162bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 2163 CCValAssign::LocInfo &LocInfo, 2164 ISD::ArgFlagsTy &ArgFlags, 2165 CCState &State) { 2166 return true; 2167} 2168 2169bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 2170 MVT &LocVT, 2171 CCValAssign::LocInfo &LocInfo, 2172 ISD::ArgFlagsTy &ArgFlags, 2173 CCState &State) { 2174 static const MCPhysReg ArgRegs[] = { 2175 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2176 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2177 }; 2178 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2179 2180 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 2181 2182 // Skip one register if the first unallocated register has an even register 2183 // number and there are still argument registers available which have not been 2184 // allocated yet. RegNum is actually an index into ArgRegs, which means we 2185 // need to skip a register if RegNum is odd. 2186 if (RegNum != NumArgRegs && RegNum % 2 == 1) { 2187 State.AllocateReg(ArgRegs[RegNum]); 2188 } 2189 2190 // Always return false here, as this function only makes sure that the first 2191 // unallocated register has an odd register number and does not actually 2192 // allocate a register for the current argument. 2193 return false; 2194} 2195 2196bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 2197 MVT &LocVT, 2198 CCValAssign::LocInfo &LocInfo, 2199 ISD::ArgFlagsTy &ArgFlags, 2200 CCState &State) { 2201 static const MCPhysReg ArgRegs[] = { 2202 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2203 PPC::F8 2204 }; 2205 2206 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2207 2208 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 2209 2210 // If there is only one Floating-point register left we need to put both f64 2211 // values of a split ppc_fp128 value on the stack. 2212 if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { 2213 State.AllocateReg(ArgRegs[RegNum]); 2214 } 2215 2216 // Always return false here, as this function only makes sure that the two f64 2217 // values a ppc_fp128 value is split into are both passed in registers or both 2218 // passed on the stack and does not actually allocate a register for the 2219 // current argument. 2220 return false; 2221} 2222 2223/// GetFPR - Get the set of FP registers that should be allocated for arguments, 2224/// on Darwin. 2225static const MCPhysReg *GetFPR() { 2226 static const MCPhysReg FPR[] = { 2227 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2228 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 2229 }; 2230 2231 return FPR; 2232} 2233 2234/// CalculateStackSlotSize - Calculates the size reserved for this argument on 2235/// the stack. 2236static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, 2237 unsigned PtrByteSize) { 2238 unsigned ArgSize = ArgVT.getStoreSize(); 2239 if (Flags.isByVal()) 2240 ArgSize = Flags.getByValSize(); 2241 2242 // Round up to multiples of the pointer size, except for array members, 2243 // which are always packed. 2244 if (!Flags.isInConsecutiveRegs()) 2245 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2246 2247 return ArgSize; 2248} 2249 2250/// CalculateStackSlotAlignment - Calculates the alignment of this argument 2251/// on the stack. 2252static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, 2253 ISD::ArgFlagsTy Flags, 2254 unsigned PtrByteSize) { 2255 unsigned Align = PtrByteSize; 2256 2257 // Altivec parameters are padded to a 16 byte boundary. 2258 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2259 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2260 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) 2261 Align = 16; 2262 2263 // ByVal parameters are aligned as requested. 2264 if (Flags.isByVal()) { 2265 unsigned BVAlign = Flags.getByValAlign(); 2266 if (BVAlign > PtrByteSize) { 2267 if (BVAlign % PtrByteSize != 0) 2268 llvm_unreachable( 2269 "ByVal alignment is not a multiple of the pointer size"); 2270 2271 Align = BVAlign; 2272 } 2273 } 2274 2275 // Array members are always packed to their original alignment. 2276 if (Flags.isInConsecutiveRegs()) { 2277 // If the array member was split into multiple registers, the first 2278 // needs to be aligned to the size of the full type. (Except for 2279 // ppcf128, which is only aligned as its f64 components.) 2280 if (Flags.isSplit() && OrigVT != MVT::ppcf128) 2281 Align = OrigVT.getStoreSize(); 2282 else 2283 Align = ArgVT.getStoreSize(); 2284 } 2285 2286 return Align; 2287} 2288 2289/// CalculateStackSlotUsed - Return whether this argument will use its 2290/// stack slot (instead of being passed in registers). ArgOffset, 2291/// AvailableFPRs, and AvailableVRs must hold the current argument 2292/// position, and will be updated to account for this argument. 2293static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, 2294 ISD::ArgFlagsTy Flags, 2295 unsigned PtrByteSize, 2296 unsigned LinkageSize, 2297 unsigned ParamAreaSize, 2298 unsigned &ArgOffset, 2299 unsigned &AvailableFPRs, 2300 unsigned &AvailableVRs) { 2301 bool UseMemory = false; 2302 2303 // Respect alignment of argument on the stack. 2304 unsigned Align = 2305 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 2306 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 2307 // If there's no space left in the argument save area, we must 2308 // use memory (this check also catches zero-sized arguments). 2309 if (ArgOffset >= LinkageSize + ParamAreaSize) 2310 UseMemory = true; 2311 2312 // Allocate argument on the stack. 2313 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 2314 if (Flags.isInConsecutiveRegsLast()) 2315 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2316 // If we overran the argument save area, we must use memory 2317 // (this check catches arguments passed partially in memory) 2318 if (ArgOffset > LinkageSize + ParamAreaSize) 2319 UseMemory = true; 2320 2321 // However, if the argument is actually passed in an FPR or a VR, 2322 // we don't use memory after all. 2323 if (!Flags.isByVal()) { 2324 if (ArgVT == MVT::f32 || ArgVT == MVT::f64) 2325 if (AvailableFPRs > 0) { 2326 --AvailableFPRs; 2327 return false; 2328 } 2329 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2330 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2331 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) 2332 if (AvailableVRs > 0) { 2333 --AvailableVRs; 2334 return false; 2335 } 2336 } 2337 2338 return UseMemory; 2339} 2340 2341/// EnsureStackAlignment - Round stack frame size up from NumBytes to 2342/// ensure minimum alignment required for target. 2343static unsigned EnsureStackAlignment(const TargetMachine &Target, 2344 unsigned NumBytes) { 2345 unsigned TargetAlign = 2346 Target.getSubtargetImpl()->getFrameLowering()->getStackAlignment(); 2347 unsigned AlignMask = TargetAlign - 1; 2348 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 2349 return NumBytes; 2350} 2351 2352SDValue 2353PPCTargetLowering::LowerFormalArguments(SDValue Chain, 2354 CallingConv::ID CallConv, bool isVarArg, 2355 const SmallVectorImpl<ISD::InputArg> 2356 &Ins, 2357 SDLoc dl, SelectionDAG &DAG, 2358 SmallVectorImpl<SDValue> &InVals) 2359 const { 2360 if (Subtarget.isSVR4ABI()) { 2361 if (Subtarget.isPPC64()) 2362 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, 2363 dl, DAG, InVals); 2364 else 2365 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, 2366 dl, DAG, InVals); 2367 } else { 2368 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, 2369 dl, DAG, InVals); 2370 } 2371} 2372 2373SDValue 2374PPCTargetLowering::LowerFormalArguments_32SVR4( 2375 SDValue Chain, 2376 CallingConv::ID CallConv, bool isVarArg, 2377 const SmallVectorImpl<ISD::InputArg> 2378 &Ins, 2379 SDLoc dl, SelectionDAG &DAG, 2380 SmallVectorImpl<SDValue> &InVals) const { 2381 2382 // 32-bit SVR4 ABI Stack Frame Layout: 2383 // +-----------------------------------+ 2384 // +--> | Back chain | 2385 // | +-----------------------------------+ 2386 // | | Floating-point register save area | 2387 // | +-----------------------------------+ 2388 // | | General register save area | 2389 // | +-----------------------------------+ 2390 // | | CR save word | 2391 // | +-----------------------------------+ 2392 // | | VRSAVE save word | 2393 // | +-----------------------------------+ 2394 // | | Alignment padding | 2395 // | +-----------------------------------+ 2396 // | | Vector register save area | 2397 // | +-----------------------------------+ 2398 // | | Local variable space | 2399 // | +-----------------------------------+ 2400 // | | Parameter list area | 2401 // | +-----------------------------------+ 2402 // | | LR save word | 2403 // | +-----------------------------------+ 2404 // SP--> +--- | Back chain | 2405 // +-----------------------------------+ 2406 // 2407 // Specifications: 2408 // System V Application Binary Interface PowerPC Processor Supplement 2409 // AltiVec Technology Programming Interface Manual 2410 2411 MachineFunction &MF = DAG.getMachineFunction(); 2412 MachineFrameInfo *MFI = MF.getFrameInfo(); 2413 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2414 2415 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2416 // Potential tail calls could cause overwriting of argument stack slots. 2417 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2418 (CallConv == CallingConv::Fast)); 2419 unsigned PtrByteSize = 4; 2420 2421 // Assign locations to all of the incoming arguments. 2422 SmallVector<CCValAssign, 16> ArgLocs; 2423 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 2424 *DAG.getContext()); 2425 2426 // Reserve space for the linkage area on the stack. 2427 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false); 2428 CCInfo.AllocateStack(LinkageSize, PtrByteSize); 2429 2430 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); 2431 2432 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2433 CCValAssign &VA = ArgLocs[i]; 2434 2435 // Arguments stored in registers. 2436 if (VA.isRegLoc()) { 2437 const TargetRegisterClass *RC; 2438 EVT ValVT = VA.getValVT(); 2439 2440 switch (ValVT.getSimpleVT().SimpleTy) { 2441 default: 2442 llvm_unreachable("ValVT not supported by formal arguments Lowering"); 2443 case MVT::i1: 2444 case MVT::i32: 2445 RC = &PPC::GPRCRegClass; 2446 break; 2447 case MVT::f32: 2448 RC = &PPC::F4RCRegClass; 2449 break; 2450 case MVT::f64: 2451 if (Subtarget.hasVSX()) 2452 RC = &PPC::VSFRCRegClass; 2453 else 2454 RC = &PPC::F8RCRegClass; 2455 break; 2456 case MVT::v16i8: 2457 case MVT::v8i16: 2458 case MVT::v4i32: 2459 case MVT::v4f32: 2460 RC = &PPC::VRRCRegClass; 2461 break; 2462 case MVT::v2f64: 2463 case MVT::v2i64: 2464 RC = &PPC::VSHRCRegClass; 2465 break; 2466 } 2467 2468 // Transform the arguments stored in physical registers into virtual ones. 2469 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2470 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, 2471 ValVT == MVT::i1 ? MVT::i32 : ValVT); 2472 2473 if (ValVT == MVT::i1) 2474 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); 2475 2476 InVals.push_back(ArgValue); 2477 } else { 2478 // Argument stored in memory. 2479 assert(VA.isMemLoc()); 2480 2481 unsigned ArgSize = VA.getLocVT().getStoreSize(); 2482 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), 2483 isImmutable); 2484 2485 // Create load nodes to retrieve arguments from the stack. 2486 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2487 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2488 MachinePointerInfo(), 2489 false, false, false, 0)); 2490 } 2491 } 2492 2493 // Assign locations to all of the incoming aggregate by value arguments. 2494 // Aggregates passed by value are stored in the local variable space of the 2495 // caller's stack frame, right above the parameter list area. 2496 SmallVector<CCValAssign, 16> ByValArgLocs; 2497 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2498 ByValArgLocs, *DAG.getContext()); 2499 2500 // Reserve stack space for the allocations in CCInfo. 2501 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 2502 2503 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); 2504 2505 // Area that is at least reserved in the caller of this function. 2506 unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); 2507 MinReservedArea = std::max(MinReservedArea, LinkageSize); 2508 2509 // Set the size that is at least reserved in caller of this function. Tail 2510 // call optimized function's reserved stack space needs to be aligned so that 2511 // taking the difference between two stack areas will result in an aligned 2512 // stack. 2513 MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea); 2514 FuncInfo->setMinReservedArea(MinReservedArea); 2515 2516 SmallVector<SDValue, 8> MemOps; 2517 2518 // If the function takes variable number of arguments, make a frame index for 2519 // the start of the first vararg value... for expansion of llvm.va_start. 2520 if (isVarArg) { 2521 static const MCPhysReg GPArgRegs[] = { 2522 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2523 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2524 }; 2525 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); 2526 2527 static const MCPhysReg FPArgRegs[] = { 2528 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2529 PPC::F8 2530 }; 2531 unsigned NumFPArgRegs = array_lengthof(FPArgRegs); 2532 if (DisablePPCFloatInVariadic) 2533 NumFPArgRegs = 0; 2534 2535 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs, 2536 NumGPArgRegs)); 2537 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs, 2538 NumFPArgRegs)); 2539 2540 // Make room for NumGPArgRegs and NumFPArgRegs. 2541 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + 2542 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; 2543 2544 FuncInfo->setVarArgsStackOffset( 2545 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 2546 CCInfo.getNextStackOffset(), true)); 2547 2548 FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); 2549 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2550 2551 // The fixed integer arguments of a variadic function are stored to the 2552 // VarArgsFrameIndex on the stack so that they may be loaded by deferencing 2553 // the result of va_next. 2554 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { 2555 // Get an existing live-in vreg, or add a new one. 2556 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); 2557 if (!VReg) 2558 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); 2559 2560 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2561 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2562 MachinePointerInfo(), false, false, 0); 2563 MemOps.push_back(Store); 2564 // Increment the address by four for the next argument to store 2565 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 2566 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2567 } 2568 2569 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 2570 // is set. 2571 // The double arguments are stored to the VarArgsFrameIndex 2572 // on the stack. 2573 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { 2574 // Get an existing live-in vreg, or add a new one. 2575 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); 2576 if (!VReg) 2577 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); 2578 2579 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); 2580 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2581 MachinePointerInfo(), false, false, 0); 2582 MemOps.push_back(Store); 2583 // Increment the address by eight for the next argument to store 2584 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, 2585 PtrVT); 2586 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2587 } 2588 } 2589 2590 if (!MemOps.empty()) 2591 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 2592 2593 return Chain; 2594} 2595 2596// PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2597// value to MVT::i64 and then truncate to the correct register size. 2598SDValue 2599PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, 2600 SelectionDAG &DAG, SDValue ArgVal, 2601 SDLoc dl) const { 2602 if (Flags.isSExt()) 2603 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, 2604 DAG.getValueType(ObjectVT)); 2605 else if (Flags.isZExt()) 2606 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, 2607 DAG.getValueType(ObjectVT)); 2608 2609 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); 2610} 2611 2612SDValue 2613PPCTargetLowering::LowerFormalArguments_64SVR4( 2614 SDValue Chain, 2615 CallingConv::ID CallConv, bool isVarArg, 2616 const SmallVectorImpl<ISD::InputArg> 2617 &Ins, 2618 SDLoc dl, SelectionDAG &DAG, 2619 SmallVectorImpl<SDValue> &InVals) const { 2620 // TODO: add description of PPC stack frame format, or at least some docs. 2621 // 2622 bool isELFv2ABI = Subtarget.isELFv2ABI(); 2623 bool isLittleEndian = Subtarget.isLittleEndian(); 2624 MachineFunction &MF = DAG.getMachineFunction(); 2625 MachineFrameInfo *MFI = MF.getFrameInfo(); 2626 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2627 2628 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2629 // Potential tail calls could cause overwriting of argument stack slots. 2630 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2631 (CallConv == CallingConv::Fast)); 2632 unsigned PtrByteSize = 8; 2633 2634 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false, 2635 isELFv2ABI); 2636 2637 static const MCPhysReg GPR[] = { 2638 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2639 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2640 }; 2641 2642 static const MCPhysReg *FPR = GetFPR(); 2643 2644 static const MCPhysReg VR[] = { 2645 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2646 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2647 }; 2648 static const MCPhysReg VSRH[] = { 2649 PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, 2650 PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 2651 }; 2652 2653 const unsigned Num_GPR_Regs = array_lengthof(GPR); 2654 const unsigned Num_FPR_Regs = 13; 2655 const unsigned Num_VR_Regs = array_lengthof(VR); 2656 2657 // Do a first pass over the arguments to determine whether the ABI 2658 // guarantees that our caller has allocated the parameter save area 2659 // on its stack frame. In the ELFv1 ABI, this is always the case; 2660 // in the ELFv2 ABI, it is true if this is a vararg function or if 2661 // any parameter is located in a stack slot. 2662 2663 bool HasParameterArea = !isELFv2ABI || isVarArg; 2664 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize; 2665 unsigned NumBytes = LinkageSize; 2666 unsigned AvailableFPRs = Num_FPR_Regs; 2667 unsigned AvailableVRs = Num_VR_Regs; 2668 for (unsigned i = 0, e = Ins.size(); i != e; ++i) 2669 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, 2670 PtrByteSize, LinkageSize, ParamAreaSize, 2671 NumBytes, AvailableFPRs, AvailableVRs)) 2672 HasParameterArea = true; 2673 2674 // Add DAG nodes to load the arguments or copy them out of registers. On 2675 // entry to a function on PPC, the arguments start after the linkage area, 2676 // although the first ones are often in registers. 2677 2678 unsigned ArgOffset = LinkageSize; 2679 unsigned GPR_idx, FPR_idx = 0, VR_idx = 0; 2680 SmallVector<SDValue, 8> MemOps; 2681 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 2682 unsigned CurArgIdx = 0; 2683 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 2684 SDValue ArgVal; 2685 bool needsLoad = false; 2686 EVT ObjectVT = Ins[ArgNo].VT; 2687 EVT OrigVT = Ins[ArgNo].ArgVT; 2688 unsigned ObjSize = ObjectVT.getStoreSize(); 2689 unsigned ArgSize = ObjSize; 2690 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
|
3090 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 3091 ObjSize = Flags.getByValSize(); 3092 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3093 // Objects of size 1 and 2 are right justified, everything else is 3094 // left justified. This means the memory address is adjusted forwards. 3095 if (ObjSize==1 || ObjSize==2) { 3096 CurArgOffset = CurArgOffset + (4 - ObjSize); 3097 } 3098 // The value of the object is its address. 3099 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true); 3100 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3101 InVals.push_back(FIN); 3102 if (ObjSize==1 || ObjSize==2) { 3103 if (GPR_idx != Num_GPR_Regs) { 3104 unsigned VReg; 3105 if (isPPC64) 3106 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3107 else 3108 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3109 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3110 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; 3111 SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 3112 MachinePointerInfo(FuncArg), 3113 ObjType, false, false, 0); 3114 MemOps.push_back(Store); 3115 ++GPR_idx; 3116 } 3117 3118 ArgOffset += PtrByteSize; 3119 3120 continue; 3121 } 3122 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 3123 // Store whatever pieces of the object are in registers 3124 // to memory. ArgOffset will be the address of the beginning 3125 // of the object. 3126 if (GPR_idx != Num_GPR_Regs) { 3127 unsigned VReg; 3128 if (isPPC64) 3129 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3130 else 3131 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3132 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 3133 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3134 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3135 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3136 MachinePointerInfo(FuncArg, j), 3137 false, false, 0); 3138 MemOps.push_back(Store); 3139 ++GPR_idx; 3140 ArgOffset += PtrByteSize; 3141 } else { 3142 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 3143 break; 3144 } 3145 } 3146 continue; 3147 } 3148 3149 switch (ObjectVT.getSimpleVT().SimpleTy) { 3150 default: llvm_unreachable("Unhandled argument type!"); 3151 case MVT::i1: 3152 case MVT::i32: 3153 if (!isPPC64) { 3154 if (GPR_idx != Num_GPR_Regs) { 3155 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3156 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 3157 3158 if (ObjectVT == MVT::i1) 3159 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); 3160 3161 ++GPR_idx; 3162 } else { 3163 needsLoad = true; 3164 ArgSize = PtrByteSize; 3165 } 3166 // All int arguments reserve stack space in the Darwin ABI. 3167 ArgOffset += PtrByteSize; 3168 break; 3169 } 3170 // FALLTHROUGH 3171 case MVT::i64: // PPC64 3172 if (GPR_idx != Num_GPR_Regs) { 3173 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3174 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3175 3176 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3177 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3178 // value to MVT::i64 and then truncate to the correct register size. 3179 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3180 3181 ++GPR_idx; 3182 } else { 3183 needsLoad = true; 3184 ArgSize = PtrByteSize; 3185 } 3186 // All int arguments reserve stack space in the Darwin ABI. 3187 ArgOffset += 8; 3188 break; 3189 3190 case MVT::f32: 3191 case MVT::f64: 3192 // Every 4 bytes of argument space consumes one of the GPRs available for 3193 // argument passing. 3194 if (GPR_idx != Num_GPR_Regs) { 3195 ++GPR_idx; 3196 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 3197 ++GPR_idx; 3198 } 3199 if (FPR_idx != Num_FPR_Regs) { 3200 unsigned VReg; 3201 3202 if (ObjectVT == MVT::f32) 3203 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 3204 else 3205 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 3206 3207 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3208 ++FPR_idx; 3209 } else { 3210 needsLoad = true; 3211 } 3212 3213 // All FP arguments reserve stack space in the Darwin ABI. 3214 ArgOffset += isPPC64 ? 8 : ObjSize; 3215 break; 3216 case MVT::v4f32: 3217 case MVT::v4i32: 3218 case MVT::v8i16: 3219 case MVT::v16i8: 3220 // Note that vector arguments in registers don't reserve stack space, 3221 // except in varargs functions. 3222 if (VR_idx != Num_VR_Regs) { 3223 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 3224 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3225 if (isVarArg) { 3226 while ((ArgOffset % 16) != 0) { 3227 ArgOffset += PtrByteSize; 3228 if (GPR_idx != Num_GPR_Regs) 3229 GPR_idx++; 3230 } 3231 ArgOffset += 16; 3232 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 3233 } 3234 ++VR_idx; 3235 } else { 3236 if (!isVarArg && !isPPC64) { 3237 // Vectors go after all the nonvectors. 3238 CurArgOffset = VecArgOffset; 3239 VecArgOffset += 16; 3240 } else { 3241 // Vectors are aligned. 3242 ArgOffset = ((ArgOffset+15)/16)*16; 3243 CurArgOffset = ArgOffset; 3244 ArgOffset += 16; 3245 } 3246 needsLoad = true; 3247 } 3248 break; 3249 } 3250 3251 // We need to load the argument to a virtual register if we determined above 3252 // that we ran out of physical registers of the appropriate type. 3253 if (needsLoad) { 3254 int FI = MFI->CreateFixedObject(ObjSize, 3255 CurArgOffset + (ArgSize - ObjSize), 3256 isImmutable); 3257 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3258 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 3259 false, false, false, 0); 3260 } 3261 3262 InVals.push_back(ArgVal); 3263 } 3264 3265 // Allow for Altivec parameters at the end, if needed. 3266 if (nAltivecParamsAtEnd) { 3267 MinReservedArea = ((MinReservedArea+15)/16)*16; 3268 MinReservedArea += 16*nAltivecParamsAtEnd; 3269 } 3270 3271 // Area that is at least reserved in the caller of this function. 3272 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize); 3273 3274 // Set the size that is at least reserved in caller of this function. Tail 3275 // call optimized functions' reserved stack space needs to be aligned so that 3276 // taking the difference between two stack areas will result in an aligned 3277 // stack. 3278 MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea); 3279 FuncInfo->setMinReservedArea(MinReservedArea); 3280 3281 // If the function takes variable number of arguments, make a frame index for 3282 // the start of the first vararg value... for expansion of llvm.va_start. 3283 if (isVarArg) { 3284 int Depth = ArgOffset; 3285 3286 FuncInfo->setVarArgsFrameIndex( 3287 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 3288 Depth, true)); 3289 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3290 3291 // If this function is vararg, store any remaining integer argument regs 3292 // to their spots on the stack so that they may be loaded by deferencing the 3293 // result of va_next. 3294 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 3295 unsigned VReg; 3296 3297 if (isPPC64) 3298 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3299 else 3300 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3301 3302 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3303 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3304 MachinePointerInfo(), false, false, 0); 3305 MemOps.push_back(Store); 3306 // Increment the address by four for the next argument to store 3307 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 3308 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3309 } 3310 } 3311 3312 if (!MemOps.empty()) 3313 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3314 3315 return Chain; 3316} 3317 3318/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 3319/// adjusted to accommodate the arguments for the tailcall. 3320static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, 3321 unsigned ParamSize) { 3322 3323 if (!isTailCall) return 0; 3324 3325 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 3326 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 3327 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 3328 // Remember only if the new adjustement is bigger. 3329 if (SPDiff < FI->getTailCallSPDelta()) 3330 FI->setTailCallSPDelta(SPDiff); 3331 3332 return SPDiff; 3333} 3334 3335/// IsEligibleForTailCallOptimization - Check whether the call is eligible 3336/// for tail call optimization. Targets which want to do tail call 3337/// optimization should implement this function. 3338bool 3339PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 3340 CallingConv::ID CalleeCC, 3341 bool isVarArg, 3342 const SmallVectorImpl<ISD::InputArg> &Ins, 3343 SelectionDAG& DAG) const { 3344 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 3345 return false; 3346 3347 // Variable argument functions are not supported. 3348 if (isVarArg) 3349 return false; 3350 3351 MachineFunction &MF = DAG.getMachineFunction(); 3352 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 3353 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 3354 // Functions containing by val parameters are not supported. 3355 for (unsigned i = 0; i != Ins.size(); i++) { 3356 ISD::ArgFlagsTy Flags = Ins[i].Flags; 3357 if (Flags.isByVal()) return false; 3358 } 3359 3360 // Non-PIC/GOT tail calls are supported. 3361 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 3362 return true; 3363 3364 // At the moment we can only do local tail calls (in same module, hidden 3365 // or protected) if we are generating PIC. 3366 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 3367 return G->getGlobal()->hasHiddenVisibility() 3368 || G->getGlobal()->hasProtectedVisibility(); 3369 } 3370 3371 return false; 3372} 3373 3374/// isCallCompatibleAddress - Return the immediate to use if the specified 3375/// 32-bit value is representable in the immediate field of a BxA instruction. 3376static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { 3377 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 3378 if (!C) return nullptr; 3379 3380 int Addr = C->getZExtValue(); 3381 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 3382 SignExtend32<26>(Addr) != Addr) 3383 return nullptr; // Top 6 bits have to be sext of immediate. 3384 3385 return DAG.getConstant((int)C->getZExtValue() >> 2, 3386 DAG.getTargetLoweringInfo().getPointerTy()).getNode(); 3387} 3388 3389namespace { 3390 3391struct TailCallArgumentInfo { 3392 SDValue Arg; 3393 SDValue FrameIdxOp; 3394 int FrameIdx; 3395 3396 TailCallArgumentInfo() : FrameIdx(0) {} 3397}; 3398 3399} 3400 3401/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 3402static void 3403StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, 3404 SDValue Chain, 3405 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs, 3406 SmallVectorImpl<SDValue> &MemOpChains, 3407 SDLoc dl) { 3408 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 3409 SDValue Arg = TailCallArgs[i].Arg; 3410 SDValue FIN = TailCallArgs[i].FrameIdxOp; 3411 int FI = TailCallArgs[i].FrameIdx; 3412 // Store relative to framepointer. 3413 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, 3414 MachinePointerInfo::getFixedStack(FI), 3415 false, false, 0)); 3416 } 3417} 3418 3419/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 3420/// the appropriate stack slot for the tail call optimized function call. 3421static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, 3422 MachineFunction &MF, 3423 SDValue Chain, 3424 SDValue OldRetAddr, 3425 SDValue OldFP, 3426 int SPDiff, 3427 bool isPPC64, 3428 bool isDarwinABI, 3429 SDLoc dl) { 3430 if (SPDiff) { 3431 // Calculate the new stack slot for the return address. 3432 int SlotSize = isPPC64 ? 8 : 4; 3433 int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64, 3434 isDarwinABI); 3435 int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, 3436 NewRetAddrLoc, true); 3437 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 3438 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 3439 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, 3440 MachinePointerInfo::getFixedStack(NewRetAddr), 3441 false, false, 0); 3442 3443 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack 3444 // slot as the FP is never overwritten. 3445 if (isDarwinABI) { 3446 int NewFPLoc = 3447 SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); 3448 int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, 3449 true); 3450 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 3451 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, 3452 MachinePointerInfo::getFixedStack(NewFPIdx), 3453 false, false, 0); 3454 } 3455 } 3456 return Chain; 3457} 3458 3459/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 3460/// the position of the argument. 3461static void 3462CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 3463 SDValue Arg, int SPDiff, unsigned ArgOffset, 3464 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) { 3465 int Offset = ArgOffset + SPDiff; 3466 uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; 3467 int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); 3468 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 3469 SDValue FIN = DAG.getFrameIndex(FI, VT); 3470 TailCallArgumentInfo Info; 3471 Info.Arg = Arg; 3472 Info.FrameIdxOp = FIN; 3473 Info.FrameIdx = FI; 3474 TailCallArguments.push_back(Info); 3475} 3476 3477/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 3478/// stack slot. Returns the chain as result and the loaded frame pointers in 3479/// LROpOut/FPOpout. Used when tail calling. 3480SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, 3481 int SPDiff, 3482 SDValue Chain, 3483 SDValue &LROpOut, 3484 SDValue &FPOpOut, 3485 bool isDarwinABI, 3486 SDLoc dl) const { 3487 if (SPDiff) { 3488 // Load the LR and FP stack slot for later adjusting. 3489 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; 3490 LROpOut = getReturnAddrFrameIndex(DAG); 3491 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(), 3492 false, false, false, 0); 3493 Chain = SDValue(LROpOut.getNode(), 1); 3494 3495 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack 3496 // slot as the FP is never overwritten. 3497 if (isDarwinABI) { 3498 FPOpOut = getFramePointerFrameIndex(DAG); 3499 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(), 3500 false, false, false, 0); 3501 Chain = SDValue(FPOpOut.getNode(), 1); 3502 } 3503 } 3504 return Chain; 3505} 3506 3507/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 3508/// by "Src" to address "Dst" of size "Size". Alignment information is 3509/// specified by the specific parameter attribute. The copy will be passed as 3510/// a byval function parameter. 3511/// Sometimes what we are copying is the end of a larger object, the part that 3512/// does not fit in registers. 3513static SDValue 3514CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 3515 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 3516 SDLoc dl) { 3517 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 3518 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 3519 false, false, MachinePointerInfo(), 3520 MachinePointerInfo()); 3521} 3522 3523/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 3524/// tail calls. 3525static void 3526LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, 3527 SDValue Arg, SDValue PtrOff, int SPDiff, 3528 unsigned ArgOffset, bool isPPC64, bool isTailCall, 3529 bool isVector, SmallVectorImpl<SDValue> &MemOpChains, 3530 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, 3531 SDLoc dl) { 3532 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3533 if (!isTailCall) { 3534 if (isVector) { 3535 SDValue StackPtr; 3536 if (isPPC64) 3537 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 3538 else 3539 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 3540 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 3541 DAG.getConstant(ArgOffset, PtrVT)); 3542 } 3543 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 3544 MachinePointerInfo(), false, false, 0)); 3545 // Calculate and remember argument location. 3546 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 3547 TailCallArguments); 3548} 3549 3550static 3551void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, 3552 SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, 3553 SDValue LROp, SDValue FPOp, bool isDarwinABI, 3554 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) { 3555 MachineFunction &MF = DAG.getMachineFunction(); 3556 3557 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 3558 // might overwrite each other in case of tail call optimization. 3559 SmallVector<SDValue, 8> MemOpChains2; 3560 // Do not flag preceding copytoreg stuff together with the following stuff. 3561 InFlag = SDValue(); 3562 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 3563 MemOpChains2, dl); 3564 if (!MemOpChains2.empty()) 3565 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); 3566 3567 // Store the return address to the appropriate stack slot. 3568 Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, 3569 isPPC64, isDarwinABI, dl); 3570 3571 // Emit callseq_end just before tailcall node. 3572 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 3573 DAG.getIntPtrConstant(0, true), InFlag, dl); 3574 InFlag = Chain.getValue(1); 3575} 3576 3577// Is this global address that of a function that can be called by name? (as 3578// opposed to something that must hold a descriptor for an indirect call). 3579static bool isFunctionGlobalAddress(SDValue Callee) { 3580 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 3581 if (Callee.getOpcode() == ISD::GlobalTLSAddress || 3582 Callee.getOpcode() == ISD::TargetGlobalTLSAddress) 3583 return false; 3584 3585 return G->getGlobal()->getType()->getElementType()->isFunctionTy(); 3586 } 3587 3588 return false; 3589} 3590 3591static 3592unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, 3593 SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall, 3594 bool IsPatchPoint, 3595 SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass, 3596 SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, 3597 const PPCSubtarget &Subtarget) { 3598 3599 bool isPPC64 = Subtarget.isPPC64(); 3600 bool isSVR4ABI = Subtarget.isSVR4ABI(); 3601 bool isELFv2ABI = Subtarget.isELFv2ABI(); 3602 3603 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3604 NodeTys.push_back(MVT::Other); // Returns a chain 3605 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. 3606 3607 unsigned CallOpc = PPCISD::CALL; 3608 3609 bool needIndirectCall = true; 3610 if (!isSVR4ABI || !isPPC64) 3611 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { 3612 // If this is an absolute destination address, use the munged value. 3613 Callee = SDValue(Dest, 0); 3614 needIndirectCall = false; 3615 } 3616 3617 if (isFunctionGlobalAddress(Callee)) { 3618 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee); 3619 // A call to a TLS address is actually an indirect call to a 3620 // thread-specific pointer. 3621 unsigned OpFlags = 0; 3622 if ((DAG.getTarget().getRelocationModel() != Reloc::Static && 3623 (Subtarget.getTargetTriple().isMacOSX() && 3624 Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && 3625 (G->getGlobal()->isDeclaration() || 3626 G->getGlobal()->isWeakForLinker())) || 3627 (Subtarget.isTargetELF() && !isPPC64 && 3628 !G->getGlobal()->hasLocalLinkage() && 3629 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { 3630 // PC-relative references to external symbols should go through $stub, 3631 // unless we're building with the leopard linker or later, which 3632 // automatically synthesizes these stubs. 3633 OpFlags = PPCII::MO_PLT_OR_STUB; 3634 } 3635 3636 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 3637 // every direct call is) turn it into a TargetGlobalAddress / 3638 // TargetExternalSymbol node so that legalize doesn't hack it. 3639 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 3640 Callee.getValueType(), 0, OpFlags); 3641 needIndirectCall = false; 3642 } 3643 3644 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 3645 unsigned char OpFlags = 0; 3646 3647 if ((DAG.getTarget().getRelocationModel() != Reloc::Static && 3648 (Subtarget.getTargetTriple().isMacOSX() && 3649 Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) || 3650 (Subtarget.isTargetELF() && !isPPC64 && 3651 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { 3652 // PC-relative references to external symbols should go through $stub, 3653 // unless we're building with the leopard linker or later, which 3654 // automatically synthesizes these stubs. 3655 OpFlags = PPCII::MO_PLT_OR_STUB; 3656 } 3657 3658 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), 3659 OpFlags); 3660 needIndirectCall = false; 3661 } 3662 3663 if (IsPatchPoint) { 3664 // We'll form an invalid direct call when lowering a patchpoint; the full 3665 // sequence for an indirect call is complicated, and many of the 3666 // instructions introduced might have side effects (and, thus, can't be 3667 // removed later). The call itself will be removed as soon as the 3668 // argument/return lowering is complete, so the fact that it has the wrong 3669 // kind of operands should not really matter. 3670 needIndirectCall = false; 3671 } 3672 3673 if (needIndirectCall) { 3674 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 3675 // to do the call, we can't use PPCISD::CALL. 3676 SDValue MTCTROps[] = {Chain, Callee, InFlag}; 3677 3678 if (isSVR4ABI && isPPC64 && !isELFv2ABI) { 3679 // Function pointers in the 64-bit SVR4 ABI do not point to the function 3680 // entry point, but to the function descriptor (the function entry point 3681 // address is part of the function descriptor though). 3682 // The function descriptor is a three doubleword structure with the 3683 // following fields: function entry point, TOC base address and 3684 // environment pointer. 3685 // Thus for a call through a function pointer, the following actions need 3686 // to be performed: 3687 // 1. Save the TOC of the caller in the TOC save area of its stack 3688 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). 3689 // 2. Load the address of the function entry point from the function 3690 // descriptor. 3691 // 3. Load the TOC of the callee from the function descriptor into r2. 3692 // 4. Load the environment pointer from the function descriptor into 3693 // r11. 3694 // 5. Branch to the function entry point address. 3695 // 6. On return of the callee, the TOC of the caller needs to be 3696 // restored (this is done in FinishCall()). 3697 // 3698 // All those operations are flagged together to ensure that no other 3699 // operations can be scheduled in between. E.g. without flagging the 3700 // operations together, a TOC access in the caller could be scheduled 3701 // between the load of the callee TOC and the branch to the callee, which 3702 // results in the TOC access going through the TOC of the callee instead 3703 // of going through the TOC of the caller, which leads to incorrect code. 3704 3705 // Load the address of the function entry point from the function 3706 // descriptor. 3707 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue); 3708 SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, 3709 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); 3710 Chain = LoadFuncPtr.getValue(1); 3711 InFlag = LoadFuncPtr.getValue(2); 3712 3713 // Load environment pointer into r11. 3714 // Offset of the environment pointer within the function descriptor. 3715 SDValue PtrOff = DAG.getIntPtrConstant(16); 3716 3717 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); 3718 SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr, 3719 InFlag); 3720 Chain = LoadEnvPtr.getValue(1); 3721 InFlag = LoadEnvPtr.getValue(2); 3722 3723 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, 3724 InFlag); 3725 Chain = EnvVal.getValue(0); 3726 InFlag = EnvVal.getValue(1); 3727 3728 // Load TOC of the callee into r2. We are using a target-specific load 3729 // with r2 hard coded, because the result of a target-independent load 3730 // would never go directly into r2, since r2 is a reserved register (which 3731 // prevents the register allocator from allocating it), resulting in an 3732 // additional register being allocated and an unnecessary move instruction 3733 // being generated. 3734 VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3735 SDValue TOCOff = DAG.getIntPtrConstant(8); 3736 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); 3737 SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, 3738 AddTOC, InFlag); 3739 Chain = LoadTOCPtr.getValue(0); 3740 InFlag = LoadTOCPtr.getValue(1); 3741 3742 MTCTROps[0] = Chain; 3743 MTCTROps[1] = LoadFuncPtr; 3744 MTCTROps[2] = InFlag; 3745 } 3746 3747 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, 3748 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); 3749 InFlag = Chain.getValue(1); 3750 3751 NodeTys.clear(); 3752 NodeTys.push_back(MVT::Other); 3753 NodeTys.push_back(MVT::Glue); 3754 Ops.push_back(Chain); 3755 CallOpc = PPCISD::BCTRL; 3756 Callee.setNode(nullptr); 3757 // Add use of X11 (holding environment pointer) 3758 if (isSVR4ABI && isPPC64 && !isELFv2ABI) 3759 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); 3760 // Add CTR register as callee so a bctr can be emitted later. 3761 if (isTailCall) 3762 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); 3763 } 3764 3765 // If this is a direct call, pass the chain and the callee. 3766 if (Callee.getNode()) { 3767 Ops.push_back(Chain); 3768 Ops.push_back(Callee); 3769 3770 // If this is a call to __tls_get_addr, find the symbol whose address 3771 // is to be taken and add it to the list. This will be used to 3772 // generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld). 3773 // We find the symbol by walking the chain to the CopyFromReg, walking 3774 // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and 3775 // pulling the symbol from that node. 3776 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 3777 if (!strcmp(S->getSymbol(), "__tls_get_addr")) { 3778 assert(!needIndirectCall && "Indirect call to __tls_get_addr???"); 3779 SDNode *AddI = Chain.getNode()->getOperand(2).getNode(); 3780 SDValue TGTAddr = AddI->getOperand(1); 3781 assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress && 3782 "Didn't find target global TLS address where we expected one"); 3783 Ops.push_back(TGTAddr); 3784 CallOpc = PPCISD::CALL_TLS; 3785 } 3786 } 3787 // If this is a tail call add stack pointer delta. 3788 if (isTailCall) 3789 Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); 3790 3791 // Add argument registers to the end of the list so that they are known live 3792 // into the call. 3793 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 3794 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 3795 RegsToPass[i].second.getValueType())); 3796 3797 // Direct calls in the ELFv2 ABI need the TOC register live into the call. 3798 if (Callee.getNode() && isELFv2ABI && !IsPatchPoint) 3799 Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); 3800 3801 return CallOpc; 3802} 3803 3804static 3805bool isLocalCall(const SDValue &Callee) 3806{ 3807 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 3808 return !G->getGlobal()->isDeclaration() && 3809 !G->getGlobal()->isWeakForLinker(); 3810 return false; 3811} 3812 3813SDValue 3814PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 3815 CallingConv::ID CallConv, bool isVarArg, 3816 const SmallVectorImpl<ISD::InputArg> &Ins, 3817 SDLoc dl, SelectionDAG &DAG, 3818 SmallVectorImpl<SDValue> &InVals) const { 3819 3820 SmallVector<CCValAssign, 16> RVLocs; 3821 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 3822 *DAG.getContext()); 3823 CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); 3824 3825 // Copy all of the result registers out of their specified physreg. 3826 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 3827 CCValAssign &VA = RVLocs[i]; 3828 assert(VA.isRegLoc() && "Can only return in registers!"); 3829 3830 SDValue Val = DAG.getCopyFromReg(Chain, dl, 3831 VA.getLocReg(), VA.getLocVT(), InFlag); 3832 Chain = Val.getValue(1); 3833 InFlag = Val.getValue(2); 3834 3835 switch (VA.getLocInfo()) { 3836 default: llvm_unreachable("Unknown loc info!"); 3837 case CCValAssign::Full: break; 3838 case CCValAssign::AExt: 3839 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3840 break; 3841 case CCValAssign::ZExt: 3842 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, 3843 DAG.getValueType(VA.getValVT())); 3844 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3845 break; 3846 case CCValAssign::SExt: 3847 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, 3848 DAG.getValueType(VA.getValVT())); 3849 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3850 break; 3851 } 3852 3853 InVals.push_back(Val); 3854 } 3855 3856 return Chain; 3857} 3858 3859SDValue 3860PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, 3861 bool isTailCall, bool isVarArg, bool IsPatchPoint, 3862 SelectionDAG &DAG, 3863 SmallVector<std::pair<unsigned, SDValue>, 8> 3864 &RegsToPass, 3865 SDValue InFlag, SDValue Chain, 3866 SDValue &Callee, 3867 int SPDiff, unsigned NumBytes, 3868 const SmallVectorImpl<ISD::InputArg> &Ins, 3869 SmallVectorImpl<SDValue> &InVals) const { 3870 3871 bool isELFv2ABI = Subtarget.isELFv2ABI(); 3872 std::vector<EVT> NodeTys; 3873 SmallVector<SDValue, 8> Ops; 3874 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, 3875 isTailCall, IsPatchPoint, RegsToPass, Ops, 3876 NodeTys, Subtarget); 3877 3878 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls 3879 if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) 3880 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); 3881 3882 // When performing tail call optimization the callee pops its arguments off 3883 // the stack. Account for this here so these bytes can be pushed back on in 3884 // PPCFrameLowering::eliminateCallFramePseudoInstr. 3885 int BytesCalleePops = 3886 (CallConv == CallingConv::Fast && 3887 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; 3888 3889 // Add a register mask operand representing the call-preserved registers. 3890 const TargetRegisterInfo *TRI = 3891 getTargetMachine().getSubtargetImpl()->getRegisterInfo(); 3892 const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); 3893 assert(Mask && "Missing call preserved mask for calling convention"); 3894 Ops.push_back(DAG.getRegisterMask(Mask)); 3895 3896 if (InFlag.getNode()) 3897 Ops.push_back(InFlag); 3898 3899 // Emit tail call. 3900 if (isTailCall) { 3901 assert(((Callee.getOpcode() == ISD::Register && 3902 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || 3903 Callee.getOpcode() == ISD::TargetExternalSymbol || 3904 Callee.getOpcode() == ISD::TargetGlobalAddress || 3905 isa<ConstantSDNode>(Callee)) && 3906 "Expecting an global address, external symbol, absolute value or register"); 3907 3908 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); 3909 } 3910 3911 // Add a NOP immediately after the branch instruction when using the 64-bit 3912 // SVR4 ABI. At link time, if caller and callee are in a different module and 3913 // thus have a different TOC, the call will be replaced with a call to a stub 3914 // function which saves the current TOC, loads the TOC of the callee and 3915 // branches to the callee. The NOP will be replaced with a load instruction 3916 // which restores the TOC of the caller from the TOC save slot of the current 3917 // stack frame. If caller and callee belong to the same module (and have the 3918 // same TOC), the NOP will remain unchanged. 3919 3920 if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && 3921 !IsPatchPoint) { 3922 if (CallOpc == PPCISD::BCTRL) { 3923 // This is a call through a function pointer. 3924 // Restore the caller TOC from the save area into R2. 3925 // See PrepareCall() for more information about calls through function 3926 // pointers in the 64-bit SVR4 ABI. 3927 // We are using a target-specific load with r2 hard coded, because the 3928 // result of a target-independent load would never go directly into r2, 3929 // since r2 is a reserved register (which prevents the register allocator 3930 // from allocating it), resulting in an additional register being 3931 // allocated and an unnecessary move instruction being generated. 3932 CallOpc = PPCISD::BCTRL_LOAD_TOC; 3933 3934 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3935 SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); 3936 unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI); 3937 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset); 3938 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); 3939 3940 // The address needs to go after the chain input but before the flag (or 3941 // any other variadic arguments). 3942 Ops.insert(std::next(Ops.begin()), AddTOC); 3943 } else if ((CallOpc == PPCISD::CALL) && 3944 (!isLocalCall(Callee) || 3945 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { 3946 // Otherwise insert NOP for non-local calls. 3947 CallOpc = PPCISD::CALL_NOP; 3948 } else if (CallOpc == PPCISD::CALL_TLS) 3949 // For 64-bit SVR4, TLS calls are always non-local. 3950 CallOpc = PPCISD::CALL_NOP_TLS; 3951 } 3952 3953 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); 3954 InFlag = Chain.getValue(1); 3955 3956 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 3957 DAG.getIntPtrConstant(BytesCalleePops, true), 3958 InFlag, dl); 3959 if (!Ins.empty()) 3960 InFlag = Chain.getValue(1); 3961 3962 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 3963 Ins, dl, DAG, InVals); 3964} 3965 3966SDValue 3967PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 3968 SmallVectorImpl<SDValue> &InVals) const { 3969 SelectionDAG &DAG = CLI.DAG; 3970 SDLoc &dl = CLI.DL; 3971 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 3972 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 3973 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 3974 SDValue Chain = CLI.Chain; 3975 SDValue Callee = CLI.Callee; 3976 bool &isTailCall = CLI.IsTailCall; 3977 CallingConv::ID CallConv = CLI.CallConv; 3978 bool isVarArg = CLI.IsVarArg; 3979 bool IsPatchPoint = CLI.IsPatchPoint; 3980 3981 if (isTailCall) 3982 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, 3983 Ins, DAG); 3984 3985 if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) 3986 report_fatal_error("failed to perform tail call elimination on a call " 3987 "site marked musttail"); 3988 3989 if (Subtarget.isSVR4ABI()) { 3990 if (Subtarget.isPPC64()) 3991 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, 3992 isTailCall, IsPatchPoint, Outs, OutVals, Ins, 3993 dl, DAG, InVals); 3994 else 3995 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, 3996 isTailCall, IsPatchPoint, Outs, OutVals, Ins, 3997 dl, DAG, InVals); 3998 } 3999 4000 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, 4001 isTailCall, IsPatchPoint, Outs, OutVals, Ins, 4002 dl, DAG, InVals); 4003} 4004 4005SDValue 4006PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, 4007 CallingConv::ID CallConv, bool isVarArg, 4008 bool isTailCall, bool IsPatchPoint, 4009 const SmallVectorImpl<ISD::OutputArg> &Outs, 4010 const SmallVectorImpl<SDValue> &OutVals, 4011 const SmallVectorImpl<ISD::InputArg> &Ins, 4012 SDLoc dl, SelectionDAG &DAG, 4013 SmallVectorImpl<SDValue> &InVals) const { 4014 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description 4015 // of the 32-bit SVR4 ABI stack frame layout. 4016 4017 assert((CallConv == CallingConv::C || 4018 CallConv == CallingConv::Fast) && "Unknown calling convention!"); 4019 4020 unsigned PtrByteSize = 4; 4021 4022 MachineFunction &MF = DAG.getMachineFunction(); 4023 4024 // Mark this function as potentially containing a function that contains a 4025 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4026 // and restoring the callers stack pointer in this functions epilog. This is 4027 // done because by tail calling the called function might overwrite the value 4028 // in this function's (MF) stack pointer stack slot 0(SP). 4029 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4030 CallConv == CallingConv::Fast) 4031 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4032 4033 // Count how many bytes are to be pushed on the stack, including the linkage 4034 // area, parameter list area and the part of the local variable space which 4035 // contains copies of aggregates which are passed by value. 4036 4037 // Assign locations to all of the outgoing arguments. 4038 SmallVector<CCValAssign, 16> ArgLocs; 4039 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 4040 *DAG.getContext()); 4041 4042 // Reserve space for the linkage area on the stack. 4043 CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false), 4044 PtrByteSize); 4045 4046 if (isVarArg) { 4047 // Handle fixed and variable vector arguments differently. 4048 // Fixed vector arguments go into registers as long as registers are 4049 // available. Variable vector arguments always go into memory. 4050 unsigned NumArgs = Outs.size(); 4051 4052 for (unsigned i = 0; i != NumArgs; ++i) { 4053 MVT ArgVT = Outs[i].VT; 4054 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 4055 bool Result; 4056 4057 if (Outs[i].IsFixed) { 4058 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, 4059 CCInfo); 4060 } else { 4061 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, 4062 ArgFlags, CCInfo); 4063 } 4064 4065 if (Result) { 4066#ifndef NDEBUG 4067 errs() << "Call operand #" << i << " has unhandled type " 4068 << EVT(ArgVT).getEVTString() << "\n"; 4069#endif 4070 llvm_unreachable(nullptr); 4071 } 4072 } 4073 } else { 4074 // All arguments are treated the same. 4075 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); 4076 } 4077 4078 // Assign locations to all of the outgoing aggregate by value arguments. 4079 SmallVector<CCValAssign, 16> ByValArgLocs; 4080 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 4081 ByValArgLocs, *DAG.getContext()); 4082 4083 // Reserve stack space for the allocations in CCInfo. 4084 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 4085 4086 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); 4087 4088 // Size of the linkage area, parameter list area and the part of the local 4089 // space variable where copies of aggregates which are passed by value are 4090 // stored. 4091 unsigned NumBytes = CCByValInfo.getNextStackOffset(); 4092 4093 // Calculate by how many bytes the stack has to be adjusted in case of tail 4094 // call optimization. 4095 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4096 4097 // Adjust the stack pointer for the new arguments... 4098 // These operations are automatically eliminated by the prolog/epilog pass 4099 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 4100 dl); 4101 SDValue CallSeqStart = Chain; 4102 4103 // Load the return address and frame pointer so it can be moved somewhere else 4104 // later. 4105 SDValue LROp, FPOp; 4106 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false, 4107 dl); 4108 4109 // Set up a copy of the stack pointer for use loading and storing any 4110 // arguments that may not fit in the registers available for argument 4111 // passing. 4112 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4113 4114 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4115 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4116 SmallVector<SDValue, 8> MemOpChains; 4117 4118 bool seenFloatArg = false; 4119 // Walk the register/memloc assignments, inserting copies/loads. 4120 for (unsigned i = 0, j = 0, e = ArgLocs.size(); 4121 i != e; 4122 ++i) { 4123 CCValAssign &VA = ArgLocs[i]; 4124 SDValue Arg = OutVals[i]; 4125 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4126 4127 if (Flags.isByVal()) { 4128 // Argument is an aggregate which is passed by value, thus we need to 4129 // create a copy of it in the local variable space of the current stack 4130 // frame (which is the stack frame of the caller) and pass the address of 4131 // this copy to the callee. 4132 assert((j < ByValArgLocs.size()) && "Index out of bounds!"); 4133 CCValAssign &ByValVA = ByValArgLocs[j++]; 4134 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); 4135 4136 // Memory reserved in the local variable space of the callers stack frame. 4137 unsigned LocMemOffset = ByValVA.getLocMemOffset(); 4138 4139 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 4140 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 4141 4142 // Create a copy of the argument in the local area of the current 4143 // stack frame. 4144 SDValue MemcpyCall = 4145 CreateCopyOfByValArgument(Arg, PtrOff, 4146 CallSeqStart.getNode()->getOperand(0), 4147 Flags, DAG, dl); 4148 4149 // This must go outside the CALLSEQ_START..END. 4150 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 4151 CallSeqStart.getNode()->getOperand(1), 4152 SDLoc(MemcpyCall)); 4153 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 4154 NewCallSeqStart.getNode()); 4155 Chain = CallSeqStart = NewCallSeqStart; 4156 4157 // Pass the address of the aggregate copy on the stack either in a 4158 // physical register or in the parameter list area of the current stack 4159 // frame to the callee. 4160 Arg = PtrOff; 4161 } 4162 4163 if (VA.isRegLoc()) { 4164 if (Arg.getValueType() == MVT::i1) 4165 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg); 4166 4167 seenFloatArg |= VA.getLocVT().isFloatingPoint(); 4168 // Put argument in a physical register. 4169 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 4170 } else { 4171 // Put argument in the parameter list area of the current stack frame. 4172 assert(VA.isMemLoc()); 4173 unsigned LocMemOffset = VA.getLocMemOffset(); 4174 4175 if (!isTailCall) { 4176 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 4177 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 4178 4179 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 4180 MachinePointerInfo(), 4181 false, false, 0)); 4182 } else { 4183 // Calculate and remember argument location. 4184 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, 4185 TailCallArguments); 4186 } 4187 } 4188 } 4189 4190 if (!MemOpChains.empty()) 4191 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 4192 4193 // Build a sequence of copy-to-reg nodes chained together with token chain 4194 // and flag operands which copy the outgoing args into the appropriate regs. 4195 SDValue InFlag; 4196 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4197 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4198 RegsToPass[i].second, InFlag); 4199 InFlag = Chain.getValue(1); 4200 } 4201 4202 // Set CR bit 6 to true if this is a vararg call with floating args passed in 4203 // registers. 4204 if (isVarArg) { 4205 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 4206 SDValue Ops[] = { Chain, InFlag }; 4207 4208 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, 4209 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1)); 4210 4211 InFlag = Chain.getValue(1); 4212 } 4213 4214 if (isTailCall) 4215 PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, 4216 false, TailCallArguments); 4217 4218 return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, 4219 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 4220 Ins, InVals); 4221} 4222 4223// Copy an argument into memory, being careful to do this outside the 4224// call sequence for the call to which the argument belongs. 4225SDValue 4226PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, 4227 SDValue CallSeqStart, 4228 ISD::ArgFlagsTy Flags, 4229 SelectionDAG &DAG, 4230 SDLoc dl) const { 4231 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 4232 CallSeqStart.getNode()->getOperand(0), 4233 Flags, DAG, dl); 4234 // The MEMCPY must go outside the CALLSEQ_START..END. 4235 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 4236 CallSeqStart.getNode()->getOperand(1), 4237 SDLoc(MemcpyCall)); 4238 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 4239 NewCallSeqStart.getNode()); 4240 return NewCallSeqStart; 4241} 4242 4243SDValue 4244PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, 4245 CallingConv::ID CallConv, bool isVarArg, 4246 bool isTailCall, bool IsPatchPoint, 4247 const SmallVectorImpl<ISD::OutputArg> &Outs, 4248 const SmallVectorImpl<SDValue> &OutVals, 4249 const SmallVectorImpl<ISD::InputArg> &Ins, 4250 SDLoc dl, SelectionDAG &DAG, 4251 SmallVectorImpl<SDValue> &InVals) const { 4252 4253 bool isELFv2ABI = Subtarget.isELFv2ABI(); 4254 bool isLittleEndian = Subtarget.isLittleEndian(); 4255 unsigned NumOps = Outs.size(); 4256 4257 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4258 unsigned PtrByteSize = 8; 4259 4260 MachineFunction &MF = DAG.getMachineFunction(); 4261 4262 // Mark this function as potentially containing a function that contains a 4263 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4264 // and restoring the callers stack pointer in this functions epilog. This is 4265 // done because by tail calling the called function might overwrite the value 4266 // in this function's (MF) stack pointer stack slot 0(SP). 4267 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4268 CallConv == CallingConv::Fast) 4269 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4270 4271 // Count how many bytes are to be pushed on the stack, including the linkage 4272 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes 4273 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage 4274 // area is 32 bytes reserved space for [SP][CR][LR][TOC]. 4275 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false, 4276 isELFv2ABI); 4277 unsigned NumBytes = LinkageSize; 4278 4279 // Add up all the space actually used. 4280 for (unsigned i = 0; i != NumOps; ++i) { 4281 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4282 EVT ArgVT = Outs[i].VT; 4283 EVT OrigVT = Outs[i].ArgVT; 4284 4285 /* Respect alignment of argument on the stack. */ 4286 unsigned Align = 4287 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 4288 NumBytes = ((NumBytes + Align - 1) / Align) * Align; 4289 4290 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 4291 if (Flags.isInConsecutiveRegsLast()) 4292 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 4293 } 4294 4295 unsigned NumBytesActuallyUsed = NumBytes; 4296 4297 // The prolog code of the callee may store up to 8 GPR argument registers to 4298 // the stack, allowing va_start to index over them in memory if its varargs. 4299 // Because we cannot tell if this is needed on the caller side, we have to 4300 // conservatively assume that it is needed. As such, make sure we have at 4301 // least enough stack space for the caller to store the 8 GPRs. 4302 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. 4303 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 4304 4305 // Tail call needs the stack to be aligned. 4306 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4307 CallConv == CallingConv::Fast) 4308 NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes); 4309 4310 // Calculate by how many bytes the stack has to be adjusted in case of tail 4311 // call optimization. 4312 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4313 4314 // To protect arguments on the stack from being clobbered in a tail call, 4315 // force all the loads to happen before doing any other lowering. 4316 if (isTailCall) 4317 Chain = DAG.getStackArgumentTokenFactor(Chain); 4318 4319 // Adjust the stack pointer for the new arguments... 4320 // These operations are automatically eliminated by the prolog/epilog pass 4321 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 4322 dl); 4323 SDValue CallSeqStart = Chain; 4324 4325 // Load the return address and frame pointer so it can be move somewhere else 4326 // later. 4327 SDValue LROp, FPOp; 4328 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 4329 dl); 4330 4331 // Set up a copy of the stack pointer for use loading and storing any 4332 // arguments that may not fit in the registers available for argument 4333 // passing. 4334 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4335 4336 // Figure out which arguments are going to go in registers, and which in 4337 // memory. Also, if this is a vararg function, floating point operations 4338 // must be stored to our stack, and loaded into integer regs as well, if 4339 // any integer regs are available for argument passing. 4340 unsigned ArgOffset = LinkageSize; 4341 unsigned GPR_idx, FPR_idx = 0, VR_idx = 0; 4342 4343 static const MCPhysReg GPR[] = { 4344 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4345 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4346 }; 4347 static const MCPhysReg *FPR = GetFPR(); 4348 4349 static const MCPhysReg VR[] = { 4350 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4351 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4352 }; 4353 static const MCPhysReg VSRH[] = { 4354 PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, 4355 PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 4356 }; 4357 4358 const unsigned NumGPRs = array_lengthof(GPR); 4359 const unsigned NumFPRs = 13; 4360 const unsigned NumVRs = array_lengthof(VR); 4361 4362 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4363 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4364 4365 SmallVector<SDValue, 8> MemOpChains; 4366 for (unsigned i = 0; i != NumOps; ++i) { 4367 SDValue Arg = OutVals[i]; 4368 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4369 EVT ArgVT = Outs[i].VT; 4370 EVT OrigVT = Outs[i].ArgVT; 4371 4372 /* Respect alignment of argument on the stack. */ 4373 unsigned Align = 4374 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 4375 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 4376 4377 /* Compute GPR index associated with argument offset. */ 4378 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 4379 GPR_idx = std::min(GPR_idx, NumGPRs); 4380 4381 // PtrOff will be used to store the current argument to the stack if a 4382 // register cannot be found for it. 4383 SDValue PtrOff; 4384 4385 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 4386 4387 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4388 4389 // Promote integers to 64-bit values. 4390 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { 4391 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 4392 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 4393 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 4394 } 4395 4396 // FIXME memcpy is used way more than necessary. Correctness first. 4397 // Note: "by value" is code for passing a structure by value, not 4398 // basic types. 4399 if (Flags.isByVal()) { 4400 // Note: Size includes alignment padding, so 4401 // struct x { short a; char b; } 4402 // will have Size = 4. With #pragma pack(1), it will have Size = 3. 4403 // These are the proper values we need for right-justifying the 4404 // aggregate in a parameter register. 4405 unsigned Size = Flags.getByValSize(); 4406 4407 // An empty aggregate parameter takes up no storage and no 4408 // registers. 4409 if (Size == 0) 4410 continue; 4411 4412 // All aggregates smaller than 8 bytes must be passed right-justified. 4413 if (Size==1 || Size==2 || Size==4) { 4414 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); 4415 if (GPR_idx != NumGPRs) { 4416 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 4417 MachinePointerInfo(), VT, 4418 false, false, false, 0); 4419 MemOpChains.push_back(Load.getValue(1)); 4420 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load)); 4421 4422 ArgOffset += PtrByteSize; 4423 continue; 4424 } 4425 } 4426 4427 if (GPR_idx == NumGPRs && Size < 8) { 4428 SDValue AddPtr = PtrOff; 4429 if (!isLittleEndian) { 4430 SDValue Const = DAG.getConstant(PtrByteSize - Size, 4431 PtrOff.getValueType()); 4432 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4433 } 4434 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4435 CallSeqStart, 4436 Flags, DAG, dl); 4437 ArgOffset += PtrByteSize; 4438 continue; 4439 } 4440 // Copy entire object into memory. There are cases where gcc-generated 4441 // code assumes it is there, even if it could be put entirely into 4442 // registers. (This is not what the doc says.) 4443 4444 // FIXME: The above statement is likely due to a misunderstanding of the 4445 // documents. All arguments must be copied into the parameter area BY 4446 // THE CALLEE in the event that the callee takes the address of any 4447 // formal argument. That has not yet been implemented. However, it is 4448 // reasonable to use the stack area as a staging area for the register 4449 // load. 4450 4451 // Skip this for small aggregates, as we will use the same slot for a 4452 // right-justified copy, below. 4453 if (Size >= 8) 4454 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 4455 CallSeqStart, 4456 Flags, DAG, dl); 4457 4458 // When a register is available, pass a small aggregate right-justified. 4459 if (Size < 8 && GPR_idx != NumGPRs) { 4460 // The easiest way to get this right-justified in a register 4461 // is to copy the structure into the rightmost portion of a 4462 // local variable slot, then load the whole slot into the 4463 // register. 4464 // FIXME: The memcpy seems to produce pretty awful code for 4465 // small aggregates, particularly for packed ones. 4466 // FIXME: It would be preferable to use the slot in the 4467 // parameter save area instead of a new local variable. 4468 SDValue AddPtr = PtrOff; 4469 if (!isLittleEndian) { 4470 SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); 4471 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4472 } 4473 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4474 CallSeqStart, 4475 Flags, DAG, dl); 4476 4477 // Load the slot into the register. 4478 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff, 4479 MachinePointerInfo(), 4480 false, false, false, 0); 4481 MemOpChains.push_back(Load.getValue(1)); 4482 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load)); 4483 4484 // Done with this argument. 4485 ArgOffset += PtrByteSize; 4486 continue; 4487 } 4488 4489 // For aggregates larger than PtrByteSize, copy the pieces of the 4490 // object that fit into registers from the parameter save area. 4491 for (unsigned j=0; j<Size; j+=PtrByteSize) { 4492 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 4493 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 4494 if (GPR_idx != NumGPRs) { 4495 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 4496 MachinePointerInfo(), 4497 false, false, false, 0); 4498 MemOpChains.push_back(Load.getValue(1)); 4499 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4500 ArgOffset += PtrByteSize; 4501 } else { 4502 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 4503 break; 4504 } 4505 } 4506 continue; 4507 } 4508 4509 switch (Arg.getSimpleValueType().SimpleTy) { 4510 default: llvm_unreachable("Unexpected ValueType for argument!"); 4511 case MVT::i1: 4512 case MVT::i32: 4513 case MVT::i64: 4514 // These can be scalar arguments or elements of an integer array type 4515 // passed directly. Clang may use those instead of "byval" aggregate 4516 // types to avoid forcing arguments to memory unnecessarily. 4517 if (GPR_idx != NumGPRs) { 4518 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg)); 4519 } else { 4520 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4521 true, isTailCall, false, MemOpChains, 4522 TailCallArguments, dl); 4523 } 4524 ArgOffset += PtrByteSize; 4525 break; 4526 case MVT::f32: 4527 case MVT::f64: { 4528 // These can be scalar arguments or elements of a float array type 4529 // passed directly. The latter are used to implement ELFv2 homogenous 4530 // float aggregates. 4531 4532 // Named arguments go into FPRs first, and once they overflow, the 4533 // remaining arguments go into GPRs and then the parameter save area. 4534 // Unnamed arguments for vararg functions always go to GPRs and 4535 // then the parameter save area. For now, put all arguments to vararg 4536 // routines always in both locations (FPR *and* GPR or stack slot). 4537 bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs; 4538 4539 // First load the argument into the next available FPR. 4540 if (FPR_idx != NumFPRs) 4541 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 4542 4543 // Next, load the argument into GPR or stack slot if needed. 4544 if (!NeedGPROrStack) 4545 ; 4546 else if (GPR_idx != NumGPRs) { 4547 // In the non-vararg case, this can only ever happen in the 4548 // presence of f32 array types, since otherwise we never run 4549 // out of FPRs before running out of GPRs. 4550 SDValue ArgVal; 4551 4552 // Double values are always passed in a single GPR. 4553 if (Arg.getValueType() != MVT::f32) { 4554 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); 4555 4556 // Non-array float values are extended and passed in a GPR. 4557 } else if (!Flags.isInConsecutiveRegs()) { 4558 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 4559 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 4560 4561 // If we have an array of floats, we collect every odd element 4562 // together with its predecessor into one GPR. 4563 } else if (ArgOffset % PtrByteSize != 0) { 4564 SDValue Lo, Hi; 4565 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); 4566 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 4567 if (!isLittleEndian) 4568 std::swap(Lo, Hi); 4569 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 4570 4571 // The final element, if even, goes into the first half of a GPR. 4572 } else if (Flags.isInConsecutiveRegsLast()) { 4573 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 4574 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 4575 if (!isLittleEndian) 4576 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal, 4577 DAG.getConstant(32, MVT::i32)); 4578 4579 // Non-final even elements are skipped; they will be handled 4580 // together the with subsequent argument on the next go-around. 4581 } else 4582 ArgVal = SDValue(); 4583 4584 if (ArgVal.getNode()) 4585 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal)); 4586 } else { 4587 // Single-precision floating-point values are mapped to the 4588 // second (rightmost) word of the stack doubleword. 4589 if (Arg.getValueType() == MVT::f32 && 4590 !isLittleEndian && !Flags.isInConsecutiveRegs()) { 4591 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4592 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4593 } 4594 4595 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4596 true, isTailCall, false, MemOpChains, 4597 TailCallArguments, dl); 4598 } 4599 // When passing an array of floats, the array occupies consecutive 4600 // space in the argument area; only round up to the next doubleword 4601 // at the end of the array. Otherwise, each float takes 8 bytes. 4602 ArgOffset += (Arg.getValueType() == MVT::f32 && 4603 Flags.isInConsecutiveRegs()) ? 4 : 8; 4604 if (Flags.isInConsecutiveRegsLast()) 4605 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 4606 break; 4607 } 4608 case MVT::v4f32: 4609 case MVT::v4i32: 4610 case MVT::v8i16: 4611 case MVT::v16i8: 4612 case MVT::v2f64: 4613 case MVT::v2i64: 4614 // These can be scalar arguments or elements of a vector array type 4615 // passed directly. The latter are used to implement ELFv2 homogenous 4616 // vector aggregates. 4617 4618 // For a varargs call, named arguments go into VRs or on the stack as 4619 // usual; unnamed arguments always go to the stack or the corresponding 4620 // GPRs when within range. For now, we always put the value in both 4621 // locations (or even all three). 4622 if (isVarArg) { 4623 // We could elide this store in the case where the object fits 4624 // entirely in R registers. Maybe later. 4625 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4626 MachinePointerInfo(), false, false, 0); 4627 MemOpChains.push_back(Store); 4628 if (VR_idx != NumVRs) { 4629 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 4630 MachinePointerInfo(), 4631 false, false, false, 0); 4632 MemOpChains.push_back(Load.getValue(1)); 4633 4634 unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || 4635 Arg.getSimpleValueType() == MVT::v2i64) ? 4636 VSRH[VR_idx] : VR[VR_idx]; 4637 ++VR_idx; 4638 4639 RegsToPass.push_back(std::make_pair(VReg, Load)); 4640 } 4641 ArgOffset += 16; 4642 for (unsigned i=0; i<16; i+=PtrByteSize) { 4643 if (GPR_idx == NumGPRs) 4644 break; 4645 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 4646 DAG.getConstant(i, PtrVT)); 4647 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 4648 false, false, false, 0); 4649 MemOpChains.push_back(Load.getValue(1)); 4650 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4651 } 4652 break; 4653 } 4654 4655 // Non-varargs Altivec params go into VRs or on the stack. 4656 if (VR_idx != NumVRs) { 4657 unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || 4658 Arg.getSimpleValueType() == MVT::v2i64) ? 4659 VSRH[VR_idx] : VR[VR_idx]; 4660 ++VR_idx; 4661 4662 RegsToPass.push_back(std::make_pair(VReg, Arg)); 4663 } else { 4664 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4665 true, isTailCall, true, MemOpChains, 4666 TailCallArguments, dl); 4667 } 4668 ArgOffset += 16; 4669 break; 4670 } 4671 } 4672 4673 assert(NumBytesActuallyUsed == ArgOffset); 4674 (void)NumBytesActuallyUsed; 4675 4676 if (!MemOpChains.empty()) 4677 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 4678 4679 // Check if this is an indirect call (MTCTR/BCTRL). 4680 // See PrepareCall() for more information about calls through function 4681 // pointers in the 64-bit SVR4 ABI. 4682 if (!isTailCall && !IsPatchPoint && 4683 !isFunctionGlobalAddress(Callee) && 4684 !isa<ExternalSymbolSDNode>(Callee)) { 4685 // Load r2 into a virtual register and store it to the TOC save area. 4686 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); 4687 // TOC save area offset. 4688 unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI); 4689 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset); 4690 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4691 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), 4692 false, false, 0); 4693 // In the ELFv2 ABI, R12 must contain the address of an indirect callee. 4694 // This does not mean the MTCTR instruction must use R12; it's easier 4695 // to model this as an extra parameter, so do that. 4696 if (isELFv2ABI && !IsPatchPoint) 4697 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); 4698 } 4699 4700 // Build a sequence of copy-to-reg nodes chained together with token chain 4701 // and flag operands which copy the outgoing args into the appropriate regs. 4702 SDValue InFlag; 4703 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4704 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4705 RegsToPass[i].second, InFlag); 4706 InFlag = Chain.getValue(1); 4707 } 4708 4709 if (isTailCall) 4710 PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, 4711 FPOp, true, TailCallArguments); 4712 4713 return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, 4714 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 4715 Ins, InVals); 4716} 4717 4718SDValue 4719PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, 4720 CallingConv::ID CallConv, bool isVarArg, 4721 bool isTailCall, bool IsPatchPoint, 4722 const SmallVectorImpl<ISD::OutputArg> &Outs, 4723 const SmallVectorImpl<SDValue> &OutVals, 4724 const SmallVectorImpl<ISD::InputArg> &Ins, 4725 SDLoc dl, SelectionDAG &DAG, 4726 SmallVectorImpl<SDValue> &InVals) const { 4727 4728 unsigned NumOps = Outs.size(); 4729 4730 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4731 bool isPPC64 = PtrVT == MVT::i64; 4732 unsigned PtrByteSize = isPPC64 ? 8 : 4; 4733 4734 MachineFunction &MF = DAG.getMachineFunction(); 4735 4736 // Mark this function as potentially containing a function that contains a 4737 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4738 // and restoring the callers stack pointer in this functions epilog. This is 4739 // done because by tail calling the called function might overwrite the value 4740 // in this function's (MF) stack pointer stack slot 0(SP). 4741 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4742 CallConv == CallingConv::Fast) 4743 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4744 4745 // Count how many bytes are to be pushed on the stack, including the linkage 4746 // area, and parameter passing area. We start with 24/48 bytes, which is 4747 // prereserved space for [SP][CR][LR][3 x unused]. 4748 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true, 4749 false); 4750 unsigned NumBytes = LinkageSize; 4751 4752 // Add up all the space actually used. 4753 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 4754 // they all go in registers, but we must reserve stack space for them for 4755 // possible use by the caller. In varargs or 64-bit calls, parameters are 4756 // assigned stack space in order, with padding so Altivec parameters are 4757 // 16-byte aligned. 4758 unsigned nAltivecParamsAtEnd = 0; 4759 for (unsigned i = 0; i != NumOps; ++i) { 4760 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4761 EVT ArgVT = Outs[i].VT; 4762 // Varargs Altivec parameters are padded to a 16 byte boundary. 4763 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 4764 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 4765 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) { 4766 if (!isVarArg && !isPPC64) { 4767 // Non-varargs Altivec parameters go after all the non-Altivec 4768 // parameters; handle those later so we know how much padding we need. 4769 nAltivecParamsAtEnd++; 4770 continue; 4771 } 4772 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 4773 NumBytes = ((NumBytes+15)/16)*16; 4774 } 4775 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 4776 } 4777 4778 // Allow for Altivec parameters at the end, if needed. 4779 if (nAltivecParamsAtEnd) { 4780 NumBytes = ((NumBytes+15)/16)*16; 4781 NumBytes += 16*nAltivecParamsAtEnd; 4782 } 4783 4784 // The prolog code of the callee may store up to 8 GPR argument registers to 4785 // the stack, allowing va_start to index over them in memory if its varargs. 4786 // Because we cannot tell if this is needed on the caller side, we have to 4787 // conservatively assume that it is needed. As such, make sure we have at 4788 // least enough stack space for the caller to store the 8 GPRs. 4789 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 4790 4791 // Tail call needs the stack to be aligned. 4792 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4793 CallConv == CallingConv::Fast) 4794 NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes); 4795 4796 // Calculate by how many bytes the stack has to be adjusted in case of tail 4797 // call optimization. 4798 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4799 4800 // To protect arguments on the stack from being clobbered in a tail call, 4801 // force all the loads to happen before doing any other lowering. 4802 if (isTailCall) 4803 Chain = DAG.getStackArgumentTokenFactor(Chain); 4804 4805 // Adjust the stack pointer for the new arguments... 4806 // These operations are automatically eliminated by the prolog/epilog pass 4807 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 4808 dl); 4809 SDValue CallSeqStart = Chain; 4810 4811 // Load the return address and frame pointer so it can be move somewhere else 4812 // later. 4813 SDValue LROp, FPOp; 4814 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 4815 dl); 4816 4817 // Set up a copy of the stack pointer for use loading and storing any 4818 // arguments that may not fit in the registers available for argument 4819 // passing. 4820 SDValue StackPtr; 4821 if (isPPC64) 4822 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4823 else 4824 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4825 4826 // Figure out which arguments are going to go in registers, and which in 4827 // memory. Also, if this is a vararg function, floating point operations 4828 // must be stored to our stack, and loaded into integer regs as well, if 4829 // any integer regs are available for argument passing. 4830 unsigned ArgOffset = LinkageSize; 4831 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 4832 4833 static const MCPhysReg GPR_32[] = { // 32-bit registers. 4834 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 4835 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 4836 }; 4837 static const MCPhysReg GPR_64[] = { // 64-bit registers. 4838 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4839 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4840 }; 4841 static const MCPhysReg *FPR = GetFPR(); 4842 4843 static const MCPhysReg VR[] = { 4844 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4845 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4846 }; 4847 const unsigned NumGPRs = array_lengthof(GPR_32); 4848 const unsigned NumFPRs = 13; 4849 const unsigned NumVRs = array_lengthof(VR); 4850 4851 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 4852 4853 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4854 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4855 4856 SmallVector<SDValue, 8> MemOpChains; 4857 for (unsigned i = 0; i != NumOps; ++i) { 4858 SDValue Arg = OutVals[i]; 4859 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4860 4861 // PtrOff will be used to store the current argument to the stack if a 4862 // register cannot be found for it. 4863 SDValue PtrOff; 4864 4865 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 4866 4867 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4868 4869 // On PPC64, promote integers to 64-bit values. 4870 if (isPPC64 && Arg.getValueType() == MVT::i32) { 4871 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 4872 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 4873 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 4874 } 4875 4876 // FIXME memcpy is used way more than necessary. Correctness first. 4877 // Note: "by value" is code for passing a structure by value, not 4878 // basic types. 4879 if (Flags.isByVal()) { 4880 unsigned Size = Flags.getByValSize(); 4881 // Very small objects are passed right-justified. Everything else is 4882 // passed left-justified. 4883 if (Size==1 || Size==2) { 4884 EVT VT = (Size==1) ? MVT::i8 : MVT::i16; 4885 if (GPR_idx != NumGPRs) { 4886 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 4887 MachinePointerInfo(), VT, 4888 false, false, false, 0); 4889 MemOpChains.push_back(Load.getValue(1)); 4890 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4891 4892 ArgOffset += PtrByteSize; 4893 } else { 4894 SDValue Const = DAG.getConstant(PtrByteSize - Size, 4895 PtrOff.getValueType()); 4896 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4897 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4898 CallSeqStart, 4899 Flags, DAG, dl); 4900 ArgOffset += PtrByteSize; 4901 } 4902 continue; 4903 } 4904 // Copy entire object into memory. There are cases where gcc-generated 4905 // code assumes it is there, even if it could be put entirely into 4906 // registers. (This is not what the doc says.) 4907 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 4908 CallSeqStart, 4909 Flags, DAG, dl); 4910 4911 // For small aggregates (Darwin only) and aggregates >= PtrByteSize, 4912 // copy the pieces of the object that fit into registers from the 4913 // parameter save area. 4914 for (unsigned j=0; j<Size; j+=PtrByteSize) { 4915 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 4916 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 4917 if (GPR_idx != NumGPRs) { 4918 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 4919 MachinePointerInfo(), 4920 false, false, false, 0); 4921 MemOpChains.push_back(Load.getValue(1)); 4922 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4923 ArgOffset += PtrByteSize; 4924 } else { 4925 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 4926 break; 4927 } 4928 } 4929 continue; 4930 } 4931 4932 switch (Arg.getSimpleValueType().SimpleTy) { 4933 default: llvm_unreachable("Unexpected ValueType for argument!"); 4934 case MVT::i1: 4935 case MVT::i32: 4936 case MVT::i64: 4937 if (GPR_idx != NumGPRs) { 4938 if (Arg.getValueType() == MVT::i1) 4939 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg); 4940 4941 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 4942 } else { 4943 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4944 isPPC64, isTailCall, false, MemOpChains, 4945 TailCallArguments, dl); 4946 } 4947 ArgOffset += PtrByteSize; 4948 break; 4949 case MVT::f32: 4950 case MVT::f64: 4951 if (FPR_idx != NumFPRs) { 4952 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 4953 4954 if (isVarArg) { 4955 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4956 MachinePointerInfo(), false, false, 0); 4957 MemOpChains.push_back(Store); 4958 4959 // Float varargs are always shadowed in available integer registers 4960 if (GPR_idx != NumGPRs) { 4961 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4962 MachinePointerInfo(), false, false, 4963 false, 0); 4964 MemOpChains.push_back(Load.getValue(1)); 4965 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4966 } 4967 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 4968 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4969 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4970 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4971 MachinePointerInfo(), 4972 false, false, false, 0); 4973 MemOpChains.push_back(Load.getValue(1)); 4974 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4975 } 4976 } else { 4977 // If we have any FPRs remaining, we may also have GPRs remaining. 4978 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 4979 // GPRs. 4980 if (GPR_idx != NumGPRs) 4981 ++GPR_idx; 4982 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 4983 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 4984 ++GPR_idx; 4985 } 4986 } else 4987 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4988 isPPC64, isTailCall, false, MemOpChains, 4989 TailCallArguments, dl); 4990 if (isPPC64) 4991 ArgOffset += 8; 4992 else 4993 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 4994 break; 4995 case MVT::v4f32: 4996 case MVT::v4i32: 4997 case MVT::v8i16: 4998 case MVT::v16i8: 4999 if (isVarArg) { 5000 // These go aligned on the stack, or in the corresponding R registers 5001 // when within range. The Darwin PPC ABI doc claims they also go in 5002 // V registers; in fact gcc does this only for arguments that are 5003 // prototyped, not for those that match the ... We do it for all 5004 // arguments, seems to work. 5005 while (ArgOffset % 16 !=0) { 5006 ArgOffset += PtrByteSize; 5007 if (GPR_idx != NumGPRs) 5008 GPR_idx++; 5009 } 5010 // We could elide this store in the case where the object fits 5011 // entirely in R registers. Maybe later. 5012 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 5013 DAG.getConstant(ArgOffset, PtrVT)); 5014 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 5015 MachinePointerInfo(), false, false, 0); 5016 MemOpChains.push_back(Store); 5017 if (VR_idx != NumVRs) { 5018 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 5019 MachinePointerInfo(), 5020 false, false, false, 0); 5021 MemOpChains.push_back(Load.getValue(1)); 5022 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 5023 } 5024 ArgOffset += 16; 5025 for (unsigned i=0; i<16; i+=PtrByteSize) { 5026 if (GPR_idx == NumGPRs) 5027 break; 5028 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5029 DAG.getConstant(i, PtrVT)); 5030 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 5031 false, false, false, 0); 5032 MemOpChains.push_back(Load.getValue(1)); 5033 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5034 } 5035 break; 5036 } 5037 5038 // Non-varargs Altivec params generally go in registers, but have 5039 // stack space allocated at the end. 5040 if (VR_idx != NumVRs) { 5041 // Doesn't have GPR space allocated. 5042 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 5043 } else if (nAltivecParamsAtEnd==0) { 5044 // We are emitting Altivec params in order. 5045 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5046 isPPC64, isTailCall, true, MemOpChains, 5047 TailCallArguments, dl); 5048 ArgOffset += 16; 5049 } 5050 break; 5051 } 5052 } 5053 // If all Altivec parameters fit in registers, as they usually do, 5054 // they get stack space following the non-Altivec parameters. We 5055 // don't track this here because nobody below needs it. 5056 // If there are more Altivec parameters than fit in registers emit 5057 // the stores here. 5058 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 5059 unsigned j = 0; 5060 // Offset is aligned; skip 1st 12 params which go in V registers. 5061 ArgOffset = ((ArgOffset+15)/16)*16; 5062 ArgOffset += 12*16; 5063 for (unsigned i = 0; i != NumOps; ++i) { 5064 SDValue Arg = OutVals[i]; 5065 EVT ArgType = Outs[i].VT; 5066 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 5067 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 5068 if (++j > NumVRs) { 5069 SDValue PtrOff; 5070 // We are emitting Altivec params in order. 5071 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5072 isPPC64, isTailCall, true, MemOpChains, 5073 TailCallArguments, dl); 5074 ArgOffset += 16; 5075 } 5076 } 5077 } 5078 } 5079 5080 if (!MemOpChains.empty()) 5081 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 5082 5083 // On Darwin, R12 must contain the address of an indirect callee. This does 5084 // not mean the MTCTR instruction must use R12; it's easier to model this as 5085 // an extra parameter, so do that. 5086 if (!isTailCall && 5087 !isFunctionGlobalAddress(Callee) && 5088 !isa<ExternalSymbolSDNode>(Callee) && 5089 !isBLACompatibleAddress(Callee, DAG)) 5090 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : 5091 PPC::R12), Callee)); 5092 5093 // Build a sequence of copy-to-reg nodes chained together with token chain 5094 // and flag operands which copy the outgoing args into the appropriate regs. 5095 SDValue InFlag; 5096 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 5097 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 5098 RegsToPass[i].second, InFlag); 5099 InFlag = Chain.getValue(1); 5100 } 5101 5102 if (isTailCall) 5103 PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, 5104 FPOp, true, TailCallArguments); 5105 5106 return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, 5107 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 5108 Ins, InVals); 5109} 5110 5111bool 5112PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 5113 MachineFunction &MF, bool isVarArg, 5114 const SmallVectorImpl<ISD::OutputArg> &Outs, 5115 LLVMContext &Context) const { 5116 SmallVector<CCValAssign, 16> RVLocs; 5117 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); 5118 return CCInfo.CheckReturn(Outs, RetCC_PPC); 5119} 5120 5121SDValue 5122PPCTargetLowering::LowerReturn(SDValue Chain, 5123 CallingConv::ID CallConv, bool isVarArg, 5124 const SmallVectorImpl<ISD::OutputArg> &Outs, 5125 const SmallVectorImpl<SDValue> &OutVals, 5126 SDLoc dl, SelectionDAG &DAG) const { 5127 5128 SmallVector<CCValAssign, 16> RVLocs; 5129 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 5130 *DAG.getContext()); 5131 CCInfo.AnalyzeReturn(Outs, RetCC_PPC); 5132 5133 SDValue Flag; 5134 SmallVector<SDValue, 4> RetOps(1, Chain); 5135 5136 // Copy the result values into the output registers. 5137 for (unsigned i = 0; i != RVLocs.size(); ++i) { 5138 CCValAssign &VA = RVLocs[i]; 5139 assert(VA.isRegLoc() && "Can only return in registers!"); 5140 5141 SDValue Arg = OutVals[i]; 5142 5143 switch (VA.getLocInfo()) { 5144 default: llvm_unreachable("Unknown loc info!"); 5145 case CCValAssign::Full: break; 5146 case CCValAssign::AExt: 5147 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 5148 break; 5149 case CCValAssign::ZExt: 5150 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 5151 break; 5152 case CCValAssign::SExt: 5153 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 5154 break; 5155 } 5156 5157 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 5158 Flag = Chain.getValue(1); 5159 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 5160 } 5161 5162 RetOps[0] = Chain; // Update chain. 5163 5164 // Add the flag if we have it. 5165 if (Flag.getNode()) 5166 RetOps.push_back(Flag); 5167 5168 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); 5169} 5170 5171SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, 5172 const PPCSubtarget &Subtarget) const { 5173 // When we pop the dynamic allocation we need to restore the SP link. 5174 SDLoc dl(Op); 5175 5176 // Get the corect type for pointers. 5177 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5178 5179 // Construct the stack pointer operand. 5180 bool isPPC64 = Subtarget.isPPC64(); 5181 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; 5182 SDValue StackPtr = DAG.getRegister(SP, PtrVT); 5183 5184 // Get the operands for the STACKRESTORE. 5185 SDValue Chain = Op.getOperand(0); 5186 SDValue SaveSP = Op.getOperand(1); 5187 5188 // Load the old link SP. 5189 SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, 5190 MachinePointerInfo(), 5191 false, false, false, 0); 5192 5193 // Restore the stack pointer. 5194 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); 5195 5196 // Store the old link SP. 5197 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(), 5198 false, false, 0); 5199} 5200 5201 5202 5203SDValue 5204PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { 5205 MachineFunction &MF = DAG.getMachineFunction(); 5206 bool isPPC64 = Subtarget.isPPC64(); 5207 bool isDarwinABI = Subtarget.isDarwinABI(); 5208 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5209 5210 // Get current frame pointer save index. The users of this index will be 5211 // primarily DYNALLOC instructions. 5212 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 5213 int RASI = FI->getReturnAddrSaveIndex(); 5214 5215 // If the frame pointer save index hasn't been defined yet. 5216 if (!RASI) { 5217 // Find out what the fix offset of the frame pointer save area. 5218 int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); 5219 // Allocate the frame index for frame pointer save area. 5220 RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false); 5221 // Save the result. 5222 FI->setReturnAddrSaveIndex(RASI); 5223 } 5224 return DAG.getFrameIndex(RASI, PtrVT); 5225} 5226 5227SDValue 5228PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 5229 MachineFunction &MF = DAG.getMachineFunction(); 5230 bool isPPC64 = Subtarget.isPPC64(); 5231 bool isDarwinABI = Subtarget.isDarwinABI(); 5232 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5233 5234 // Get current frame pointer save index. The users of this index will be 5235 // primarily DYNALLOC instructions. 5236 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 5237 int FPSI = FI->getFramePointerSaveIndex(); 5238 5239 // If the frame pointer save index hasn't been defined yet. 5240 if (!FPSI) { 5241 // Find out what the fix offset of the frame pointer save area. 5242 int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, 5243 isDarwinABI); 5244 5245 // Allocate the frame index for frame pointer save area. 5246 FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 5247 // Save the result. 5248 FI->setFramePointerSaveIndex(FPSI); 5249 } 5250 return DAG.getFrameIndex(FPSI, PtrVT); 5251} 5252 5253SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 5254 SelectionDAG &DAG, 5255 const PPCSubtarget &Subtarget) const { 5256 // Get the inputs. 5257 SDValue Chain = Op.getOperand(0); 5258 SDValue Size = Op.getOperand(1); 5259 SDLoc dl(Op); 5260 5261 // Get the corect type for pointers. 5262 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5263 // Negate the size. 5264 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, 5265 DAG.getConstant(0, PtrVT), Size); 5266 // Construct a node for the frame pointer save index. 5267 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 5268 // Build a DYNALLOC node. 5269 SDValue Ops[3] = { Chain, NegSize, FPSIdx }; 5270 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 5271 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops); 5272} 5273 5274SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, 5275 SelectionDAG &DAG) const { 5276 SDLoc DL(Op); 5277 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, 5278 DAG.getVTList(MVT::i32, MVT::Other), 5279 Op.getOperand(0), Op.getOperand(1)); 5280} 5281 5282SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, 5283 SelectionDAG &DAG) const { 5284 SDLoc DL(Op); 5285 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, 5286 Op.getOperand(0), Op.getOperand(1)); 5287} 5288 5289SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 5290 assert(Op.getValueType() == MVT::i1 && 5291 "Custom lowering only for i1 loads"); 5292 5293 // First, load 8 bits into 32 bits, then truncate to 1 bit. 5294 5295 SDLoc dl(Op); 5296 LoadSDNode *LD = cast<LoadSDNode>(Op); 5297 5298 SDValue Chain = LD->getChain(); 5299 SDValue BasePtr = LD->getBasePtr(); 5300 MachineMemOperand *MMO = LD->getMemOperand(); 5301 5302 SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain, 5303 BasePtr, MVT::i8, MMO); 5304 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); 5305 5306 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; 5307 return DAG.getMergeValues(Ops, dl); 5308} 5309 5310SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 5311 assert(Op.getOperand(1).getValueType() == MVT::i1 && 5312 "Custom lowering only for i1 stores"); 5313 5314 // First, zero extend to 32 bits, then use a truncating store to 8 bits. 5315 5316 SDLoc dl(Op); 5317 StoreSDNode *ST = cast<StoreSDNode>(Op); 5318 5319 SDValue Chain = ST->getChain(); 5320 SDValue BasePtr = ST->getBasePtr(); 5321 SDValue Value = ST->getValue(); 5322 MachineMemOperand *MMO = ST->getMemOperand(); 5323 5324 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value); 5325 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); 5326} 5327 5328// FIXME: Remove this once the ANDI glue bug is fixed: 5329SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { 5330 assert(Op.getValueType() == MVT::i1 && 5331 "Custom lowering only for i1 results"); 5332 5333 SDLoc DL(Op); 5334 return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, 5335 Op.getOperand(0)); 5336} 5337 5338/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 5339/// possible. 5340SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 5341 // Not FP? Not a fsel. 5342 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 5343 !Op.getOperand(2).getValueType().isFloatingPoint()) 5344 return Op; 5345 5346 // We might be able to do better than this under some circumstances, but in 5347 // general, fsel-based lowering of select is a finite-math-only optimization. 5348 // For more information, see section F.3 of the 2.06 ISA specification. 5349 if (!DAG.getTarget().Options.NoInfsFPMath || 5350 !DAG.getTarget().Options.NoNaNsFPMath) 5351 return Op; 5352 5353 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 5354 5355 EVT ResVT = Op.getValueType(); 5356 EVT CmpVT = Op.getOperand(0).getValueType(); 5357 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5358 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); 5359 SDLoc dl(Op); 5360 5361 // If the RHS of the comparison is a 0.0, we don't need to do the 5362 // subtraction at all. 5363 SDValue Sel1; 5364 if (isFloatingPointZero(RHS)) 5365 switch (CC) { 5366 default: break; // SETUO etc aren't handled by fsel. 5367 case ISD::SETNE: 5368 std::swap(TV, FV); 5369 case ISD::SETEQ: 5370 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 5371 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 5372 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 5373 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 5374 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 5375 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 5376 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); 5377 case ISD::SETULT: 5378 case ISD::SETLT: 5379 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 5380 case ISD::SETOGE: 5381 case ISD::SETGE: 5382 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 5383 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 5384 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 5385 case ISD::SETUGT: 5386 case ISD::SETGT: 5387 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 5388 case ISD::SETOLE: 5389 case ISD::SETLE: 5390 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 5391 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 5392 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 5393 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); 5394 } 5395 5396 SDValue Cmp; 5397 switch (CC) { 5398 default: break; // SETUO etc aren't handled by fsel. 5399 case ISD::SETNE: 5400 std::swap(TV, FV); 5401 case ISD::SETEQ: 5402 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 5403 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5404 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5405 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 5406 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 5407 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 5408 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 5409 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); 5410 case ISD::SETULT: 5411 case ISD::SETLT: 5412 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 5413 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5414 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5415 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 5416 case ISD::SETOGE: 5417 case ISD::SETGE: 5418 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 5419 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5420 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5421 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 5422 case ISD::SETUGT: 5423 case ISD::SETGT: 5424 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 5425 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5426 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5427 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 5428 case ISD::SETOLE: 5429 case ISD::SETLE: 5430 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 5431 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5432 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5433 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 5434 } 5435 return Op; 5436} 5437 5438void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, 5439 SelectionDAG &DAG, 5440 SDLoc dl) const { 5441 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 5442 SDValue Src = Op.getOperand(0); 5443 if (Src.getValueType() == MVT::f32) 5444 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 5445 5446 SDValue Tmp; 5447 switch (Op.getSimpleValueType().SimpleTy) { 5448 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 5449 case MVT::i32: 5450 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : 5451 (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : 5452 PPCISD::FCTIDZ), 5453 dl, MVT::f64, Src); 5454 break; 5455 case MVT::i64: 5456 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && 5457 "i64 FP_TO_UINT is supported only with FPCVT"); 5458 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 5459 PPCISD::FCTIDUZ, 5460 dl, MVT::f64, Src); 5461 break; 5462 } 5463 5464 // Convert the FP value to an int value through memory. 5465 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && 5466 (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); 5467 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); 5468 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); 5469 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI); 5470 5471 // Emit a store to the stack slot. 5472 SDValue Chain; 5473 if (i32Stack) { 5474 MachineFunction &MF = DAG.getMachineFunction(); 5475 MachineMemOperand *MMO = 5476 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); 5477 SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; 5478 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 5479 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); 5480 } else 5481 Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, 5482 MPI, false, false, 0); 5483 5484 // Result is a load from the stack slot. If loading 4 bytes, make sure to 5485 // add in a bias. 5486 if (Op.getValueType() == MVT::i32 && !i32Stack) { 5487 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, 5488 DAG.getConstant(4, FIPtr.getValueType())); 5489 MPI = MPI.getWithOffset(4); 5490 } 5491 5492 RLI.Chain = Chain; 5493 RLI.Ptr = FIPtr; 5494 RLI.MPI = MPI; 5495} 5496 5497SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 5498 SDLoc dl) const { 5499 ReuseLoadInfo RLI; 5500 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 5501 5502 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, false, 5503 false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo, 5504 RLI.Ranges); 5505} 5506 5507// We're trying to insert a regular store, S, and then a load, L. If the 5508// incoming value, O, is a load, we might just be able to have our load use the 5509// address used by O. However, we don't know if anything else will store to 5510// that address before we can load from it. To prevent this situation, we need 5511// to insert our load, L, into the chain as a peer of O. To do this, we give L 5512// the same chain operand as O, we create a token factor from the chain results 5513// of O and L, and we replace all uses of O's chain result with that token 5514// factor (see spliceIntoChain below for this last part). 5515bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, 5516 ReuseLoadInfo &RLI, 5517 SelectionDAG &DAG, 5518 ISD::LoadExtType ET) const { 5519 SDLoc dl(Op); 5520 if (ET == ISD::NON_EXTLOAD && 5521 (Op.getOpcode() == ISD::FP_TO_UINT || 5522 Op.getOpcode() == ISD::FP_TO_SINT) && 5523 isOperationLegalOrCustom(Op.getOpcode(), 5524 Op.getOperand(0).getValueType())) { 5525 5526 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 5527 return true; 5528 } 5529 5530 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op); 5531 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() || 5532 LD->isNonTemporal()) 5533 return false; 5534 if (LD->getMemoryVT() != MemVT) 5535 return false; 5536 5537 RLI.Ptr = LD->getBasePtr(); 5538 if (LD->isIndexed() && LD->getOffset().getOpcode() != ISD::UNDEF) { 5539 assert(LD->getAddressingMode() == ISD::PRE_INC && 5540 "Non-pre-inc AM on PPC?"); 5541 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr, 5542 LD->getOffset()); 5543 } 5544 5545 RLI.Chain = LD->getChain(); 5546 RLI.MPI = LD->getPointerInfo(); 5547 RLI.IsInvariant = LD->isInvariant(); 5548 RLI.Alignment = LD->getAlignment(); 5549 RLI.AAInfo = LD->getAAInfo(); 5550 RLI.Ranges = LD->getRanges(); 5551 5552 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1); 5553 return true; 5554} 5555 5556// Given the head of the old chain, ResChain, insert a token factor containing 5557// it and NewResChain, and make users of ResChain now be users of that token 5558// factor. 5559void PPCTargetLowering::spliceIntoChain(SDValue ResChain, 5560 SDValue NewResChain, 5561 SelectionDAG &DAG) const { 5562 if (!ResChain) 5563 return; 5564 5565 SDLoc dl(NewResChain); 5566 5567 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 5568 NewResChain, DAG.getUNDEF(MVT::Other)); 5569 assert(TF.getNode() != NewResChain.getNode() && 5570 "A new TF really is required here"); 5571 5572 DAG.ReplaceAllUsesOfValueWith(ResChain, TF); 5573 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); 5574} 5575 5576SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, 5577 SelectionDAG &DAG) const { 5578 SDLoc dl(Op); 5579 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 5580 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 5581 return SDValue(); 5582 5583 if (Op.getOperand(0).getValueType() == MVT::i1) 5584 return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), 5585 DAG.getConstantFP(1.0, Op.getValueType()), 5586 DAG.getConstantFP(0.0, Op.getValueType())); 5587 5588 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 5589 "UINT_TO_FP is supported only with FPCVT"); 5590 5591 // If we have FCFIDS, then use it when converting to single-precision. 5592 // Otherwise, convert to double-precision and then round. 5593 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 5594 (Op.getOpcode() == ISD::UINT_TO_FP ? 5595 PPCISD::FCFIDUS : PPCISD::FCFIDS) : 5596 (Op.getOpcode() == ISD::UINT_TO_FP ? 5597 PPCISD::FCFIDU : PPCISD::FCFID); 5598 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 5599 MVT::f32 : MVT::f64; 5600 5601 if (Op.getOperand(0).getValueType() == MVT::i64) { 5602 SDValue SINT = Op.getOperand(0); 5603 // When converting to single-precision, we actually need to convert 5604 // to double-precision first and then round to single-precision. 5605 // To avoid double-rounding effects during that operation, we have 5606 // to prepare the input operand. Bits that might be truncated when 5607 // converting to double-precision are replaced by a bit that won't 5608 // be lost at this stage, but is below the single-precision rounding 5609 // position. 5610 // 5611 // However, if -enable-unsafe-fp-math is in effect, accept double 5612 // rounding to avoid the extra overhead. 5613 if (Op.getValueType() == MVT::f32 && 5614 !Subtarget.hasFPCVT() && 5615 !DAG.getTarget().Options.UnsafeFPMath) { 5616 5617 // Twiddle input to make sure the low 11 bits are zero. (If this 5618 // is the case, we are guaranteed the value will fit into the 53 bit 5619 // mantissa of an IEEE double-precision value without rounding.) 5620 // If any of those low 11 bits were not zero originally, make sure 5621 // bit 12 (value 2048) is set instead, so that the final rounding 5622 // to single-precision gets the correct result. 5623 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, 5624 SINT, DAG.getConstant(2047, MVT::i64)); 5625 Round = DAG.getNode(ISD::ADD, dl, MVT::i64, 5626 Round, DAG.getConstant(2047, MVT::i64)); 5627 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); 5628 Round = DAG.getNode(ISD::AND, dl, MVT::i64, 5629 Round, DAG.getConstant(-2048, MVT::i64)); 5630 5631 // However, we cannot use that value unconditionally: if the magnitude 5632 // of the input value is small, the bit-twiddling we did above might 5633 // end up visibly changing the output. Fortunately, in that case, we 5634 // don't need to twiddle bits since the original input will convert 5635 // exactly to double-precision floating-point already. Therefore, 5636 // construct a conditional to use the original value if the top 11 5637 // bits are all sign-bit copies, and use the rounded value computed 5638 // above otherwise. 5639 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, 5640 SINT, DAG.getConstant(53, MVT::i32)); 5641 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, 5642 Cond, DAG.getConstant(1, MVT::i64)); 5643 Cond = DAG.getSetCC(dl, MVT::i32, 5644 Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT); 5645 5646 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); 5647 } 5648 5649 ReuseLoadInfo RLI; 5650 SDValue Bits; 5651 5652 MachineFunction &MF = DAG.getMachineFunction(); 5653 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) { 5654 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false, 5655 false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo, 5656 RLI.Ranges); 5657 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 5658 } else if (Subtarget.hasLFIWAX() && 5659 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) { 5660 MachineMemOperand *MMO = 5661 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 5662 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 5663 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 5664 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl, 5665 DAG.getVTList(MVT::f64, MVT::Other), 5666 Ops, MVT::i32, MMO); 5667 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 5668 } else if (Subtarget.hasFPCVT() && 5669 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) { 5670 MachineMemOperand *MMO = 5671 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 5672 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 5673 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 5674 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl, 5675 DAG.getVTList(MVT::f64, MVT::Other), 5676 Ops, MVT::i32, MMO); 5677 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 5678 } else if (((Subtarget.hasLFIWAX() && 5679 SINT.getOpcode() == ISD::SIGN_EXTEND) || 5680 (Subtarget.hasFPCVT() && 5681 SINT.getOpcode() == ISD::ZERO_EXTEND)) && 5682 SINT.getOperand(0).getValueType() == MVT::i32) { 5683 MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 5684 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5685 5686 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); 5687 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 5688 5689 SDValue Store = 5690 DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, 5691 MachinePointerInfo::getFixedStack(FrameIdx), 5692 false, false, 0); 5693 5694 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 5695 "Expected an i32 store"); 5696 5697 RLI.Ptr = FIdx; 5698 RLI.Chain = Store; 5699 RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx); 5700 RLI.Alignment = 4; 5701 5702 MachineMemOperand *MMO = 5703 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 5704 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 5705 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 5706 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ? 5707 PPCISD::LFIWZX : PPCISD::LFIWAX, 5708 dl, DAG.getVTList(MVT::f64, MVT::Other), 5709 Ops, MVT::i32, MMO); 5710 } else 5711 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); 5712 5713 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); 5714 5715 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 5716 FP = DAG.getNode(ISD::FP_ROUND, dl, 5717 MVT::f32, FP, DAG.getIntPtrConstant(0)); 5718 return FP; 5719 } 5720 5721 assert(Op.getOperand(0).getValueType() == MVT::i32 && 5722 "Unhandled INT_TO_FP type in custom expander!"); 5723 // Since we only generate this in 64-bit mode, we can take advantage of 5724 // 64-bit registers. In particular, sign extend the input value into the 5725 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 5726 // then lfd it and fcfid it. 5727 MachineFunction &MF = DAG.getMachineFunction(); 5728 MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 5729 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5730 5731 SDValue Ld; 5732 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { 5733 ReuseLoadInfo RLI; 5734 bool ReusingLoad; 5735 if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI, 5736 DAG))) { 5737 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); 5738 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 5739 5740 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 5741 MachinePointerInfo::getFixedStack(FrameIdx), 5742 false, false, 0); 5743 5744 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 5745 "Expected an i32 store"); 5746 5747 RLI.Ptr = FIdx; 5748 RLI.Chain = Store; 5749 RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx); 5750 RLI.Alignment = 4; 5751 } 5752 5753 MachineMemOperand *MMO = 5754 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 5755 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 5756 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 5757 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? 5758 PPCISD::LFIWZX : PPCISD::LFIWAX, 5759 dl, DAG.getVTList(MVT::f64, MVT::Other), 5760 Ops, MVT::i32, MMO); 5761 if (ReusingLoad) 5762 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG); 5763 } else { 5764 assert(Subtarget.isPPC64() && 5765 "i32->FP without LFIWAX supported only on PPC64"); 5766 5767 int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); 5768 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 5769 5770 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, 5771 Op.getOperand(0)); 5772 5773 // STD the extended value into the stack slot. 5774 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx, 5775 MachinePointerInfo::getFixedStack(FrameIdx), 5776 false, false, 0); 5777 5778 // Load the value as a double. 5779 Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, 5780 MachinePointerInfo::getFixedStack(FrameIdx), 5781 false, false, false, 0); 5782 } 5783 5784 // FCFID it and return it. 5785 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); 5786 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 5787 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); 5788 return FP; 5789} 5790 5791SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 5792 SelectionDAG &DAG) const { 5793 SDLoc dl(Op); 5794 /* 5795 The rounding mode is in bits 30:31 of FPSR, and has the following 5796 settings: 5797 00 Round to nearest 5798 01 Round to 0 5799 10 Round to +inf 5800 11 Round to -inf 5801 5802 FLT_ROUNDS, on the other hand, expects the following: 5803 -1 Undefined 5804 0 Round to 0 5805 1 Round to nearest 5806 2 Round to +inf 5807 3 Round to -inf 5808 5809 To perform the conversion, we do: 5810 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 5811 */ 5812 5813 MachineFunction &MF = DAG.getMachineFunction(); 5814 EVT VT = Op.getValueType(); 5815 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5816 5817 // Save FP Control Word to register 5818 EVT NodeTys[] = { 5819 MVT::f64, // return register 5820 MVT::Glue // unused in this context 5821 }; 5822 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None); 5823 5824 // Save FP register to stack slot 5825 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); 5826 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 5827 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, 5828 StackSlot, MachinePointerInfo(), false, false,0); 5829 5830 // Load FP Control Word from low 32 bits of stack slot. 5831 SDValue Four = DAG.getConstant(4, PtrVT); 5832 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); 5833 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(), 5834 false, false, false, 0); 5835 5836 // Transform as necessary 5837 SDValue CWD1 = 5838 DAG.getNode(ISD::AND, dl, MVT::i32, 5839 CWD, DAG.getConstant(3, MVT::i32)); 5840 SDValue CWD2 = 5841 DAG.getNode(ISD::SRL, dl, MVT::i32, 5842 DAG.getNode(ISD::AND, dl, MVT::i32, 5843 DAG.getNode(ISD::XOR, dl, MVT::i32, 5844 CWD, DAG.getConstant(3, MVT::i32)), 5845 DAG.getConstant(3, MVT::i32)), 5846 DAG.getConstant(1, MVT::i32)); 5847 5848 SDValue RetVal = 5849 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); 5850 5851 return DAG.getNode((VT.getSizeInBits() < 16 ? 5852 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); 5853} 5854 5855SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { 5856 EVT VT = Op.getValueType(); 5857 unsigned BitWidth = VT.getSizeInBits(); 5858 SDLoc dl(Op); 5859 assert(Op.getNumOperands() == 3 && 5860 VT == Op.getOperand(1).getValueType() && 5861 "Unexpected SHL!"); 5862 5863 // Expand into a bunch of logical ops. Note that these ops 5864 // depend on the PPC behavior for oversized shift amounts. 5865 SDValue Lo = Op.getOperand(0); 5866 SDValue Hi = Op.getOperand(1); 5867 SDValue Amt = Op.getOperand(2); 5868 EVT AmtVT = Amt.getValueType(); 5869 5870 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5871 DAG.getConstant(BitWidth, AmtVT), Amt); 5872 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); 5873 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); 5874 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); 5875 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5876 DAG.getConstant(-BitWidth, AmtVT)); 5877 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); 5878 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 5879 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); 5880 SDValue OutOps[] = { OutLo, OutHi }; 5881 return DAG.getMergeValues(OutOps, dl); 5882} 5883 5884SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { 5885 EVT VT = Op.getValueType(); 5886 SDLoc dl(Op); 5887 unsigned BitWidth = VT.getSizeInBits(); 5888 assert(Op.getNumOperands() == 3 && 5889 VT == Op.getOperand(1).getValueType() && 5890 "Unexpected SRL!"); 5891 5892 // Expand into a bunch of logical ops. Note that these ops 5893 // depend on the PPC behavior for oversized shift amounts. 5894 SDValue Lo = Op.getOperand(0); 5895 SDValue Hi = Op.getOperand(1); 5896 SDValue Amt = Op.getOperand(2); 5897 EVT AmtVT = Amt.getValueType(); 5898 5899 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5900 DAG.getConstant(BitWidth, AmtVT), Amt); 5901 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 5902 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 5903 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 5904 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5905 DAG.getConstant(-BitWidth, AmtVT)); 5906 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); 5907 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 5908 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); 5909 SDValue OutOps[] = { OutLo, OutHi }; 5910 return DAG.getMergeValues(OutOps, dl); 5911} 5912 5913SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { 5914 SDLoc dl(Op); 5915 EVT VT = Op.getValueType(); 5916 unsigned BitWidth = VT.getSizeInBits(); 5917 assert(Op.getNumOperands() == 3 && 5918 VT == Op.getOperand(1).getValueType() && 5919 "Unexpected SRA!"); 5920 5921 // Expand into a bunch of logical ops, followed by a select_cc. 5922 SDValue Lo = Op.getOperand(0); 5923 SDValue Hi = Op.getOperand(1); 5924 SDValue Amt = Op.getOperand(2); 5925 EVT AmtVT = Amt.getValueType(); 5926 5927 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5928 DAG.getConstant(BitWidth, AmtVT), Amt); 5929 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 5930 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 5931 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 5932 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5933 DAG.getConstant(-BitWidth, AmtVT)); 5934 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); 5935 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); 5936 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT), 5937 Tmp4, Tmp6, ISD::SETLE); 5938 SDValue OutOps[] = { OutLo, OutHi }; 5939 return DAG.getMergeValues(OutOps, dl); 5940} 5941 5942//===----------------------------------------------------------------------===// 5943// Vector related lowering. 5944// 5945 5946/// BuildSplatI - Build a canonical splati of Val with an element size of 5947/// SplatSize. Cast the result to VT. 5948static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, 5949 SelectionDAG &DAG, SDLoc dl) { 5950 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 5951 5952 static const EVT VTys[] = { // canonical VT to use for each size. 5953 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 5954 }; 5955 5956 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 5957 5958 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 5959 if (Val == -1) 5960 SplatSize = 1; 5961 5962 EVT CanonicalVT = VTys[SplatSize-1]; 5963 5964 // Build a canonical splat for this value. 5965 SDValue Elt = DAG.getConstant(Val, MVT::i32); 5966 SmallVector<SDValue, 8> Ops; 5967 Ops.assign(CanonicalVT.getVectorNumElements(), Elt); 5968 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops); 5969 return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res); 5970} 5971 5972/// BuildIntrinsicOp - Return a unary operator intrinsic node with the 5973/// specified intrinsic ID. 5974static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, 5975 SelectionDAG &DAG, SDLoc dl, 5976 EVT DestVT = MVT::Other) { 5977 if (DestVT == MVT::Other) DestVT = Op.getValueType(); 5978 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5979 DAG.getConstant(IID, MVT::i32), Op); 5980} 5981 5982/// BuildIntrinsicOp - Return a binary operator intrinsic node with the 5983/// specified intrinsic ID. 5984static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, 5985 SelectionDAG &DAG, SDLoc dl, 5986 EVT DestVT = MVT::Other) { 5987 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 5988 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5989 DAG.getConstant(IID, MVT::i32), LHS, RHS); 5990} 5991 5992/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 5993/// specified intrinsic ID. 5994static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, 5995 SDValue Op2, SelectionDAG &DAG, 5996 SDLoc dl, EVT DestVT = MVT::Other) { 5997 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 5998 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5999 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 6000} 6001 6002 6003/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 6004/// amount. The result has the specified value type. 6005static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, 6006 EVT VT, SelectionDAG &DAG, SDLoc dl) { 6007 // Force LHS/RHS to be the right type. 6008 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); 6009 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); 6010 6011 int Ops[16]; 6012 for (unsigned i = 0; i != 16; ++i) 6013 Ops[i] = i + Amt; 6014 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); 6015 return DAG.getNode(ISD::BITCAST, dl, VT, T); 6016} 6017 6018// If this is a case we can't handle, return null and let the default 6019// expansion code take care of it. If we CAN select this case, and if it 6020// selects to a single instruction, return Op. Otherwise, if we can codegen 6021// this case more efficiently than a constant pool load, lower it to the 6022// sequence of ops that should be used. 6023SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, 6024 SelectionDAG &DAG) const { 6025 SDLoc dl(Op); 6026 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 6027 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); 6028 6029 // Check if this is a splat of a constant value. 6030 APInt APSplatBits, APSplatUndef; 6031 unsigned SplatBitSize; 6032 bool HasAnyUndefs; 6033 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 6034 HasAnyUndefs, 0, true) || SplatBitSize > 32) 6035 return SDValue(); 6036 6037 unsigned SplatBits = APSplatBits.getZExtValue(); 6038 unsigned SplatUndef = APSplatUndef.getZExtValue(); 6039 unsigned SplatSize = SplatBitSize / 8; 6040 6041 // First, handle single instruction cases. 6042 6043 // All zeros? 6044 if (SplatBits == 0) { 6045 // Canonicalize all zero vectors to be v4i32. 6046 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 6047 SDValue Z = DAG.getConstant(0, MVT::i32); 6048 Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z); 6049 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); 6050 } 6051 return Op; 6052 } 6053 6054 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 6055 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> 6056 (32-SplatBitSize)); 6057 if (SextVal >= -16 && SextVal <= 15) 6058 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); 6059 6060 6061 // Two instruction sequences. 6062 6063 // If this value is in the range [-32,30] and is even, use: 6064 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) 6065 // If this value is in the range [17,31] and is odd, use: 6066 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) 6067 // If this value is in the range [-31,-17] and is odd, use: 6068 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) 6069 // Note the last two are three-instruction sequences. 6070 if (SextVal >= -32 && SextVal <= 31) { 6071 // To avoid having these optimizations undone by constant folding, 6072 // we convert to a pseudo that will be expanded later into one of 6073 // the above forms. 6074 SDValue Elt = DAG.getConstant(SextVal, MVT::i32); 6075 EVT VT = (SplatSize == 1 ? MVT::v16i8 : 6076 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); 6077 SDValue EltSize = DAG.getConstant(SplatSize, MVT::i32); 6078 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); 6079 if (VT == Op.getValueType()) 6080 return RetVal; 6081 else 6082 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); 6083 } 6084 6085 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 6086 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 6087 // for fneg/fabs. 6088 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 6089 // Make -1 and vspltisw -1: 6090 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); 6091 6092 // Make the VSLW intrinsic, computing 0x8000_0000. 6093 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 6094 OnesV, DAG, dl); 6095 6096 // xor by OnesV to invert it. 6097 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); 6098 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6099 } 6100 6101 // The remaining cases assume either big endian element order or 6102 // a splat-size that equates to the element size of the vector 6103 // to be built. An example that doesn't work for little endian is 6104 // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits 6105 // and a vector element size of 16 bits. The code below will 6106 // produce the vector in big endian element order, which for little 6107 // endian is {-1, 0, -1, 0, -1, 0, -1, 0}. 6108 6109 // For now, just avoid these optimizations in that case. 6110 // FIXME: Develop correct optimizations for LE with mismatched 6111 // splat and element sizes. 6112 6113 if (Subtarget.isLittleEndian() && 6114 SplatSize != Op.getValueType().getVectorElementType().getSizeInBits()) 6115 return SDValue(); 6116 6117 // Check to see if this is a wide variety of vsplti*, binop self cases. 6118 static const signed char SplatCsts[] = { 6119 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 6120 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 6121 }; 6122 6123 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 6124 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 6125 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 6126 int i = SplatCsts[idx]; 6127 6128 // Figure out what shift amount will be used by altivec if shifted by i in 6129 // this splat size. 6130 unsigned TypeShiftAmt = i & (SplatBitSize-1); 6131 6132 // vsplti + shl self. 6133 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { 6134 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 6135 static const unsigned IIDs[] = { // Intrinsic to use for each size. 6136 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 6137 Intrinsic::ppc_altivec_vslw 6138 }; 6139 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 6140 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6141 } 6142 6143 // vsplti + srl self. 6144 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 6145 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 6146 static const unsigned IIDs[] = { // Intrinsic to use for each size. 6147 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 6148 Intrinsic::ppc_altivec_vsrw 6149 }; 6150 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 6151 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6152 } 6153 6154 // vsplti + sra self. 6155 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 6156 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 6157 static const unsigned IIDs[] = { // Intrinsic to use for each size. 6158 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 6159 Intrinsic::ppc_altivec_vsraw 6160 }; 6161 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 6162 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6163 } 6164 6165 // vsplti + rol self. 6166 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 6167 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 6168 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 6169 static const unsigned IIDs[] = { // Intrinsic to use for each size. 6170 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 6171 Intrinsic::ppc_altivec_vrlw 6172 }; 6173 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 6174 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6175 } 6176 6177 // t = vsplti c, result = vsldoi t, t, 1 6178 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { 6179 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 6180 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); 6181 } 6182 // t = vsplti c, result = vsldoi t, t, 2 6183 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { 6184 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 6185 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); 6186 } 6187 // t = vsplti c, result = vsldoi t, t, 3 6188 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { 6189 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 6190 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); 6191 } 6192 } 6193 6194 return SDValue(); 6195} 6196 6197/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 6198/// the specified operations to build the shuffle. 6199static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 6200 SDValue RHS, SelectionDAG &DAG, 6201 SDLoc dl) { 6202 unsigned OpNum = (PFEntry >> 26) & 0x0F; 6203 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 6204 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 6205 6206 enum { 6207 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 6208 OP_VMRGHW, 6209 OP_VMRGLW, 6210 OP_VSPLTISW0, 6211 OP_VSPLTISW1, 6212 OP_VSPLTISW2, 6213 OP_VSPLTISW3, 6214 OP_VSLDOI4, 6215 OP_VSLDOI8, 6216 OP_VSLDOI12 6217 }; 6218 6219 if (OpNum == OP_COPY) { 6220 if (LHSID == (1*9+2)*9+3) return LHS; 6221 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 6222 return RHS; 6223 } 6224 6225 SDValue OpLHS, OpRHS; 6226 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 6227 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 6228 6229 int ShufIdxs[16]; 6230 switch (OpNum) { 6231 default: llvm_unreachable("Unknown i32 permute!"); 6232 case OP_VMRGHW: 6233 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 6234 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 6235 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 6236 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 6237 break; 6238 case OP_VMRGLW: 6239 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 6240 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 6241 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 6242 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 6243 break; 6244 case OP_VSPLTISW0: 6245 for (unsigned i = 0; i != 16; ++i) 6246 ShufIdxs[i] = (i&3)+0; 6247 break; 6248 case OP_VSPLTISW1: 6249 for (unsigned i = 0; i != 16; ++i) 6250 ShufIdxs[i] = (i&3)+4; 6251 break; 6252 case OP_VSPLTISW2: 6253 for (unsigned i = 0; i != 16; ++i) 6254 ShufIdxs[i] = (i&3)+8; 6255 break; 6256 case OP_VSPLTISW3: 6257 for (unsigned i = 0; i != 16; ++i) 6258 ShufIdxs[i] = (i&3)+12; 6259 break; 6260 case OP_VSLDOI4: 6261 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); 6262 case OP_VSLDOI8: 6263 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); 6264 case OP_VSLDOI12: 6265 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); 6266 } 6267 EVT VT = OpLHS.getValueType(); 6268 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); 6269 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); 6270 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); 6271 return DAG.getNode(ISD::BITCAST, dl, VT, T); 6272} 6273 6274/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 6275/// is a shuffle we can handle in a single instruction, return it. Otherwise, 6276/// return the code it can be lowered into. Worst case, it can always be 6277/// lowered into a vperm. 6278SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 6279 SelectionDAG &DAG) const { 6280 SDLoc dl(Op); 6281 SDValue V1 = Op.getOperand(0); 6282 SDValue V2 = Op.getOperand(1); 6283 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 6284 EVT VT = Op.getValueType(); 6285 bool isLittleEndian = Subtarget.isLittleEndian(); 6286 6287 // Cases that are handled by instructions that take permute immediates 6288 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 6289 // selected by the instruction selector. 6290 if (V2.getOpcode() == ISD::UNDEF) { 6291 if (PPC::isSplatShuffleMask(SVOp, 1) || 6292 PPC::isSplatShuffleMask(SVOp, 2) || 6293 PPC::isSplatShuffleMask(SVOp, 4) || 6294 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || 6295 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || 6296 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || 6297 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || 6298 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || 6299 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || 6300 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || 6301 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || 6302 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) { 6303 return Op; 6304 } 6305 } 6306 6307 // Altivec has a variety of "shuffle immediates" that take two vector inputs 6308 // and produce a fixed permutation. If any of these match, do not lower to 6309 // VPERM. 6310 unsigned int ShuffleKind = isLittleEndian ? 2 : 0; 6311 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || 6312 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || 6313 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || 6314 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || 6315 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || 6316 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || 6317 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || 6318 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || 6319 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG)) 6320 return Op; 6321 6322 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 6323 // perfect shuffle table to emit an optimal matching sequence. 6324 ArrayRef<int> PermMask = SVOp->getMask(); 6325 6326 unsigned PFIndexes[4]; 6327 bool isFourElementShuffle = true; 6328 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 6329 unsigned EltNo = 8; // Start out undef. 6330 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 6331 if (PermMask[i*4+j] < 0) 6332 continue; // Undef, ignore it. 6333 6334 unsigned ByteSource = PermMask[i*4+j]; 6335 if ((ByteSource & 3) != j) { 6336 isFourElementShuffle = false; 6337 break; 6338 } 6339 6340 if (EltNo == 8) { 6341 EltNo = ByteSource/4; 6342 } else if (EltNo != ByteSource/4) { 6343 isFourElementShuffle = false; 6344 break; 6345 } 6346 } 6347 PFIndexes[i] = EltNo; 6348 } 6349 6350 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 6351 // perfect shuffle vector to determine if it is cost effective to do this as 6352 // discrete instructions, or whether we should use a vperm. 6353 // For now, we skip this for little endian until such time as we have a 6354 // little-endian perfect shuffle table. 6355 if (isFourElementShuffle && !isLittleEndian) { 6356 // Compute the index in the perfect shuffle table. 6357 unsigned PFTableIndex = 6358 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 6359 6360 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 6361 unsigned Cost = (PFEntry >> 30); 6362 6363 // Determining when to avoid vperm is tricky. Many things affect the cost 6364 // of vperm, particularly how many times the perm mask needs to be computed. 6365 // For example, if the perm mask can be hoisted out of a loop or is already 6366 // used (perhaps because there are multiple permutes with the same shuffle 6367 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 6368 // the loop requires an extra register. 6369 // 6370 // As a compromise, we only emit discrete instructions if the shuffle can be 6371 // generated in 3 or fewer operations. When we have loop information 6372 // available, if this block is within a loop, we should avoid using vperm 6373 // for 3-operation perms and use a constant pool load instead. 6374 if (Cost < 3) 6375 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 6376 } 6377 6378 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 6379 // vector that will get spilled to the constant pool. 6380 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 6381 6382 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 6383 // that it is in input element units, not in bytes. Convert now. 6384 6385 // For little endian, the order of the input vectors is reversed, and 6386 // the permutation mask is complemented with respect to 31. This is 6387 // necessary to produce proper semantics with the big-endian-biased vperm 6388 // instruction. 6389 EVT EltVT = V1.getValueType().getVectorElementType(); 6390 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 6391 6392 SmallVector<SDValue, 16> ResultMask; 6393 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { 6394 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; 6395 6396 for (unsigned j = 0; j != BytesPerElement; ++j) 6397 if (isLittleEndian) 6398 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j), 6399 MVT::i32)); 6400 else 6401 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 6402 MVT::i32)); 6403 } 6404 6405 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, 6406 ResultMask); 6407 if (isLittleEndian) 6408 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 6409 V2, V1, VPermMask); 6410 else 6411 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 6412 V1, V2, VPermMask); 6413} 6414 6415/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an 6416/// altivec comparison. If it is, return true and fill in Opc/isDot with 6417/// information about the intrinsic. 6418static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, 6419 bool &isDot) { 6420 unsigned IntrinsicID = 6421 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); 6422 CompareOpc = -1; 6423 isDot = false; 6424 switch (IntrinsicID) { 6425 default: return false; 6426 // Comparison predicates. 6427 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 6428 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 6429 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 6430 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 6431 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 6432 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 6433 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 6434 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 6435 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 6436 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 6437 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 6438 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 6439 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 6440 6441 // Normal Comparisons. 6442 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 6443 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 6444 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 6445 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 6446 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 6447 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 6448 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 6449 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 6450 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 6451 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 6452 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 6453 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 6454 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 6455 } 6456 return true; 6457} 6458 6459/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 6460/// lower, do it, otherwise return null. 6461SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 6462 SelectionDAG &DAG) const { 6463 // If this is a lowered altivec predicate compare, CompareOpc is set to the 6464 // opcode number of the comparison. 6465 SDLoc dl(Op); 6466 int CompareOpc; 6467 bool isDot; 6468 if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) 6469 return SDValue(); // Don't custom lower most intrinsics. 6470 6471 // If this is a non-dot comparison, make the VCMP node and we are done. 6472 if (!isDot) { 6473 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), 6474 Op.getOperand(1), Op.getOperand(2), 6475 DAG.getConstant(CompareOpc, MVT::i32)); 6476 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); 6477 } 6478 6479 // Create the PPCISD altivec 'dot' comparison node. 6480 SDValue Ops[] = { 6481 Op.getOperand(2), // LHS 6482 Op.getOperand(3), // RHS 6483 DAG.getConstant(CompareOpc, MVT::i32) 6484 }; 6485 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; 6486 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 6487 6488 // Now that we have the comparison, emit a copy from the CR to a GPR. 6489 // This is flagged to the above dot comparison. 6490 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, 6491 DAG.getRegister(PPC::CR6, MVT::i32), 6492 CompNode.getValue(1)); 6493 6494 // Unpack the result based on how the target uses it. 6495 unsigned BitNo; // Bit # of CR6. 6496 bool InvertBit; // Invert result? 6497 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { 6498 default: // Can't happen, don't crash on invalid number though. 6499 case 0: // Return the value of the EQ bit of CR6. 6500 BitNo = 0; InvertBit = false; 6501 break; 6502 case 1: // Return the inverted value of the EQ bit of CR6. 6503 BitNo = 0; InvertBit = true; 6504 break; 6505 case 2: // Return the value of the LT bit of CR6. 6506 BitNo = 2; InvertBit = false; 6507 break; 6508 case 3: // Return the inverted value of the LT bit of CR6. 6509 BitNo = 2; InvertBit = true; 6510 break; 6511 } 6512 6513 // Shift the bit into the low position. 6514 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, 6515 DAG.getConstant(8-(3-BitNo), MVT::i32)); 6516 // Isolate the bit. 6517 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, 6518 DAG.getConstant(1, MVT::i32)); 6519 6520 // If we are supposed to, toggle the bit. 6521 if (InvertBit) 6522 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, 6523 DAG.getConstant(1, MVT::i32)); 6524 return Flags; 6525} 6526 6527SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, 6528 SelectionDAG &DAG) const { 6529 SDLoc dl(Op); 6530 // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int 6531 // instructions), but for smaller types, we need to first extend up to v2i32 6532 // before doing going farther. 6533 if (Op.getValueType() == MVT::v2i64) { 6534 EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 6535 if (ExtVT != MVT::v2i32) { 6536 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)); 6537 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op, 6538 DAG.getValueType(EVT::getVectorVT(*DAG.getContext(), 6539 ExtVT.getVectorElementType(), 4))); 6540 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op); 6541 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op, 6542 DAG.getValueType(MVT::v2i32)); 6543 } 6544 6545 return Op; 6546 } 6547 6548 return SDValue(); 6549} 6550 6551SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 6552 SelectionDAG &DAG) const { 6553 SDLoc dl(Op); 6554 // Create a stack slot that is 16-byte aligned. 6555 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 6556 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 6557 EVT PtrVT = getPointerTy(); 6558 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6559 6560 // Store the input value into Value#0 of the stack slot. 6561 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, 6562 Op.getOperand(0), FIdx, MachinePointerInfo(), 6563 false, false, 0); 6564 // Load it out. 6565 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(), 6566 false, false, false, 0); 6567} 6568 6569SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { 6570 SDLoc dl(Op); 6571 if (Op.getValueType() == MVT::v4i32) { 6572 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 6573 6574 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); 6575 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. 6576 6577 SDValue RHSSwap = // = vrlw RHS, 16 6578 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); 6579 6580 // Shrinkify inputs to v8i16. 6581 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); 6582 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); 6583 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); 6584 6585 // Low parts multiplied together, generating 32-bit results (we ignore the 6586 // top parts). 6587 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 6588 LHS, RHS, DAG, dl, MVT::v4i32); 6589 6590 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 6591 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); 6592 // Shift the high parts up 16 bits. 6593 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, 6594 Neg16, DAG, dl); 6595 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); 6596 } else if (Op.getValueType() == MVT::v8i16) { 6597 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 6598 6599 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); 6600 6601 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 6602 LHS, RHS, Zero, DAG, dl); 6603 } else if (Op.getValueType() == MVT::v16i8) { 6604 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 6605 bool isLittleEndian = Subtarget.isLittleEndian(); 6606 6607 // Multiply the even 8-bit parts, producing 16-bit sums. 6608 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 6609 LHS, RHS, DAG, dl, MVT::v8i16); 6610 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); 6611 6612 // Multiply the odd 8-bit parts, producing 16-bit sums. 6613 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 6614 LHS, RHS, DAG, dl, MVT::v8i16); 6615 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); 6616 6617 // Merge the results together. Because vmuleub and vmuloub are 6618 // instructions with a big-endian bias, we must reverse the 6619 // element numbering and reverse the meaning of "odd" and "even" 6620 // when generating little endian code. 6621 int Ops[16]; 6622 for (unsigned i = 0; i != 8; ++i) { 6623 if (isLittleEndian) { 6624 Ops[i*2 ] = 2*i; 6625 Ops[i*2+1] = 2*i+16; 6626 } else { 6627 Ops[i*2 ] = 2*i+1; 6628 Ops[i*2+1] = 2*i+1+16; 6629 } 6630 } 6631 if (isLittleEndian) 6632 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops); 6633 else 6634 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); 6635 } else { 6636 llvm_unreachable("Unknown mul to lower!"); 6637 } 6638} 6639 6640/// LowerOperation - Provide custom lowering hooks for some operations. 6641/// 6642SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 6643 switch (Op.getOpcode()) { 6644 default: llvm_unreachable("Wasn't expecting to be able to lower this!"); 6645 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 6646 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 6647 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 6648 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 6649 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 6650 case ISD::SETCC: return LowerSETCC(Op, DAG); 6651 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); 6652 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); 6653 case ISD::VASTART: 6654 return LowerVASTART(Op, DAG, Subtarget); 6655 6656 case ISD::VAARG: 6657 return LowerVAARG(Op, DAG, Subtarget); 6658 6659 case ISD::VACOPY: 6660 return LowerVACOPY(Op, DAG, Subtarget); 6661 6662 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget); 6663 case ISD::DYNAMIC_STACKALLOC: 6664 return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget); 6665 6666 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); 6667 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); 6668 6669 case ISD::LOAD: return LowerLOAD(Op, DAG); 6670 case ISD::STORE: return LowerSTORE(Op, DAG); 6671 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); 6672 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 6673 case ISD::FP_TO_UINT: 6674 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, 6675 SDLoc(Op)); 6676 case ISD::UINT_TO_FP: 6677 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 6678 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 6679 6680 // Lower 64-bit shifts. 6681 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 6682 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 6683 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 6684 6685 // Vector-related lowering. 6686 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 6687 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 6688 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 6689 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 6690 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 6691 case ISD::MUL: return LowerMUL(Op, DAG); 6692 6693 // For counter-based loop handling. 6694 case ISD::INTRINSIC_W_CHAIN: return SDValue(); 6695 6696 // Frame & Return address. 6697 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 6698 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 6699 } 6700} 6701 6702void PPCTargetLowering::ReplaceNodeResults(SDNode *N, 6703 SmallVectorImpl<SDValue>&Results, 6704 SelectionDAG &DAG) const { 6705 const TargetMachine &TM = getTargetMachine(); 6706 SDLoc dl(N); 6707 switch (N->getOpcode()) { 6708 default: 6709 llvm_unreachable("Do not know how to custom type legalize this operation!"); 6710 case ISD::READCYCLECOUNTER: { 6711 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 6712 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0)); 6713 6714 Results.push_back(RTB); 6715 Results.push_back(RTB.getValue(1)); 6716 Results.push_back(RTB.getValue(2)); 6717 break; 6718 } 6719 case ISD::INTRINSIC_W_CHAIN: { 6720 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 6721 Intrinsic::ppc_is_decremented_ctr_nonzero) 6722 break; 6723 6724 assert(N->getValueType(0) == MVT::i1 && 6725 "Unexpected result type for CTR decrement intrinsic"); 6726 EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0)); 6727 SDVTList VTs = DAG.getVTList(SVT, MVT::Other); 6728 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), 6729 N->getOperand(1)); 6730 6731 Results.push_back(NewInt); 6732 Results.push_back(NewInt.getValue(1)); 6733 break; 6734 } 6735 case ISD::VAARG: { 6736 if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI() 6737 || TM.getSubtarget<PPCSubtarget>().isPPC64()) 6738 return; 6739 6740 EVT VT = N->getValueType(0); 6741 6742 if (VT == MVT::i64) { 6743 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget); 6744 6745 Results.push_back(NewNode); 6746 Results.push_back(NewNode.getValue(1)); 6747 } 6748 return; 6749 } 6750 case ISD::FP_ROUND_INREG: { 6751 assert(N->getValueType(0) == MVT::ppcf128); 6752 assert(N->getOperand(0).getValueType() == MVT::ppcf128); 6753 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 6754 MVT::f64, N->getOperand(0), 6755 DAG.getIntPtrConstant(0)); 6756 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 6757 MVT::f64, N->getOperand(0), 6758 DAG.getIntPtrConstant(1)); 6759 6760 // Add the two halves of the long double in round-to-zero mode. 6761 SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); 6762 6763 // We know the low half is about to be thrown away, so just use something 6764 // convenient. 6765 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, 6766 FPreg, FPreg)); 6767 return; 6768 } 6769 case ISD::FP_TO_SINT: 6770 // LowerFP_TO_INT() can only handle f32 and f64. 6771 if (N->getOperand(0).getValueType() == MVT::ppcf128) 6772 return; 6773 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); 6774 return; 6775 } 6776} 6777 6778 6779//===----------------------------------------------------------------------===// 6780// Other Lowering Code 6781//===----------------------------------------------------------------------===// 6782 6783static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { 6784 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 6785 Function *Func = Intrinsic::getDeclaration(M, Id); 6786 return Builder.CreateCall(Func); 6787} 6788 6789// The mappings for emitLeading/TrailingFence is taken from 6790// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html 6791Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 6792 AtomicOrdering Ord, bool IsStore, 6793 bool IsLoad) const { 6794 if (Ord == SequentiallyConsistent) 6795 return callIntrinsic(Builder, Intrinsic::ppc_sync); 6796 else if (isAtLeastRelease(Ord)) 6797 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 6798 else 6799 return nullptr; 6800} 6801 6802Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 6803 AtomicOrdering Ord, bool IsStore, 6804 bool IsLoad) const { 6805 if (IsLoad && isAtLeastAcquire(Ord)) 6806 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 6807 // FIXME: this is too conservative, a dependent branch + isync is enough. 6808 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and 6809 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html 6810 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. 6811 else 6812 return nullptr; 6813} 6814 6815MachineBasicBlock * 6816PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 6817 bool is64bit, unsigned BinOpcode) const { 6818 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 6819 const TargetInstrInfo *TII = 6820 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 6821 6822 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6823 MachineFunction *F = BB->getParent(); 6824 MachineFunction::iterator It = BB; 6825 ++It; 6826 6827 unsigned dest = MI->getOperand(0).getReg(); 6828 unsigned ptrA = MI->getOperand(1).getReg(); 6829 unsigned ptrB = MI->getOperand(2).getReg(); 6830 unsigned incr = MI->getOperand(3).getReg(); 6831 DebugLoc dl = MI->getDebugLoc(); 6832 6833 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 6834 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6835 F->insert(It, loopMBB); 6836 F->insert(It, exitMBB); 6837 exitMBB->splice(exitMBB->begin(), BB, 6838 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 6839 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6840 6841 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6842 unsigned TmpReg = (!BinOpcode) ? incr : 6843 RegInfo.createVirtualRegister( is64bit ? &PPC::G8RCRegClass 6844 : &PPC::GPRCRegClass); 6845 6846 // thisMBB: 6847 // ... 6848 // fallthrough --> loopMBB 6849 BB->addSuccessor(loopMBB); 6850 6851 // loopMBB: 6852 // l[wd]arx dest, ptr 6853 // add r0, dest, incr 6854 // st[wd]cx. r0, ptr 6855 // bne- loopMBB 6856 // fallthrough --> exitMBB 6857 BB = loopMBB; 6858 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 6859 .addReg(ptrA).addReg(ptrB); 6860 if (BinOpcode) 6861 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); 6862 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 6863 .addReg(TmpReg).addReg(ptrA).addReg(ptrB); 6864 BuildMI(BB, dl, TII->get(PPC::BCC)) 6865 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 6866 BB->addSuccessor(loopMBB); 6867 BB->addSuccessor(exitMBB); 6868 6869 // exitMBB: 6870 // ... 6871 BB = exitMBB; 6872 return BB; 6873} 6874 6875MachineBasicBlock * 6876PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, 6877 MachineBasicBlock *BB, 6878 bool is8bit, // operation 6879 unsigned BinOpcode) const { 6880 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 6881 const TargetInstrInfo *TII = 6882 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 6883 // In 64 bit mode we have to use 64 bits for addresses, even though the 6884 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address 6885 // registers without caring whether they're 32 or 64, but here we're 6886 // doing actual arithmetic on the addresses. 6887 bool is64bit = Subtarget.isPPC64(); 6888 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 6889 6890 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6891 MachineFunction *F = BB->getParent(); 6892 MachineFunction::iterator It = BB; 6893 ++It; 6894 6895 unsigned dest = MI->getOperand(0).getReg(); 6896 unsigned ptrA = MI->getOperand(1).getReg(); 6897 unsigned ptrB = MI->getOperand(2).getReg(); 6898 unsigned incr = MI->getOperand(3).getReg(); 6899 DebugLoc dl = MI->getDebugLoc(); 6900 6901 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 6902 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6903 F->insert(It, loopMBB); 6904 F->insert(It, exitMBB); 6905 exitMBB->splice(exitMBB->begin(), BB, 6906 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 6907 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6908 6909 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6910 const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass 6911 : &PPC::GPRCRegClass; 6912 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 6913 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 6914 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 6915 unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); 6916 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 6917 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 6918 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 6919 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 6920 unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); 6921 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 6922 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 6923 unsigned Ptr1Reg; 6924 unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); 6925 6926 // thisMBB: 6927 // ... 6928 // fallthrough --> loopMBB 6929 BB->addSuccessor(loopMBB); 6930 6931 // The 4-byte load must be aligned, while a char or short may be 6932 // anywhere in the word. Hence all this nasty bookkeeping code. 6933 // add ptr1, ptrA, ptrB [copy if ptrA==0] 6934 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 6935 // xori shift, shift1, 24 [16] 6936 // rlwinm ptr, ptr1, 0, 0, 29 6937 // slw incr2, incr, shift 6938 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 6939 // slw mask, mask2, shift 6940 // loopMBB: 6941 // lwarx tmpDest, ptr 6942 // add tmp, tmpDest, incr2 6943 // andc tmp2, tmpDest, mask 6944 // and tmp3, tmp, mask 6945 // or tmp4, tmp3, tmp2 6946 // stwcx. tmp4, ptr 6947 // bne- loopMBB 6948 // fallthrough --> exitMBB 6949 // srw dest, tmpDest, shift 6950 if (ptrA != ZeroReg) { 6951 Ptr1Reg = RegInfo.createVirtualRegister(RC); 6952 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 6953 .addReg(ptrA).addReg(ptrB); 6954 } else { 6955 Ptr1Reg = ptrB; 6956 } 6957 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 6958 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 6959 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 6960 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 6961 if (is64bit) 6962 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 6963 .addReg(Ptr1Reg).addImm(0).addImm(61); 6964 else 6965 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 6966 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 6967 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) 6968 .addReg(incr).addReg(ShiftReg); 6969 if (is8bit) 6970 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 6971 else { 6972 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 6973 BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); 6974 } 6975 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 6976 .addReg(Mask2Reg).addReg(ShiftReg); 6977 6978 BB = loopMBB; 6979 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 6980 .addReg(ZeroReg).addReg(PtrReg); 6981 if (BinOpcode) 6982 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) 6983 .addReg(Incr2Reg).addReg(TmpDestReg); 6984 BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) 6985 .addReg(TmpDestReg).addReg(MaskReg); 6986 BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) 6987 .addReg(TmpReg).addReg(MaskReg); 6988 BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) 6989 .addReg(Tmp3Reg).addReg(Tmp2Reg); 6990 BuildMI(BB, dl, TII->get(PPC::STWCX)) 6991 .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); 6992 BuildMI(BB, dl, TII->get(PPC::BCC)) 6993 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 6994 BB->addSuccessor(loopMBB); 6995 BB->addSuccessor(exitMBB); 6996 6997 // exitMBB: 6998 // ... 6999 BB = exitMBB; 7000 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) 7001 .addReg(ShiftReg); 7002 return BB; 7003} 7004 7005llvm::MachineBasicBlock* 7006PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, 7007 MachineBasicBlock *MBB) const { 7008 DebugLoc DL = MI->getDebugLoc(); 7009 const TargetInstrInfo *TII = 7010 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 7011 7012 MachineFunction *MF = MBB->getParent(); 7013 MachineRegisterInfo &MRI = MF->getRegInfo(); 7014 7015 const BasicBlock *BB = MBB->getBasicBlock(); 7016 MachineFunction::iterator I = MBB; 7017 ++I; 7018 7019 // Memory Reference 7020 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 7021 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 7022 7023 unsigned DstReg = MI->getOperand(0).getReg(); 7024 const TargetRegisterClass *RC = MRI.getRegClass(DstReg); 7025 assert(RC->hasType(MVT::i32) && "Invalid destination!"); 7026 unsigned mainDstReg = MRI.createVirtualRegister(RC); 7027 unsigned restoreDstReg = MRI.createVirtualRegister(RC); 7028 7029 MVT PVT = getPointerTy(); 7030 assert((PVT == MVT::i64 || PVT == MVT::i32) && 7031 "Invalid Pointer Size!"); 7032 // For v = setjmp(buf), we generate 7033 // 7034 // thisMBB: 7035 // SjLjSetup mainMBB 7036 // bl mainMBB 7037 // v_restore = 1 7038 // b sinkMBB 7039 // 7040 // mainMBB: 7041 // buf[LabelOffset] = LR 7042 // v_main = 0 7043 // 7044 // sinkMBB: 7045 // v = phi(main, restore) 7046 // 7047 7048 MachineBasicBlock *thisMBB = MBB; 7049 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); 7050 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); 7051 MF->insert(I, mainMBB); 7052 MF->insert(I, sinkMBB); 7053 7054 MachineInstrBuilder MIB; 7055 7056 // Transfer the remainder of BB and its successor edges to sinkMBB. 7057 sinkMBB->splice(sinkMBB->begin(), MBB, 7058 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); 7059 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 7060 7061 // Note that the structure of the jmp_buf used here is not compatible 7062 // with that used by libc, and is not designed to be. Specifically, it 7063 // stores only those 'reserved' registers that LLVM does not otherwise 7064 // understand how to spill. Also, by convention, by the time this 7065 // intrinsic is called, Clang has already stored the frame address in the 7066 // first slot of the buffer and stack address in the third. Following the 7067 // X86 target code, we'll store the jump address in the second slot. We also 7068 // need to save the TOC pointer (R2) to handle jumps between shared 7069 // libraries, and that will be stored in the fourth slot. The thread 7070 // identifier (R13) is not affected. 7071 7072 // thisMBB: 7073 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 7074 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 7075 const int64_t BPOffset = 4 * PVT.getStoreSize(); 7076 7077 // Prepare IP either in reg. 7078 const TargetRegisterClass *PtrRC = getRegClassFor(PVT); 7079 unsigned LabelReg = MRI.createVirtualRegister(PtrRC); 7080 unsigned BufReg = MI->getOperand(1).getReg(); 7081 7082 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { 7083 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) 7084 .addReg(PPC::X2) 7085 .addImm(TOCOffset) 7086 .addReg(BufReg); 7087 MIB.setMemRefs(MMOBegin, MMOEnd); 7088 } 7089 7090 // Naked functions never have a base pointer, and so we use r1. For all 7091 // other functions, this decision must be delayed until during PEI. 7092 unsigned BaseReg; 7093 if (MF->getFunction()->getAttributes().hasAttribute( 7094 AttributeSet::FunctionIndex, Attribute::Naked)) 7095 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; 7096 else 7097 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; 7098 7099 MIB = BuildMI(*thisMBB, MI, DL, 7100 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) 7101 .addReg(BaseReg) 7102 .addImm(BPOffset) 7103 .addReg(BufReg); 7104 MIB.setMemRefs(MMOBegin, MMOEnd); 7105 7106 // Setup 7107 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); 7108 const PPCRegisterInfo *TRI = 7109 getTargetMachine().getSubtarget<PPCSubtarget>().getRegisterInfo(); 7110 MIB.addRegMask(TRI->getNoPreservedMask()); 7111 7112 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); 7113 7114 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) 7115 .addMBB(mainMBB); 7116 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); 7117 7118 thisMBB->addSuccessor(mainMBB, /* weight */ 0); 7119 thisMBB->addSuccessor(sinkMBB, /* weight */ 1); 7120 7121 // mainMBB: 7122 // mainDstReg = 0 7123 MIB = BuildMI(mainMBB, DL, 7124 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); 7125 7126 // Store IP 7127 if (Subtarget.isPPC64()) { 7128 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) 7129 .addReg(LabelReg) 7130 .addImm(LabelOffset) 7131 .addReg(BufReg); 7132 } else { 7133 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) 7134 .addReg(LabelReg) 7135 .addImm(LabelOffset) 7136 .addReg(BufReg); 7137 } 7138 7139 MIB.setMemRefs(MMOBegin, MMOEnd); 7140 7141 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); 7142 mainMBB->addSuccessor(sinkMBB); 7143 7144 // sinkMBB: 7145 BuildMI(*sinkMBB, sinkMBB->begin(), DL, 7146 TII->get(PPC::PHI), DstReg) 7147 .addReg(mainDstReg).addMBB(mainMBB) 7148 .addReg(restoreDstReg).addMBB(thisMBB); 7149 7150 MI->eraseFromParent(); 7151 return sinkMBB; 7152} 7153 7154MachineBasicBlock * 7155PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, 7156 MachineBasicBlock *MBB) const { 7157 DebugLoc DL = MI->getDebugLoc(); 7158 const TargetInstrInfo *TII = 7159 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 7160 7161 MachineFunction *MF = MBB->getParent(); 7162 MachineRegisterInfo &MRI = MF->getRegInfo(); 7163 7164 // Memory Reference 7165 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 7166 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 7167 7168 MVT PVT = getPointerTy(); 7169 assert((PVT == MVT::i64 || PVT == MVT::i32) && 7170 "Invalid Pointer Size!"); 7171 7172 const TargetRegisterClass *RC = 7173 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 7174 unsigned Tmp = MRI.createVirtualRegister(RC); 7175 // Since FP is only updated here but NOT referenced, it's treated as GPR. 7176 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; 7177 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; 7178 unsigned BP = (PVT == MVT::i64) ? PPC::X30 : 7179 (Subtarget.isSVR4ABI() && 7180 MF->getTarget().getRelocationModel() == Reloc::PIC_ ? 7181 PPC::R29 : PPC::R30); 7182 7183 MachineInstrBuilder MIB; 7184 7185 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 7186 const int64_t SPOffset = 2 * PVT.getStoreSize(); 7187 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 7188 const int64_t BPOffset = 4 * PVT.getStoreSize(); 7189 7190 unsigned BufReg = MI->getOperand(0).getReg(); 7191 7192 // Reload FP (the jumped-to function may not have had a 7193 // frame pointer, and if so, then its r31 will be restored 7194 // as necessary). 7195 if (PVT == MVT::i64) { 7196 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) 7197 .addImm(0) 7198 .addReg(BufReg); 7199 } else { 7200 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) 7201 .addImm(0) 7202 .addReg(BufReg); 7203 } 7204 MIB.setMemRefs(MMOBegin, MMOEnd); 7205 7206 // Reload IP 7207 if (PVT == MVT::i64) { 7208 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) 7209 .addImm(LabelOffset) 7210 .addReg(BufReg); 7211 } else { 7212 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) 7213 .addImm(LabelOffset) 7214 .addReg(BufReg); 7215 } 7216 MIB.setMemRefs(MMOBegin, MMOEnd); 7217 7218 // Reload SP 7219 if (PVT == MVT::i64) { 7220 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) 7221 .addImm(SPOffset) 7222 .addReg(BufReg); 7223 } else { 7224 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) 7225 .addImm(SPOffset) 7226 .addReg(BufReg); 7227 } 7228 MIB.setMemRefs(MMOBegin, MMOEnd); 7229 7230 // Reload BP 7231 if (PVT == MVT::i64) { 7232 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) 7233 .addImm(BPOffset) 7234 .addReg(BufReg); 7235 } else { 7236 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) 7237 .addImm(BPOffset) 7238 .addReg(BufReg); 7239 } 7240 MIB.setMemRefs(MMOBegin, MMOEnd); 7241 7242 // Reload TOC 7243 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) { 7244 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) 7245 .addImm(TOCOffset) 7246 .addReg(BufReg); 7247 7248 MIB.setMemRefs(MMOBegin, MMOEnd); 7249 } 7250 7251 // Jump 7252 BuildMI(*MBB, MI, DL, 7253 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); 7254 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); 7255 7256 MI->eraseFromParent(); 7257 return MBB; 7258} 7259 7260MachineBasicBlock * 7261PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 7262 MachineBasicBlock *BB) const { 7263 if (MI->getOpcode() == TargetOpcode::STACKMAP || 7264 MI->getOpcode() == TargetOpcode::PATCHPOINT) 7265 return emitPatchPoint(MI, BB); 7266 7267 if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 || 7268 MI->getOpcode() == PPC::EH_SjLj_SetJmp64) { 7269 return emitEHSjLjSetJmp(MI, BB); 7270 } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 || 7271 MI->getOpcode() == PPC::EH_SjLj_LongJmp64) { 7272 return emitEHSjLjLongJmp(MI, BB); 7273 } 7274 7275 const TargetInstrInfo *TII = 7276 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 7277 7278 // To "insert" these instructions we actually have to insert their 7279 // control-flow patterns. 7280 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7281 MachineFunction::iterator It = BB; 7282 ++It; 7283 7284 MachineFunction *F = BB->getParent(); 7285 7286 if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || 7287 MI->getOpcode() == PPC::SELECT_CC_I8 || 7288 MI->getOpcode() == PPC::SELECT_I4 || 7289 MI->getOpcode() == PPC::SELECT_I8)) { 7290 SmallVector<MachineOperand, 2> Cond; 7291 if (MI->getOpcode() == PPC::SELECT_CC_I4 || 7292 MI->getOpcode() == PPC::SELECT_CC_I8) 7293 Cond.push_back(MI->getOperand(4)); 7294 else 7295 Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); 7296 Cond.push_back(MI->getOperand(1)); 7297 7298 DebugLoc dl = MI->getDebugLoc(); 7299 const TargetInstrInfo *TII = 7300 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 7301 TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), 7302 Cond, MI->getOperand(2).getReg(), 7303 MI->getOperand(3).getReg()); 7304 } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || 7305 MI->getOpcode() == PPC::SELECT_CC_I8 || 7306 MI->getOpcode() == PPC::SELECT_CC_F4 || 7307 MI->getOpcode() == PPC::SELECT_CC_F8 || 7308 MI->getOpcode() == PPC::SELECT_CC_VRRC || 7309 MI->getOpcode() == PPC::SELECT_CC_VSFRC || 7310 MI->getOpcode() == PPC::SELECT_CC_VSRC || 7311 MI->getOpcode() == PPC::SELECT_I4 || 7312 MI->getOpcode() == PPC::SELECT_I8 || 7313 MI->getOpcode() == PPC::SELECT_F4 || 7314 MI->getOpcode() == PPC::SELECT_F8 || 7315 MI->getOpcode() == PPC::SELECT_VRRC || 7316 MI->getOpcode() == PPC::SELECT_VSFRC || 7317 MI->getOpcode() == PPC::SELECT_VSRC) { 7318 // The incoming instruction knows the destination vreg to set, the 7319 // condition code register to branch on, the true/false values to 7320 // select between, and a branch opcode to use. 7321 7322 // thisMBB: 7323 // ... 7324 // TrueVal = ... 7325 // cmpTY ccX, r1, r2 7326 // bCC copy1MBB 7327 // fallthrough --> copy0MBB 7328 MachineBasicBlock *thisMBB = BB; 7329 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 7330 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 7331 DebugLoc dl = MI->getDebugLoc(); 7332 F->insert(It, copy0MBB); 7333 F->insert(It, sinkMBB); 7334 7335 // Transfer the remainder of BB and its successor edges to sinkMBB. 7336 sinkMBB->splice(sinkMBB->begin(), BB, 7337 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7338 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 7339 7340 // Next, add the true and fallthrough blocks as its successors. 7341 BB->addSuccessor(copy0MBB); 7342 BB->addSuccessor(sinkMBB); 7343 7344 if (MI->getOpcode() == PPC::SELECT_I4 || 7345 MI->getOpcode() == PPC::SELECT_I8 || 7346 MI->getOpcode() == PPC::SELECT_F4 || 7347 MI->getOpcode() == PPC::SELECT_F8 || 7348 MI->getOpcode() == PPC::SELECT_VRRC || 7349 MI->getOpcode() == PPC::SELECT_VSFRC || 7350 MI->getOpcode() == PPC::SELECT_VSRC) { 7351 BuildMI(BB, dl, TII->get(PPC::BC)) 7352 .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 7353 } else { 7354 unsigned SelectPred = MI->getOperand(4).getImm(); 7355 BuildMI(BB, dl, TII->get(PPC::BCC)) 7356 .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 7357 } 7358 7359 // copy0MBB: 7360 // %FalseValue = ... 7361 // # fallthrough to sinkMBB 7362 BB = copy0MBB; 7363 7364 // Update machine-CFG edges 7365 BB->addSuccessor(sinkMBB); 7366 7367 // sinkMBB: 7368 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 7369 // ... 7370 BB = sinkMBB; 7371 BuildMI(*BB, BB->begin(), dl, 7372 TII->get(PPC::PHI), MI->getOperand(0).getReg()) 7373 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 7374 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 7375 } else if (MI->getOpcode() == PPC::ReadTB) { 7376 // To read the 64-bit time-base register on a 32-bit target, we read the 7377 // two halves. Should the counter have wrapped while it was being read, we 7378 // need to try again. 7379 // ... 7380 // readLoop: 7381 // mfspr Rx,TBU # load from TBU 7382 // mfspr Ry,TB # load from TB 7383 // mfspr Rz,TBU # load from TBU 7384 // cmpw crX,Rx,Rz # check if ���old���=���new��� 7385 // bne readLoop # branch if they're not equal 7386 // ... 7387 7388 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB); 7389 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 7390 DebugLoc dl = MI->getDebugLoc(); 7391 F->insert(It, readMBB); 7392 F->insert(It, sinkMBB); 7393 7394 // Transfer the remainder of BB and its successor edges to sinkMBB. 7395 sinkMBB->splice(sinkMBB->begin(), BB, 7396 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7397 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 7398 7399 BB->addSuccessor(readMBB); 7400 BB = readMBB; 7401 7402 MachineRegisterInfo &RegInfo = F->getRegInfo(); 7403 unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 7404 unsigned LoReg = MI->getOperand(0).getReg(); 7405 unsigned HiReg = MI->getOperand(1).getReg(); 7406 7407 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); 7408 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); 7409 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); 7410 7411 unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 7412 7413 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) 7414 .addReg(HiReg).addReg(ReadAgainReg); 7415 BuildMI(BB, dl, TII->get(PPC::BCC)) 7416 .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB); 7417 7418 BB->addSuccessor(readMBB); 7419 BB->addSuccessor(sinkMBB); 7420 } 7421 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) 7422 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); 7423 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) 7424 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); 7425 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) 7426 BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4); 7427 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) 7428 BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8); 7429 7430 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) 7431 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); 7432 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) 7433 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); 7434 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) 7435 BB = EmitAtomicBinary(MI, BB, false, PPC::AND); 7436 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) 7437 BB = EmitAtomicBinary(MI, BB, true, PPC::AND8); 7438 7439 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) 7440 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); 7441 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) 7442 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); 7443 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) 7444 BB = EmitAtomicBinary(MI, BB, false, PPC::OR); 7445 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) 7446 BB = EmitAtomicBinary(MI, BB, true, PPC::OR8); 7447 7448 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) 7449 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); 7450 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) 7451 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); 7452 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) 7453 BB = EmitAtomicBinary(MI, BB, false, PPC::XOR); 7454 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) 7455 BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); 7456 7457 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) 7458 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); 7459 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) 7460 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); 7461 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) 7462 BB = EmitAtomicBinary(MI, BB, false, PPC::NAND); 7463 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) 7464 BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8); 7465 7466 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) 7467 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); 7468 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) 7469 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); 7470 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) 7471 BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF); 7472 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) 7473 BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8); 7474 7475 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) 7476 BB = EmitPartwordAtomicBinary(MI, BB, true, 0); 7477 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) 7478 BB = EmitPartwordAtomicBinary(MI, BB, false, 0); 7479 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) 7480 BB = EmitAtomicBinary(MI, BB, false, 0); 7481 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) 7482 BB = EmitAtomicBinary(MI, BB, true, 0); 7483 7484 else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || 7485 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) { 7486 bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; 7487 7488 unsigned dest = MI->getOperand(0).getReg(); 7489 unsigned ptrA = MI->getOperand(1).getReg(); 7490 unsigned ptrB = MI->getOperand(2).getReg(); 7491 unsigned oldval = MI->getOperand(3).getReg(); 7492 unsigned newval = MI->getOperand(4).getReg(); 7493 DebugLoc dl = MI->getDebugLoc(); 7494 7495 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 7496 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 7497 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 7498 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 7499 F->insert(It, loop1MBB); 7500 F->insert(It, loop2MBB); 7501 F->insert(It, midMBB); 7502 F->insert(It, exitMBB); 7503 exitMBB->splice(exitMBB->begin(), BB, 7504 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7505 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 7506 7507 // thisMBB: 7508 // ... 7509 // fallthrough --> loopMBB 7510 BB->addSuccessor(loop1MBB); 7511 7512 // loop1MBB: 7513 // l[wd]arx dest, ptr 7514 // cmp[wd] dest, oldval 7515 // bne- midMBB 7516 // loop2MBB: 7517 // st[wd]cx. newval, ptr 7518 // bne- loopMBB 7519 // b exitBB 7520 // midMBB: 7521 // st[wd]cx. dest, ptr 7522 // exitBB: 7523 BB = loop1MBB; 7524 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 7525 .addReg(ptrA).addReg(ptrB); 7526 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) 7527 .addReg(oldval).addReg(dest); 7528 BuildMI(BB, dl, TII->get(PPC::BCC)) 7529 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 7530 BB->addSuccessor(loop2MBB); 7531 BB->addSuccessor(midMBB); 7532 7533 BB = loop2MBB; 7534 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 7535 .addReg(newval).addReg(ptrA).addReg(ptrB); 7536 BuildMI(BB, dl, TII->get(PPC::BCC)) 7537 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 7538 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 7539 BB->addSuccessor(loop1MBB); 7540 BB->addSuccessor(exitMBB); 7541 7542 BB = midMBB; 7543 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 7544 .addReg(dest).addReg(ptrA).addReg(ptrB); 7545 BB->addSuccessor(exitMBB); 7546 7547 // exitMBB: 7548 // ... 7549 BB = exitMBB; 7550 } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || 7551 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { 7552 // We must use 64-bit registers for addresses when targeting 64-bit, 7553 // since we're actually doing arithmetic on them. Other registers 7554 // can be 32-bit. 7555 bool is64bit = Subtarget.isPPC64(); 7556 bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; 7557 7558 unsigned dest = MI->getOperand(0).getReg(); 7559 unsigned ptrA = MI->getOperand(1).getReg(); 7560 unsigned ptrB = MI->getOperand(2).getReg(); 7561 unsigned oldval = MI->getOperand(3).getReg(); 7562 unsigned newval = MI->getOperand(4).getReg(); 7563 DebugLoc dl = MI->getDebugLoc(); 7564 7565 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 7566 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 7567 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 7568 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 7569 F->insert(It, loop1MBB); 7570 F->insert(It, loop2MBB); 7571 F->insert(It, midMBB); 7572 F->insert(It, exitMBB); 7573 exitMBB->splice(exitMBB->begin(), BB, 7574 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7575 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 7576 7577 MachineRegisterInfo &RegInfo = F->getRegInfo(); 7578 const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass 7579 : &PPC::GPRCRegClass; 7580 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 7581 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 7582 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 7583 unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); 7584 unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); 7585 unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); 7586 unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); 7587 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 7588 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 7589 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 7590 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 7591 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 7592 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 7593 unsigned Ptr1Reg; 7594 unsigned TmpReg = RegInfo.createVirtualRegister(RC); 7595 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 7596 // thisMBB: 7597 // ... 7598 // fallthrough --> loopMBB 7599 BB->addSuccessor(loop1MBB); 7600 7601 // The 4-byte load must be aligned, while a char or short may be 7602 // anywhere in the word. Hence all this nasty bookkeeping code. 7603 // add ptr1, ptrA, ptrB [copy if ptrA==0] 7604 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 7605 // xori shift, shift1, 24 [16] 7606 // rlwinm ptr, ptr1, 0, 0, 29 7607 // slw newval2, newval, shift 7608 // slw oldval2, oldval,shift 7609 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 7610 // slw mask, mask2, shift 7611 // and newval3, newval2, mask 7612 // and oldval3, oldval2, mask 7613 // loop1MBB: 7614 // lwarx tmpDest, ptr 7615 // and tmp, tmpDest, mask 7616 // cmpw tmp, oldval3 7617 // bne- midMBB 7618 // loop2MBB: 7619 // andc tmp2, tmpDest, mask 7620 // or tmp4, tmp2, newval3 7621 // stwcx. tmp4, ptr 7622 // bne- loop1MBB 7623 // b exitBB 7624 // midMBB: 7625 // stwcx. tmpDest, ptr 7626 // exitBB: 7627 // srw dest, tmpDest, shift 7628 if (ptrA != ZeroReg) { 7629 Ptr1Reg = RegInfo.createVirtualRegister(RC); 7630 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 7631 .addReg(ptrA).addReg(ptrB); 7632 } else { 7633 Ptr1Reg = ptrB; 7634 } 7635 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 7636 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 7637 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 7638 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 7639 if (is64bit) 7640 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 7641 .addReg(Ptr1Reg).addImm(0).addImm(61); 7642 else 7643 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 7644 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 7645 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) 7646 .addReg(newval).addReg(ShiftReg); 7647 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) 7648 .addReg(oldval).addReg(ShiftReg); 7649 if (is8bit) 7650 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 7651 else { 7652 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 7653 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 7654 .addReg(Mask3Reg).addImm(65535); 7655 } 7656 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 7657 .addReg(Mask2Reg).addReg(ShiftReg); 7658 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) 7659 .addReg(NewVal2Reg).addReg(MaskReg); 7660 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) 7661 .addReg(OldVal2Reg).addReg(MaskReg); 7662 7663 BB = loop1MBB; 7664 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 7665 .addReg(ZeroReg).addReg(PtrReg); 7666 BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) 7667 .addReg(TmpDestReg).addReg(MaskReg); 7668 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) 7669 .addReg(TmpReg).addReg(OldVal3Reg); 7670 BuildMI(BB, dl, TII->get(PPC::BCC)) 7671 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 7672 BB->addSuccessor(loop2MBB); 7673 BB->addSuccessor(midMBB); 7674 7675 BB = loop2MBB; 7676 BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) 7677 .addReg(TmpDestReg).addReg(MaskReg); 7678 BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) 7679 .addReg(Tmp2Reg).addReg(NewVal3Reg); 7680 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) 7681 .addReg(ZeroReg).addReg(PtrReg); 7682 BuildMI(BB, dl, TII->get(PPC::BCC)) 7683 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 7684 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 7685 BB->addSuccessor(loop1MBB); 7686 BB->addSuccessor(exitMBB); 7687 7688 BB = midMBB; 7689 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) 7690 .addReg(ZeroReg).addReg(PtrReg); 7691 BB->addSuccessor(exitMBB); 7692 7693 // exitMBB: 7694 // ... 7695 BB = exitMBB; 7696 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) 7697 .addReg(ShiftReg); 7698 } else if (MI->getOpcode() == PPC::FADDrtz) { 7699 // This pseudo performs an FADD with rounding mode temporarily forced 7700 // to round-to-zero. We emit this via custom inserter since the FPSCR 7701 // is not modeled at the SelectionDAG level. 7702 unsigned Dest = MI->getOperand(0).getReg(); 7703 unsigned Src1 = MI->getOperand(1).getReg(); 7704 unsigned Src2 = MI->getOperand(2).getReg(); 7705 DebugLoc dl = MI->getDebugLoc(); 7706 7707 MachineRegisterInfo &RegInfo = F->getRegInfo(); 7708 unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 7709 7710 // Save FPSCR value. 7711 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); 7712 7713 // Set rounding mode to round-to-zero. 7714 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); 7715 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); 7716 7717 // Perform addition. 7718 BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); 7719 7720 // Restore FPSCR value. 7721 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg); 7722 } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || 7723 MI->getOpcode() == PPC::ANDIo_1_GT_BIT || 7724 MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 7725 MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) { 7726 unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 7727 MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ? 7728 PPC::ANDIo8 : PPC::ANDIo; 7729 bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || 7730 MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8); 7731 7732 MachineRegisterInfo &RegInfo = F->getRegInfo(); 7733 unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? 7734 &PPC::GPRCRegClass : 7735 &PPC::G8RCRegClass); 7736 7737 DebugLoc dl = MI->getDebugLoc(); 7738 BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) 7739 .addReg(MI->getOperand(1).getReg()).addImm(1); 7740 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), 7741 MI->getOperand(0).getReg()) 7742 .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); 7743 } else { 7744 llvm_unreachable("Unexpected instr type to insert"); 7745 } 7746 7747 MI->eraseFromParent(); // The pseudo instruction is gone now. 7748 return BB; 7749} 7750 7751//===----------------------------------------------------------------------===// 7752// Target Optimization Hooks 7753//===----------------------------------------------------------------------===// 7754 7755SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, 7756 DAGCombinerInfo &DCI, 7757 unsigned &RefinementSteps, 7758 bool &UseOneConstNR) const { 7759 EVT VT = Operand.getValueType(); 7760 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || 7761 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || 7762 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 7763 (VT == MVT::v2f64 && Subtarget.hasVSX())) { 7764 // Convergence is quadratic, so we essentially double the number of digits 7765 // correct after every iteration. For both FRE and FRSQRTE, the minimum 7766 // architected relative accuracy is 2^-5. When hasRecipPrec(), this is 7767 // 2^-14. IEEE float has 23 digits and double has 52 digits. 7768 RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; 7769 if (VT.getScalarType() == MVT::f64) 7770 ++RefinementSteps; 7771 UseOneConstNR = true; 7772 return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); 7773 } 7774 return SDValue(); 7775} 7776 7777SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, 7778 DAGCombinerInfo &DCI, 7779 unsigned &RefinementSteps) const { 7780 EVT VT = Operand.getValueType(); 7781 if ((VT == MVT::f32 && Subtarget.hasFRES()) || 7782 (VT == MVT::f64 && Subtarget.hasFRE()) || 7783 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 7784 (VT == MVT::v2f64 && Subtarget.hasVSX())) { 7785 // Convergence is quadratic, so we essentially double the number of digits 7786 // correct after every iteration. For both FRE and FRSQRTE, the minimum 7787 // architected relative accuracy is 2^-5. When hasRecipPrec(), this is 7788 // 2^-14. IEEE float has 23 digits and double has 52 digits. 7789 RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; 7790 if (VT.getScalarType() == MVT::f64) 7791 ++RefinementSteps; 7792 return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); 7793 } 7794 return SDValue(); 7795} 7796 7797bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { 7798 // Note: This functionality is used only when unsafe-fp-math is enabled, and 7799 // on cores with reciprocal estimates (which are used when unsafe-fp-math is 7800 // enabled for division), this functionality is redundant with the default 7801 // combiner logic (once the division -> reciprocal/multiply transformation 7802 // has taken place). As a result, this matters more for older cores than for 7803 // newer ones. 7804 7805 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 7806 // reciprocal if there are two or more FDIVs (for embedded cores with only 7807 // one FP pipeline) for three or more FDIVs (for generic OOO cores). 7808 switch (Subtarget.getDarwinDirective()) { 7809 default: 7810 return NumUsers > 2; 7811 case PPC::DIR_440: 7812 case PPC::DIR_A2: 7813 case PPC::DIR_E500mc: 7814 case PPC::DIR_E5500: 7815 return NumUsers > 1; 7816 } 7817} 7818 7819static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, 7820 unsigned Bytes, int Dist, 7821 SelectionDAG &DAG) { 7822 if (VT.getSizeInBits() / 8 != Bytes) 7823 return false; 7824 7825 SDValue BaseLoc = Base->getBasePtr(); 7826 if (Loc.getOpcode() == ISD::FrameIndex) { 7827 if (BaseLoc.getOpcode() != ISD::FrameIndex) 7828 return false; 7829 const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 7830 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 7831 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 7832 int FS = MFI->getObjectSize(FI); 7833 int BFS = MFI->getObjectSize(BFI); 7834 if (FS != BFS || FS != (int)Bytes) return false; 7835 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); 7836 } 7837 7838 // Handle X+C 7839 if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc && 7840 cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) 7841 return true; 7842 7843 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7844 const GlobalValue *GV1 = nullptr; 7845 const GlobalValue *GV2 = nullptr; 7846 int64_t Offset1 = 0; 7847 int64_t Offset2 = 0; 7848 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); 7849 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); 7850 if (isGA1 && isGA2 && GV1 == GV2) 7851 return Offset1 == (Offset2 + Dist*Bytes); 7852 return false; 7853} 7854 7855// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does 7856// not enforce equality of the chain operands. 7857static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, 7858 unsigned Bytes, int Dist, 7859 SelectionDAG &DAG) { 7860 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) { 7861 EVT VT = LS->getMemoryVT(); 7862 SDValue Loc = LS->getBasePtr(); 7863 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG); 7864 } 7865 7866 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { 7867 EVT VT; 7868 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 7869 default: return false; 7870 case Intrinsic::ppc_altivec_lvx: 7871 case Intrinsic::ppc_altivec_lvxl: 7872 case Intrinsic::ppc_vsx_lxvw4x: 7873 VT = MVT::v4i32; 7874 break; 7875 case Intrinsic::ppc_vsx_lxvd2x: 7876 VT = MVT::v2f64; 7877 break; 7878 case Intrinsic::ppc_altivec_lvebx: 7879 VT = MVT::i8; 7880 break; 7881 case Intrinsic::ppc_altivec_lvehx: 7882 VT = MVT::i16; 7883 break; 7884 case Intrinsic::ppc_altivec_lvewx: 7885 VT = MVT::i32; 7886 break; 7887 } 7888 7889 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG); 7890 } 7891 7892 if (N->getOpcode() == ISD::INTRINSIC_VOID) { 7893 EVT VT; 7894 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 7895 default: return false; 7896 case Intrinsic::ppc_altivec_stvx: 7897 case Intrinsic::ppc_altivec_stvxl: 7898 case Intrinsic::ppc_vsx_stxvw4x: 7899 VT = MVT::v4i32; 7900 break; 7901 case Intrinsic::ppc_vsx_stxvd2x: 7902 VT = MVT::v2f64; 7903 break; 7904 case Intrinsic::ppc_altivec_stvebx: 7905 VT = MVT::i8; 7906 break; 7907 case Intrinsic::ppc_altivec_stvehx: 7908 VT = MVT::i16; 7909 break; 7910 case Intrinsic::ppc_altivec_stvewx: 7911 VT = MVT::i32; 7912 break; 7913 } 7914 7915 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG); 7916 } 7917 7918 return false; 7919} 7920 7921// Return true is there is a nearyby consecutive load to the one provided 7922// (regardless of alignment). We search up and down the chain, looking though 7923// token factors and other loads (but nothing else). As a result, a true result 7924// indicates that it is safe to create a new consecutive load adjacent to the 7925// load provided. 7926static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { 7927 SDValue Chain = LD->getChain(); 7928 EVT VT = LD->getMemoryVT(); 7929 7930 SmallSet<SDNode *, 16> LoadRoots; 7931 SmallVector<SDNode *, 8> Queue(1, Chain.getNode()); 7932 SmallSet<SDNode *, 16> Visited; 7933 7934 // First, search up the chain, branching to follow all token-factor operands. 7935 // If we find a consecutive load, then we're done, otherwise, record all 7936 // nodes just above the top-level loads and token factors. 7937 while (!Queue.empty()) { 7938 SDNode *ChainNext = Queue.pop_back_val(); 7939 if (!Visited.insert(ChainNext).second) 7940 continue; 7941 7942 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) { 7943 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 7944 return true; 7945 7946 if (!Visited.count(ChainLD->getChain().getNode())) 7947 Queue.push_back(ChainLD->getChain().getNode()); 7948 } else if (ChainNext->getOpcode() == ISD::TokenFactor) { 7949 for (const SDUse &O : ChainNext->ops()) 7950 if (!Visited.count(O.getNode())) 7951 Queue.push_back(O.getNode()); 7952 } else 7953 LoadRoots.insert(ChainNext); 7954 } 7955 7956 // Second, search down the chain, starting from the top-level nodes recorded 7957 // in the first phase. These top-level nodes are the nodes just above all 7958 // loads and token factors. Starting with their uses, recursively look though 7959 // all loads (just the chain uses) and token factors to find a consecutive 7960 // load. 7961 Visited.clear(); 7962 Queue.clear(); 7963 7964 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(), 7965 IE = LoadRoots.end(); I != IE; ++I) { 7966 Queue.push_back(*I); 7967 7968 while (!Queue.empty()) { 7969 SDNode *LoadRoot = Queue.pop_back_val(); 7970 if (!Visited.insert(LoadRoot).second) 7971 continue; 7972 7973 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot)) 7974 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 7975 return true; 7976 7977 for (SDNode::use_iterator UI = LoadRoot->use_begin(), 7978 UE = LoadRoot->use_end(); UI != UE; ++UI) 7979 if (((isa<MemSDNode>(*UI) && 7980 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) || 7981 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) 7982 Queue.push_back(*UI); 7983 } 7984 } 7985 7986 return false; 7987} 7988 7989SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, 7990 DAGCombinerInfo &DCI) const { 7991 SelectionDAG &DAG = DCI.DAG; 7992 SDLoc dl(N); 7993 7994 assert(Subtarget.useCRBits() && 7995 "Expecting to be tracking CR bits"); 7996 // If we're tracking CR bits, we need to be careful that we don't have: 7997 // trunc(binary-ops(zext(x), zext(y))) 7998 // or 7999 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) 8000 // such that we're unnecessarily moving things into GPRs when it would be 8001 // better to keep them in CR bits. 8002 8003 // Note that trunc here can be an actual i1 trunc, or can be the effective 8004 // truncation that comes from a setcc or select_cc. 8005 if (N->getOpcode() == ISD::TRUNCATE && 8006 N->getValueType(0) != MVT::i1) 8007 return SDValue(); 8008 8009 if (N->getOperand(0).getValueType() != MVT::i32 && 8010 N->getOperand(0).getValueType() != MVT::i64) 8011 return SDValue(); 8012 8013 if (N->getOpcode() == ISD::SETCC || 8014 N->getOpcode() == ISD::SELECT_CC) { 8015 // If we're looking at a comparison, then we need to make sure that the 8016 // high bits (all except for the first) don't matter the result. 8017 ISD::CondCode CC = 8018 cast<CondCodeSDNode>(N->getOperand( 8019 N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); 8020 unsigned OpBits = N->getOperand(0).getValueSizeInBits(); 8021 8022 if (ISD::isSignedIntSetCC(CC)) { 8023 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || 8024 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) 8025 return SDValue(); 8026 } else if (ISD::isUnsignedIntSetCC(CC)) { 8027 if (!DAG.MaskedValueIsZero(N->getOperand(0), 8028 APInt::getHighBitsSet(OpBits, OpBits-1)) || 8029 !DAG.MaskedValueIsZero(N->getOperand(1), 8030 APInt::getHighBitsSet(OpBits, OpBits-1))) 8031 return SDValue(); 8032 } else { 8033 // This is neither a signed nor an unsigned comparison, just make sure 8034 // that the high bits are equal. 8035 APInt Op1Zero, Op1One; 8036 APInt Op2Zero, Op2One; 8037 DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One); 8038 DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One); 8039 8040 // We don't really care about what is known about the first bit (if 8041 // anything), so clear it in all masks prior to comparing them. 8042 Op1Zero.clearBit(0); Op1One.clearBit(0); 8043 Op2Zero.clearBit(0); Op2One.clearBit(0); 8044 8045 if (Op1Zero != Op2Zero || Op1One != Op2One) 8046 return SDValue(); 8047 } 8048 } 8049 8050 // We now know that the higher-order bits are irrelevant, we just need to 8051 // make sure that all of the intermediate operations are bit operations, and 8052 // all inputs are extensions. 8053 if (N->getOperand(0).getOpcode() != ISD::AND && 8054 N->getOperand(0).getOpcode() != ISD::OR && 8055 N->getOperand(0).getOpcode() != ISD::XOR && 8056 N->getOperand(0).getOpcode() != ISD::SELECT && 8057 N->getOperand(0).getOpcode() != ISD::SELECT_CC && 8058 N->getOperand(0).getOpcode() != ISD::TRUNCATE && 8059 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && 8060 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && 8061 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) 8062 return SDValue(); 8063 8064 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && 8065 N->getOperand(1).getOpcode() != ISD::AND && 8066 N->getOperand(1).getOpcode() != ISD::OR && 8067 N->getOperand(1).getOpcode() != ISD::XOR && 8068 N->getOperand(1).getOpcode() != ISD::SELECT && 8069 N->getOperand(1).getOpcode() != ISD::SELECT_CC && 8070 N->getOperand(1).getOpcode() != ISD::TRUNCATE && 8071 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && 8072 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && 8073 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) 8074 return SDValue(); 8075 8076 SmallVector<SDValue, 4> Inputs; 8077 SmallVector<SDValue, 8> BinOps, PromOps; 8078 SmallPtrSet<SDNode *, 16> Visited; 8079 8080 for (unsigned i = 0; i < 2; ++i) { 8081 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 8082 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 8083 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 8084 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || 8085 isa<ConstantSDNode>(N->getOperand(i))) 8086 Inputs.push_back(N->getOperand(i)); 8087 else 8088 BinOps.push_back(N->getOperand(i)); 8089 8090 if (N->getOpcode() == ISD::TRUNCATE) 8091 break; 8092 } 8093 8094 // Visit all inputs, collect all binary operations (and, or, xor and 8095 // select) that are all fed by extensions. 8096 while (!BinOps.empty()) { 8097 SDValue BinOp = BinOps.back(); 8098 BinOps.pop_back(); 8099 8100 if (!Visited.insert(BinOp.getNode()).second) 8101 continue; 8102 8103 PromOps.push_back(BinOp); 8104 8105 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 8106 // The condition of the select is not promoted. 8107 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 8108 continue; 8109 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 8110 continue; 8111 8112 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 8113 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 8114 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 8115 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || 8116 isa<ConstantSDNode>(BinOp.getOperand(i))) { 8117 Inputs.push_back(BinOp.getOperand(i)); 8118 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 8119 BinOp.getOperand(i).getOpcode() == ISD::OR || 8120 BinOp.getOperand(i).getOpcode() == ISD::XOR || 8121 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 8122 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || 8123 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 8124 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 8125 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 8126 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { 8127 BinOps.push_back(BinOp.getOperand(i)); 8128 } else { 8129 // We have an input that is not an extension or another binary 8130 // operation; we'll abort this transformation. 8131 return SDValue(); 8132 } 8133 } 8134 } 8135 8136 // Make sure that this is a self-contained cluster of operations (which 8137 // is not quite the same thing as saying that everything has only one 8138 // use). 8139 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 8140 if (isa<ConstantSDNode>(Inputs[i])) 8141 continue; 8142 8143 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 8144 UE = Inputs[i].getNode()->use_end(); 8145 UI != UE; ++UI) { 8146 SDNode *User = *UI; 8147 if (User != N && !Visited.count(User)) 8148 return SDValue(); 8149 8150 // Make sure that we're not going to promote the non-output-value 8151 // operand(s) or SELECT or SELECT_CC. 8152 // FIXME: Although we could sometimes handle this, and it does occur in 8153 // practice that one of the condition inputs to the select is also one of 8154 // the outputs, we currently can't deal with this. 8155 if (User->getOpcode() == ISD::SELECT) { 8156 if (User->getOperand(0) == Inputs[i]) 8157 return SDValue(); 8158 } else if (User->getOpcode() == ISD::SELECT_CC) { 8159 if (User->getOperand(0) == Inputs[i] || 8160 User->getOperand(1) == Inputs[i]) 8161 return SDValue(); 8162 } 8163 } 8164 } 8165 8166 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 8167 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 8168 UE = PromOps[i].getNode()->use_end(); 8169 UI != UE; ++UI) { 8170 SDNode *User = *UI; 8171 if (User != N && !Visited.count(User)) 8172 return SDValue(); 8173 8174 // Make sure that we're not going to promote the non-output-value 8175 // operand(s) or SELECT or SELECT_CC. 8176 // FIXME: Although we could sometimes handle this, and it does occur in 8177 // practice that one of the condition inputs to the select is also one of 8178 // the outputs, we currently can't deal with this. 8179 if (User->getOpcode() == ISD::SELECT) { 8180 if (User->getOperand(0) == PromOps[i]) 8181 return SDValue(); 8182 } else if (User->getOpcode() == ISD::SELECT_CC) { 8183 if (User->getOperand(0) == PromOps[i] || 8184 User->getOperand(1) == PromOps[i]) 8185 return SDValue(); 8186 } 8187 } 8188 } 8189 8190 // Replace all inputs with the extension operand. 8191 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 8192 // Constants may have users outside the cluster of to-be-promoted nodes, 8193 // and so we need to replace those as we do the promotions. 8194 if (isa<ConstantSDNode>(Inputs[i])) 8195 continue; 8196 else 8197 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); 8198 } 8199 8200 // Replace all operations (these are all the same, but have a different 8201 // (i1) return type). DAG.getNode will validate that the types of 8202 // a binary operator match, so go through the list in reverse so that 8203 // we've likely promoted both operands first. Any intermediate truncations or 8204 // extensions disappear. 8205 while (!PromOps.empty()) { 8206 SDValue PromOp = PromOps.back(); 8207 PromOps.pop_back(); 8208 8209 if (PromOp.getOpcode() == ISD::TRUNCATE || 8210 PromOp.getOpcode() == ISD::SIGN_EXTEND || 8211 PromOp.getOpcode() == ISD::ZERO_EXTEND || 8212 PromOp.getOpcode() == ISD::ANY_EXTEND) { 8213 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) && 8214 PromOp.getOperand(0).getValueType() != MVT::i1) { 8215 // The operand is not yet ready (see comment below). 8216 PromOps.insert(PromOps.begin(), PromOp); 8217 continue; 8218 } 8219 8220 SDValue RepValue = PromOp.getOperand(0); 8221 if (isa<ConstantSDNode>(RepValue)) 8222 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); 8223 8224 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); 8225 continue; 8226 } 8227 8228 unsigned C; 8229 switch (PromOp.getOpcode()) { 8230 default: C = 0; break; 8231 case ISD::SELECT: C = 1; break; 8232 case ISD::SELECT_CC: C = 2; break; 8233 } 8234 8235 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 8236 PromOp.getOperand(C).getValueType() != MVT::i1) || 8237 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 8238 PromOp.getOperand(C+1).getValueType() != MVT::i1)) { 8239 // The to-be-promoted operands of this node have not yet been 8240 // promoted (this should be rare because we're going through the 8241 // list backward, but if one of the operands has several users in 8242 // this cluster of to-be-promoted nodes, it is possible). 8243 PromOps.insert(PromOps.begin(), PromOp); 8244 continue; 8245 } 8246 8247 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 8248 PromOp.getNode()->op_end()); 8249 8250 // If there are any constant inputs, make sure they're replaced now. 8251 for (unsigned i = 0; i < 2; ++i) 8252 if (isa<ConstantSDNode>(Ops[C+i])) 8253 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); 8254 8255 DAG.ReplaceAllUsesOfValueWith(PromOp, 8256 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops)); 8257 } 8258 8259 // Now we're left with the initial truncation itself. 8260 if (N->getOpcode() == ISD::TRUNCATE) 8261 return N->getOperand(0); 8262 8263 // Otherwise, this is a comparison. The operands to be compared have just 8264 // changed type (to i1), but everything else is the same. 8265 return SDValue(N, 0); 8266} 8267 8268SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, 8269 DAGCombinerInfo &DCI) const { 8270 SelectionDAG &DAG = DCI.DAG; 8271 SDLoc dl(N); 8272 8273 // If we're tracking CR bits, we need to be careful that we don't have: 8274 // zext(binary-ops(trunc(x), trunc(y))) 8275 // or 8276 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) 8277 // such that we're unnecessarily moving things into CR bits that can more 8278 // efficiently stay in GPRs. Note that if we're not certain that the high 8279 // bits are set as required by the final extension, we still may need to do 8280 // some masking to get the proper behavior. 8281 8282 // This same functionality is important on PPC64 when dealing with 8283 // 32-to-64-bit extensions; these occur often when 32-bit values are used as 8284 // the return values of functions. Because it is so similar, it is handled 8285 // here as well. 8286 8287 if (N->getValueType(0) != MVT::i32 && 8288 N->getValueType(0) != MVT::i64) 8289 return SDValue(); 8290 8291 if (!((N->getOperand(0).getValueType() == MVT::i1 && 8292 Subtarget.useCRBits()) || 8293 (N->getOperand(0).getValueType() == MVT::i32 && 8294 Subtarget.isPPC64()))) 8295 return SDValue(); 8296 8297 if (N->getOperand(0).getOpcode() != ISD::AND && 8298 N->getOperand(0).getOpcode() != ISD::OR && 8299 N->getOperand(0).getOpcode() != ISD::XOR && 8300 N->getOperand(0).getOpcode() != ISD::SELECT && 8301 N->getOperand(0).getOpcode() != ISD::SELECT_CC) 8302 return SDValue(); 8303 8304 SmallVector<SDValue, 4> Inputs; 8305 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps; 8306 SmallPtrSet<SDNode *, 16> Visited; 8307 8308 // Visit all inputs, collect all binary operations (and, or, xor and 8309 // select) that are all fed by truncations. 8310 while (!BinOps.empty()) { 8311 SDValue BinOp = BinOps.back(); 8312 BinOps.pop_back(); 8313 8314 if (!Visited.insert(BinOp.getNode()).second) 8315 continue; 8316 8317 PromOps.push_back(BinOp); 8318 8319 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 8320 // The condition of the select is not promoted. 8321 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 8322 continue; 8323 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 8324 continue; 8325 8326 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 8327 isa<ConstantSDNode>(BinOp.getOperand(i))) { 8328 Inputs.push_back(BinOp.getOperand(i)); 8329 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 8330 BinOp.getOperand(i).getOpcode() == ISD::OR || 8331 BinOp.getOperand(i).getOpcode() == ISD::XOR || 8332 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 8333 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { 8334 BinOps.push_back(BinOp.getOperand(i)); 8335 } else { 8336 // We have an input that is not a truncation or another binary 8337 // operation; we'll abort this transformation. 8338 return SDValue(); 8339 } 8340 } 8341 } 8342 8343 // The operands of a select that must be truncated when the select is 8344 // promoted because the operand is actually part of the to-be-promoted set. 8345 DenseMap<SDNode *, EVT> SelectTruncOp[2]; 8346 8347 // Make sure that this is a self-contained cluster of operations (which 8348 // is not quite the same thing as saying that everything has only one 8349 // use). 8350 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 8351 if (isa<ConstantSDNode>(Inputs[i])) 8352 continue; 8353 8354 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 8355 UE = Inputs[i].getNode()->use_end(); 8356 UI != UE; ++UI) { 8357 SDNode *User = *UI; 8358 if (User != N && !Visited.count(User)) 8359 return SDValue(); 8360 8361 // If we're going to promote the non-output-value operand(s) or SELECT or 8362 // SELECT_CC, record them for truncation. 8363 if (User->getOpcode() == ISD::SELECT) { 8364 if (User->getOperand(0) == Inputs[i]) 8365 SelectTruncOp[0].insert(std::make_pair(User, 8366 User->getOperand(0).getValueType())); 8367 } else if (User->getOpcode() == ISD::SELECT_CC) { 8368 if (User->getOperand(0) == Inputs[i]) 8369 SelectTruncOp[0].insert(std::make_pair(User, 8370 User->getOperand(0).getValueType())); 8371 if (User->getOperand(1) == Inputs[i]) 8372 SelectTruncOp[1].insert(std::make_pair(User, 8373 User->getOperand(1).getValueType())); 8374 } 8375 } 8376 } 8377 8378 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 8379 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 8380 UE = PromOps[i].getNode()->use_end(); 8381 UI != UE; ++UI) { 8382 SDNode *User = *UI; 8383 if (User != N && !Visited.count(User)) 8384 return SDValue(); 8385 8386 // If we're going to promote the non-output-value operand(s) or SELECT or 8387 // SELECT_CC, record them for truncation. 8388 if (User->getOpcode() == ISD::SELECT) { 8389 if (User->getOperand(0) == PromOps[i]) 8390 SelectTruncOp[0].insert(std::make_pair(User, 8391 User->getOperand(0).getValueType())); 8392 } else if (User->getOpcode() == ISD::SELECT_CC) { 8393 if (User->getOperand(0) == PromOps[i]) 8394 SelectTruncOp[0].insert(std::make_pair(User, 8395 User->getOperand(0).getValueType())); 8396 if (User->getOperand(1) == PromOps[i]) 8397 SelectTruncOp[1].insert(std::make_pair(User, 8398 User->getOperand(1).getValueType())); 8399 } 8400 } 8401 } 8402 8403 unsigned PromBits = N->getOperand(0).getValueSizeInBits(); 8404 bool ReallyNeedsExt = false; 8405 if (N->getOpcode() != ISD::ANY_EXTEND) { 8406 // If all of the inputs are not already sign/zero extended, then 8407 // we'll still need to do that at the end. 8408 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 8409 if (isa<ConstantSDNode>(Inputs[i])) 8410 continue; 8411 8412 unsigned OpBits = 8413 Inputs[i].getOperand(0).getValueSizeInBits(); 8414 assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); 8415 8416 if ((N->getOpcode() == ISD::ZERO_EXTEND && 8417 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), 8418 APInt::getHighBitsSet(OpBits, 8419 OpBits-PromBits))) || 8420 (N->getOpcode() == ISD::SIGN_EXTEND && 8421 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < 8422 (OpBits-(PromBits-1)))) { 8423 ReallyNeedsExt = true; 8424 break; 8425 } 8426 } 8427 } 8428 8429 // Replace all inputs, either with the truncation operand, or a 8430 // truncation or extension to the final output type. 8431 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 8432 // Constant inputs need to be replaced with the to-be-promoted nodes that 8433 // use them because they might have users outside of the cluster of 8434 // promoted nodes. 8435 if (isa<ConstantSDNode>(Inputs[i])) 8436 continue; 8437 8438 SDValue InSrc = Inputs[i].getOperand(0); 8439 if (Inputs[i].getValueType() == N->getValueType(0)) 8440 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); 8441 else if (N->getOpcode() == ISD::SIGN_EXTEND) 8442 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 8443 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); 8444 else if (N->getOpcode() == ISD::ZERO_EXTEND) 8445 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 8446 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); 8447 else 8448 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 8449 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); 8450 } 8451 8452 // Replace all operations (these are all the same, but have a different 8453 // (promoted) return type). DAG.getNode will validate that the types of 8454 // a binary operator match, so go through the list in reverse so that 8455 // we've likely promoted both operands first. 8456 while (!PromOps.empty()) { 8457 SDValue PromOp = PromOps.back(); 8458 PromOps.pop_back(); 8459 8460 unsigned C; 8461 switch (PromOp.getOpcode()) { 8462 default: C = 0; break; 8463 case ISD::SELECT: C = 1; break; 8464 case ISD::SELECT_CC: C = 2; break; 8465 } 8466 8467 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 8468 PromOp.getOperand(C).getValueType() != N->getValueType(0)) || 8469 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 8470 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { 8471 // The to-be-promoted operands of this node have not yet been 8472 // promoted (this should be rare because we're going through the 8473 // list backward, but if one of the operands has several users in 8474 // this cluster of to-be-promoted nodes, it is possible). 8475 PromOps.insert(PromOps.begin(), PromOp); 8476 continue; 8477 } 8478 8479 // For SELECT and SELECT_CC nodes, we do a similar check for any 8480 // to-be-promoted comparison inputs. 8481 if (PromOp.getOpcode() == ISD::SELECT || 8482 PromOp.getOpcode() == ISD::SELECT_CC) { 8483 if ((SelectTruncOp[0].count(PromOp.getNode()) && 8484 PromOp.getOperand(0).getValueType() != N->getValueType(0)) || 8485 (SelectTruncOp[1].count(PromOp.getNode()) && 8486 PromOp.getOperand(1).getValueType() != N->getValueType(0))) { 8487 PromOps.insert(PromOps.begin(), PromOp); 8488 continue; 8489 } 8490 } 8491 8492 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 8493 PromOp.getNode()->op_end()); 8494 8495 // If this node has constant inputs, then they'll need to be promoted here. 8496 for (unsigned i = 0; i < 2; ++i) { 8497 if (!isa<ConstantSDNode>(Ops[C+i])) 8498 continue; 8499 if (Ops[C+i].getValueType() == N->getValueType(0)) 8500 continue; 8501 8502 if (N->getOpcode() == ISD::SIGN_EXTEND) 8503 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 8504 else if (N->getOpcode() == ISD::ZERO_EXTEND) 8505 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 8506 else 8507 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 8508 } 8509 8510 // If we've promoted the comparison inputs of a SELECT or SELECT_CC, 8511 // truncate them again to the original value type. 8512 if (PromOp.getOpcode() == ISD::SELECT || 8513 PromOp.getOpcode() == ISD::SELECT_CC) { 8514 auto SI0 = SelectTruncOp[0].find(PromOp.getNode()); 8515 if (SI0 != SelectTruncOp[0].end()) 8516 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]); 8517 auto SI1 = SelectTruncOp[1].find(PromOp.getNode()); 8518 if (SI1 != SelectTruncOp[1].end()) 8519 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]); 8520 } 8521 8522 DAG.ReplaceAllUsesOfValueWith(PromOp, 8523 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); 8524 } 8525 8526 // Now we're left with the initial extension itself. 8527 if (!ReallyNeedsExt) 8528 return N->getOperand(0); 8529 8530 // To zero extend, just mask off everything except for the first bit (in the 8531 // i1 case). 8532 if (N->getOpcode() == ISD::ZERO_EXTEND) 8533 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), 8534 DAG.getConstant(APInt::getLowBitsSet( 8535 N->getValueSizeInBits(0), PromBits), 8536 N->getValueType(0))); 8537 8538 assert(N->getOpcode() == ISD::SIGN_EXTEND && 8539 "Invalid extension type"); 8540 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0)); 8541 SDValue ShiftCst = 8542 DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy); 8543 return DAG.getNode(ISD::SRA, dl, N->getValueType(0), 8544 DAG.getNode(ISD::SHL, dl, N->getValueType(0), 8545 N->getOperand(0), ShiftCst), ShiftCst); 8546} 8547 8548SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, 8549 DAGCombinerInfo &DCI) const { 8550 assert((N->getOpcode() == ISD::SINT_TO_FP || 8551 N->getOpcode() == ISD::UINT_TO_FP) && 8552 "Need an int -> FP conversion node here"); 8553 8554 if (!Subtarget.has64BitSupport()) 8555 return SDValue(); 8556 8557 SelectionDAG &DAG = DCI.DAG; 8558 SDLoc dl(N); 8559 SDValue Op(N, 0); 8560 8561 // Don't handle ppc_fp128 here or i1 conversions. 8562 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 8563 return SDValue(); 8564 if (Op.getOperand(0).getValueType() == MVT::i1) 8565 return SDValue(); 8566 8567 // For i32 intermediate values, unfortunately, the conversion functions 8568 // leave the upper 32 bits of the value are undefined. Within the set of 8569 // scalar instructions, we have no method for zero- or sign-extending the 8570 // value. Thus, we cannot handle i32 intermediate values here. 8571 if (Op.getOperand(0).getValueType() == MVT::i32) 8572 return SDValue(); 8573 8574 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 8575 "UINT_TO_FP is supported only with FPCVT"); 8576 8577 // If we have FCFIDS, then use it when converting to single-precision. 8578 // Otherwise, convert to double-precision and then round. 8579 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 8580 (Op.getOpcode() == ISD::UINT_TO_FP ? 8581 PPCISD::FCFIDUS : PPCISD::FCFIDS) : 8582 (Op.getOpcode() == ISD::UINT_TO_FP ? 8583 PPCISD::FCFIDU : PPCISD::FCFID); 8584 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 8585 MVT::f32 : MVT::f64; 8586 8587 // If we're converting from a float, to an int, and back to a float again, 8588 // then we don't need the store/load pair at all. 8589 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT && 8590 Subtarget.hasFPCVT()) || 8591 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) { 8592 SDValue Src = Op.getOperand(0).getOperand(0); 8593 if (Src.getValueType() == MVT::f32) { 8594 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 8595 DCI.AddToWorklist(Src.getNode()); 8596 } 8597 8598 unsigned FCTOp = 8599 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 8600 PPCISD::FCTIDUZ; 8601 8602 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src); 8603 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp); 8604 8605 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { 8606 FP = DAG.getNode(ISD::FP_ROUND, dl, 8607 MVT::f32, FP, DAG.getIntPtrConstant(0)); 8608 DCI.AddToWorklist(FP.getNode()); 8609 } 8610 8611 return FP; 8612 } 8613 8614 return SDValue(); 8615} 8616 8617// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for 8618// builtins) into loads with swaps. 8619SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, 8620 DAGCombinerInfo &DCI) const { 8621 SelectionDAG &DAG = DCI.DAG; 8622 SDLoc dl(N); 8623 SDValue Chain; 8624 SDValue Base; 8625 MachineMemOperand *MMO; 8626 8627 switch (N->getOpcode()) { 8628 default: 8629 llvm_unreachable("Unexpected opcode for little endian VSX load"); 8630 case ISD::LOAD: { 8631 LoadSDNode *LD = cast<LoadSDNode>(N); 8632 Chain = LD->getChain(); 8633 Base = LD->getBasePtr(); 8634 MMO = LD->getMemOperand(); 8635 // If the MMO suggests this isn't a load of a full vector, leave 8636 // things alone. For a built-in, we have to make the change for 8637 // correctness, so if there is a size problem that will be a bug. 8638 if (MMO->getSize() < 16) 8639 return SDValue(); 8640 break; 8641 } 8642 case ISD::INTRINSIC_W_CHAIN: { 8643 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 8644 Chain = Intrin->getChain(); 8645 Base = Intrin->getBasePtr(); 8646 MMO = Intrin->getMemOperand(); 8647 break; 8648 } 8649 } 8650 8651 MVT VecTy = N->getValueType(0).getSimpleVT(); 8652 SDValue LoadOps[] = { Chain, Base }; 8653 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, 8654 DAG.getVTList(VecTy, MVT::Other), 8655 LoadOps, VecTy, MMO); 8656 DCI.AddToWorklist(Load.getNode()); 8657 Chain = Load.getValue(1); 8658 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, 8659 DAG.getVTList(VecTy, MVT::Other), Chain, Load); 8660 DCI.AddToWorklist(Swap.getNode()); 8661 return Swap; 8662} 8663 8664// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for 8665// builtins) into stores with swaps. 8666SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, 8667 DAGCombinerInfo &DCI) const { 8668 SelectionDAG &DAG = DCI.DAG; 8669 SDLoc dl(N); 8670 SDValue Chain; 8671 SDValue Base; 8672 unsigned SrcOpnd; 8673 MachineMemOperand *MMO; 8674 8675 switch (N->getOpcode()) { 8676 default: 8677 llvm_unreachable("Unexpected opcode for little endian VSX store"); 8678 case ISD::STORE: { 8679 StoreSDNode *ST = cast<StoreSDNode>(N); 8680 Chain = ST->getChain(); 8681 Base = ST->getBasePtr(); 8682 MMO = ST->getMemOperand(); 8683 SrcOpnd = 1; 8684 // If the MMO suggests this isn't a store of a full vector, leave 8685 // things alone. For a built-in, we have to make the change for 8686 // correctness, so if there is a size problem that will be a bug. 8687 if (MMO->getSize() < 16) 8688 return SDValue(); 8689 break; 8690 } 8691 case ISD::INTRINSIC_VOID: { 8692 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 8693 Chain = Intrin->getChain(); 8694 // Intrin->getBasePtr() oddly does not get what we want. 8695 Base = Intrin->getOperand(3); 8696 MMO = Intrin->getMemOperand(); 8697 SrcOpnd = 2; 8698 break; 8699 } 8700 } 8701 8702 SDValue Src = N->getOperand(SrcOpnd); 8703 MVT VecTy = Src.getValueType().getSimpleVT(); 8704 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, 8705 DAG.getVTList(VecTy, MVT::Other), Chain, Src); 8706 DCI.AddToWorklist(Swap.getNode()); 8707 Chain = Swap.getValue(1); 8708 SDValue StoreOps[] = { Chain, Swap, Base }; 8709 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl, 8710 DAG.getVTList(MVT::Other), 8711 StoreOps, VecTy, MMO); 8712 DCI.AddToWorklist(Store.getNode()); 8713 return Store; 8714} 8715 8716SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 8717 DAGCombinerInfo &DCI) const { 8718 const TargetMachine &TM = getTargetMachine(); 8719 SelectionDAG &DAG = DCI.DAG; 8720 SDLoc dl(N); 8721 switch (N->getOpcode()) { 8722 default: break; 8723 case PPCISD::SHL: 8724 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 8725 if (C->isNullValue()) // 0 << V -> 0. 8726 return N->getOperand(0); 8727 } 8728 break; 8729 case PPCISD::SRL: 8730 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 8731 if (C->isNullValue()) // 0 >>u V -> 0. 8732 return N->getOperand(0); 8733 } 8734 break; 8735 case PPCISD::SRA: 8736 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 8737 if (C->isNullValue() || // 0 >>s V -> 0. 8738 C->isAllOnesValue()) // -1 >>s V -> -1. 8739 return N->getOperand(0); 8740 } 8741 break; 8742 case ISD::SIGN_EXTEND: 8743 case ISD::ZERO_EXTEND: 8744 case ISD::ANY_EXTEND: 8745 return DAGCombineExtBoolTrunc(N, DCI); 8746 case ISD::TRUNCATE: 8747 case ISD::SETCC: 8748 case ISD::SELECT_CC: 8749 return DAGCombineTruncBoolExt(N, DCI); 8750 case ISD::SINT_TO_FP: 8751 case ISD::UINT_TO_FP: 8752 return combineFPToIntToFP(N, DCI); 8753 case ISD::STORE: { 8754 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 8755 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 8756 !cast<StoreSDNode>(N)->isTruncatingStore() && 8757 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 8758 N->getOperand(1).getValueType() == MVT::i32 && 8759 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 8760 SDValue Val = N->getOperand(1).getOperand(0); 8761 if (Val.getValueType() == MVT::f32) { 8762 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 8763 DCI.AddToWorklist(Val.getNode()); 8764 } 8765 Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); 8766 DCI.AddToWorklist(Val.getNode()); 8767 8768 SDValue Ops[] = { 8769 N->getOperand(0), Val, N->getOperand(2), 8770 DAG.getValueType(N->getOperand(1).getValueType()) 8771 }; 8772 8773 Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 8774 DAG.getVTList(MVT::Other), Ops, 8775 cast<StoreSDNode>(N)->getMemoryVT(), 8776 cast<StoreSDNode>(N)->getMemOperand()); 8777 DCI.AddToWorklist(Val.getNode()); 8778 return Val; 8779 } 8780 8781 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 8782 if (cast<StoreSDNode>(N)->isUnindexed() && 8783 N->getOperand(1).getOpcode() == ISD::BSWAP && 8784 N->getOperand(1).getNode()->hasOneUse() && 8785 (N->getOperand(1).getValueType() == MVT::i32 || 8786 N->getOperand(1).getValueType() == MVT::i16 || 8787 (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && 8788 TM.getSubtarget<PPCSubtarget>().isPPC64() && 8789 N->getOperand(1).getValueType() == MVT::i64))) { 8790 SDValue BSwapOp = N->getOperand(1).getOperand(0); 8791 // Do an any-extend to 32-bits if this is a half-word input. 8792 if (BSwapOp.getValueType() == MVT::i16) 8793 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); 8794 8795 SDValue Ops[] = { 8796 N->getOperand(0), BSwapOp, N->getOperand(2), 8797 DAG.getValueType(N->getOperand(1).getValueType()) 8798 }; 8799 return 8800 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), 8801 Ops, cast<StoreSDNode>(N)->getMemoryVT(), 8802 cast<StoreSDNode>(N)->getMemOperand()); 8803 } 8804 8805 // For little endian, VSX stores require generating xxswapd/lxvd2x. 8806 EVT VT = N->getOperand(1).getValueType(); 8807 if (VT.isSimple()) { 8808 MVT StoreVT = VT.getSimpleVT(); 8809 if (TM.getSubtarget<PPCSubtarget>().hasVSX() && 8810 TM.getSubtarget<PPCSubtarget>().isLittleEndian() && 8811 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 || 8812 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) 8813 return expandVSXStoreForLE(N, DCI); 8814 } 8815 break; 8816 } 8817 case ISD::LOAD: { 8818 LoadSDNode *LD = cast<LoadSDNode>(N); 8819 EVT VT = LD->getValueType(0); 8820 8821 // For little endian, VSX loads require generating lxvd2x/xxswapd. 8822 if (VT.isSimple()) { 8823 MVT LoadVT = VT.getSimpleVT(); 8824 if (TM.getSubtarget<PPCSubtarget>().hasVSX() && 8825 TM.getSubtarget<PPCSubtarget>().isLittleEndian() && 8826 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 || 8827 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32)) 8828 return expandVSXLoadForLE(N, DCI); 8829 } 8830 8831 Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); 8832 unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); 8833 if (ISD::isNON_EXTLoad(N) && VT.isVector() && 8834 TM.getSubtarget<PPCSubtarget>().hasAltivec() && 8835 // P8 and later hardware should just use LOAD. 8836 !TM.getSubtarget<PPCSubtarget>().hasP8Vector() && 8837 (VT == MVT::v16i8 || VT == MVT::v8i16 || 8838 VT == MVT::v4i32 || VT == MVT::v4f32) && 8839 LD->getAlignment() < ABIAlignment) { 8840 // This is a type-legal unaligned Altivec load. 8841 SDValue Chain = LD->getChain(); 8842 SDValue Ptr = LD->getBasePtr(); 8843 bool isLittleEndian = Subtarget.isLittleEndian(); 8844 8845 // This implements the loading of unaligned vectors as described in 8846 // the venerable Apple Velocity Engine overview. Specifically: 8847 // https://developer.apple.com/hardwaredrivers/ve/alignment.html 8848 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html 8849 // 8850 // The general idea is to expand a sequence of one or more unaligned 8851 // loads into an alignment-based permutation-control instruction (lvsl 8852 // or lvsr), a series of regular vector loads (which always truncate 8853 // their input address to an aligned address), and a series of 8854 // permutations. The results of these permutations are the requested 8855 // loaded values. The trick is that the last "extra" load is not taken 8856 // from the address you might suspect (sizeof(vector) bytes after the 8857 // last requested load), but rather sizeof(vector) - 1 bytes after the 8858 // last requested vector. The point of this is to avoid a page fault if 8859 // the base address happened to be aligned. This works because if the 8860 // base address is aligned, then adding less than a full vector length 8861 // will cause the last vector in the sequence to be (re)loaded. 8862 // Otherwise, the next vector will be fetched as you might suspect was 8863 // necessary. 8864 8865 // We might be able to reuse the permutation generation from 8866 // a different base address offset from this one by an aligned amount. 8867 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this 8868 // optimization later. 8869 Intrinsic::ID Intr = (isLittleEndian ? 8870 Intrinsic::ppc_altivec_lvsr : 8871 Intrinsic::ppc_altivec_lvsl); 8872 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8); 8873 8874 // Create the new MMO for the new base load. It is like the original MMO, 8875 // but represents an area in memory almost twice the vector size centered 8876 // on the original address. If the address is unaligned, we might start 8877 // reading up to (sizeof(vector)-1) bytes below the address of the 8878 // original unaligned load. 8879 MachineFunction &MF = DAG.getMachineFunction(); 8880 MachineMemOperand *BaseMMO = 8881 MF.getMachineMemOperand(LD->getMemOperand(), 8882 -LD->getMemoryVT().getStoreSize()+1, 8883 2*LD->getMemoryVT().getStoreSize()-1); 8884 8885 // Create the new base load. 8886 SDValue LDXIntID = DAG.getTargetConstant(Intrinsic::ppc_altivec_lvx, 8887 getPointerTy()); 8888 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; 8889 SDValue BaseLoad = 8890 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 8891 DAG.getVTList(MVT::v4i32, MVT::Other), 8892 BaseLoadOps, MVT::v4i32, BaseMMO); 8893 8894 // Note that the value of IncOffset (which is provided to the next 8895 // load's pointer info offset value, and thus used to calculate the 8896 // alignment), and the value of IncValue (which is actually used to 8897 // increment the pointer value) are different! This is because we 8898 // require the next load to appear to be aligned, even though it 8899 // is actually offset from the base pointer by a lesser amount. 8900 int IncOffset = VT.getSizeInBits() / 8; 8901 int IncValue = IncOffset; 8902 8903 // Walk (both up and down) the chain looking for another load at the real 8904 // (aligned) offset (the alignment of the other load does not matter in 8905 // this case). If found, then do not use the offset reduction trick, as 8906 // that will prevent the loads from being later combined (as they would 8907 // otherwise be duplicates). 8908 if (!findConsecutiveLoad(LD, DAG)) 8909 --IncValue; 8910 8911 SDValue Increment = DAG.getConstant(IncValue, getPointerTy()); 8912 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); 8913 8914 MachineMemOperand *ExtraMMO = 8915 MF.getMachineMemOperand(LD->getMemOperand(), 8916 1, 2*LD->getMemoryVT().getStoreSize()-1); 8917 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; 8918 SDValue ExtraLoad = 8919 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 8920 DAG.getVTList(MVT::v4i32, MVT::Other), 8921 ExtraLoadOps, MVT::v4i32, ExtraMMO); 8922 8923 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 8924 BaseLoad.getValue(1), ExtraLoad.getValue(1)); 8925 8926 // Because vperm has a big-endian bias, we must reverse the order 8927 // of the input vectors and complement the permute control vector 8928 // when generating little endian code. We have already handled the 8929 // latter by using lvsr instead of lvsl, so just reverse BaseLoad 8930 // and ExtraLoad here. 8931 SDValue Perm; 8932 if (isLittleEndian) 8933 Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, 8934 ExtraLoad, BaseLoad, PermCntl, DAG, dl); 8935 else 8936 Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, 8937 BaseLoad, ExtraLoad, PermCntl, DAG, dl); 8938 8939 if (VT != MVT::v4i32) 8940 Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm); 8941 8942 // The output of the permutation is our loaded result, the TokenFactor is 8943 // our new chain. 8944 DCI.CombineTo(N, Perm, TF); 8945 return SDValue(N, 0); 8946 } 8947 } 8948 break; 8949 case ISD::INTRINSIC_WO_CHAIN: { 8950 bool isLittleEndian = Subtarget.isLittleEndian(); 8951 Intrinsic::ID Intr = (isLittleEndian ? 8952 Intrinsic::ppc_altivec_lvsr : 8953 Intrinsic::ppc_altivec_lvsl); 8954 if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr && 8955 N->getOperand(1)->getOpcode() == ISD::ADD) { 8956 SDValue Add = N->getOperand(1); 8957 8958 if (DAG.MaskedValueIsZero(Add->getOperand(1), 8959 APInt::getAllOnesValue(4 /* 16 byte alignment */).zext( 8960 Add.getValueType().getScalarType().getSizeInBits()))) { 8961 SDNode *BasePtr = Add->getOperand(0).getNode(); 8962 for (SDNode::use_iterator UI = BasePtr->use_begin(), 8963 UE = BasePtr->use_end(); UI != UE; ++UI) { 8964 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 8965 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == 8966 Intr) { 8967 // We've found another LVSL/LVSR, and this address is an aligned 8968 // multiple of that one. The results will be the same, so use the 8969 // one we've just found instead. 8970 8971 return SDValue(*UI, 0); 8972 } 8973 } 8974 } 8975 } 8976 } 8977 8978 break; 8979 case ISD::INTRINSIC_W_CHAIN: { 8980 // For little endian, VSX loads require generating lxvd2x/xxswapd. 8981 if (TM.getSubtarget<PPCSubtarget>().hasVSX() && 8982 TM.getSubtarget<PPCSubtarget>().isLittleEndian()) { 8983 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 8984 default: 8985 break; 8986 case Intrinsic::ppc_vsx_lxvw4x: 8987 case Intrinsic::ppc_vsx_lxvd2x: 8988 return expandVSXLoadForLE(N, DCI); 8989 } 8990 } 8991 break; 8992 } 8993 case ISD::INTRINSIC_VOID: { 8994 // For little endian, VSX stores require generating xxswapd/stxvd2x. 8995 if (TM.getSubtarget<PPCSubtarget>().hasVSX() && 8996 TM.getSubtarget<PPCSubtarget>().isLittleEndian()) { 8997 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 8998 default: 8999 break; 9000 case Intrinsic::ppc_vsx_stxvw4x: 9001 case Intrinsic::ppc_vsx_stxvd2x: 9002 return expandVSXStoreForLE(N, DCI); 9003 } 9004 } 9005 break; 9006 } 9007 case ISD::BSWAP: 9008 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 9009 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 9010 N->getOperand(0).hasOneUse() && 9011 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || 9012 (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && 9013 TM.getSubtarget<PPCSubtarget>().isPPC64() && 9014 N->getValueType(0) == MVT::i64))) { 9015 SDValue Load = N->getOperand(0); 9016 LoadSDNode *LD = cast<LoadSDNode>(Load); 9017 // Create the byte-swapping load. 9018 SDValue Ops[] = { 9019 LD->getChain(), // Chain 9020 LD->getBasePtr(), // Ptr 9021 DAG.getValueType(N->getValueType(0)) // VT 9022 }; 9023 SDValue BSLoad = 9024 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, 9025 DAG.getVTList(N->getValueType(0) == MVT::i64 ? 9026 MVT::i64 : MVT::i32, MVT::Other), 9027 Ops, LD->getMemoryVT(), LD->getMemOperand()); 9028 9029 // If this is an i16 load, insert the truncate. 9030 SDValue ResVal = BSLoad; 9031 if (N->getValueType(0) == MVT::i16) 9032 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); 9033 9034 // First, combine the bswap away. This makes the value produced by the 9035 // load dead. 9036 DCI.CombineTo(N, ResVal); 9037 9038 // Next, combine the load away, we give it a bogus result value but a real 9039 // chain result. The result value is dead because the bswap is dead. 9040 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 9041 9042 // Return N so it doesn't get rechecked! 9043 return SDValue(N, 0); 9044 } 9045 9046 break; 9047 case PPCISD::VCMP: { 9048 // If a VCMPo node already exists with exactly the same operands as this 9049 // node, use its result instead of this node (VCMPo computes both a CR6 and 9050 // a normal output). 9051 // 9052 if (!N->getOperand(0).hasOneUse() && 9053 !N->getOperand(1).hasOneUse() && 9054 !N->getOperand(2).hasOneUse()) { 9055 9056 // Scan all of the users of the LHS, looking for VCMPo's that match. 9057 SDNode *VCMPoNode = nullptr; 9058 9059 SDNode *LHSN = N->getOperand(0).getNode(); 9060 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 9061 UI != E; ++UI) 9062 if (UI->getOpcode() == PPCISD::VCMPo && 9063 UI->getOperand(1) == N->getOperand(1) && 9064 UI->getOperand(2) == N->getOperand(2) && 9065 UI->getOperand(0) == N->getOperand(0)) { 9066 VCMPoNode = *UI; 9067 break; 9068 } 9069 9070 // If there is no VCMPo node, or if the flag value has a single use, don't 9071 // transform this. 9072 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 9073 break; 9074 9075 // Look at the (necessarily single) use of the flag value. If it has a 9076 // chain, this transformation is more complex. Note that multiple things 9077 // could use the value result, which we should ignore. 9078 SDNode *FlagUser = nullptr; 9079 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 9080 FlagUser == nullptr; ++UI) { 9081 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 9082 SDNode *User = *UI; 9083 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 9084 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { 9085 FlagUser = User; 9086 break; 9087 } 9088 } 9089 } 9090 9091 // If the user is a MFOCRF instruction, we know this is safe. 9092 // Otherwise we give up for right now. 9093 if (FlagUser->getOpcode() == PPCISD::MFOCRF) 9094 return SDValue(VCMPoNode, 0); 9095 } 9096 break; 9097 } 9098 case ISD::BRCOND: { 9099 SDValue Cond = N->getOperand(1); 9100 SDValue Target = N->getOperand(2); 9101 9102 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && 9103 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() == 9104 Intrinsic::ppc_is_decremented_ctr_nonzero) { 9105 9106 // We now need to make the intrinsic dead (it cannot be instruction 9107 // selected). 9108 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); 9109 assert(Cond.getNode()->hasOneUse() && 9110 "Counter decrement has more than one use"); 9111 9112 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, 9113 N->getOperand(0), Target); 9114 } 9115 } 9116 break; 9117 case ISD::BR_CC: { 9118 // If this is a branch on an altivec predicate comparison, lower this so 9119 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This 9120 // lowering is done pre-legalize, because the legalizer lowers the predicate 9121 // compare down to code that is difficult to reassemble. 9122 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 9123 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); 9124 9125 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero 9126 // value. If so, pass-through the AND to get to the intrinsic. 9127 if (LHS.getOpcode() == ISD::AND && 9128 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && 9129 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() == 9130 Intrinsic::ppc_is_decremented_ctr_nonzero && 9131 isa<ConstantSDNode>(LHS.getOperand(1)) && 9132 !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()-> 9133 isZero()) 9134 LHS = LHS.getOperand(0); 9135 9136 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && 9137 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 9138 Intrinsic::ppc_is_decremented_ctr_nonzero && 9139 isa<ConstantSDNode>(RHS)) { 9140 assert((CC == ISD::SETEQ || CC == ISD::SETNE) && 9141 "Counter decrement comparison is not EQ or NE"); 9142 9143 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 9144 bool isBDNZ = (CC == ISD::SETEQ && Val) || 9145 (CC == ISD::SETNE && !Val); 9146 9147 // We now need to make the intrinsic dead (it cannot be instruction 9148 // selected). 9149 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); 9150 assert(LHS.getNode()->hasOneUse() && 9151 "Counter decrement has more than one use"); 9152 9153 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, 9154 N->getOperand(0), N->getOperand(4)); 9155 } 9156 9157 int CompareOpc; 9158 bool isDot; 9159 9160 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 9161 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 9162 getAltivecCompareInfo(LHS, CompareOpc, isDot)) { 9163 assert(isDot && "Can't compare against a vector result!"); 9164 9165 // If this is a comparison against something other than 0/1, then we know 9166 // that the condition is never/always true. 9167 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 9168 if (Val != 0 && Val != 1) { 9169 if (CC == ISD::SETEQ) // Cond never true, remove branch. 9170 return N->getOperand(0); 9171 // Always !=, turn it into an unconditional branch. 9172 return DAG.getNode(ISD::BR, dl, MVT::Other, 9173 N->getOperand(0), N->getOperand(4)); 9174 } 9175 9176 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 9177 9178 // Create the PPCISD altivec 'dot' comparison node. 9179 SDValue Ops[] = { 9180 LHS.getOperand(2), // LHS of compare 9181 LHS.getOperand(3), // RHS of compare 9182 DAG.getConstant(CompareOpc, MVT::i32) 9183 }; 9184 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; 9185 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 9186 9187 // Unpack the result based on how the target uses it. 9188 PPC::Predicate CompOpc; 9189 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { 9190 default: // Can't happen, don't crash on invalid number though. 9191 case 0: // Branch on the value of the EQ bit of CR6. 9192 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 9193 break; 9194 case 1: // Branch on the inverted value of the EQ bit of CR6. 9195 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 9196 break; 9197 case 2: // Branch on the value of the LT bit of CR6. 9198 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 9199 break; 9200 case 3: // Branch on the inverted value of the LT bit of CR6. 9201 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 9202 break; 9203 } 9204 9205 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), 9206 DAG.getConstant(CompOpc, MVT::i32), 9207 DAG.getRegister(PPC::CR6, MVT::i32), 9208 N->getOperand(4), CompNode.getValue(1)); 9209 } 9210 break; 9211 } 9212 } 9213 9214 return SDValue(); 9215} 9216 9217SDValue 9218PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, 9219 SelectionDAG &DAG, 9220 std::vector<SDNode *> *Created) const { 9221 // fold (sdiv X, pow2) 9222 EVT VT = N->getValueType(0); 9223 if (VT == MVT::i64 && !Subtarget.isPPC64()) 9224 return SDValue(); 9225 if ((VT != MVT::i32 && VT != MVT::i64) || 9226 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) 9227 return SDValue(); 9228 9229 SDLoc DL(N); 9230 SDValue N0 = N->getOperand(0); 9231 9232 bool IsNegPow2 = (-Divisor).isPowerOf2(); 9233 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros(); 9234 SDValue ShiftAmt = DAG.getConstant(Lg2, VT); 9235 9236 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt); 9237 if (Created) 9238 Created->push_back(Op.getNode()); 9239 9240 if (IsNegPow2) { 9241 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), Op); 9242 if (Created) 9243 Created->push_back(Op.getNode()); 9244 } 9245 9246 return Op; 9247} 9248 9249//===----------------------------------------------------------------------===// 9250// Inline Assembly Support 9251//===----------------------------------------------------------------------===// 9252 9253void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 9254 APInt &KnownZero, 9255 APInt &KnownOne, 9256 const SelectionDAG &DAG, 9257 unsigned Depth) const { 9258 KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); 9259 switch (Op.getOpcode()) { 9260 default: break; 9261 case PPCISD::LBRX: { 9262 // lhbrx is known to have the top bits cleared out. 9263 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) 9264 KnownZero = 0xFFFF0000; 9265 break; 9266 } 9267 case ISD::INTRINSIC_WO_CHAIN: { 9268 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { 9269 default: break; 9270 case Intrinsic::ppc_altivec_vcmpbfp_p: 9271 case Intrinsic::ppc_altivec_vcmpeqfp_p: 9272 case Intrinsic::ppc_altivec_vcmpequb_p: 9273 case Intrinsic::ppc_altivec_vcmpequh_p: 9274 case Intrinsic::ppc_altivec_vcmpequw_p: 9275 case Intrinsic::ppc_altivec_vcmpgefp_p: 9276 case Intrinsic::ppc_altivec_vcmpgtfp_p: 9277 case Intrinsic::ppc_altivec_vcmpgtsb_p: 9278 case Intrinsic::ppc_altivec_vcmpgtsh_p: 9279 case Intrinsic::ppc_altivec_vcmpgtsw_p: 9280 case Intrinsic::ppc_altivec_vcmpgtub_p: 9281 case Intrinsic::ppc_altivec_vcmpgtuh_p: 9282 case Intrinsic::ppc_altivec_vcmpgtuw_p: 9283 KnownZero = ~1U; // All bits but the low one are known to be zero. 9284 break; 9285 } 9286 } 9287 } 9288} 9289 9290unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { 9291 switch (Subtarget.getDarwinDirective()) { 9292 default: break; 9293 case PPC::DIR_970: 9294 case PPC::DIR_PWR4: 9295 case PPC::DIR_PWR5: 9296 case PPC::DIR_PWR5X: 9297 case PPC::DIR_PWR6: 9298 case PPC::DIR_PWR6X: 9299 case PPC::DIR_PWR7: 9300 case PPC::DIR_PWR8: { 9301 if (!ML) 9302 break; 9303 9304 const PPCInstrInfo *TII = 9305 static_cast<const PPCInstrInfo *>(getTargetMachine().getSubtargetImpl()-> 9306 getInstrInfo()); 9307 9308 // For small loops (between 5 and 8 instructions), align to a 32-byte 9309 // boundary so that the entire loop fits in one instruction-cache line. 9310 uint64_t LoopSize = 0; 9311 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I) 9312 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) 9313 LoopSize += TII->GetInstSizeInBytes(J); 9314 9315 if (LoopSize > 16 && LoopSize <= 32) 9316 return 5; 9317 9318 break; 9319 } 9320 } 9321 9322 return TargetLowering::getPrefLoopAlignment(ML); 9323} 9324 9325/// getConstraintType - Given a constraint, return the type of 9326/// constraint it is for this target. 9327PPCTargetLowering::ConstraintType 9328PPCTargetLowering::getConstraintType(const std::string &Constraint) const { 9329 if (Constraint.size() == 1) { 9330 switch (Constraint[0]) { 9331 default: break; 9332 case 'b': 9333 case 'r': 9334 case 'f': 9335 case 'v': 9336 case 'y': 9337 return C_RegisterClass; 9338 case 'Z': 9339 // FIXME: While Z does indicate a memory constraint, it specifically 9340 // indicates an r+r address (used in conjunction with the 'y' modifier 9341 // in the replacement string). Currently, we're forcing the base 9342 // register to be r0 in the asm printer (which is interpreted as zero) 9343 // and forming the complete address in the second register. This is 9344 // suboptimal. 9345 return C_Memory; 9346 } 9347 } else if (Constraint == "wc") { // individual CR bits. 9348 return C_RegisterClass; 9349 } else if (Constraint == "wa" || Constraint == "wd" || 9350 Constraint == "wf" || Constraint == "ws") { 9351 return C_RegisterClass; // VSX registers. 9352 } 9353 return TargetLowering::getConstraintType(Constraint); 9354} 9355 9356/// Examine constraint type and operand type and determine a weight value. 9357/// This object must already have been set up with the operand type 9358/// and the current alternative constraint selected. 9359TargetLowering::ConstraintWeight 9360PPCTargetLowering::getSingleConstraintMatchWeight( 9361 AsmOperandInfo &info, const char *constraint) const { 9362 ConstraintWeight weight = CW_Invalid; 9363 Value *CallOperandVal = info.CallOperandVal; 9364 // If we don't have a value, we can't do a match, 9365 // but allow it at the lowest weight. 9366 if (!CallOperandVal) 9367 return CW_Default; 9368 Type *type = CallOperandVal->getType(); 9369 9370 // Look at the constraint type. 9371 if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) 9372 return CW_Register; // an individual CR bit. 9373 else if ((StringRef(constraint) == "wa" || 9374 StringRef(constraint) == "wd" || 9375 StringRef(constraint) == "wf") && 9376 type->isVectorTy()) 9377 return CW_Register; 9378 else if (StringRef(constraint) == "ws" && type->isDoubleTy()) 9379 return CW_Register; 9380 9381 switch (*constraint) { 9382 default: 9383 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 9384 break; 9385 case 'b': 9386 if (type->isIntegerTy()) 9387 weight = CW_Register; 9388 break; 9389 case 'f': 9390 if (type->isFloatTy()) 9391 weight = CW_Register; 9392 break; 9393 case 'd': 9394 if (type->isDoubleTy()) 9395 weight = CW_Register; 9396 break; 9397 case 'v': 9398 if (type->isVectorTy()) 9399 weight = CW_Register; 9400 break; 9401 case 'y': 9402 weight = CW_Register; 9403 break; 9404 case 'Z': 9405 weight = CW_Memory; 9406 break; 9407 } 9408 return weight; 9409} 9410 9411std::pair<unsigned, const TargetRegisterClass*> 9412PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 9413 MVT VT) const { 9414 if (Constraint.size() == 1) { 9415 // GCC RS6000 Constraint Letters 9416 switch (Constraint[0]) { 9417 case 'b': // R1-R31 9418 if (VT == MVT::i64 && Subtarget.isPPC64()) 9419 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); 9420 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); 9421 case 'r': // R0-R31 9422 if (VT == MVT::i64 && Subtarget.isPPC64()) 9423 return std::make_pair(0U, &PPC::G8RCRegClass); 9424 return std::make_pair(0U, &PPC::GPRCRegClass); 9425 case 'f': 9426 if (VT == MVT::f32 || VT == MVT::i32) 9427 return std::make_pair(0U, &PPC::F4RCRegClass); 9428 if (VT == MVT::f64 || VT == MVT::i64) 9429 return std::make_pair(0U, &PPC::F8RCRegClass); 9430 break; 9431 case 'v': 9432 return std::make_pair(0U, &PPC::VRRCRegClass); 9433 case 'y': // crrc 9434 return std::make_pair(0U, &PPC::CRRCRegClass); 9435 } 9436 } else if (Constraint == "wc") { // an individual CR bit. 9437 return std::make_pair(0U, &PPC::CRBITRCRegClass); 9438 } else if (Constraint == "wa" || Constraint == "wd" || 9439 Constraint == "wf") { 9440 return std::make_pair(0U, &PPC::VSRCRegClass); 9441 } else if (Constraint == "ws") { 9442 return std::make_pair(0U, &PPC::VSFRCRegClass); 9443 } 9444 9445 std::pair<unsigned, const TargetRegisterClass*> R = 9446 TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 9447 9448 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers 9449 // (which we call X[0-9]+). If a 64-bit value has been requested, and a 9450 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent 9451 // register. 9452 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use 9453 // the AsmName field from *RegisterInfo.td, then this would not be necessary. 9454 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && 9455 PPC::GPRCRegClass.contains(R.first)) { 9456 const TargetRegisterInfo *TRI = 9457 getTargetMachine().getSubtargetImpl()->getRegisterInfo(); 9458 return std::make_pair(TRI->getMatchingSuperReg(R.first, 9459 PPC::sub_32, &PPC::G8RCRegClass), 9460 &PPC::G8RCRegClass); 9461 } 9462 9463 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same. 9464 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) { 9465 R.first = PPC::CR0; 9466 R.second = &PPC::CRRCRegClass; 9467 } 9468 9469 return R; 9470} 9471 9472 9473/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 9474/// vector. If it is invalid, don't add anything to Ops. 9475void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 9476 std::string &Constraint, 9477 std::vector<SDValue>&Ops, 9478 SelectionDAG &DAG) const { 9479 SDValue Result; 9480 9481 // Only support length 1 constraints. 9482 if (Constraint.length() > 1) return; 9483 9484 char Letter = Constraint[0]; 9485 switch (Letter) { 9486 default: break; 9487 case 'I': 9488 case 'J': 9489 case 'K': 9490 case 'L': 9491 case 'M': 9492 case 'N': 9493 case 'O': 9494 case 'P': { 9495 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 9496 if (!CST) return; // Must be an immediate to match. 9497 int64_t Value = CST->getSExtValue(); 9498 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative 9499 // numbers are printed as such. 9500 switch (Letter) { 9501 default: llvm_unreachable("Unknown constraint letter!"); 9502 case 'I': // "I" is a signed 16-bit constant. 9503 if (isInt<16>(Value)) 9504 Result = DAG.getTargetConstant(Value, TCVT); 9505 break; 9506 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 9507 if (isShiftedUInt<16, 16>(Value)) 9508 Result = DAG.getTargetConstant(Value, TCVT); 9509 break; 9510 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 9511 if (isShiftedInt<16, 16>(Value)) 9512 Result = DAG.getTargetConstant(Value, TCVT); 9513 break; 9514 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 9515 if (isUInt<16>(Value)) 9516 Result = DAG.getTargetConstant(Value, TCVT); 9517 break; 9518 case 'M': // "M" is a constant that is greater than 31. 9519 if (Value > 31) 9520 Result = DAG.getTargetConstant(Value, TCVT); 9521 break; 9522 case 'N': // "N" is a positive constant that is an exact power of two. 9523 if (Value > 0 && isPowerOf2_64(Value)) 9524 Result = DAG.getTargetConstant(Value, TCVT); 9525 break; 9526 case 'O': // "O" is the constant zero. 9527 if (Value == 0) 9528 Result = DAG.getTargetConstant(Value, TCVT); 9529 break; 9530 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 9531 if (isInt<16>(-Value)) 9532 Result = DAG.getTargetConstant(Value, TCVT); 9533 break; 9534 } 9535 break; 9536 } 9537 } 9538 9539 if (Result.getNode()) { 9540 Ops.push_back(Result); 9541 return; 9542 } 9543 9544 // Handle standard constraint letters. 9545 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 9546} 9547 9548// isLegalAddressingMode - Return true if the addressing mode represented 9549// by AM is legal for this target, for a load/store of the specified type. 9550bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, 9551 Type *Ty) const { 9552 // FIXME: PPC does not allow r+i addressing modes for vectors! 9553 9554 // PPC allows a sign-extended 16-bit immediate field. 9555 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 9556 return false; 9557 9558 // No global is ever allowed as a base. 9559 if (AM.BaseGV) 9560 return false; 9561 9562 // PPC only support r+r, 9563 switch (AM.Scale) { 9564 case 0: // "r+i" or just "i", depending on HasBaseReg. 9565 break; 9566 case 1: 9567 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 9568 return false; 9569 // Otherwise we have r+r or r+i. 9570 break; 9571 case 2: 9572 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 9573 return false; 9574 // Allow 2*r as r+r. 9575 break; 9576 default: 9577 // No other scales are supported. 9578 return false; 9579 } 9580 9581 return true; 9582} 9583 9584SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, 9585 SelectionDAG &DAG) const { 9586 MachineFunction &MF = DAG.getMachineFunction(); 9587 MachineFrameInfo *MFI = MF.getFrameInfo(); 9588 MFI->setReturnAddressIsTaken(true); 9589 9590 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 9591 return SDValue(); 9592 9593 SDLoc dl(Op); 9594 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 9595 9596 // Make sure the function does not optimize away the store of the RA to 9597 // the stack. 9598 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 9599 FuncInfo->setLRStoreRequired(); 9600 bool isPPC64 = Subtarget.isPPC64(); 9601 bool isDarwinABI = Subtarget.isDarwinABI(); 9602 9603 if (Depth > 0) { 9604 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 9605 SDValue Offset = 9606 9607 DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI), 9608 isPPC64? MVT::i64 : MVT::i32); 9609 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 9610 DAG.getNode(ISD::ADD, dl, getPointerTy(), 9611 FrameAddr, Offset), 9612 MachinePointerInfo(), false, false, false, 0); 9613 } 9614 9615 // Just load the return address off the stack. 9616 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); 9617 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 9618 RetAddrFI, MachinePointerInfo(), false, false, false, 0); 9619} 9620 9621SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, 9622 SelectionDAG &DAG) const { 9623 SDLoc dl(Op); 9624 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 9625 9626 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 9627 bool isPPC64 = PtrVT == MVT::i64; 9628 9629 MachineFunction &MF = DAG.getMachineFunction(); 9630 MachineFrameInfo *MFI = MF.getFrameInfo(); 9631 MFI->setFrameAddressIsTaken(true); 9632 9633 // Naked functions never have a frame pointer, and so we use r1. For all 9634 // other functions, this decision must be delayed until during PEI. 9635 unsigned FrameReg; 9636 if (MF.getFunction()->getAttributes().hasAttribute( 9637 AttributeSet::FunctionIndex, Attribute::Naked)) 9638 FrameReg = isPPC64 ? PPC::X1 : PPC::R1; 9639 else 9640 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; 9641 9642 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, 9643 PtrVT); 9644 while (Depth--) 9645 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), 9646 FrameAddr, MachinePointerInfo(), false, false, 9647 false, 0); 9648 return FrameAddr; 9649} 9650 9651// FIXME? Maybe this could be a TableGen attribute on some registers and 9652// this table could be generated automatically from RegInfo. 9653unsigned PPCTargetLowering::getRegisterByName(const char* RegName, 9654 EVT VT) const { 9655 bool isPPC64 = Subtarget.isPPC64(); 9656 bool isDarwinABI = Subtarget.isDarwinABI(); 9657 9658 if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || 9659 (!isPPC64 && VT != MVT::i32)) 9660 report_fatal_error("Invalid register global variable type"); 9661 9662 bool is64Bit = isPPC64 && VT == MVT::i64; 9663 unsigned Reg = StringSwitch<unsigned>(RegName) 9664 .Case("r1", is64Bit ? PPC::X1 : PPC::R1) 9665 .Case("r2", isDarwinABI ? 0 : (is64Bit ? PPC::X2 : PPC::R2)) 9666 .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : 9667 (is64Bit ? PPC::X13 : PPC::R13)) 9668 .Default(0); 9669 9670 if (Reg) 9671 return Reg; 9672 report_fatal_error("Invalid register name global variable"); 9673} 9674 9675bool 9676PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 9677 // The PowerPC target isn't yet aware of offsets. 9678 return false; 9679} 9680 9681bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 9682 const CallInst &I, 9683 unsigned Intrinsic) const { 9684 9685 switch (Intrinsic) { 9686 case Intrinsic::ppc_altivec_lvx: 9687 case Intrinsic::ppc_altivec_lvxl: 9688 case Intrinsic::ppc_altivec_lvebx: 9689 case Intrinsic::ppc_altivec_lvehx: 9690 case Intrinsic::ppc_altivec_lvewx: 9691 case Intrinsic::ppc_vsx_lxvd2x: 9692 case Intrinsic::ppc_vsx_lxvw4x: { 9693 EVT VT; 9694 switch (Intrinsic) { 9695 case Intrinsic::ppc_altivec_lvebx: 9696 VT = MVT::i8; 9697 break; 9698 case Intrinsic::ppc_altivec_lvehx: 9699 VT = MVT::i16; 9700 break; 9701 case Intrinsic::ppc_altivec_lvewx: 9702 VT = MVT::i32; 9703 break; 9704 case Intrinsic::ppc_vsx_lxvd2x: 9705 VT = MVT::v2f64; 9706 break; 9707 default: 9708 VT = MVT::v4i32; 9709 break; 9710 } 9711 9712 Info.opc = ISD::INTRINSIC_W_CHAIN; 9713 Info.memVT = VT; 9714 Info.ptrVal = I.getArgOperand(0); 9715 Info.offset = -VT.getStoreSize()+1; 9716 Info.size = 2*VT.getStoreSize()-1; 9717 Info.align = 1; 9718 Info.vol = false; 9719 Info.readMem = true; 9720 Info.writeMem = false; 9721 return true; 9722 } 9723 case Intrinsic::ppc_altivec_stvx: 9724 case Intrinsic::ppc_altivec_stvxl: 9725 case Intrinsic::ppc_altivec_stvebx: 9726 case Intrinsic::ppc_altivec_stvehx: 9727 case Intrinsic::ppc_altivec_stvewx: 9728 case Intrinsic::ppc_vsx_stxvd2x: 9729 case Intrinsic::ppc_vsx_stxvw4x: { 9730 EVT VT; 9731 switch (Intrinsic) { 9732 case Intrinsic::ppc_altivec_stvebx: 9733 VT = MVT::i8; 9734 break; 9735 case Intrinsic::ppc_altivec_stvehx: 9736 VT = MVT::i16; 9737 break; 9738 case Intrinsic::ppc_altivec_stvewx: 9739 VT = MVT::i32; 9740 break; 9741 case Intrinsic::ppc_vsx_stxvd2x: 9742 VT = MVT::v2f64; 9743 break; 9744 default: 9745 VT = MVT::v4i32; 9746 break; 9747 } 9748 9749 Info.opc = ISD::INTRINSIC_VOID; 9750 Info.memVT = VT; 9751 Info.ptrVal = I.getArgOperand(1); 9752 Info.offset = -VT.getStoreSize()+1; 9753 Info.size = 2*VT.getStoreSize()-1; 9754 Info.align = 1; 9755 Info.vol = false; 9756 Info.readMem = false; 9757 Info.writeMem = true; 9758 return true; 9759 } 9760 default: 9761 break; 9762 } 9763 9764 return false; 9765} 9766 9767/// getOptimalMemOpType - Returns the target specific optimal type for load 9768/// and store operations as a result of memset, memcpy, and memmove 9769/// lowering. If DstAlign is zero that means it's safe to destination 9770/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 9771/// means there isn't a need to check it against alignment requirement, 9772/// probably because the source does not need to be loaded. If 'IsMemset' is 9773/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 9774/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 9775/// source is constant so it does not need to be loaded. 9776/// It returns EVT::Other if the type should be determined using generic 9777/// target-independent logic. 9778EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, 9779 unsigned DstAlign, unsigned SrcAlign, 9780 bool IsMemset, bool ZeroMemset, 9781 bool MemcpyStrSrc, 9782 MachineFunction &MF) const { 9783 if (Subtarget.isPPC64()) { 9784 return MVT::i64; 9785 } else { 9786 return MVT::i32; 9787 } 9788} 9789 9790/// \brief Returns true if it is beneficial to convert a load of a constant 9791/// to just the constant itself. 9792bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 9793 Type *Ty) const { 9794 assert(Ty->isIntegerTy()); 9795 9796 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 9797 if (BitSize == 0 || BitSize > 64) 9798 return false; 9799 return true; 9800} 9801 9802bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { 9803 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 9804 return false; 9805 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 9806 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 9807 return NumBits1 == 64 && NumBits2 == 32; 9808} 9809 9810bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { 9811 if (!VT1.isInteger() || !VT2.isInteger()) 9812 return false; 9813 unsigned NumBits1 = VT1.getSizeInBits(); 9814 unsigned NumBits2 = VT2.getSizeInBits(); 9815 return NumBits1 == 64 && NumBits2 == 32; 9816} 9817 9818bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 9819 // Generally speaking, zexts are not free, but they are free when they can be 9820 // folded with other operations. 9821 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) { 9822 EVT MemVT = LD->getMemoryVT(); 9823 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 || 9824 (Subtarget.isPPC64() && MemVT == MVT::i32)) && 9825 (LD->getExtensionType() == ISD::NON_EXTLOAD || 9826 LD->getExtensionType() == ISD::ZEXTLOAD)) 9827 return true; 9828 } 9829 9830 // FIXME: Add other cases... 9831 // - 32-bit shifts with a zext to i64 9832 // - zext after ctlz, bswap, etc. 9833 // - zext after and by a constant mask 9834 9835 return TargetLowering::isZExtFree(Val, VT2); 9836} 9837 9838bool PPCTargetLowering::isFPExtFree(EVT VT) const { 9839 assert(VT.isFloatingPoint()); 9840 return true; 9841} 9842 9843bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 9844 return isInt<16>(Imm) || isUInt<16>(Imm); 9845} 9846 9847bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { 9848 return isInt<16>(Imm) || isUInt<16>(Imm); 9849} 9850 9851bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 9852 unsigned, 9853 unsigned, 9854 bool *Fast) const { 9855 if (DisablePPCUnaligned) 9856 return false; 9857 9858 // PowerPC supports unaligned memory access for simple non-vector types. 9859 // Although accessing unaligned addresses is not as efficient as accessing 9860 // aligned addresses, it is generally more efficient than manual expansion, 9861 // and generally only traps for software emulation when crossing page 9862 // boundaries. 9863 9864 if (!VT.isSimple()) 9865 return false; 9866 9867 if (VT.getSimpleVT().isVector()) { 9868 if (Subtarget.hasVSX()) { 9869 if (VT != MVT::v2f64 && VT != MVT::v2i64 && 9870 VT != MVT::v4f32 && VT != MVT::v4i32) 9871 return false; 9872 } else { 9873 return false; 9874 } 9875 } 9876 9877 if (VT == MVT::ppcf128) 9878 return false; 9879 9880 if (Fast) 9881 *Fast = true; 9882 9883 return true; 9884} 9885 9886bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { 9887 VT = VT.getScalarType(); 9888 9889 if (!VT.isSimple()) 9890 return false; 9891 9892 switch (VT.getSimpleVT().SimpleTy) { 9893 case MVT::f32: 9894 case MVT::f64: 9895 return true; 9896 default: 9897 break; 9898 } 9899 9900 return false; 9901} 9902 9903const MCPhysReg * 9904PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { 9905 // LR is a callee-save register, but we must treat it as clobbered by any call 9906 // site. Hence we include LR in the scratch registers, which are in turn added 9907 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies 9908 // to CTR, which is used by any indirect call. 9909 static const MCPhysReg ScratchRegs[] = { 9910 PPC::X11, PPC::X12, PPC::LR8, PPC::CTR8, 0 9911 }; 9912 9913 return ScratchRegs; 9914} 9915 9916bool 9917PPCTargetLowering::shouldExpandBuildVectorWithShuffles( 9918 EVT VT , unsigned DefinedValues) const { 9919 if (VT == MVT::v2i64) 9920 return false; 9921 9922 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); 9923} 9924 9925Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { 9926 if (DisableILPPref || Subtarget.enableMachineScheduler()) 9927 return TargetLowering::getSchedulingPreference(N); 9928 9929 return Sched::ILP; 9930} 9931 9932// Create a fast isel object. 9933FastISel * 9934PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, 9935 const TargetLibraryInfo *LibInfo) const { 9936 return PPC::createFastISel(FuncInfo, LibInfo); 9937}
| 3096 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 3097 ObjSize = Flags.getByValSize(); 3098 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3099 // Objects of size 1 and 2 are right justified, everything else is 3100 // left justified. This means the memory address is adjusted forwards. 3101 if (ObjSize==1 || ObjSize==2) { 3102 CurArgOffset = CurArgOffset + (4 - ObjSize); 3103 } 3104 // The value of the object is its address. 3105 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true); 3106 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3107 InVals.push_back(FIN); 3108 if (ObjSize==1 || ObjSize==2) { 3109 if (GPR_idx != Num_GPR_Regs) { 3110 unsigned VReg; 3111 if (isPPC64) 3112 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3113 else 3114 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3115 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3116 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; 3117 SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 3118 MachinePointerInfo(FuncArg), 3119 ObjType, false, false, 0); 3120 MemOps.push_back(Store); 3121 ++GPR_idx; 3122 } 3123 3124 ArgOffset += PtrByteSize; 3125 3126 continue; 3127 } 3128 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 3129 // Store whatever pieces of the object are in registers 3130 // to memory. ArgOffset will be the address of the beginning 3131 // of the object. 3132 if (GPR_idx != Num_GPR_Regs) { 3133 unsigned VReg; 3134 if (isPPC64) 3135 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3136 else 3137 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3138 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 3139 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3140 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3141 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3142 MachinePointerInfo(FuncArg, j), 3143 false, false, 0); 3144 MemOps.push_back(Store); 3145 ++GPR_idx; 3146 ArgOffset += PtrByteSize; 3147 } else { 3148 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 3149 break; 3150 } 3151 } 3152 continue; 3153 } 3154 3155 switch (ObjectVT.getSimpleVT().SimpleTy) { 3156 default: llvm_unreachable("Unhandled argument type!"); 3157 case MVT::i1: 3158 case MVT::i32: 3159 if (!isPPC64) { 3160 if (GPR_idx != Num_GPR_Regs) { 3161 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3162 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 3163 3164 if (ObjectVT == MVT::i1) 3165 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); 3166 3167 ++GPR_idx; 3168 } else { 3169 needsLoad = true; 3170 ArgSize = PtrByteSize; 3171 } 3172 // All int arguments reserve stack space in the Darwin ABI. 3173 ArgOffset += PtrByteSize; 3174 break; 3175 } 3176 // FALLTHROUGH 3177 case MVT::i64: // PPC64 3178 if (GPR_idx != Num_GPR_Regs) { 3179 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3180 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3181 3182 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3183 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3184 // value to MVT::i64 and then truncate to the correct register size. 3185 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3186 3187 ++GPR_idx; 3188 } else { 3189 needsLoad = true; 3190 ArgSize = PtrByteSize; 3191 } 3192 // All int arguments reserve stack space in the Darwin ABI. 3193 ArgOffset += 8; 3194 break; 3195 3196 case MVT::f32: 3197 case MVT::f64: 3198 // Every 4 bytes of argument space consumes one of the GPRs available for 3199 // argument passing. 3200 if (GPR_idx != Num_GPR_Regs) { 3201 ++GPR_idx; 3202 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 3203 ++GPR_idx; 3204 } 3205 if (FPR_idx != Num_FPR_Regs) { 3206 unsigned VReg; 3207 3208 if (ObjectVT == MVT::f32) 3209 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 3210 else 3211 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 3212 3213 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3214 ++FPR_idx; 3215 } else { 3216 needsLoad = true; 3217 } 3218 3219 // All FP arguments reserve stack space in the Darwin ABI. 3220 ArgOffset += isPPC64 ? 8 : ObjSize; 3221 break; 3222 case MVT::v4f32: 3223 case MVT::v4i32: 3224 case MVT::v8i16: 3225 case MVT::v16i8: 3226 // Note that vector arguments in registers don't reserve stack space, 3227 // except in varargs functions. 3228 if (VR_idx != Num_VR_Regs) { 3229 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 3230 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3231 if (isVarArg) { 3232 while ((ArgOffset % 16) != 0) { 3233 ArgOffset += PtrByteSize; 3234 if (GPR_idx != Num_GPR_Regs) 3235 GPR_idx++; 3236 } 3237 ArgOffset += 16; 3238 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 3239 } 3240 ++VR_idx; 3241 } else { 3242 if (!isVarArg && !isPPC64) { 3243 // Vectors go after all the nonvectors. 3244 CurArgOffset = VecArgOffset; 3245 VecArgOffset += 16; 3246 } else { 3247 // Vectors are aligned. 3248 ArgOffset = ((ArgOffset+15)/16)*16; 3249 CurArgOffset = ArgOffset; 3250 ArgOffset += 16; 3251 } 3252 needsLoad = true; 3253 } 3254 break; 3255 } 3256 3257 // We need to load the argument to a virtual register if we determined above 3258 // that we ran out of physical registers of the appropriate type. 3259 if (needsLoad) { 3260 int FI = MFI->CreateFixedObject(ObjSize, 3261 CurArgOffset + (ArgSize - ObjSize), 3262 isImmutable); 3263 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3264 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 3265 false, false, false, 0); 3266 } 3267 3268 InVals.push_back(ArgVal); 3269 } 3270 3271 // Allow for Altivec parameters at the end, if needed. 3272 if (nAltivecParamsAtEnd) { 3273 MinReservedArea = ((MinReservedArea+15)/16)*16; 3274 MinReservedArea += 16*nAltivecParamsAtEnd; 3275 } 3276 3277 // Area that is at least reserved in the caller of this function. 3278 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize); 3279 3280 // Set the size that is at least reserved in caller of this function. Tail 3281 // call optimized functions' reserved stack space needs to be aligned so that 3282 // taking the difference between two stack areas will result in an aligned 3283 // stack. 3284 MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea); 3285 FuncInfo->setMinReservedArea(MinReservedArea); 3286 3287 // If the function takes variable number of arguments, make a frame index for 3288 // the start of the first vararg value... for expansion of llvm.va_start. 3289 if (isVarArg) { 3290 int Depth = ArgOffset; 3291 3292 FuncInfo->setVarArgsFrameIndex( 3293 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 3294 Depth, true)); 3295 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3296 3297 // If this function is vararg, store any remaining integer argument regs 3298 // to their spots on the stack so that they may be loaded by deferencing the 3299 // result of va_next. 3300 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 3301 unsigned VReg; 3302 3303 if (isPPC64) 3304 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3305 else 3306 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3307 3308 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3309 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3310 MachinePointerInfo(), false, false, 0); 3311 MemOps.push_back(Store); 3312 // Increment the address by four for the next argument to store 3313 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 3314 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3315 } 3316 } 3317 3318 if (!MemOps.empty()) 3319 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3320 3321 return Chain; 3322} 3323 3324/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 3325/// adjusted to accommodate the arguments for the tailcall. 3326static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, 3327 unsigned ParamSize) { 3328 3329 if (!isTailCall) return 0; 3330 3331 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 3332 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 3333 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 3334 // Remember only if the new adjustement is bigger. 3335 if (SPDiff < FI->getTailCallSPDelta()) 3336 FI->setTailCallSPDelta(SPDiff); 3337 3338 return SPDiff; 3339} 3340 3341/// IsEligibleForTailCallOptimization - Check whether the call is eligible 3342/// for tail call optimization. Targets which want to do tail call 3343/// optimization should implement this function. 3344bool 3345PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 3346 CallingConv::ID CalleeCC, 3347 bool isVarArg, 3348 const SmallVectorImpl<ISD::InputArg> &Ins, 3349 SelectionDAG& DAG) const { 3350 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 3351 return false; 3352 3353 // Variable argument functions are not supported. 3354 if (isVarArg) 3355 return false; 3356 3357 MachineFunction &MF = DAG.getMachineFunction(); 3358 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 3359 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 3360 // Functions containing by val parameters are not supported. 3361 for (unsigned i = 0; i != Ins.size(); i++) { 3362 ISD::ArgFlagsTy Flags = Ins[i].Flags; 3363 if (Flags.isByVal()) return false; 3364 } 3365 3366 // Non-PIC/GOT tail calls are supported. 3367 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 3368 return true; 3369 3370 // At the moment we can only do local tail calls (in same module, hidden 3371 // or protected) if we are generating PIC. 3372 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 3373 return G->getGlobal()->hasHiddenVisibility() 3374 || G->getGlobal()->hasProtectedVisibility(); 3375 } 3376 3377 return false; 3378} 3379 3380/// isCallCompatibleAddress - Return the immediate to use if the specified 3381/// 32-bit value is representable in the immediate field of a BxA instruction. 3382static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { 3383 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 3384 if (!C) return nullptr; 3385 3386 int Addr = C->getZExtValue(); 3387 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 3388 SignExtend32<26>(Addr) != Addr) 3389 return nullptr; // Top 6 bits have to be sext of immediate. 3390 3391 return DAG.getConstant((int)C->getZExtValue() >> 2, 3392 DAG.getTargetLoweringInfo().getPointerTy()).getNode(); 3393} 3394 3395namespace { 3396 3397struct TailCallArgumentInfo { 3398 SDValue Arg; 3399 SDValue FrameIdxOp; 3400 int FrameIdx; 3401 3402 TailCallArgumentInfo() : FrameIdx(0) {} 3403}; 3404 3405} 3406 3407/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 3408static void 3409StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, 3410 SDValue Chain, 3411 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs, 3412 SmallVectorImpl<SDValue> &MemOpChains, 3413 SDLoc dl) { 3414 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 3415 SDValue Arg = TailCallArgs[i].Arg; 3416 SDValue FIN = TailCallArgs[i].FrameIdxOp; 3417 int FI = TailCallArgs[i].FrameIdx; 3418 // Store relative to framepointer. 3419 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, 3420 MachinePointerInfo::getFixedStack(FI), 3421 false, false, 0)); 3422 } 3423} 3424 3425/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 3426/// the appropriate stack slot for the tail call optimized function call. 3427static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, 3428 MachineFunction &MF, 3429 SDValue Chain, 3430 SDValue OldRetAddr, 3431 SDValue OldFP, 3432 int SPDiff, 3433 bool isPPC64, 3434 bool isDarwinABI, 3435 SDLoc dl) { 3436 if (SPDiff) { 3437 // Calculate the new stack slot for the return address. 3438 int SlotSize = isPPC64 ? 8 : 4; 3439 int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64, 3440 isDarwinABI); 3441 int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, 3442 NewRetAddrLoc, true); 3443 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 3444 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 3445 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, 3446 MachinePointerInfo::getFixedStack(NewRetAddr), 3447 false, false, 0); 3448 3449 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack 3450 // slot as the FP is never overwritten. 3451 if (isDarwinABI) { 3452 int NewFPLoc = 3453 SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); 3454 int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, 3455 true); 3456 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 3457 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, 3458 MachinePointerInfo::getFixedStack(NewFPIdx), 3459 false, false, 0); 3460 } 3461 } 3462 return Chain; 3463} 3464 3465/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 3466/// the position of the argument. 3467static void 3468CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 3469 SDValue Arg, int SPDiff, unsigned ArgOffset, 3470 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) { 3471 int Offset = ArgOffset + SPDiff; 3472 uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; 3473 int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); 3474 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 3475 SDValue FIN = DAG.getFrameIndex(FI, VT); 3476 TailCallArgumentInfo Info; 3477 Info.Arg = Arg; 3478 Info.FrameIdxOp = FIN; 3479 Info.FrameIdx = FI; 3480 TailCallArguments.push_back(Info); 3481} 3482 3483/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 3484/// stack slot. Returns the chain as result and the loaded frame pointers in 3485/// LROpOut/FPOpout. Used when tail calling. 3486SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, 3487 int SPDiff, 3488 SDValue Chain, 3489 SDValue &LROpOut, 3490 SDValue &FPOpOut, 3491 bool isDarwinABI, 3492 SDLoc dl) const { 3493 if (SPDiff) { 3494 // Load the LR and FP stack slot for later adjusting. 3495 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; 3496 LROpOut = getReturnAddrFrameIndex(DAG); 3497 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(), 3498 false, false, false, 0); 3499 Chain = SDValue(LROpOut.getNode(), 1); 3500 3501 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack 3502 // slot as the FP is never overwritten. 3503 if (isDarwinABI) { 3504 FPOpOut = getFramePointerFrameIndex(DAG); 3505 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(), 3506 false, false, false, 0); 3507 Chain = SDValue(FPOpOut.getNode(), 1); 3508 } 3509 } 3510 return Chain; 3511} 3512 3513/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 3514/// by "Src" to address "Dst" of size "Size". Alignment information is 3515/// specified by the specific parameter attribute. The copy will be passed as 3516/// a byval function parameter. 3517/// Sometimes what we are copying is the end of a larger object, the part that 3518/// does not fit in registers. 3519static SDValue 3520CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 3521 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 3522 SDLoc dl) { 3523 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 3524 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 3525 false, false, MachinePointerInfo(), 3526 MachinePointerInfo()); 3527} 3528 3529/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 3530/// tail calls. 3531static void 3532LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, 3533 SDValue Arg, SDValue PtrOff, int SPDiff, 3534 unsigned ArgOffset, bool isPPC64, bool isTailCall, 3535 bool isVector, SmallVectorImpl<SDValue> &MemOpChains, 3536 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, 3537 SDLoc dl) { 3538 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3539 if (!isTailCall) { 3540 if (isVector) { 3541 SDValue StackPtr; 3542 if (isPPC64) 3543 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 3544 else 3545 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 3546 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 3547 DAG.getConstant(ArgOffset, PtrVT)); 3548 } 3549 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 3550 MachinePointerInfo(), false, false, 0)); 3551 // Calculate and remember argument location. 3552 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 3553 TailCallArguments); 3554} 3555 3556static 3557void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, 3558 SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, 3559 SDValue LROp, SDValue FPOp, bool isDarwinABI, 3560 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) { 3561 MachineFunction &MF = DAG.getMachineFunction(); 3562 3563 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 3564 // might overwrite each other in case of tail call optimization. 3565 SmallVector<SDValue, 8> MemOpChains2; 3566 // Do not flag preceding copytoreg stuff together with the following stuff. 3567 InFlag = SDValue(); 3568 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 3569 MemOpChains2, dl); 3570 if (!MemOpChains2.empty()) 3571 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); 3572 3573 // Store the return address to the appropriate stack slot. 3574 Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, 3575 isPPC64, isDarwinABI, dl); 3576 3577 // Emit callseq_end just before tailcall node. 3578 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 3579 DAG.getIntPtrConstant(0, true), InFlag, dl); 3580 InFlag = Chain.getValue(1); 3581} 3582 3583// Is this global address that of a function that can be called by name? (as 3584// opposed to something that must hold a descriptor for an indirect call). 3585static bool isFunctionGlobalAddress(SDValue Callee) { 3586 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 3587 if (Callee.getOpcode() == ISD::GlobalTLSAddress || 3588 Callee.getOpcode() == ISD::TargetGlobalTLSAddress) 3589 return false; 3590 3591 return G->getGlobal()->getType()->getElementType()->isFunctionTy(); 3592 } 3593 3594 return false; 3595} 3596 3597static 3598unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, 3599 SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall, 3600 bool IsPatchPoint, 3601 SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass, 3602 SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, 3603 const PPCSubtarget &Subtarget) { 3604 3605 bool isPPC64 = Subtarget.isPPC64(); 3606 bool isSVR4ABI = Subtarget.isSVR4ABI(); 3607 bool isELFv2ABI = Subtarget.isELFv2ABI(); 3608 3609 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3610 NodeTys.push_back(MVT::Other); // Returns a chain 3611 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. 3612 3613 unsigned CallOpc = PPCISD::CALL; 3614 3615 bool needIndirectCall = true; 3616 if (!isSVR4ABI || !isPPC64) 3617 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { 3618 // If this is an absolute destination address, use the munged value. 3619 Callee = SDValue(Dest, 0); 3620 needIndirectCall = false; 3621 } 3622 3623 if (isFunctionGlobalAddress(Callee)) { 3624 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee); 3625 // A call to a TLS address is actually an indirect call to a 3626 // thread-specific pointer. 3627 unsigned OpFlags = 0; 3628 if ((DAG.getTarget().getRelocationModel() != Reloc::Static && 3629 (Subtarget.getTargetTriple().isMacOSX() && 3630 Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && 3631 (G->getGlobal()->isDeclaration() || 3632 G->getGlobal()->isWeakForLinker())) || 3633 (Subtarget.isTargetELF() && !isPPC64 && 3634 !G->getGlobal()->hasLocalLinkage() && 3635 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { 3636 // PC-relative references to external symbols should go through $stub, 3637 // unless we're building with the leopard linker or later, which 3638 // automatically synthesizes these stubs. 3639 OpFlags = PPCII::MO_PLT_OR_STUB; 3640 } 3641 3642 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 3643 // every direct call is) turn it into a TargetGlobalAddress / 3644 // TargetExternalSymbol node so that legalize doesn't hack it. 3645 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 3646 Callee.getValueType(), 0, OpFlags); 3647 needIndirectCall = false; 3648 } 3649 3650 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 3651 unsigned char OpFlags = 0; 3652 3653 if ((DAG.getTarget().getRelocationModel() != Reloc::Static && 3654 (Subtarget.getTargetTriple().isMacOSX() && 3655 Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) || 3656 (Subtarget.isTargetELF() && !isPPC64 && 3657 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { 3658 // PC-relative references to external symbols should go through $stub, 3659 // unless we're building with the leopard linker or later, which 3660 // automatically synthesizes these stubs. 3661 OpFlags = PPCII::MO_PLT_OR_STUB; 3662 } 3663 3664 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), 3665 OpFlags); 3666 needIndirectCall = false; 3667 } 3668 3669 if (IsPatchPoint) { 3670 // We'll form an invalid direct call when lowering a patchpoint; the full 3671 // sequence for an indirect call is complicated, and many of the 3672 // instructions introduced might have side effects (and, thus, can't be 3673 // removed later). The call itself will be removed as soon as the 3674 // argument/return lowering is complete, so the fact that it has the wrong 3675 // kind of operands should not really matter. 3676 needIndirectCall = false; 3677 } 3678 3679 if (needIndirectCall) { 3680 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 3681 // to do the call, we can't use PPCISD::CALL. 3682 SDValue MTCTROps[] = {Chain, Callee, InFlag}; 3683 3684 if (isSVR4ABI && isPPC64 && !isELFv2ABI) { 3685 // Function pointers in the 64-bit SVR4 ABI do not point to the function 3686 // entry point, but to the function descriptor (the function entry point 3687 // address is part of the function descriptor though). 3688 // The function descriptor is a three doubleword structure with the 3689 // following fields: function entry point, TOC base address and 3690 // environment pointer. 3691 // Thus for a call through a function pointer, the following actions need 3692 // to be performed: 3693 // 1. Save the TOC of the caller in the TOC save area of its stack 3694 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). 3695 // 2. Load the address of the function entry point from the function 3696 // descriptor. 3697 // 3. Load the TOC of the callee from the function descriptor into r2. 3698 // 4. Load the environment pointer from the function descriptor into 3699 // r11. 3700 // 5. Branch to the function entry point address. 3701 // 6. On return of the callee, the TOC of the caller needs to be 3702 // restored (this is done in FinishCall()). 3703 // 3704 // All those operations are flagged together to ensure that no other 3705 // operations can be scheduled in between. E.g. without flagging the 3706 // operations together, a TOC access in the caller could be scheduled 3707 // between the load of the callee TOC and the branch to the callee, which 3708 // results in the TOC access going through the TOC of the callee instead 3709 // of going through the TOC of the caller, which leads to incorrect code. 3710 3711 // Load the address of the function entry point from the function 3712 // descriptor. 3713 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue); 3714 SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, 3715 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); 3716 Chain = LoadFuncPtr.getValue(1); 3717 InFlag = LoadFuncPtr.getValue(2); 3718 3719 // Load environment pointer into r11. 3720 // Offset of the environment pointer within the function descriptor. 3721 SDValue PtrOff = DAG.getIntPtrConstant(16); 3722 3723 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); 3724 SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr, 3725 InFlag); 3726 Chain = LoadEnvPtr.getValue(1); 3727 InFlag = LoadEnvPtr.getValue(2); 3728 3729 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, 3730 InFlag); 3731 Chain = EnvVal.getValue(0); 3732 InFlag = EnvVal.getValue(1); 3733 3734 // Load TOC of the callee into r2. We are using a target-specific load 3735 // with r2 hard coded, because the result of a target-independent load 3736 // would never go directly into r2, since r2 is a reserved register (which 3737 // prevents the register allocator from allocating it), resulting in an 3738 // additional register being allocated and an unnecessary move instruction 3739 // being generated. 3740 VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3741 SDValue TOCOff = DAG.getIntPtrConstant(8); 3742 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); 3743 SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, 3744 AddTOC, InFlag); 3745 Chain = LoadTOCPtr.getValue(0); 3746 InFlag = LoadTOCPtr.getValue(1); 3747 3748 MTCTROps[0] = Chain; 3749 MTCTROps[1] = LoadFuncPtr; 3750 MTCTROps[2] = InFlag; 3751 } 3752 3753 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, 3754 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); 3755 InFlag = Chain.getValue(1); 3756 3757 NodeTys.clear(); 3758 NodeTys.push_back(MVT::Other); 3759 NodeTys.push_back(MVT::Glue); 3760 Ops.push_back(Chain); 3761 CallOpc = PPCISD::BCTRL; 3762 Callee.setNode(nullptr); 3763 // Add use of X11 (holding environment pointer) 3764 if (isSVR4ABI && isPPC64 && !isELFv2ABI) 3765 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); 3766 // Add CTR register as callee so a bctr can be emitted later. 3767 if (isTailCall) 3768 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); 3769 } 3770 3771 // If this is a direct call, pass the chain and the callee. 3772 if (Callee.getNode()) { 3773 Ops.push_back(Chain); 3774 Ops.push_back(Callee); 3775 3776 // If this is a call to __tls_get_addr, find the symbol whose address 3777 // is to be taken and add it to the list. This will be used to 3778 // generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld). 3779 // We find the symbol by walking the chain to the CopyFromReg, walking 3780 // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and 3781 // pulling the symbol from that node. 3782 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 3783 if (!strcmp(S->getSymbol(), "__tls_get_addr")) { 3784 assert(!needIndirectCall && "Indirect call to __tls_get_addr???"); 3785 SDNode *AddI = Chain.getNode()->getOperand(2).getNode(); 3786 SDValue TGTAddr = AddI->getOperand(1); 3787 assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress && 3788 "Didn't find target global TLS address where we expected one"); 3789 Ops.push_back(TGTAddr); 3790 CallOpc = PPCISD::CALL_TLS; 3791 } 3792 } 3793 // If this is a tail call add stack pointer delta. 3794 if (isTailCall) 3795 Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); 3796 3797 // Add argument registers to the end of the list so that they are known live 3798 // into the call. 3799 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 3800 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 3801 RegsToPass[i].second.getValueType())); 3802 3803 // Direct calls in the ELFv2 ABI need the TOC register live into the call. 3804 if (Callee.getNode() && isELFv2ABI && !IsPatchPoint) 3805 Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); 3806 3807 return CallOpc; 3808} 3809 3810static 3811bool isLocalCall(const SDValue &Callee) 3812{ 3813 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 3814 return !G->getGlobal()->isDeclaration() && 3815 !G->getGlobal()->isWeakForLinker(); 3816 return false; 3817} 3818 3819SDValue 3820PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 3821 CallingConv::ID CallConv, bool isVarArg, 3822 const SmallVectorImpl<ISD::InputArg> &Ins, 3823 SDLoc dl, SelectionDAG &DAG, 3824 SmallVectorImpl<SDValue> &InVals) const { 3825 3826 SmallVector<CCValAssign, 16> RVLocs; 3827 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 3828 *DAG.getContext()); 3829 CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); 3830 3831 // Copy all of the result registers out of their specified physreg. 3832 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 3833 CCValAssign &VA = RVLocs[i]; 3834 assert(VA.isRegLoc() && "Can only return in registers!"); 3835 3836 SDValue Val = DAG.getCopyFromReg(Chain, dl, 3837 VA.getLocReg(), VA.getLocVT(), InFlag); 3838 Chain = Val.getValue(1); 3839 InFlag = Val.getValue(2); 3840 3841 switch (VA.getLocInfo()) { 3842 default: llvm_unreachable("Unknown loc info!"); 3843 case CCValAssign::Full: break; 3844 case CCValAssign::AExt: 3845 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3846 break; 3847 case CCValAssign::ZExt: 3848 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, 3849 DAG.getValueType(VA.getValVT())); 3850 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3851 break; 3852 case CCValAssign::SExt: 3853 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, 3854 DAG.getValueType(VA.getValVT())); 3855 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3856 break; 3857 } 3858 3859 InVals.push_back(Val); 3860 } 3861 3862 return Chain; 3863} 3864 3865SDValue 3866PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, 3867 bool isTailCall, bool isVarArg, bool IsPatchPoint, 3868 SelectionDAG &DAG, 3869 SmallVector<std::pair<unsigned, SDValue>, 8> 3870 &RegsToPass, 3871 SDValue InFlag, SDValue Chain, 3872 SDValue &Callee, 3873 int SPDiff, unsigned NumBytes, 3874 const SmallVectorImpl<ISD::InputArg> &Ins, 3875 SmallVectorImpl<SDValue> &InVals) const { 3876 3877 bool isELFv2ABI = Subtarget.isELFv2ABI(); 3878 std::vector<EVT> NodeTys; 3879 SmallVector<SDValue, 8> Ops; 3880 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, 3881 isTailCall, IsPatchPoint, RegsToPass, Ops, 3882 NodeTys, Subtarget); 3883 3884 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls 3885 if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) 3886 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); 3887 3888 // When performing tail call optimization the callee pops its arguments off 3889 // the stack. Account for this here so these bytes can be pushed back on in 3890 // PPCFrameLowering::eliminateCallFramePseudoInstr. 3891 int BytesCalleePops = 3892 (CallConv == CallingConv::Fast && 3893 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; 3894 3895 // Add a register mask operand representing the call-preserved registers. 3896 const TargetRegisterInfo *TRI = 3897 getTargetMachine().getSubtargetImpl()->getRegisterInfo(); 3898 const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); 3899 assert(Mask && "Missing call preserved mask for calling convention"); 3900 Ops.push_back(DAG.getRegisterMask(Mask)); 3901 3902 if (InFlag.getNode()) 3903 Ops.push_back(InFlag); 3904 3905 // Emit tail call. 3906 if (isTailCall) { 3907 assert(((Callee.getOpcode() == ISD::Register && 3908 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || 3909 Callee.getOpcode() == ISD::TargetExternalSymbol || 3910 Callee.getOpcode() == ISD::TargetGlobalAddress || 3911 isa<ConstantSDNode>(Callee)) && 3912 "Expecting an global address, external symbol, absolute value or register"); 3913 3914 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); 3915 } 3916 3917 // Add a NOP immediately after the branch instruction when using the 64-bit 3918 // SVR4 ABI. At link time, if caller and callee are in a different module and 3919 // thus have a different TOC, the call will be replaced with a call to a stub 3920 // function which saves the current TOC, loads the TOC of the callee and 3921 // branches to the callee. The NOP will be replaced with a load instruction 3922 // which restores the TOC of the caller from the TOC save slot of the current 3923 // stack frame. If caller and callee belong to the same module (and have the 3924 // same TOC), the NOP will remain unchanged. 3925 3926 if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && 3927 !IsPatchPoint) { 3928 if (CallOpc == PPCISD::BCTRL) { 3929 // This is a call through a function pointer. 3930 // Restore the caller TOC from the save area into R2. 3931 // See PrepareCall() for more information about calls through function 3932 // pointers in the 64-bit SVR4 ABI. 3933 // We are using a target-specific load with r2 hard coded, because the 3934 // result of a target-independent load would never go directly into r2, 3935 // since r2 is a reserved register (which prevents the register allocator 3936 // from allocating it), resulting in an additional register being 3937 // allocated and an unnecessary move instruction being generated. 3938 CallOpc = PPCISD::BCTRL_LOAD_TOC; 3939 3940 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3941 SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); 3942 unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI); 3943 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset); 3944 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); 3945 3946 // The address needs to go after the chain input but before the flag (or 3947 // any other variadic arguments). 3948 Ops.insert(std::next(Ops.begin()), AddTOC); 3949 } else if ((CallOpc == PPCISD::CALL) && 3950 (!isLocalCall(Callee) || 3951 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { 3952 // Otherwise insert NOP for non-local calls. 3953 CallOpc = PPCISD::CALL_NOP; 3954 } else if (CallOpc == PPCISD::CALL_TLS) 3955 // For 64-bit SVR4, TLS calls are always non-local. 3956 CallOpc = PPCISD::CALL_NOP_TLS; 3957 } 3958 3959 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); 3960 InFlag = Chain.getValue(1); 3961 3962 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 3963 DAG.getIntPtrConstant(BytesCalleePops, true), 3964 InFlag, dl); 3965 if (!Ins.empty()) 3966 InFlag = Chain.getValue(1); 3967 3968 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 3969 Ins, dl, DAG, InVals); 3970} 3971 3972SDValue 3973PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 3974 SmallVectorImpl<SDValue> &InVals) const { 3975 SelectionDAG &DAG = CLI.DAG; 3976 SDLoc &dl = CLI.DL; 3977 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 3978 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 3979 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 3980 SDValue Chain = CLI.Chain; 3981 SDValue Callee = CLI.Callee; 3982 bool &isTailCall = CLI.IsTailCall; 3983 CallingConv::ID CallConv = CLI.CallConv; 3984 bool isVarArg = CLI.IsVarArg; 3985 bool IsPatchPoint = CLI.IsPatchPoint; 3986 3987 if (isTailCall) 3988 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, 3989 Ins, DAG); 3990 3991 if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) 3992 report_fatal_error("failed to perform tail call elimination on a call " 3993 "site marked musttail"); 3994 3995 if (Subtarget.isSVR4ABI()) { 3996 if (Subtarget.isPPC64()) 3997 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, 3998 isTailCall, IsPatchPoint, Outs, OutVals, Ins, 3999 dl, DAG, InVals); 4000 else 4001 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, 4002 isTailCall, IsPatchPoint, Outs, OutVals, Ins, 4003 dl, DAG, InVals); 4004 } 4005 4006 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, 4007 isTailCall, IsPatchPoint, Outs, OutVals, Ins, 4008 dl, DAG, InVals); 4009} 4010 4011SDValue 4012PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, 4013 CallingConv::ID CallConv, bool isVarArg, 4014 bool isTailCall, bool IsPatchPoint, 4015 const SmallVectorImpl<ISD::OutputArg> &Outs, 4016 const SmallVectorImpl<SDValue> &OutVals, 4017 const SmallVectorImpl<ISD::InputArg> &Ins, 4018 SDLoc dl, SelectionDAG &DAG, 4019 SmallVectorImpl<SDValue> &InVals) const { 4020 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description 4021 // of the 32-bit SVR4 ABI stack frame layout. 4022 4023 assert((CallConv == CallingConv::C || 4024 CallConv == CallingConv::Fast) && "Unknown calling convention!"); 4025 4026 unsigned PtrByteSize = 4; 4027 4028 MachineFunction &MF = DAG.getMachineFunction(); 4029 4030 // Mark this function as potentially containing a function that contains a 4031 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4032 // and restoring the callers stack pointer in this functions epilog. This is 4033 // done because by tail calling the called function might overwrite the value 4034 // in this function's (MF) stack pointer stack slot 0(SP). 4035 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4036 CallConv == CallingConv::Fast) 4037 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4038 4039 // Count how many bytes are to be pushed on the stack, including the linkage 4040 // area, parameter list area and the part of the local variable space which 4041 // contains copies of aggregates which are passed by value. 4042 4043 // Assign locations to all of the outgoing arguments. 4044 SmallVector<CCValAssign, 16> ArgLocs; 4045 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 4046 *DAG.getContext()); 4047 4048 // Reserve space for the linkage area on the stack. 4049 CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false), 4050 PtrByteSize); 4051 4052 if (isVarArg) { 4053 // Handle fixed and variable vector arguments differently. 4054 // Fixed vector arguments go into registers as long as registers are 4055 // available. Variable vector arguments always go into memory. 4056 unsigned NumArgs = Outs.size(); 4057 4058 for (unsigned i = 0; i != NumArgs; ++i) { 4059 MVT ArgVT = Outs[i].VT; 4060 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 4061 bool Result; 4062 4063 if (Outs[i].IsFixed) { 4064 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, 4065 CCInfo); 4066 } else { 4067 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, 4068 ArgFlags, CCInfo); 4069 } 4070 4071 if (Result) { 4072#ifndef NDEBUG 4073 errs() << "Call operand #" << i << " has unhandled type " 4074 << EVT(ArgVT).getEVTString() << "\n"; 4075#endif 4076 llvm_unreachable(nullptr); 4077 } 4078 } 4079 } else { 4080 // All arguments are treated the same. 4081 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); 4082 } 4083 4084 // Assign locations to all of the outgoing aggregate by value arguments. 4085 SmallVector<CCValAssign, 16> ByValArgLocs; 4086 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 4087 ByValArgLocs, *DAG.getContext()); 4088 4089 // Reserve stack space for the allocations in CCInfo. 4090 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 4091 4092 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); 4093 4094 // Size of the linkage area, parameter list area and the part of the local 4095 // space variable where copies of aggregates which are passed by value are 4096 // stored. 4097 unsigned NumBytes = CCByValInfo.getNextStackOffset(); 4098 4099 // Calculate by how many bytes the stack has to be adjusted in case of tail 4100 // call optimization. 4101 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4102 4103 // Adjust the stack pointer for the new arguments... 4104 // These operations are automatically eliminated by the prolog/epilog pass 4105 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 4106 dl); 4107 SDValue CallSeqStart = Chain; 4108 4109 // Load the return address and frame pointer so it can be moved somewhere else 4110 // later. 4111 SDValue LROp, FPOp; 4112 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false, 4113 dl); 4114 4115 // Set up a copy of the stack pointer for use loading and storing any 4116 // arguments that may not fit in the registers available for argument 4117 // passing. 4118 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4119 4120 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4121 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4122 SmallVector<SDValue, 8> MemOpChains; 4123 4124 bool seenFloatArg = false; 4125 // Walk the register/memloc assignments, inserting copies/loads. 4126 for (unsigned i = 0, j = 0, e = ArgLocs.size(); 4127 i != e; 4128 ++i) { 4129 CCValAssign &VA = ArgLocs[i]; 4130 SDValue Arg = OutVals[i]; 4131 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4132 4133 if (Flags.isByVal()) { 4134 // Argument is an aggregate which is passed by value, thus we need to 4135 // create a copy of it in the local variable space of the current stack 4136 // frame (which is the stack frame of the caller) and pass the address of 4137 // this copy to the callee. 4138 assert((j < ByValArgLocs.size()) && "Index out of bounds!"); 4139 CCValAssign &ByValVA = ByValArgLocs[j++]; 4140 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); 4141 4142 // Memory reserved in the local variable space of the callers stack frame. 4143 unsigned LocMemOffset = ByValVA.getLocMemOffset(); 4144 4145 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 4146 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 4147 4148 // Create a copy of the argument in the local area of the current 4149 // stack frame. 4150 SDValue MemcpyCall = 4151 CreateCopyOfByValArgument(Arg, PtrOff, 4152 CallSeqStart.getNode()->getOperand(0), 4153 Flags, DAG, dl); 4154 4155 // This must go outside the CALLSEQ_START..END. 4156 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 4157 CallSeqStart.getNode()->getOperand(1), 4158 SDLoc(MemcpyCall)); 4159 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 4160 NewCallSeqStart.getNode()); 4161 Chain = CallSeqStart = NewCallSeqStart; 4162 4163 // Pass the address of the aggregate copy on the stack either in a 4164 // physical register or in the parameter list area of the current stack 4165 // frame to the callee. 4166 Arg = PtrOff; 4167 } 4168 4169 if (VA.isRegLoc()) { 4170 if (Arg.getValueType() == MVT::i1) 4171 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg); 4172 4173 seenFloatArg |= VA.getLocVT().isFloatingPoint(); 4174 // Put argument in a physical register. 4175 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 4176 } else { 4177 // Put argument in the parameter list area of the current stack frame. 4178 assert(VA.isMemLoc()); 4179 unsigned LocMemOffset = VA.getLocMemOffset(); 4180 4181 if (!isTailCall) { 4182 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 4183 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 4184 4185 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 4186 MachinePointerInfo(), 4187 false, false, 0)); 4188 } else { 4189 // Calculate and remember argument location. 4190 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, 4191 TailCallArguments); 4192 } 4193 } 4194 } 4195 4196 if (!MemOpChains.empty()) 4197 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 4198 4199 // Build a sequence of copy-to-reg nodes chained together with token chain 4200 // and flag operands which copy the outgoing args into the appropriate regs. 4201 SDValue InFlag; 4202 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4203 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4204 RegsToPass[i].second, InFlag); 4205 InFlag = Chain.getValue(1); 4206 } 4207 4208 // Set CR bit 6 to true if this is a vararg call with floating args passed in 4209 // registers. 4210 if (isVarArg) { 4211 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 4212 SDValue Ops[] = { Chain, InFlag }; 4213 4214 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, 4215 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1)); 4216 4217 InFlag = Chain.getValue(1); 4218 } 4219 4220 if (isTailCall) 4221 PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, 4222 false, TailCallArguments); 4223 4224 return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, 4225 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 4226 Ins, InVals); 4227} 4228 4229// Copy an argument into memory, being careful to do this outside the 4230// call sequence for the call to which the argument belongs. 4231SDValue 4232PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, 4233 SDValue CallSeqStart, 4234 ISD::ArgFlagsTy Flags, 4235 SelectionDAG &DAG, 4236 SDLoc dl) const { 4237 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 4238 CallSeqStart.getNode()->getOperand(0), 4239 Flags, DAG, dl); 4240 // The MEMCPY must go outside the CALLSEQ_START..END. 4241 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 4242 CallSeqStart.getNode()->getOperand(1), 4243 SDLoc(MemcpyCall)); 4244 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 4245 NewCallSeqStart.getNode()); 4246 return NewCallSeqStart; 4247} 4248 4249SDValue 4250PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, 4251 CallingConv::ID CallConv, bool isVarArg, 4252 bool isTailCall, bool IsPatchPoint, 4253 const SmallVectorImpl<ISD::OutputArg> &Outs, 4254 const SmallVectorImpl<SDValue> &OutVals, 4255 const SmallVectorImpl<ISD::InputArg> &Ins, 4256 SDLoc dl, SelectionDAG &DAG, 4257 SmallVectorImpl<SDValue> &InVals) const { 4258 4259 bool isELFv2ABI = Subtarget.isELFv2ABI(); 4260 bool isLittleEndian = Subtarget.isLittleEndian(); 4261 unsigned NumOps = Outs.size(); 4262 4263 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4264 unsigned PtrByteSize = 8; 4265 4266 MachineFunction &MF = DAG.getMachineFunction(); 4267 4268 // Mark this function as potentially containing a function that contains a 4269 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4270 // and restoring the callers stack pointer in this functions epilog. This is 4271 // done because by tail calling the called function might overwrite the value 4272 // in this function's (MF) stack pointer stack slot 0(SP). 4273 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4274 CallConv == CallingConv::Fast) 4275 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4276 4277 // Count how many bytes are to be pushed on the stack, including the linkage 4278 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes 4279 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage 4280 // area is 32 bytes reserved space for [SP][CR][LR][TOC]. 4281 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false, 4282 isELFv2ABI); 4283 unsigned NumBytes = LinkageSize; 4284 4285 // Add up all the space actually used. 4286 for (unsigned i = 0; i != NumOps; ++i) { 4287 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4288 EVT ArgVT = Outs[i].VT; 4289 EVT OrigVT = Outs[i].ArgVT; 4290 4291 /* Respect alignment of argument on the stack. */ 4292 unsigned Align = 4293 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 4294 NumBytes = ((NumBytes + Align - 1) / Align) * Align; 4295 4296 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 4297 if (Flags.isInConsecutiveRegsLast()) 4298 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 4299 } 4300 4301 unsigned NumBytesActuallyUsed = NumBytes; 4302 4303 // The prolog code of the callee may store up to 8 GPR argument registers to 4304 // the stack, allowing va_start to index over them in memory if its varargs. 4305 // Because we cannot tell if this is needed on the caller side, we have to 4306 // conservatively assume that it is needed. As such, make sure we have at 4307 // least enough stack space for the caller to store the 8 GPRs. 4308 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. 4309 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 4310 4311 // Tail call needs the stack to be aligned. 4312 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4313 CallConv == CallingConv::Fast) 4314 NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes); 4315 4316 // Calculate by how many bytes the stack has to be adjusted in case of tail 4317 // call optimization. 4318 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4319 4320 // To protect arguments on the stack from being clobbered in a tail call, 4321 // force all the loads to happen before doing any other lowering. 4322 if (isTailCall) 4323 Chain = DAG.getStackArgumentTokenFactor(Chain); 4324 4325 // Adjust the stack pointer for the new arguments... 4326 // These operations are automatically eliminated by the prolog/epilog pass 4327 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 4328 dl); 4329 SDValue CallSeqStart = Chain; 4330 4331 // Load the return address and frame pointer so it can be move somewhere else 4332 // later. 4333 SDValue LROp, FPOp; 4334 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 4335 dl); 4336 4337 // Set up a copy of the stack pointer for use loading and storing any 4338 // arguments that may not fit in the registers available for argument 4339 // passing. 4340 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4341 4342 // Figure out which arguments are going to go in registers, and which in 4343 // memory. Also, if this is a vararg function, floating point operations 4344 // must be stored to our stack, and loaded into integer regs as well, if 4345 // any integer regs are available for argument passing. 4346 unsigned ArgOffset = LinkageSize; 4347 unsigned GPR_idx, FPR_idx = 0, VR_idx = 0; 4348 4349 static const MCPhysReg GPR[] = { 4350 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4351 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4352 }; 4353 static const MCPhysReg *FPR = GetFPR(); 4354 4355 static const MCPhysReg VR[] = { 4356 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4357 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4358 }; 4359 static const MCPhysReg VSRH[] = { 4360 PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, 4361 PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 4362 }; 4363 4364 const unsigned NumGPRs = array_lengthof(GPR); 4365 const unsigned NumFPRs = 13; 4366 const unsigned NumVRs = array_lengthof(VR); 4367 4368 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4369 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4370 4371 SmallVector<SDValue, 8> MemOpChains; 4372 for (unsigned i = 0; i != NumOps; ++i) { 4373 SDValue Arg = OutVals[i]; 4374 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4375 EVT ArgVT = Outs[i].VT; 4376 EVT OrigVT = Outs[i].ArgVT; 4377 4378 /* Respect alignment of argument on the stack. */ 4379 unsigned Align = 4380 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 4381 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 4382 4383 /* Compute GPR index associated with argument offset. */ 4384 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 4385 GPR_idx = std::min(GPR_idx, NumGPRs); 4386 4387 // PtrOff will be used to store the current argument to the stack if a 4388 // register cannot be found for it. 4389 SDValue PtrOff; 4390 4391 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 4392 4393 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4394 4395 // Promote integers to 64-bit values. 4396 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { 4397 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 4398 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 4399 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 4400 } 4401 4402 // FIXME memcpy is used way more than necessary. Correctness first. 4403 // Note: "by value" is code for passing a structure by value, not 4404 // basic types. 4405 if (Flags.isByVal()) { 4406 // Note: Size includes alignment padding, so 4407 // struct x { short a; char b; } 4408 // will have Size = 4. With #pragma pack(1), it will have Size = 3. 4409 // These are the proper values we need for right-justifying the 4410 // aggregate in a parameter register. 4411 unsigned Size = Flags.getByValSize(); 4412 4413 // An empty aggregate parameter takes up no storage and no 4414 // registers. 4415 if (Size == 0) 4416 continue; 4417 4418 // All aggregates smaller than 8 bytes must be passed right-justified. 4419 if (Size==1 || Size==2 || Size==4) { 4420 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); 4421 if (GPR_idx != NumGPRs) { 4422 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 4423 MachinePointerInfo(), VT, 4424 false, false, false, 0); 4425 MemOpChains.push_back(Load.getValue(1)); 4426 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load)); 4427 4428 ArgOffset += PtrByteSize; 4429 continue; 4430 } 4431 } 4432 4433 if (GPR_idx == NumGPRs && Size < 8) { 4434 SDValue AddPtr = PtrOff; 4435 if (!isLittleEndian) { 4436 SDValue Const = DAG.getConstant(PtrByteSize - Size, 4437 PtrOff.getValueType()); 4438 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4439 } 4440 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4441 CallSeqStart, 4442 Flags, DAG, dl); 4443 ArgOffset += PtrByteSize; 4444 continue; 4445 } 4446 // Copy entire object into memory. There are cases where gcc-generated 4447 // code assumes it is there, even if it could be put entirely into 4448 // registers. (This is not what the doc says.) 4449 4450 // FIXME: The above statement is likely due to a misunderstanding of the 4451 // documents. All arguments must be copied into the parameter area BY 4452 // THE CALLEE in the event that the callee takes the address of any 4453 // formal argument. That has not yet been implemented. However, it is 4454 // reasonable to use the stack area as a staging area for the register 4455 // load. 4456 4457 // Skip this for small aggregates, as we will use the same slot for a 4458 // right-justified copy, below. 4459 if (Size >= 8) 4460 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 4461 CallSeqStart, 4462 Flags, DAG, dl); 4463 4464 // When a register is available, pass a small aggregate right-justified. 4465 if (Size < 8 && GPR_idx != NumGPRs) { 4466 // The easiest way to get this right-justified in a register 4467 // is to copy the structure into the rightmost portion of a 4468 // local variable slot, then load the whole slot into the 4469 // register. 4470 // FIXME: The memcpy seems to produce pretty awful code for 4471 // small aggregates, particularly for packed ones. 4472 // FIXME: It would be preferable to use the slot in the 4473 // parameter save area instead of a new local variable. 4474 SDValue AddPtr = PtrOff; 4475 if (!isLittleEndian) { 4476 SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); 4477 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4478 } 4479 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4480 CallSeqStart, 4481 Flags, DAG, dl); 4482 4483 // Load the slot into the register. 4484 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff, 4485 MachinePointerInfo(), 4486 false, false, false, 0); 4487 MemOpChains.push_back(Load.getValue(1)); 4488 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load)); 4489 4490 // Done with this argument. 4491 ArgOffset += PtrByteSize; 4492 continue; 4493 } 4494 4495 // For aggregates larger than PtrByteSize, copy the pieces of the 4496 // object that fit into registers from the parameter save area. 4497 for (unsigned j=0; j<Size; j+=PtrByteSize) { 4498 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 4499 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 4500 if (GPR_idx != NumGPRs) { 4501 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 4502 MachinePointerInfo(), 4503 false, false, false, 0); 4504 MemOpChains.push_back(Load.getValue(1)); 4505 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4506 ArgOffset += PtrByteSize; 4507 } else { 4508 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 4509 break; 4510 } 4511 } 4512 continue; 4513 } 4514 4515 switch (Arg.getSimpleValueType().SimpleTy) { 4516 default: llvm_unreachable("Unexpected ValueType for argument!"); 4517 case MVT::i1: 4518 case MVT::i32: 4519 case MVT::i64: 4520 // These can be scalar arguments or elements of an integer array type 4521 // passed directly. Clang may use those instead of "byval" aggregate 4522 // types to avoid forcing arguments to memory unnecessarily. 4523 if (GPR_idx != NumGPRs) { 4524 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg)); 4525 } else { 4526 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4527 true, isTailCall, false, MemOpChains, 4528 TailCallArguments, dl); 4529 } 4530 ArgOffset += PtrByteSize; 4531 break; 4532 case MVT::f32: 4533 case MVT::f64: { 4534 // These can be scalar arguments or elements of a float array type 4535 // passed directly. The latter are used to implement ELFv2 homogenous 4536 // float aggregates. 4537 4538 // Named arguments go into FPRs first, and once they overflow, the 4539 // remaining arguments go into GPRs and then the parameter save area. 4540 // Unnamed arguments for vararg functions always go to GPRs and 4541 // then the parameter save area. For now, put all arguments to vararg 4542 // routines always in both locations (FPR *and* GPR or stack slot). 4543 bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs; 4544 4545 // First load the argument into the next available FPR. 4546 if (FPR_idx != NumFPRs) 4547 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 4548 4549 // Next, load the argument into GPR or stack slot if needed. 4550 if (!NeedGPROrStack) 4551 ; 4552 else if (GPR_idx != NumGPRs) { 4553 // In the non-vararg case, this can only ever happen in the 4554 // presence of f32 array types, since otherwise we never run 4555 // out of FPRs before running out of GPRs. 4556 SDValue ArgVal; 4557 4558 // Double values are always passed in a single GPR. 4559 if (Arg.getValueType() != MVT::f32) { 4560 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); 4561 4562 // Non-array float values are extended and passed in a GPR. 4563 } else if (!Flags.isInConsecutiveRegs()) { 4564 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 4565 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 4566 4567 // If we have an array of floats, we collect every odd element 4568 // together with its predecessor into one GPR. 4569 } else if (ArgOffset % PtrByteSize != 0) { 4570 SDValue Lo, Hi; 4571 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); 4572 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 4573 if (!isLittleEndian) 4574 std::swap(Lo, Hi); 4575 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 4576 4577 // The final element, if even, goes into the first half of a GPR. 4578 } else if (Flags.isInConsecutiveRegsLast()) { 4579 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 4580 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 4581 if (!isLittleEndian) 4582 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal, 4583 DAG.getConstant(32, MVT::i32)); 4584 4585 // Non-final even elements are skipped; they will be handled 4586 // together the with subsequent argument on the next go-around. 4587 } else 4588 ArgVal = SDValue(); 4589 4590 if (ArgVal.getNode()) 4591 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal)); 4592 } else { 4593 // Single-precision floating-point values are mapped to the 4594 // second (rightmost) word of the stack doubleword. 4595 if (Arg.getValueType() == MVT::f32 && 4596 !isLittleEndian && !Flags.isInConsecutiveRegs()) { 4597 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4598 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4599 } 4600 4601 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4602 true, isTailCall, false, MemOpChains, 4603 TailCallArguments, dl); 4604 } 4605 // When passing an array of floats, the array occupies consecutive 4606 // space in the argument area; only round up to the next doubleword 4607 // at the end of the array. Otherwise, each float takes 8 bytes. 4608 ArgOffset += (Arg.getValueType() == MVT::f32 && 4609 Flags.isInConsecutiveRegs()) ? 4 : 8; 4610 if (Flags.isInConsecutiveRegsLast()) 4611 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 4612 break; 4613 } 4614 case MVT::v4f32: 4615 case MVT::v4i32: 4616 case MVT::v8i16: 4617 case MVT::v16i8: 4618 case MVT::v2f64: 4619 case MVT::v2i64: 4620 // These can be scalar arguments or elements of a vector array type 4621 // passed directly. The latter are used to implement ELFv2 homogenous 4622 // vector aggregates. 4623 4624 // For a varargs call, named arguments go into VRs or on the stack as 4625 // usual; unnamed arguments always go to the stack or the corresponding 4626 // GPRs when within range. For now, we always put the value in both 4627 // locations (or even all three). 4628 if (isVarArg) { 4629 // We could elide this store in the case where the object fits 4630 // entirely in R registers. Maybe later. 4631 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4632 MachinePointerInfo(), false, false, 0); 4633 MemOpChains.push_back(Store); 4634 if (VR_idx != NumVRs) { 4635 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 4636 MachinePointerInfo(), 4637 false, false, false, 0); 4638 MemOpChains.push_back(Load.getValue(1)); 4639 4640 unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || 4641 Arg.getSimpleValueType() == MVT::v2i64) ? 4642 VSRH[VR_idx] : VR[VR_idx]; 4643 ++VR_idx; 4644 4645 RegsToPass.push_back(std::make_pair(VReg, Load)); 4646 } 4647 ArgOffset += 16; 4648 for (unsigned i=0; i<16; i+=PtrByteSize) { 4649 if (GPR_idx == NumGPRs) 4650 break; 4651 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 4652 DAG.getConstant(i, PtrVT)); 4653 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 4654 false, false, false, 0); 4655 MemOpChains.push_back(Load.getValue(1)); 4656 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4657 } 4658 break; 4659 } 4660 4661 // Non-varargs Altivec params go into VRs or on the stack. 4662 if (VR_idx != NumVRs) { 4663 unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || 4664 Arg.getSimpleValueType() == MVT::v2i64) ? 4665 VSRH[VR_idx] : VR[VR_idx]; 4666 ++VR_idx; 4667 4668 RegsToPass.push_back(std::make_pair(VReg, Arg)); 4669 } else { 4670 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4671 true, isTailCall, true, MemOpChains, 4672 TailCallArguments, dl); 4673 } 4674 ArgOffset += 16; 4675 break; 4676 } 4677 } 4678 4679 assert(NumBytesActuallyUsed == ArgOffset); 4680 (void)NumBytesActuallyUsed; 4681 4682 if (!MemOpChains.empty()) 4683 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 4684 4685 // Check if this is an indirect call (MTCTR/BCTRL). 4686 // See PrepareCall() for more information about calls through function 4687 // pointers in the 64-bit SVR4 ABI. 4688 if (!isTailCall && !IsPatchPoint && 4689 !isFunctionGlobalAddress(Callee) && 4690 !isa<ExternalSymbolSDNode>(Callee)) { 4691 // Load r2 into a virtual register and store it to the TOC save area. 4692 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); 4693 // TOC save area offset. 4694 unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI); 4695 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset); 4696 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4697 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), 4698 false, false, 0); 4699 // In the ELFv2 ABI, R12 must contain the address of an indirect callee. 4700 // This does not mean the MTCTR instruction must use R12; it's easier 4701 // to model this as an extra parameter, so do that. 4702 if (isELFv2ABI && !IsPatchPoint) 4703 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); 4704 } 4705 4706 // Build a sequence of copy-to-reg nodes chained together with token chain 4707 // and flag operands which copy the outgoing args into the appropriate regs. 4708 SDValue InFlag; 4709 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4710 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4711 RegsToPass[i].second, InFlag); 4712 InFlag = Chain.getValue(1); 4713 } 4714 4715 if (isTailCall) 4716 PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, 4717 FPOp, true, TailCallArguments); 4718 4719 return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, 4720 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 4721 Ins, InVals); 4722} 4723 4724SDValue 4725PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, 4726 CallingConv::ID CallConv, bool isVarArg, 4727 bool isTailCall, bool IsPatchPoint, 4728 const SmallVectorImpl<ISD::OutputArg> &Outs, 4729 const SmallVectorImpl<SDValue> &OutVals, 4730 const SmallVectorImpl<ISD::InputArg> &Ins, 4731 SDLoc dl, SelectionDAG &DAG, 4732 SmallVectorImpl<SDValue> &InVals) const { 4733 4734 unsigned NumOps = Outs.size(); 4735 4736 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4737 bool isPPC64 = PtrVT == MVT::i64; 4738 unsigned PtrByteSize = isPPC64 ? 8 : 4; 4739 4740 MachineFunction &MF = DAG.getMachineFunction(); 4741 4742 // Mark this function as potentially containing a function that contains a 4743 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4744 // and restoring the callers stack pointer in this functions epilog. This is 4745 // done because by tail calling the called function might overwrite the value 4746 // in this function's (MF) stack pointer stack slot 0(SP). 4747 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4748 CallConv == CallingConv::Fast) 4749 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4750 4751 // Count how many bytes are to be pushed on the stack, including the linkage 4752 // area, and parameter passing area. We start with 24/48 bytes, which is 4753 // prereserved space for [SP][CR][LR][3 x unused]. 4754 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true, 4755 false); 4756 unsigned NumBytes = LinkageSize; 4757 4758 // Add up all the space actually used. 4759 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 4760 // they all go in registers, but we must reserve stack space for them for 4761 // possible use by the caller. In varargs or 64-bit calls, parameters are 4762 // assigned stack space in order, with padding so Altivec parameters are 4763 // 16-byte aligned. 4764 unsigned nAltivecParamsAtEnd = 0; 4765 for (unsigned i = 0; i != NumOps; ++i) { 4766 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4767 EVT ArgVT = Outs[i].VT; 4768 // Varargs Altivec parameters are padded to a 16 byte boundary. 4769 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 4770 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 4771 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) { 4772 if (!isVarArg && !isPPC64) { 4773 // Non-varargs Altivec parameters go after all the non-Altivec 4774 // parameters; handle those later so we know how much padding we need. 4775 nAltivecParamsAtEnd++; 4776 continue; 4777 } 4778 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 4779 NumBytes = ((NumBytes+15)/16)*16; 4780 } 4781 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 4782 } 4783 4784 // Allow for Altivec parameters at the end, if needed. 4785 if (nAltivecParamsAtEnd) { 4786 NumBytes = ((NumBytes+15)/16)*16; 4787 NumBytes += 16*nAltivecParamsAtEnd; 4788 } 4789 4790 // The prolog code of the callee may store up to 8 GPR argument registers to 4791 // the stack, allowing va_start to index over them in memory if its varargs. 4792 // Because we cannot tell if this is needed on the caller side, we have to 4793 // conservatively assume that it is needed. As such, make sure we have at 4794 // least enough stack space for the caller to store the 8 GPRs. 4795 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 4796 4797 // Tail call needs the stack to be aligned. 4798 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4799 CallConv == CallingConv::Fast) 4800 NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes); 4801 4802 // Calculate by how many bytes the stack has to be adjusted in case of tail 4803 // call optimization. 4804 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4805 4806 // To protect arguments on the stack from being clobbered in a tail call, 4807 // force all the loads to happen before doing any other lowering. 4808 if (isTailCall) 4809 Chain = DAG.getStackArgumentTokenFactor(Chain); 4810 4811 // Adjust the stack pointer for the new arguments... 4812 // These operations are automatically eliminated by the prolog/epilog pass 4813 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 4814 dl); 4815 SDValue CallSeqStart = Chain; 4816 4817 // Load the return address and frame pointer so it can be move somewhere else 4818 // later. 4819 SDValue LROp, FPOp; 4820 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 4821 dl); 4822 4823 // Set up a copy of the stack pointer for use loading and storing any 4824 // arguments that may not fit in the registers available for argument 4825 // passing. 4826 SDValue StackPtr; 4827 if (isPPC64) 4828 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4829 else 4830 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4831 4832 // Figure out which arguments are going to go in registers, and which in 4833 // memory. Also, if this is a vararg function, floating point operations 4834 // must be stored to our stack, and loaded into integer regs as well, if 4835 // any integer regs are available for argument passing. 4836 unsigned ArgOffset = LinkageSize; 4837 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 4838 4839 static const MCPhysReg GPR_32[] = { // 32-bit registers. 4840 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 4841 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 4842 }; 4843 static const MCPhysReg GPR_64[] = { // 64-bit registers. 4844 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4845 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4846 }; 4847 static const MCPhysReg *FPR = GetFPR(); 4848 4849 static const MCPhysReg VR[] = { 4850 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4851 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4852 }; 4853 const unsigned NumGPRs = array_lengthof(GPR_32); 4854 const unsigned NumFPRs = 13; 4855 const unsigned NumVRs = array_lengthof(VR); 4856 4857 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 4858 4859 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4860 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4861 4862 SmallVector<SDValue, 8> MemOpChains; 4863 for (unsigned i = 0; i != NumOps; ++i) { 4864 SDValue Arg = OutVals[i]; 4865 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4866 4867 // PtrOff will be used to store the current argument to the stack if a 4868 // register cannot be found for it. 4869 SDValue PtrOff; 4870 4871 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 4872 4873 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4874 4875 // On PPC64, promote integers to 64-bit values. 4876 if (isPPC64 && Arg.getValueType() == MVT::i32) { 4877 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 4878 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 4879 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 4880 } 4881 4882 // FIXME memcpy is used way more than necessary. Correctness first. 4883 // Note: "by value" is code for passing a structure by value, not 4884 // basic types. 4885 if (Flags.isByVal()) { 4886 unsigned Size = Flags.getByValSize(); 4887 // Very small objects are passed right-justified. Everything else is 4888 // passed left-justified. 4889 if (Size==1 || Size==2) { 4890 EVT VT = (Size==1) ? MVT::i8 : MVT::i16; 4891 if (GPR_idx != NumGPRs) { 4892 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 4893 MachinePointerInfo(), VT, 4894 false, false, false, 0); 4895 MemOpChains.push_back(Load.getValue(1)); 4896 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4897 4898 ArgOffset += PtrByteSize; 4899 } else { 4900 SDValue Const = DAG.getConstant(PtrByteSize - Size, 4901 PtrOff.getValueType()); 4902 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4903 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4904 CallSeqStart, 4905 Flags, DAG, dl); 4906 ArgOffset += PtrByteSize; 4907 } 4908 continue; 4909 } 4910 // Copy entire object into memory. There are cases where gcc-generated 4911 // code assumes it is there, even if it could be put entirely into 4912 // registers. (This is not what the doc says.) 4913 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 4914 CallSeqStart, 4915 Flags, DAG, dl); 4916 4917 // For small aggregates (Darwin only) and aggregates >= PtrByteSize, 4918 // copy the pieces of the object that fit into registers from the 4919 // parameter save area. 4920 for (unsigned j=0; j<Size; j+=PtrByteSize) { 4921 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 4922 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 4923 if (GPR_idx != NumGPRs) { 4924 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 4925 MachinePointerInfo(), 4926 false, false, false, 0); 4927 MemOpChains.push_back(Load.getValue(1)); 4928 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4929 ArgOffset += PtrByteSize; 4930 } else { 4931 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 4932 break; 4933 } 4934 } 4935 continue; 4936 } 4937 4938 switch (Arg.getSimpleValueType().SimpleTy) { 4939 default: llvm_unreachable("Unexpected ValueType for argument!"); 4940 case MVT::i1: 4941 case MVT::i32: 4942 case MVT::i64: 4943 if (GPR_idx != NumGPRs) { 4944 if (Arg.getValueType() == MVT::i1) 4945 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg); 4946 4947 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 4948 } else { 4949 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4950 isPPC64, isTailCall, false, MemOpChains, 4951 TailCallArguments, dl); 4952 } 4953 ArgOffset += PtrByteSize; 4954 break; 4955 case MVT::f32: 4956 case MVT::f64: 4957 if (FPR_idx != NumFPRs) { 4958 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 4959 4960 if (isVarArg) { 4961 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4962 MachinePointerInfo(), false, false, 0); 4963 MemOpChains.push_back(Store); 4964 4965 // Float varargs are always shadowed in available integer registers 4966 if (GPR_idx != NumGPRs) { 4967 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4968 MachinePointerInfo(), false, false, 4969 false, 0); 4970 MemOpChains.push_back(Load.getValue(1)); 4971 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4972 } 4973 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 4974 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4975 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4976 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4977 MachinePointerInfo(), 4978 false, false, false, 0); 4979 MemOpChains.push_back(Load.getValue(1)); 4980 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4981 } 4982 } else { 4983 // If we have any FPRs remaining, we may also have GPRs remaining. 4984 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 4985 // GPRs. 4986 if (GPR_idx != NumGPRs) 4987 ++GPR_idx; 4988 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 4989 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 4990 ++GPR_idx; 4991 } 4992 } else 4993 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4994 isPPC64, isTailCall, false, MemOpChains, 4995 TailCallArguments, dl); 4996 if (isPPC64) 4997 ArgOffset += 8; 4998 else 4999 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 5000 break; 5001 case MVT::v4f32: 5002 case MVT::v4i32: 5003 case MVT::v8i16: 5004 case MVT::v16i8: 5005 if (isVarArg) { 5006 // These go aligned on the stack, or in the corresponding R registers 5007 // when within range. The Darwin PPC ABI doc claims they also go in 5008 // V registers; in fact gcc does this only for arguments that are 5009 // prototyped, not for those that match the ... We do it for all 5010 // arguments, seems to work. 5011 while (ArgOffset % 16 !=0) { 5012 ArgOffset += PtrByteSize; 5013 if (GPR_idx != NumGPRs) 5014 GPR_idx++; 5015 } 5016 // We could elide this store in the case where the object fits 5017 // entirely in R registers. Maybe later. 5018 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 5019 DAG.getConstant(ArgOffset, PtrVT)); 5020 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 5021 MachinePointerInfo(), false, false, 0); 5022 MemOpChains.push_back(Store); 5023 if (VR_idx != NumVRs) { 5024 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 5025 MachinePointerInfo(), 5026 false, false, false, 0); 5027 MemOpChains.push_back(Load.getValue(1)); 5028 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 5029 } 5030 ArgOffset += 16; 5031 for (unsigned i=0; i<16; i+=PtrByteSize) { 5032 if (GPR_idx == NumGPRs) 5033 break; 5034 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5035 DAG.getConstant(i, PtrVT)); 5036 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 5037 false, false, false, 0); 5038 MemOpChains.push_back(Load.getValue(1)); 5039 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5040 } 5041 break; 5042 } 5043 5044 // Non-varargs Altivec params generally go in registers, but have 5045 // stack space allocated at the end. 5046 if (VR_idx != NumVRs) { 5047 // Doesn't have GPR space allocated. 5048 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 5049 } else if (nAltivecParamsAtEnd==0) { 5050 // We are emitting Altivec params in order. 5051 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5052 isPPC64, isTailCall, true, MemOpChains, 5053 TailCallArguments, dl); 5054 ArgOffset += 16; 5055 } 5056 break; 5057 } 5058 } 5059 // If all Altivec parameters fit in registers, as they usually do, 5060 // they get stack space following the non-Altivec parameters. We 5061 // don't track this here because nobody below needs it. 5062 // If there are more Altivec parameters than fit in registers emit 5063 // the stores here. 5064 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 5065 unsigned j = 0; 5066 // Offset is aligned; skip 1st 12 params which go in V registers. 5067 ArgOffset = ((ArgOffset+15)/16)*16; 5068 ArgOffset += 12*16; 5069 for (unsigned i = 0; i != NumOps; ++i) { 5070 SDValue Arg = OutVals[i]; 5071 EVT ArgType = Outs[i].VT; 5072 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 5073 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 5074 if (++j > NumVRs) { 5075 SDValue PtrOff; 5076 // We are emitting Altivec params in order. 5077 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5078 isPPC64, isTailCall, true, MemOpChains, 5079 TailCallArguments, dl); 5080 ArgOffset += 16; 5081 } 5082 } 5083 } 5084 } 5085 5086 if (!MemOpChains.empty()) 5087 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 5088 5089 // On Darwin, R12 must contain the address of an indirect callee. This does 5090 // not mean the MTCTR instruction must use R12; it's easier to model this as 5091 // an extra parameter, so do that. 5092 if (!isTailCall && 5093 !isFunctionGlobalAddress(Callee) && 5094 !isa<ExternalSymbolSDNode>(Callee) && 5095 !isBLACompatibleAddress(Callee, DAG)) 5096 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : 5097 PPC::R12), Callee)); 5098 5099 // Build a sequence of copy-to-reg nodes chained together with token chain 5100 // and flag operands which copy the outgoing args into the appropriate regs. 5101 SDValue InFlag; 5102 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 5103 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 5104 RegsToPass[i].second, InFlag); 5105 InFlag = Chain.getValue(1); 5106 } 5107 5108 if (isTailCall) 5109 PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, 5110 FPOp, true, TailCallArguments); 5111 5112 return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, 5113 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 5114 Ins, InVals); 5115} 5116 5117bool 5118PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 5119 MachineFunction &MF, bool isVarArg, 5120 const SmallVectorImpl<ISD::OutputArg> &Outs, 5121 LLVMContext &Context) const { 5122 SmallVector<CCValAssign, 16> RVLocs; 5123 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); 5124 return CCInfo.CheckReturn(Outs, RetCC_PPC); 5125} 5126 5127SDValue 5128PPCTargetLowering::LowerReturn(SDValue Chain, 5129 CallingConv::ID CallConv, bool isVarArg, 5130 const SmallVectorImpl<ISD::OutputArg> &Outs, 5131 const SmallVectorImpl<SDValue> &OutVals, 5132 SDLoc dl, SelectionDAG &DAG) const { 5133 5134 SmallVector<CCValAssign, 16> RVLocs; 5135 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 5136 *DAG.getContext()); 5137 CCInfo.AnalyzeReturn(Outs, RetCC_PPC); 5138 5139 SDValue Flag; 5140 SmallVector<SDValue, 4> RetOps(1, Chain); 5141 5142 // Copy the result values into the output registers. 5143 for (unsigned i = 0; i != RVLocs.size(); ++i) { 5144 CCValAssign &VA = RVLocs[i]; 5145 assert(VA.isRegLoc() && "Can only return in registers!"); 5146 5147 SDValue Arg = OutVals[i]; 5148 5149 switch (VA.getLocInfo()) { 5150 default: llvm_unreachable("Unknown loc info!"); 5151 case CCValAssign::Full: break; 5152 case CCValAssign::AExt: 5153 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 5154 break; 5155 case CCValAssign::ZExt: 5156 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 5157 break; 5158 case CCValAssign::SExt: 5159 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 5160 break; 5161 } 5162 5163 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 5164 Flag = Chain.getValue(1); 5165 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 5166 } 5167 5168 RetOps[0] = Chain; // Update chain. 5169 5170 // Add the flag if we have it. 5171 if (Flag.getNode()) 5172 RetOps.push_back(Flag); 5173 5174 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); 5175} 5176 5177SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, 5178 const PPCSubtarget &Subtarget) const { 5179 // When we pop the dynamic allocation we need to restore the SP link. 5180 SDLoc dl(Op); 5181 5182 // Get the corect type for pointers. 5183 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5184 5185 // Construct the stack pointer operand. 5186 bool isPPC64 = Subtarget.isPPC64(); 5187 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; 5188 SDValue StackPtr = DAG.getRegister(SP, PtrVT); 5189 5190 // Get the operands for the STACKRESTORE. 5191 SDValue Chain = Op.getOperand(0); 5192 SDValue SaveSP = Op.getOperand(1); 5193 5194 // Load the old link SP. 5195 SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, 5196 MachinePointerInfo(), 5197 false, false, false, 0); 5198 5199 // Restore the stack pointer. 5200 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); 5201 5202 // Store the old link SP. 5203 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(), 5204 false, false, 0); 5205} 5206 5207 5208 5209SDValue 5210PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { 5211 MachineFunction &MF = DAG.getMachineFunction(); 5212 bool isPPC64 = Subtarget.isPPC64(); 5213 bool isDarwinABI = Subtarget.isDarwinABI(); 5214 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5215 5216 // Get current frame pointer save index. The users of this index will be 5217 // primarily DYNALLOC instructions. 5218 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 5219 int RASI = FI->getReturnAddrSaveIndex(); 5220 5221 // If the frame pointer save index hasn't been defined yet. 5222 if (!RASI) { 5223 // Find out what the fix offset of the frame pointer save area. 5224 int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); 5225 // Allocate the frame index for frame pointer save area. 5226 RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false); 5227 // Save the result. 5228 FI->setReturnAddrSaveIndex(RASI); 5229 } 5230 return DAG.getFrameIndex(RASI, PtrVT); 5231} 5232 5233SDValue 5234PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 5235 MachineFunction &MF = DAG.getMachineFunction(); 5236 bool isPPC64 = Subtarget.isPPC64(); 5237 bool isDarwinABI = Subtarget.isDarwinABI(); 5238 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5239 5240 // Get current frame pointer save index. The users of this index will be 5241 // primarily DYNALLOC instructions. 5242 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 5243 int FPSI = FI->getFramePointerSaveIndex(); 5244 5245 // If the frame pointer save index hasn't been defined yet. 5246 if (!FPSI) { 5247 // Find out what the fix offset of the frame pointer save area. 5248 int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, 5249 isDarwinABI); 5250 5251 // Allocate the frame index for frame pointer save area. 5252 FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 5253 // Save the result. 5254 FI->setFramePointerSaveIndex(FPSI); 5255 } 5256 return DAG.getFrameIndex(FPSI, PtrVT); 5257} 5258 5259SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 5260 SelectionDAG &DAG, 5261 const PPCSubtarget &Subtarget) const { 5262 // Get the inputs. 5263 SDValue Chain = Op.getOperand(0); 5264 SDValue Size = Op.getOperand(1); 5265 SDLoc dl(Op); 5266 5267 // Get the corect type for pointers. 5268 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5269 // Negate the size. 5270 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, 5271 DAG.getConstant(0, PtrVT), Size); 5272 // Construct a node for the frame pointer save index. 5273 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 5274 // Build a DYNALLOC node. 5275 SDValue Ops[3] = { Chain, NegSize, FPSIdx }; 5276 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 5277 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops); 5278} 5279 5280SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, 5281 SelectionDAG &DAG) const { 5282 SDLoc DL(Op); 5283 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, 5284 DAG.getVTList(MVT::i32, MVT::Other), 5285 Op.getOperand(0), Op.getOperand(1)); 5286} 5287 5288SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, 5289 SelectionDAG &DAG) const { 5290 SDLoc DL(Op); 5291 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, 5292 Op.getOperand(0), Op.getOperand(1)); 5293} 5294 5295SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 5296 assert(Op.getValueType() == MVT::i1 && 5297 "Custom lowering only for i1 loads"); 5298 5299 // First, load 8 bits into 32 bits, then truncate to 1 bit. 5300 5301 SDLoc dl(Op); 5302 LoadSDNode *LD = cast<LoadSDNode>(Op); 5303 5304 SDValue Chain = LD->getChain(); 5305 SDValue BasePtr = LD->getBasePtr(); 5306 MachineMemOperand *MMO = LD->getMemOperand(); 5307 5308 SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain, 5309 BasePtr, MVT::i8, MMO); 5310 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); 5311 5312 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; 5313 return DAG.getMergeValues(Ops, dl); 5314} 5315 5316SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 5317 assert(Op.getOperand(1).getValueType() == MVT::i1 && 5318 "Custom lowering only for i1 stores"); 5319 5320 // First, zero extend to 32 bits, then use a truncating store to 8 bits. 5321 5322 SDLoc dl(Op); 5323 StoreSDNode *ST = cast<StoreSDNode>(Op); 5324 5325 SDValue Chain = ST->getChain(); 5326 SDValue BasePtr = ST->getBasePtr(); 5327 SDValue Value = ST->getValue(); 5328 MachineMemOperand *MMO = ST->getMemOperand(); 5329 5330 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value); 5331 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); 5332} 5333 5334// FIXME: Remove this once the ANDI glue bug is fixed: 5335SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { 5336 assert(Op.getValueType() == MVT::i1 && 5337 "Custom lowering only for i1 results"); 5338 5339 SDLoc DL(Op); 5340 return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, 5341 Op.getOperand(0)); 5342} 5343 5344/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 5345/// possible. 5346SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 5347 // Not FP? Not a fsel. 5348 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 5349 !Op.getOperand(2).getValueType().isFloatingPoint()) 5350 return Op; 5351 5352 // We might be able to do better than this under some circumstances, but in 5353 // general, fsel-based lowering of select is a finite-math-only optimization. 5354 // For more information, see section F.3 of the 2.06 ISA specification. 5355 if (!DAG.getTarget().Options.NoInfsFPMath || 5356 !DAG.getTarget().Options.NoNaNsFPMath) 5357 return Op; 5358 5359 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 5360 5361 EVT ResVT = Op.getValueType(); 5362 EVT CmpVT = Op.getOperand(0).getValueType(); 5363 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5364 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); 5365 SDLoc dl(Op); 5366 5367 // If the RHS of the comparison is a 0.0, we don't need to do the 5368 // subtraction at all. 5369 SDValue Sel1; 5370 if (isFloatingPointZero(RHS)) 5371 switch (CC) { 5372 default: break; // SETUO etc aren't handled by fsel. 5373 case ISD::SETNE: 5374 std::swap(TV, FV); 5375 case ISD::SETEQ: 5376 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 5377 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 5378 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 5379 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 5380 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 5381 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 5382 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); 5383 case ISD::SETULT: 5384 case ISD::SETLT: 5385 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 5386 case ISD::SETOGE: 5387 case ISD::SETGE: 5388 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 5389 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 5390 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 5391 case ISD::SETUGT: 5392 case ISD::SETGT: 5393 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 5394 case ISD::SETOLE: 5395 case ISD::SETLE: 5396 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 5397 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 5398 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 5399 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); 5400 } 5401 5402 SDValue Cmp; 5403 switch (CC) { 5404 default: break; // SETUO etc aren't handled by fsel. 5405 case ISD::SETNE: 5406 std::swap(TV, FV); 5407 case ISD::SETEQ: 5408 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 5409 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5410 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5411 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 5412 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 5413 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 5414 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 5415 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); 5416 case ISD::SETULT: 5417 case ISD::SETLT: 5418 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 5419 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5420 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5421 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 5422 case ISD::SETOGE: 5423 case ISD::SETGE: 5424 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 5425 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5426 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5427 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 5428 case ISD::SETUGT: 5429 case ISD::SETGT: 5430 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 5431 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5432 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5433 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 5434 case ISD::SETOLE: 5435 case ISD::SETLE: 5436 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 5437 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5438 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5439 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 5440 } 5441 return Op; 5442} 5443 5444void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, 5445 SelectionDAG &DAG, 5446 SDLoc dl) const { 5447 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 5448 SDValue Src = Op.getOperand(0); 5449 if (Src.getValueType() == MVT::f32) 5450 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 5451 5452 SDValue Tmp; 5453 switch (Op.getSimpleValueType().SimpleTy) { 5454 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 5455 case MVT::i32: 5456 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : 5457 (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : 5458 PPCISD::FCTIDZ), 5459 dl, MVT::f64, Src); 5460 break; 5461 case MVT::i64: 5462 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && 5463 "i64 FP_TO_UINT is supported only with FPCVT"); 5464 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 5465 PPCISD::FCTIDUZ, 5466 dl, MVT::f64, Src); 5467 break; 5468 } 5469 5470 // Convert the FP value to an int value through memory. 5471 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && 5472 (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); 5473 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); 5474 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); 5475 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI); 5476 5477 // Emit a store to the stack slot. 5478 SDValue Chain; 5479 if (i32Stack) { 5480 MachineFunction &MF = DAG.getMachineFunction(); 5481 MachineMemOperand *MMO = 5482 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); 5483 SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; 5484 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 5485 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); 5486 } else 5487 Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, 5488 MPI, false, false, 0); 5489 5490 // Result is a load from the stack slot. If loading 4 bytes, make sure to 5491 // add in a bias. 5492 if (Op.getValueType() == MVT::i32 && !i32Stack) { 5493 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, 5494 DAG.getConstant(4, FIPtr.getValueType())); 5495 MPI = MPI.getWithOffset(4); 5496 } 5497 5498 RLI.Chain = Chain; 5499 RLI.Ptr = FIPtr; 5500 RLI.MPI = MPI; 5501} 5502 5503SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 5504 SDLoc dl) const { 5505 ReuseLoadInfo RLI; 5506 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 5507 5508 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, false, 5509 false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo, 5510 RLI.Ranges); 5511} 5512 5513// We're trying to insert a regular store, S, and then a load, L. If the 5514// incoming value, O, is a load, we might just be able to have our load use the 5515// address used by O. However, we don't know if anything else will store to 5516// that address before we can load from it. To prevent this situation, we need 5517// to insert our load, L, into the chain as a peer of O. To do this, we give L 5518// the same chain operand as O, we create a token factor from the chain results 5519// of O and L, and we replace all uses of O's chain result with that token 5520// factor (see spliceIntoChain below for this last part). 5521bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, 5522 ReuseLoadInfo &RLI, 5523 SelectionDAG &DAG, 5524 ISD::LoadExtType ET) const { 5525 SDLoc dl(Op); 5526 if (ET == ISD::NON_EXTLOAD && 5527 (Op.getOpcode() == ISD::FP_TO_UINT || 5528 Op.getOpcode() == ISD::FP_TO_SINT) && 5529 isOperationLegalOrCustom(Op.getOpcode(), 5530 Op.getOperand(0).getValueType())) { 5531 5532 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 5533 return true; 5534 } 5535 5536 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op); 5537 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() || 5538 LD->isNonTemporal()) 5539 return false; 5540 if (LD->getMemoryVT() != MemVT) 5541 return false; 5542 5543 RLI.Ptr = LD->getBasePtr(); 5544 if (LD->isIndexed() && LD->getOffset().getOpcode() != ISD::UNDEF) { 5545 assert(LD->getAddressingMode() == ISD::PRE_INC && 5546 "Non-pre-inc AM on PPC?"); 5547 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr, 5548 LD->getOffset()); 5549 } 5550 5551 RLI.Chain = LD->getChain(); 5552 RLI.MPI = LD->getPointerInfo(); 5553 RLI.IsInvariant = LD->isInvariant(); 5554 RLI.Alignment = LD->getAlignment(); 5555 RLI.AAInfo = LD->getAAInfo(); 5556 RLI.Ranges = LD->getRanges(); 5557 5558 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1); 5559 return true; 5560} 5561 5562// Given the head of the old chain, ResChain, insert a token factor containing 5563// it and NewResChain, and make users of ResChain now be users of that token 5564// factor. 5565void PPCTargetLowering::spliceIntoChain(SDValue ResChain, 5566 SDValue NewResChain, 5567 SelectionDAG &DAG) const { 5568 if (!ResChain) 5569 return; 5570 5571 SDLoc dl(NewResChain); 5572 5573 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 5574 NewResChain, DAG.getUNDEF(MVT::Other)); 5575 assert(TF.getNode() != NewResChain.getNode() && 5576 "A new TF really is required here"); 5577 5578 DAG.ReplaceAllUsesOfValueWith(ResChain, TF); 5579 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); 5580} 5581 5582SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, 5583 SelectionDAG &DAG) const { 5584 SDLoc dl(Op); 5585 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 5586 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 5587 return SDValue(); 5588 5589 if (Op.getOperand(0).getValueType() == MVT::i1) 5590 return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), 5591 DAG.getConstantFP(1.0, Op.getValueType()), 5592 DAG.getConstantFP(0.0, Op.getValueType())); 5593 5594 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 5595 "UINT_TO_FP is supported only with FPCVT"); 5596 5597 // If we have FCFIDS, then use it when converting to single-precision. 5598 // Otherwise, convert to double-precision and then round. 5599 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 5600 (Op.getOpcode() == ISD::UINT_TO_FP ? 5601 PPCISD::FCFIDUS : PPCISD::FCFIDS) : 5602 (Op.getOpcode() == ISD::UINT_TO_FP ? 5603 PPCISD::FCFIDU : PPCISD::FCFID); 5604 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 5605 MVT::f32 : MVT::f64; 5606 5607 if (Op.getOperand(0).getValueType() == MVT::i64) { 5608 SDValue SINT = Op.getOperand(0); 5609 // When converting to single-precision, we actually need to convert 5610 // to double-precision first and then round to single-precision. 5611 // To avoid double-rounding effects during that operation, we have 5612 // to prepare the input operand. Bits that might be truncated when 5613 // converting to double-precision are replaced by a bit that won't 5614 // be lost at this stage, but is below the single-precision rounding 5615 // position. 5616 // 5617 // However, if -enable-unsafe-fp-math is in effect, accept double 5618 // rounding to avoid the extra overhead. 5619 if (Op.getValueType() == MVT::f32 && 5620 !Subtarget.hasFPCVT() && 5621 !DAG.getTarget().Options.UnsafeFPMath) { 5622 5623 // Twiddle input to make sure the low 11 bits are zero. (If this 5624 // is the case, we are guaranteed the value will fit into the 53 bit 5625 // mantissa of an IEEE double-precision value without rounding.) 5626 // If any of those low 11 bits were not zero originally, make sure 5627 // bit 12 (value 2048) is set instead, so that the final rounding 5628 // to single-precision gets the correct result. 5629 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, 5630 SINT, DAG.getConstant(2047, MVT::i64)); 5631 Round = DAG.getNode(ISD::ADD, dl, MVT::i64, 5632 Round, DAG.getConstant(2047, MVT::i64)); 5633 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); 5634 Round = DAG.getNode(ISD::AND, dl, MVT::i64, 5635 Round, DAG.getConstant(-2048, MVT::i64)); 5636 5637 // However, we cannot use that value unconditionally: if the magnitude 5638 // of the input value is small, the bit-twiddling we did above might 5639 // end up visibly changing the output. Fortunately, in that case, we 5640 // don't need to twiddle bits since the original input will convert 5641 // exactly to double-precision floating-point already. Therefore, 5642 // construct a conditional to use the original value if the top 11 5643 // bits are all sign-bit copies, and use the rounded value computed 5644 // above otherwise. 5645 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, 5646 SINT, DAG.getConstant(53, MVT::i32)); 5647 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, 5648 Cond, DAG.getConstant(1, MVT::i64)); 5649 Cond = DAG.getSetCC(dl, MVT::i32, 5650 Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT); 5651 5652 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); 5653 } 5654 5655 ReuseLoadInfo RLI; 5656 SDValue Bits; 5657 5658 MachineFunction &MF = DAG.getMachineFunction(); 5659 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) { 5660 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false, 5661 false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo, 5662 RLI.Ranges); 5663 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 5664 } else if (Subtarget.hasLFIWAX() && 5665 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) { 5666 MachineMemOperand *MMO = 5667 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 5668 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 5669 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 5670 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl, 5671 DAG.getVTList(MVT::f64, MVT::Other), 5672 Ops, MVT::i32, MMO); 5673 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 5674 } else if (Subtarget.hasFPCVT() && 5675 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) { 5676 MachineMemOperand *MMO = 5677 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 5678 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 5679 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 5680 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl, 5681 DAG.getVTList(MVT::f64, MVT::Other), 5682 Ops, MVT::i32, MMO); 5683 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 5684 } else if (((Subtarget.hasLFIWAX() && 5685 SINT.getOpcode() == ISD::SIGN_EXTEND) || 5686 (Subtarget.hasFPCVT() && 5687 SINT.getOpcode() == ISD::ZERO_EXTEND)) && 5688 SINT.getOperand(0).getValueType() == MVT::i32) { 5689 MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 5690 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5691 5692 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); 5693 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 5694 5695 SDValue Store = 5696 DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, 5697 MachinePointerInfo::getFixedStack(FrameIdx), 5698 false, false, 0); 5699 5700 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 5701 "Expected an i32 store"); 5702 5703 RLI.Ptr = FIdx; 5704 RLI.Chain = Store; 5705 RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx); 5706 RLI.Alignment = 4; 5707 5708 MachineMemOperand *MMO = 5709 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 5710 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 5711 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 5712 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ? 5713 PPCISD::LFIWZX : PPCISD::LFIWAX, 5714 dl, DAG.getVTList(MVT::f64, MVT::Other), 5715 Ops, MVT::i32, MMO); 5716 } else 5717 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); 5718 5719 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); 5720 5721 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 5722 FP = DAG.getNode(ISD::FP_ROUND, dl, 5723 MVT::f32, FP, DAG.getIntPtrConstant(0)); 5724 return FP; 5725 } 5726 5727 assert(Op.getOperand(0).getValueType() == MVT::i32 && 5728 "Unhandled INT_TO_FP type in custom expander!"); 5729 // Since we only generate this in 64-bit mode, we can take advantage of 5730 // 64-bit registers. In particular, sign extend the input value into the 5731 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 5732 // then lfd it and fcfid it. 5733 MachineFunction &MF = DAG.getMachineFunction(); 5734 MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 5735 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5736 5737 SDValue Ld; 5738 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { 5739 ReuseLoadInfo RLI; 5740 bool ReusingLoad; 5741 if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI, 5742 DAG))) { 5743 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); 5744 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 5745 5746 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 5747 MachinePointerInfo::getFixedStack(FrameIdx), 5748 false, false, 0); 5749 5750 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 5751 "Expected an i32 store"); 5752 5753 RLI.Ptr = FIdx; 5754 RLI.Chain = Store; 5755 RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx); 5756 RLI.Alignment = 4; 5757 } 5758 5759 MachineMemOperand *MMO = 5760 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 5761 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 5762 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 5763 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? 5764 PPCISD::LFIWZX : PPCISD::LFIWAX, 5765 dl, DAG.getVTList(MVT::f64, MVT::Other), 5766 Ops, MVT::i32, MMO); 5767 if (ReusingLoad) 5768 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG); 5769 } else { 5770 assert(Subtarget.isPPC64() && 5771 "i32->FP without LFIWAX supported only on PPC64"); 5772 5773 int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); 5774 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 5775 5776 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, 5777 Op.getOperand(0)); 5778 5779 // STD the extended value into the stack slot. 5780 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx, 5781 MachinePointerInfo::getFixedStack(FrameIdx), 5782 false, false, 0); 5783 5784 // Load the value as a double. 5785 Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, 5786 MachinePointerInfo::getFixedStack(FrameIdx), 5787 false, false, false, 0); 5788 } 5789 5790 // FCFID it and return it. 5791 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); 5792 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 5793 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); 5794 return FP; 5795} 5796 5797SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 5798 SelectionDAG &DAG) const { 5799 SDLoc dl(Op); 5800 /* 5801 The rounding mode is in bits 30:31 of FPSR, and has the following 5802 settings: 5803 00 Round to nearest 5804 01 Round to 0 5805 10 Round to +inf 5806 11 Round to -inf 5807 5808 FLT_ROUNDS, on the other hand, expects the following: 5809 -1 Undefined 5810 0 Round to 0 5811 1 Round to nearest 5812 2 Round to +inf 5813 3 Round to -inf 5814 5815 To perform the conversion, we do: 5816 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 5817 */ 5818 5819 MachineFunction &MF = DAG.getMachineFunction(); 5820 EVT VT = Op.getValueType(); 5821 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5822 5823 // Save FP Control Word to register 5824 EVT NodeTys[] = { 5825 MVT::f64, // return register 5826 MVT::Glue // unused in this context 5827 }; 5828 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None); 5829 5830 // Save FP register to stack slot 5831 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); 5832 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 5833 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, 5834 StackSlot, MachinePointerInfo(), false, false,0); 5835 5836 // Load FP Control Word from low 32 bits of stack slot. 5837 SDValue Four = DAG.getConstant(4, PtrVT); 5838 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); 5839 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(), 5840 false, false, false, 0); 5841 5842 // Transform as necessary 5843 SDValue CWD1 = 5844 DAG.getNode(ISD::AND, dl, MVT::i32, 5845 CWD, DAG.getConstant(3, MVT::i32)); 5846 SDValue CWD2 = 5847 DAG.getNode(ISD::SRL, dl, MVT::i32, 5848 DAG.getNode(ISD::AND, dl, MVT::i32, 5849 DAG.getNode(ISD::XOR, dl, MVT::i32, 5850 CWD, DAG.getConstant(3, MVT::i32)), 5851 DAG.getConstant(3, MVT::i32)), 5852 DAG.getConstant(1, MVT::i32)); 5853 5854 SDValue RetVal = 5855 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); 5856 5857 return DAG.getNode((VT.getSizeInBits() < 16 ? 5858 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); 5859} 5860 5861SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { 5862 EVT VT = Op.getValueType(); 5863 unsigned BitWidth = VT.getSizeInBits(); 5864 SDLoc dl(Op); 5865 assert(Op.getNumOperands() == 3 && 5866 VT == Op.getOperand(1).getValueType() && 5867 "Unexpected SHL!"); 5868 5869 // Expand into a bunch of logical ops. Note that these ops 5870 // depend on the PPC behavior for oversized shift amounts. 5871 SDValue Lo = Op.getOperand(0); 5872 SDValue Hi = Op.getOperand(1); 5873 SDValue Amt = Op.getOperand(2); 5874 EVT AmtVT = Amt.getValueType(); 5875 5876 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5877 DAG.getConstant(BitWidth, AmtVT), Amt); 5878 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); 5879 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); 5880 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); 5881 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5882 DAG.getConstant(-BitWidth, AmtVT)); 5883 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); 5884 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 5885 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); 5886 SDValue OutOps[] = { OutLo, OutHi }; 5887 return DAG.getMergeValues(OutOps, dl); 5888} 5889 5890SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { 5891 EVT VT = Op.getValueType(); 5892 SDLoc dl(Op); 5893 unsigned BitWidth = VT.getSizeInBits(); 5894 assert(Op.getNumOperands() == 3 && 5895 VT == Op.getOperand(1).getValueType() && 5896 "Unexpected SRL!"); 5897 5898 // Expand into a bunch of logical ops. Note that these ops 5899 // depend on the PPC behavior for oversized shift amounts. 5900 SDValue Lo = Op.getOperand(0); 5901 SDValue Hi = Op.getOperand(1); 5902 SDValue Amt = Op.getOperand(2); 5903 EVT AmtVT = Amt.getValueType(); 5904 5905 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5906 DAG.getConstant(BitWidth, AmtVT), Amt); 5907 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 5908 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 5909 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 5910 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5911 DAG.getConstant(-BitWidth, AmtVT)); 5912 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); 5913 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 5914 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); 5915 SDValue OutOps[] = { OutLo, OutHi }; 5916 return DAG.getMergeValues(OutOps, dl); 5917} 5918 5919SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { 5920 SDLoc dl(Op); 5921 EVT VT = Op.getValueType(); 5922 unsigned BitWidth = VT.getSizeInBits(); 5923 assert(Op.getNumOperands() == 3 && 5924 VT == Op.getOperand(1).getValueType() && 5925 "Unexpected SRA!"); 5926 5927 // Expand into a bunch of logical ops, followed by a select_cc. 5928 SDValue Lo = Op.getOperand(0); 5929 SDValue Hi = Op.getOperand(1); 5930 SDValue Amt = Op.getOperand(2); 5931 EVT AmtVT = Amt.getValueType(); 5932 5933 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5934 DAG.getConstant(BitWidth, AmtVT), Amt); 5935 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 5936 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 5937 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 5938 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5939 DAG.getConstant(-BitWidth, AmtVT)); 5940 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); 5941 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); 5942 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT), 5943 Tmp4, Tmp6, ISD::SETLE); 5944 SDValue OutOps[] = { OutLo, OutHi }; 5945 return DAG.getMergeValues(OutOps, dl); 5946} 5947 5948//===----------------------------------------------------------------------===// 5949// Vector related lowering. 5950// 5951 5952/// BuildSplatI - Build a canonical splati of Val with an element size of 5953/// SplatSize. Cast the result to VT. 5954static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, 5955 SelectionDAG &DAG, SDLoc dl) { 5956 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 5957 5958 static const EVT VTys[] = { // canonical VT to use for each size. 5959 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 5960 }; 5961 5962 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 5963 5964 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 5965 if (Val == -1) 5966 SplatSize = 1; 5967 5968 EVT CanonicalVT = VTys[SplatSize-1]; 5969 5970 // Build a canonical splat for this value. 5971 SDValue Elt = DAG.getConstant(Val, MVT::i32); 5972 SmallVector<SDValue, 8> Ops; 5973 Ops.assign(CanonicalVT.getVectorNumElements(), Elt); 5974 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops); 5975 return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res); 5976} 5977 5978/// BuildIntrinsicOp - Return a unary operator intrinsic node with the 5979/// specified intrinsic ID. 5980static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, 5981 SelectionDAG &DAG, SDLoc dl, 5982 EVT DestVT = MVT::Other) { 5983 if (DestVT == MVT::Other) DestVT = Op.getValueType(); 5984 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5985 DAG.getConstant(IID, MVT::i32), Op); 5986} 5987 5988/// BuildIntrinsicOp - Return a binary operator intrinsic node with the 5989/// specified intrinsic ID. 5990static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, 5991 SelectionDAG &DAG, SDLoc dl, 5992 EVT DestVT = MVT::Other) { 5993 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 5994 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5995 DAG.getConstant(IID, MVT::i32), LHS, RHS); 5996} 5997 5998/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 5999/// specified intrinsic ID. 6000static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, 6001 SDValue Op2, SelectionDAG &DAG, 6002 SDLoc dl, EVT DestVT = MVT::Other) { 6003 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 6004 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 6005 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 6006} 6007 6008 6009/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 6010/// amount. The result has the specified value type. 6011static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, 6012 EVT VT, SelectionDAG &DAG, SDLoc dl) { 6013 // Force LHS/RHS to be the right type. 6014 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); 6015 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); 6016 6017 int Ops[16]; 6018 for (unsigned i = 0; i != 16; ++i) 6019 Ops[i] = i + Amt; 6020 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); 6021 return DAG.getNode(ISD::BITCAST, dl, VT, T); 6022} 6023 6024// If this is a case we can't handle, return null and let the default 6025// expansion code take care of it. If we CAN select this case, and if it 6026// selects to a single instruction, return Op. Otherwise, if we can codegen 6027// this case more efficiently than a constant pool load, lower it to the 6028// sequence of ops that should be used. 6029SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, 6030 SelectionDAG &DAG) const { 6031 SDLoc dl(Op); 6032 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 6033 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); 6034 6035 // Check if this is a splat of a constant value. 6036 APInt APSplatBits, APSplatUndef; 6037 unsigned SplatBitSize; 6038 bool HasAnyUndefs; 6039 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 6040 HasAnyUndefs, 0, true) || SplatBitSize > 32) 6041 return SDValue(); 6042 6043 unsigned SplatBits = APSplatBits.getZExtValue(); 6044 unsigned SplatUndef = APSplatUndef.getZExtValue(); 6045 unsigned SplatSize = SplatBitSize / 8; 6046 6047 // First, handle single instruction cases. 6048 6049 // All zeros? 6050 if (SplatBits == 0) { 6051 // Canonicalize all zero vectors to be v4i32. 6052 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 6053 SDValue Z = DAG.getConstant(0, MVT::i32); 6054 Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z); 6055 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); 6056 } 6057 return Op; 6058 } 6059 6060 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 6061 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> 6062 (32-SplatBitSize)); 6063 if (SextVal >= -16 && SextVal <= 15) 6064 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); 6065 6066 6067 // Two instruction sequences. 6068 6069 // If this value is in the range [-32,30] and is even, use: 6070 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) 6071 // If this value is in the range [17,31] and is odd, use: 6072 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) 6073 // If this value is in the range [-31,-17] and is odd, use: 6074 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) 6075 // Note the last two are three-instruction sequences. 6076 if (SextVal >= -32 && SextVal <= 31) { 6077 // To avoid having these optimizations undone by constant folding, 6078 // we convert to a pseudo that will be expanded later into one of 6079 // the above forms. 6080 SDValue Elt = DAG.getConstant(SextVal, MVT::i32); 6081 EVT VT = (SplatSize == 1 ? MVT::v16i8 : 6082 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); 6083 SDValue EltSize = DAG.getConstant(SplatSize, MVT::i32); 6084 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); 6085 if (VT == Op.getValueType()) 6086 return RetVal; 6087 else 6088 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); 6089 } 6090 6091 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 6092 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 6093 // for fneg/fabs. 6094 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 6095 // Make -1 and vspltisw -1: 6096 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); 6097 6098 // Make the VSLW intrinsic, computing 0x8000_0000. 6099 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 6100 OnesV, DAG, dl); 6101 6102 // xor by OnesV to invert it. 6103 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); 6104 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6105 } 6106 6107 // The remaining cases assume either big endian element order or 6108 // a splat-size that equates to the element size of the vector 6109 // to be built. An example that doesn't work for little endian is 6110 // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits 6111 // and a vector element size of 16 bits. The code below will 6112 // produce the vector in big endian element order, which for little 6113 // endian is {-1, 0, -1, 0, -1, 0, -1, 0}. 6114 6115 // For now, just avoid these optimizations in that case. 6116 // FIXME: Develop correct optimizations for LE with mismatched 6117 // splat and element sizes. 6118 6119 if (Subtarget.isLittleEndian() && 6120 SplatSize != Op.getValueType().getVectorElementType().getSizeInBits()) 6121 return SDValue(); 6122 6123 // Check to see if this is a wide variety of vsplti*, binop self cases. 6124 static const signed char SplatCsts[] = { 6125 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 6126 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 6127 }; 6128 6129 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 6130 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 6131 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 6132 int i = SplatCsts[idx]; 6133 6134 // Figure out what shift amount will be used by altivec if shifted by i in 6135 // this splat size. 6136 unsigned TypeShiftAmt = i & (SplatBitSize-1); 6137 6138 // vsplti + shl self. 6139 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { 6140 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 6141 static const unsigned IIDs[] = { // Intrinsic to use for each size. 6142 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 6143 Intrinsic::ppc_altivec_vslw 6144 }; 6145 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 6146 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6147 } 6148 6149 // vsplti + srl self. 6150 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 6151 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 6152 static const unsigned IIDs[] = { // Intrinsic to use for each size. 6153 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 6154 Intrinsic::ppc_altivec_vsrw 6155 }; 6156 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 6157 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6158 } 6159 6160 // vsplti + sra self. 6161 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 6162 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 6163 static const unsigned IIDs[] = { // Intrinsic to use for each size. 6164 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 6165 Intrinsic::ppc_altivec_vsraw 6166 }; 6167 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 6168 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6169 } 6170 6171 // vsplti + rol self. 6172 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 6173 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 6174 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 6175 static const unsigned IIDs[] = { // Intrinsic to use for each size. 6176 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 6177 Intrinsic::ppc_altivec_vrlw 6178 }; 6179 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 6180 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6181 } 6182 6183 // t = vsplti c, result = vsldoi t, t, 1 6184 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { 6185 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 6186 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); 6187 } 6188 // t = vsplti c, result = vsldoi t, t, 2 6189 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { 6190 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 6191 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); 6192 } 6193 // t = vsplti c, result = vsldoi t, t, 3 6194 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { 6195 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 6196 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); 6197 } 6198 } 6199 6200 return SDValue(); 6201} 6202 6203/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 6204/// the specified operations to build the shuffle. 6205static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 6206 SDValue RHS, SelectionDAG &DAG, 6207 SDLoc dl) { 6208 unsigned OpNum = (PFEntry >> 26) & 0x0F; 6209 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 6210 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 6211 6212 enum { 6213 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 6214 OP_VMRGHW, 6215 OP_VMRGLW, 6216 OP_VSPLTISW0, 6217 OP_VSPLTISW1, 6218 OP_VSPLTISW2, 6219 OP_VSPLTISW3, 6220 OP_VSLDOI4, 6221 OP_VSLDOI8, 6222 OP_VSLDOI12 6223 }; 6224 6225 if (OpNum == OP_COPY) { 6226 if (LHSID == (1*9+2)*9+3) return LHS; 6227 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 6228 return RHS; 6229 } 6230 6231 SDValue OpLHS, OpRHS; 6232 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 6233 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 6234 6235 int ShufIdxs[16]; 6236 switch (OpNum) { 6237 default: llvm_unreachable("Unknown i32 permute!"); 6238 case OP_VMRGHW: 6239 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 6240 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 6241 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 6242 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 6243 break; 6244 case OP_VMRGLW: 6245 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 6246 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 6247 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 6248 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 6249 break; 6250 case OP_VSPLTISW0: 6251 for (unsigned i = 0; i != 16; ++i) 6252 ShufIdxs[i] = (i&3)+0; 6253 break; 6254 case OP_VSPLTISW1: 6255 for (unsigned i = 0; i != 16; ++i) 6256 ShufIdxs[i] = (i&3)+4; 6257 break; 6258 case OP_VSPLTISW2: 6259 for (unsigned i = 0; i != 16; ++i) 6260 ShufIdxs[i] = (i&3)+8; 6261 break; 6262 case OP_VSPLTISW3: 6263 for (unsigned i = 0; i != 16; ++i) 6264 ShufIdxs[i] = (i&3)+12; 6265 break; 6266 case OP_VSLDOI4: 6267 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); 6268 case OP_VSLDOI8: 6269 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); 6270 case OP_VSLDOI12: 6271 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); 6272 } 6273 EVT VT = OpLHS.getValueType(); 6274 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); 6275 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); 6276 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); 6277 return DAG.getNode(ISD::BITCAST, dl, VT, T); 6278} 6279 6280/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 6281/// is a shuffle we can handle in a single instruction, return it. Otherwise, 6282/// return the code it can be lowered into. Worst case, it can always be 6283/// lowered into a vperm. 6284SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 6285 SelectionDAG &DAG) const { 6286 SDLoc dl(Op); 6287 SDValue V1 = Op.getOperand(0); 6288 SDValue V2 = Op.getOperand(1); 6289 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 6290 EVT VT = Op.getValueType(); 6291 bool isLittleEndian = Subtarget.isLittleEndian(); 6292 6293 // Cases that are handled by instructions that take permute immediates 6294 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 6295 // selected by the instruction selector. 6296 if (V2.getOpcode() == ISD::UNDEF) { 6297 if (PPC::isSplatShuffleMask(SVOp, 1) || 6298 PPC::isSplatShuffleMask(SVOp, 2) || 6299 PPC::isSplatShuffleMask(SVOp, 4) || 6300 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || 6301 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || 6302 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || 6303 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || 6304 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || 6305 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || 6306 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || 6307 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || 6308 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) { 6309 return Op; 6310 } 6311 } 6312 6313 // Altivec has a variety of "shuffle immediates" that take two vector inputs 6314 // and produce a fixed permutation. If any of these match, do not lower to 6315 // VPERM. 6316 unsigned int ShuffleKind = isLittleEndian ? 2 : 0; 6317 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || 6318 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || 6319 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || 6320 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || 6321 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || 6322 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || 6323 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || 6324 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || 6325 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG)) 6326 return Op; 6327 6328 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 6329 // perfect shuffle table to emit an optimal matching sequence. 6330 ArrayRef<int> PermMask = SVOp->getMask(); 6331 6332 unsigned PFIndexes[4]; 6333 bool isFourElementShuffle = true; 6334 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 6335 unsigned EltNo = 8; // Start out undef. 6336 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 6337 if (PermMask[i*4+j] < 0) 6338 continue; // Undef, ignore it. 6339 6340 unsigned ByteSource = PermMask[i*4+j]; 6341 if ((ByteSource & 3) != j) { 6342 isFourElementShuffle = false; 6343 break; 6344 } 6345 6346 if (EltNo == 8) { 6347 EltNo = ByteSource/4; 6348 } else if (EltNo != ByteSource/4) { 6349 isFourElementShuffle = false; 6350 break; 6351 } 6352 } 6353 PFIndexes[i] = EltNo; 6354 } 6355 6356 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 6357 // perfect shuffle vector to determine if it is cost effective to do this as 6358 // discrete instructions, or whether we should use a vperm. 6359 // For now, we skip this for little endian until such time as we have a 6360 // little-endian perfect shuffle table. 6361 if (isFourElementShuffle && !isLittleEndian) { 6362 // Compute the index in the perfect shuffle table. 6363 unsigned PFTableIndex = 6364 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 6365 6366 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 6367 unsigned Cost = (PFEntry >> 30); 6368 6369 // Determining when to avoid vperm is tricky. Many things affect the cost 6370 // of vperm, particularly how many times the perm mask needs to be computed. 6371 // For example, if the perm mask can be hoisted out of a loop or is already 6372 // used (perhaps because there are multiple permutes with the same shuffle 6373 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 6374 // the loop requires an extra register. 6375 // 6376 // As a compromise, we only emit discrete instructions if the shuffle can be 6377 // generated in 3 or fewer operations. When we have loop information 6378 // available, if this block is within a loop, we should avoid using vperm 6379 // for 3-operation perms and use a constant pool load instead. 6380 if (Cost < 3) 6381 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 6382 } 6383 6384 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 6385 // vector that will get spilled to the constant pool. 6386 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 6387 6388 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 6389 // that it is in input element units, not in bytes. Convert now. 6390 6391 // For little endian, the order of the input vectors is reversed, and 6392 // the permutation mask is complemented with respect to 31. This is 6393 // necessary to produce proper semantics with the big-endian-biased vperm 6394 // instruction. 6395 EVT EltVT = V1.getValueType().getVectorElementType(); 6396 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 6397 6398 SmallVector<SDValue, 16> ResultMask; 6399 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { 6400 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; 6401 6402 for (unsigned j = 0; j != BytesPerElement; ++j) 6403 if (isLittleEndian) 6404 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j), 6405 MVT::i32)); 6406 else 6407 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 6408 MVT::i32)); 6409 } 6410 6411 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, 6412 ResultMask); 6413 if (isLittleEndian) 6414 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 6415 V2, V1, VPermMask); 6416 else 6417 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 6418 V1, V2, VPermMask); 6419} 6420 6421/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an 6422/// altivec comparison. If it is, return true and fill in Opc/isDot with 6423/// information about the intrinsic. 6424static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, 6425 bool &isDot) { 6426 unsigned IntrinsicID = 6427 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); 6428 CompareOpc = -1; 6429 isDot = false; 6430 switch (IntrinsicID) { 6431 default: return false; 6432 // Comparison predicates. 6433 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 6434 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 6435 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 6436 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 6437 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 6438 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 6439 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 6440 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 6441 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 6442 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 6443 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 6444 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 6445 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 6446 6447 // Normal Comparisons. 6448 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 6449 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 6450 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 6451 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 6452 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 6453 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 6454 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 6455 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 6456 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 6457 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 6458 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 6459 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 6460 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 6461 } 6462 return true; 6463} 6464 6465/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 6466/// lower, do it, otherwise return null. 6467SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 6468 SelectionDAG &DAG) const { 6469 // If this is a lowered altivec predicate compare, CompareOpc is set to the 6470 // opcode number of the comparison. 6471 SDLoc dl(Op); 6472 int CompareOpc; 6473 bool isDot; 6474 if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) 6475 return SDValue(); // Don't custom lower most intrinsics. 6476 6477 // If this is a non-dot comparison, make the VCMP node and we are done. 6478 if (!isDot) { 6479 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), 6480 Op.getOperand(1), Op.getOperand(2), 6481 DAG.getConstant(CompareOpc, MVT::i32)); 6482 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); 6483 } 6484 6485 // Create the PPCISD altivec 'dot' comparison node. 6486 SDValue Ops[] = { 6487 Op.getOperand(2), // LHS 6488 Op.getOperand(3), // RHS 6489 DAG.getConstant(CompareOpc, MVT::i32) 6490 }; 6491 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; 6492 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 6493 6494 // Now that we have the comparison, emit a copy from the CR to a GPR. 6495 // This is flagged to the above dot comparison. 6496 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, 6497 DAG.getRegister(PPC::CR6, MVT::i32), 6498 CompNode.getValue(1)); 6499 6500 // Unpack the result based on how the target uses it. 6501 unsigned BitNo; // Bit # of CR6. 6502 bool InvertBit; // Invert result? 6503 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { 6504 default: // Can't happen, don't crash on invalid number though. 6505 case 0: // Return the value of the EQ bit of CR6. 6506 BitNo = 0; InvertBit = false; 6507 break; 6508 case 1: // Return the inverted value of the EQ bit of CR6. 6509 BitNo = 0; InvertBit = true; 6510 break; 6511 case 2: // Return the value of the LT bit of CR6. 6512 BitNo = 2; InvertBit = false; 6513 break; 6514 case 3: // Return the inverted value of the LT bit of CR6. 6515 BitNo = 2; InvertBit = true; 6516 break; 6517 } 6518 6519 // Shift the bit into the low position. 6520 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, 6521 DAG.getConstant(8-(3-BitNo), MVT::i32)); 6522 // Isolate the bit. 6523 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, 6524 DAG.getConstant(1, MVT::i32)); 6525 6526 // If we are supposed to, toggle the bit. 6527 if (InvertBit) 6528 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, 6529 DAG.getConstant(1, MVT::i32)); 6530 return Flags; 6531} 6532 6533SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, 6534 SelectionDAG &DAG) const { 6535 SDLoc dl(Op); 6536 // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int 6537 // instructions), but for smaller types, we need to first extend up to v2i32 6538 // before doing going farther. 6539 if (Op.getValueType() == MVT::v2i64) { 6540 EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 6541 if (ExtVT != MVT::v2i32) { 6542 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)); 6543 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op, 6544 DAG.getValueType(EVT::getVectorVT(*DAG.getContext(), 6545 ExtVT.getVectorElementType(), 4))); 6546 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op); 6547 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op, 6548 DAG.getValueType(MVT::v2i32)); 6549 } 6550 6551 return Op; 6552 } 6553 6554 return SDValue(); 6555} 6556 6557SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 6558 SelectionDAG &DAG) const { 6559 SDLoc dl(Op); 6560 // Create a stack slot that is 16-byte aligned. 6561 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 6562 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 6563 EVT PtrVT = getPointerTy(); 6564 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6565 6566 // Store the input value into Value#0 of the stack slot. 6567 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, 6568 Op.getOperand(0), FIdx, MachinePointerInfo(), 6569 false, false, 0); 6570 // Load it out. 6571 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(), 6572 false, false, false, 0); 6573} 6574 6575SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { 6576 SDLoc dl(Op); 6577 if (Op.getValueType() == MVT::v4i32) { 6578 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 6579 6580 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); 6581 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. 6582 6583 SDValue RHSSwap = // = vrlw RHS, 16 6584 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); 6585 6586 // Shrinkify inputs to v8i16. 6587 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); 6588 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); 6589 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); 6590 6591 // Low parts multiplied together, generating 32-bit results (we ignore the 6592 // top parts). 6593 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 6594 LHS, RHS, DAG, dl, MVT::v4i32); 6595 6596 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 6597 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); 6598 // Shift the high parts up 16 bits. 6599 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, 6600 Neg16, DAG, dl); 6601 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); 6602 } else if (Op.getValueType() == MVT::v8i16) { 6603 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 6604 6605 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); 6606 6607 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 6608 LHS, RHS, Zero, DAG, dl); 6609 } else if (Op.getValueType() == MVT::v16i8) { 6610 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 6611 bool isLittleEndian = Subtarget.isLittleEndian(); 6612 6613 // Multiply the even 8-bit parts, producing 16-bit sums. 6614 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 6615 LHS, RHS, DAG, dl, MVT::v8i16); 6616 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); 6617 6618 // Multiply the odd 8-bit parts, producing 16-bit sums. 6619 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 6620 LHS, RHS, DAG, dl, MVT::v8i16); 6621 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); 6622 6623 // Merge the results together. Because vmuleub and vmuloub are 6624 // instructions with a big-endian bias, we must reverse the 6625 // element numbering and reverse the meaning of "odd" and "even" 6626 // when generating little endian code. 6627 int Ops[16]; 6628 for (unsigned i = 0; i != 8; ++i) { 6629 if (isLittleEndian) { 6630 Ops[i*2 ] = 2*i; 6631 Ops[i*2+1] = 2*i+16; 6632 } else { 6633 Ops[i*2 ] = 2*i+1; 6634 Ops[i*2+1] = 2*i+1+16; 6635 } 6636 } 6637 if (isLittleEndian) 6638 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops); 6639 else 6640 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); 6641 } else { 6642 llvm_unreachable("Unknown mul to lower!"); 6643 } 6644} 6645 6646/// LowerOperation - Provide custom lowering hooks for some operations. 6647/// 6648SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 6649 switch (Op.getOpcode()) { 6650 default: llvm_unreachable("Wasn't expecting to be able to lower this!"); 6651 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 6652 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 6653 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 6654 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 6655 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 6656 case ISD::SETCC: return LowerSETCC(Op, DAG); 6657 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); 6658 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); 6659 case ISD::VASTART: 6660 return LowerVASTART(Op, DAG, Subtarget); 6661 6662 case ISD::VAARG: 6663 return LowerVAARG(Op, DAG, Subtarget); 6664 6665 case ISD::VACOPY: 6666 return LowerVACOPY(Op, DAG, Subtarget); 6667 6668 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget); 6669 case ISD::DYNAMIC_STACKALLOC: 6670 return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget); 6671 6672 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); 6673 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); 6674 6675 case ISD::LOAD: return LowerLOAD(Op, DAG); 6676 case ISD::STORE: return LowerSTORE(Op, DAG); 6677 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); 6678 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 6679 case ISD::FP_TO_UINT: 6680 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, 6681 SDLoc(Op)); 6682 case ISD::UINT_TO_FP: 6683 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 6684 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 6685 6686 // Lower 64-bit shifts. 6687 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 6688 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 6689 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 6690 6691 // Vector-related lowering. 6692 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 6693 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 6694 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 6695 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 6696 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 6697 case ISD::MUL: return LowerMUL(Op, DAG); 6698 6699 // For counter-based loop handling. 6700 case ISD::INTRINSIC_W_CHAIN: return SDValue(); 6701 6702 // Frame & Return address. 6703 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 6704 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 6705 } 6706} 6707 6708void PPCTargetLowering::ReplaceNodeResults(SDNode *N, 6709 SmallVectorImpl<SDValue>&Results, 6710 SelectionDAG &DAG) const { 6711 const TargetMachine &TM = getTargetMachine(); 6712 SDLoc dl(N); 6713 switch (N->getOpcode()) { 6714 default: 6715 llvm_unreachable("Do not know how to custom type legalize this operation!"); 6716 case ISD::READCYCLECOUNTER: { 6717 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 6718 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0)); 6719 6720 Results.push_back(RTB); 6721 Results.push_back(RTB.getValue(1)); 6722 Results.push_back(RTB.getValue(2)); 6723 break; 6724 } 6725 case ISD::INTRINSIC_W_CHAIN: { 6726 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 6727 Intrinsic::ppc_is_decremented_ctr_nonzero) 6728 break; 6729 6730 assert(N->getValueType(0) == MVT::i1 && 6731 "Unexpected result type for CTR decrement intrinsic"); 6732 EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0)); 6733 SDVTList VTs = DAG.getVTList(SVT, MVT::Other); 6734 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), 6735 N->getOperand(1)); 6736 6737 Results.push_back(NewInt); 6738 Results.push_back(NewInt.getValue(1)); 6739 break; 6740 } 6741 case ISD::VAARG: { 6742 if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI() 6743 || TM.getSubtarget<PPCSubtarget>().isPPC64()) 6744 return; 6745 6746 EVT VT = N->getValueType(0); 6747 6748 if (VT == MVT::i64) { 6749 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget); 6750 6751 Results.push_back(NewNode); 6752 Results.push_back(NewNode.getValue(1)); 6753 } 6754 return; 6755 } 6756 case ISD::FP_ROUND_INREG: { 6757 assert(N->getValueType(0) == MVT::ppcf128); 6758 assert(N->getOperand(0).getValueType() == MVT::ppcf128); 6759 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 6760 MVT::f64, N->getOperand(0), 6761 DAG.getIntPtrConstant(0)); 6762 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 6763 MVT::f64, N->getOperand(0), 6764 DAG.getIntPtrConstant(1)); 6765 6766 // Add the two halves of the long double in round-to-zero mode. 6767 SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); 6768 6769 // We know the low half is about to be thrown away, so just use something 6770 // convenient. 6771 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, 6772 FPreg, FPreg)); 6773 return; 6774 } 6775 case ISD::FP_TO_SINT: 6776 // LowerFP_TO_INT() can only handle f32 and f64. 6777 if (N->getOperand(0).getValueType() == MVT::ppcf128) 6778 return; 6779 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); 6780 return; 6781 } 6782} 6783 6784 6785//===----------------------------------------------------------------------===// 6786// Other Lowering Code 6787//===----------------------------------------------------------------------===// 6788 6789static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { 6790 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 6791 Function *Func = Intrinsic::getDeclaration(M, Id); 6792 return Builder.CreateCall(Func); 6793} 6794 6795// The mappings for emitLeading/TrailingFence is taken from 6796// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html 6797Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 6798 AtomicOrdering Ord, bool IsStore, 6799 bool IsLoad) const { 6800 if (Ord == SequentiallyConsistent) 6801 return callIntrinsic(Builder, Intrinsic::ppc_sync); 6802 else if (isAtLeastRelease(Ord)) 6803 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 6804 else 6805 return nullptr; 6806} 6807 6808Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 6809 AtomicOrdering Ord, bool IsStore, 6810 bool IsLoad) const { 6811 if (IsLoad && isAtLeastAcquire(Ord)) 6812 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 6813 // FIXME: this is too conservative, a dependent branch + isync is enough. 6814 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and 6815 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html 6816 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. 6817 else 6818 return nullptr; 6819} 6820 6821MachineBasicBlock * 6822PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 6823 bool is64bit, unsigned BinOpcode) const { 6824 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 6825 const TargetInstrInfo *TII = 6826 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 6827 6828 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6829 MachineFunction *F = BB->getParent(); 6830 MachineFunction::iterator It = BB; 6831 ++It; 6832 6833 unsigned dest = MI->getOperand(0).getReg(); 6834 unsigned ptrA = MI->getOperand(1).getReg(); 6835 unsigned ptrB = MI->getOperand(2).getReg(); 6836 unsigned incr = MI->getOperand(3).getReg(); 6837 DebugLoc dl = MI->getDebugLoc(); 6838 6839 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 6840 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6841 F->insert(It, loopMBB); 6842 F->insert(It, exitMBB); 6843 exitMBB->splice(exitMBB->begin(), BB, 6844 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 6845 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6846 6847 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6848 unsigned TmpReg = (!BinOpcode) ? incr : 6849 RegInfo.createVirtualRegister( is64bit ? &PPC::G8RCRegClass 6850 : &PPC::GPRCRegClass); 6851 6852 // thisMBB: 6853 // ... 6854 // fallthrough --> loopMBB 6855 BB->addSuccessor(loopMBB); 6856 6857 // loopMBB: 6858 // l[wd]arx dest, ptr 6859 // add r0, dest, incr 6860 // st[wd]cx. r0, ptr 6861 // bne- loopMBB 6862 // fallthrough --> exitMBB 6863 BB = loopMBB; 6864 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 6865 .addReg(ptrA).addReg(ptrB); 6866 if (BinOpcode) 6867 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); 6868 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 6869 .addReg(TmpReg).addReg(ptrA).addReg(ptrB); 6870 BuildMI(BB, dl, TII->get(PPC::BCC)) 6871 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 6872 BB->addSuccessor(loopMBB); 6873 BB->addSuccessor(exitMBB); 6874 6875 // exitMBB: 6876 // ... 6877 BB = exitMBB; 6878 return BB; 6879} 6880 6881MachineBasicBlock * 6882PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, 6883 MachineBasicBlock *BB, 6884 bool is8bit, // operation 6885 unsigned BinOpcode) const { 6886 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 6887 const TargetInstrInfo *TII = 6888 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 6889 // In 64 bit mode we have to use 64 bits for addresses, even though the 6890 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address 6891 // registers without caring whether they're 32 or 64, but here we're 6892 // doing actual arithmetic on the addresses. 6893 bool is64bit = Subtarget.isPPC64(); 6894 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 6895 6896 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6897 MachineFunction *F = BB->getParent(); 6898 MachineFunction::iterator It = BB; 6899 ++It; 6900 6901 unsigned dest = MI->getOperand(0).getReg(); 6902 unsigned ptrA = MI->getOperand(1).getReg(); 6903 unsigned ptrB = MI->getOperand(2).getReg(); 6904 unsigned incr = MI->getOperand(3).getReg(); 6905 DebugLoc dl = MI->getDebugLoc(); 6906 6907 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 6908 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6909 F->insert(It, loopMBB); 6910 F->insert(It, exitMBB); 6911 exitMBB->splice(exitMBB->begin(), BB, 6912 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 6913 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6914 6915 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6916 const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass 6917 : &PPC::GPRCRegClass; 6918 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 6919 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 6920 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 6921 unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); 6922 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 6923 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 6924 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 6925 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 6926 unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); 6927 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 6928 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 6929 unsigned Ptr1Reg; 6930 unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); 6931 6932 // thisMBB: 6933 // ... 6934 // fallthrough --> loopMBB 6935 BB->addSuccessor(loopMBB); 6936 6937 // The 4-byte load must be aligned, while a char or short may be 6938 // anywhere in the word. Hence all this nasty bookkeeping code. 6939 // add ptr1, ptrA, ptrB [copy if ptrA==0] 6940 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 6941 // xori shift, shift1, 24 [16] 6942 // rlwinm ptr, ptr1, 0, 0, 29 6943 // slw incr2, incr, shift 6944 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 6945 // slw mask, mask2, shift 6946 // loopMBB: 6947 // lwarx tmpDest, ptr 6948 // add tmp, tmpDest, incr2 6949 // andc tmp2, tmpDest, mask 6950 // and tmp3, tmp, mask 6951 // or tmp4, tmp3, tmp2 6952 // stwcx. tmp4, ptr 6953 // bne- loopMBB 6954 // fallthrough --> exitMBB 6955 // srw dest, tmpDest, shift 6956 if (ptrA != ZeroReg) { 6957 Ptr1Reg = RegInfo.createVirtualRegister(RC); 6958 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 6959 .addReg(ptrA).addReg(ptrB); 6960 } else { 6961 Ptr1Reg = ptrB; 6962 } 6963 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 6964 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 6965 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 6966 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 6967 if (is64bit) 6968 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 6969 .addReg(Ptr1Reg).addImm(0).addImm(61); 6970 else 6971 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 6972 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 6973 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) 6974 .addReg(incr).addReg(ShiftReg); 6975 if (is8bit) 6976 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 6977 else { 6978 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 6979 BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); 6980 } 6981 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 6982 .addReg(Mask2Reg).addReg(ShiftReg); 6983 6984 BB = loopMBB; 6985 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 6986 .addReg(ZeroReg).addReg(PtrReg); 6987 if (BinOpcode) 6988 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) 6989 .addReg(Incr2Reg).addReg(TmpDestReg); 6990 BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) 6991 .addReg(TmpDestReg).addReg(MaskReg); 6992 BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) 6993 .addReg(TmpReg).addReg(MaskReg); 6994 BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) 6995 .addReg(Tmp3Reg).addReg(Tmp2Reg); 6996 BuildMI(BB, dl, TII->get(PPC::STWCX)) 6997 .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); 6998 BuildMI(BB, dl, TII->get(PPC::BCC)) 6999 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 7000 BB->addSuccessor(loopMBB); 7001 BB->addSuccessor(exitMBB); 7002 7003 // exitMBB: 7004 // ... 7005 BB = exitMBB; 7006 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) 7007 .addReg(ShiftReg); 7008 return BB; 7009} 7010 7011llvm::MachineBasicBlock* 7012PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, 7013 MachineBasicBlock *MBB) const { 7014 DebugLoc DL = MI->getDebugLoc(); 7015 const TargetInstrInfo *TII = 7016 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 7017 7018 MachineFunction *MF = MBB->getParent(); 7019 MachineRegisterInfo &MRI = MF->getRegInfo(); 7020 7021 const BasicBlock *BB = MBB->getBasicBlock(); 7022 MachineFunction::iterator I = MBB; 7023 ++I; 7024 7025 // Memory Reference 7026 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 7027 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 7028 7029 unsigned DstReg = MI->getOperand(0).getReg(); 7030 const TargetRegisterClass *RC = MRI.getRegClass(DstReg); 7031 assert(RC->hasType(MVT::i32) && "Invalid destination!"); 7032 unsigned mainDstReg = MRI.createVirtualRegister(RC); 7033 unsigned restoreDstReg = MRI.createVirtualRegister(RC); 7034 7035 MVT PVT = getPointerTy(); 7036 assert((PVT == MVT::i64 || PVT == MVT::i32) && 7037 "Invalid Pointer Size!"); 7038 // For v = setjmp(buf), we generate 7039 // 7040 // thisMBB: 7041 // SjLjSetup mainMBB 7042 // bl mainMBB 7043 // v_restore = 1 7044 // b sinkMBB 7045 // 7046 // mainMBB: 7047 // buf[LabelOffset] = LR 7048 // v_main = 0 7049 // 7050 // sinkMBB: 7051 // v = phi(main, restore) 7052 // 7053 7054 MachineBasicBlock *thisMBB = MBB; 7055 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); 7056 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); 7057 MF->insert(I, mainMBB); 7058 MF->insert(I, sinkMBB); 7059 7060 MachineInstrBuilder MIB; 7061 7062 // Transfer the remainder of BB and its successor edges to sinkMBB. 7063 sinkMBB->splice(sinkMBB->begin(), MBB, 7064 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); 7065 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 7066 7067 // Note that the structure of the jmp_buf used here is not compatible 7068 // with that used by libc, and is not designed to be. Specifically, it 7069 // stores only those 'reserved' registers that LLVM does not otherwise 7070 // understand how to spill. Also, by convention, by the time this 7071 // intrinsic is called, Clang has already stored the frame address in the 7072 // first slot of the buffer and stack address in the third. Following the 7073 // X86 target code, we'll store the jump address in the second slot. We also 7074 // need to save the TOC pointer (R2) to handle jumps between shared 7075 // libraries, and that will be stored in the fourth slot. The thread 7076 // identifier (R13) is not affected. 7077 7078 // thisMBB: 7079 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 7080 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 7081 const int64_t BPOffset = 4 * PVT.getStoreSize(); 7082 7083 // Prepare IP either in reg. 7084 const TargetRegisterClass *PtrRC = getRegClassFor(PVT); 7085 unsigned LabelReg = MRI.createVirtualRegister(PtrRC); 7086 unsigned BufReg = MI->getOperand(1).getReg(); 7087 7088 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { 7089 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) 7090 .addReg(PPC::X2) 7091 .addImm(TOCOffset) 7092 .addReg(BufReg); 7093 MIB.setMemRefs(MMOBegin, MMOEnd); 7094 } 7095 7096 // Naked functions never have a base pointer, and so we use r1. For all 7097 // other functions, this decision must be delayed until during PEI. 7098 unsigned BaseReg; 7099 if (MF->getFunction()->getAttributes().hasAttribute( 7100 AttributeSet::FunctionIndex, Attribute::Naked)) 7101 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; 7102 else 7103 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; 7104 7105 MIB = BuildMI(*thisMBB, MI, DL, 7106 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) 7107 .addReg(BaseReg) 7108 .addImm(BPOffset) 7109 .addReg(BufReg); 7110 MIB.setMemRefs(MMOBegin, MMOEnd); 7111 7112 // Setup 7113 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); 7114 const PPCRegisterInfo *TRI = 7115 getTargetMachine().getSubtarget<PPCSubtarget>().getRegisterInfo(); 7116 MIB.addRegMask(TRI->getNoPreservedMask()); 7117 7118 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); 7119 7120 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) 7121 .addMBB(mainMBB); 7122 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); 7123 7124 thisMBB->addSuccessor(mainMBB, /* weight */ 0); 7125 thisMBB->addSuccessor(sinkMBB, /* weight */ 1); 7126 7127 // mainMBB: 7128 // mainDstReg = 0 7129 MIB = BuildMI(mainMBB, DL, 7130 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); 7131 7132 // Store IP 7133 if (Subtarget.isPPC64()) { 7134 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) 7135 .addReg(LabelReg) 7136 .addImm(LabelOffset) 7137 .addReg(BufReg); 7138 } else { 7139 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) 7140 .addReg(LabelReg) 7141 .addImm(LabelOffset) 7142 .addReg(BufReg); 7143 } 7144 7145 MIB.setMemRefs(MMOBegin, MMOEnd); 7146 7147 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); 7148 mainMBB->addSuccessor(sinkMBB); 7149 7150 // sinkMBB: 7151 BuildMI(*sinkMBB, sinkMBB->begin(), DL, 7152 TII->get(PPC::PHI), DstReg) 7153 .addReg(mainDstReg).addMBB(mainMBB) 7154 .addReg(restoreDstReg).addMBB(thisMBB); 7155 7156 MI->eraseFromParent(); 7157 return sinkMBB; 7158} 7159 7160MachineBasicBlock * 7161PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, 7162 MachineBasicBlock *MBB) const { 7163 DebugLoc DL = MI->getDebugLoc(); 7164 const TargetInstrInfo *TII = 7165 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 7166 7167 MachineFunction *MF = MBB->getParent(); 7168 MachineRegisterInfo &MRI = MF->getRegInfo(); 7169 7170 // Memory Reference 7171 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 7172 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 7173 7174 MVT PVT = getPointerTy(); 7175 assert((PVT == MVT::i64 || PVT == MVT::i32) && 7176 "Invalid Pointer Size!"); 7177 7178 const TargetRegisterClass *RC = 7179 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 7180 unsigned Tmp = MRI.createVirtualRegister(RC); 7181 // Since FP is only updated here but NOT referenced, it's treated as GPR. 7182 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; 7183 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; 7184 unsigned BP = (PVT == MVT::i64) ? PPC::X30 : 7185 (Subtarget.isSVR4ABI() && 7186 MF->getTarget().getRelocationModel() == Reloc::PIC_ ? 7187 PPC::R29 : PPC::R30); 7188 7189 MachineInstrBuilder MIB; 7190 7191 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 7192 const int64_t SPOffset = 2 * PVT.getStoreSize(); 7193 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 7194 const int64_t BPOffset = 4 * PVT.getStoreSize(); 7195 7196 unsigned BufReg = MI->getOperand(0).getReg(); 7197 7198 // Reload FP (the jumped-to function may not have had a 7199 // frame pointer, and if so, then its r31 will be restored 7200 // as necessary). 7201 if (PVT == MVT::i64) { 7202 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) 7203 .addImm(0) 7204 .addReg(BufReg); 7205 } else { 7206 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) 7207 .addImm(0) 7208 .addReg(BufReg); 7209 } 7210 MIB.setMemRefs(MMOBegin, MMOEnd); 7211 7212 // Reload IP 7213 if (PVT == MVT::i64) { 7214 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) 7215 .addImm(LabelOffset) 7216 .addReg(BufReg); 7217 } else { 7218 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) 7219 .addImm(LabelOffset) 7220 .addReg(BufReg); 7221 } 7222 MIB.setMemRefs(MMOBegin, MMOEnd); 7223 7224 // Reload SP 7225 if (PVT == MVT::i64) { 7226 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) 7227 .addImm(SPOffset) 7228 .addReg(BufReg); 7229 } else { 7230 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) 7231 .addImm(SPOffset) 7232 .addReg(BufReg); 7233 } 7234 MIB.setMemRefs(MMOBegin, MMOEnd); 7235 7236 // Reload BP 7237 if (PVT == MVT::i64) { 7238 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) 7239 .addImm(BPOffset) 7240 .addReg(BufReg); 7241 } else { 7242 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) 7243 .addImm(BPOffset) 7244 .addReg(BufReg); 7245 } 7246 MIB.setMemRefs(MMOBegin, MMOEnd); 7247 7248 // Reload TOC 7249 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) { 7250 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) 7251 .addImm(TOCOffset) 7252 .addReg(BufReg); 7253 7254 MIB.setMemRefs(MMOBegin, MMOEnd); 7255 } 7256 7257 // Jump 7258 BuildMI(*MBB, MI, DL, 7259 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); 7260 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); 7261 7262 MI->eraseFromParent(); 7263 return MBB; 7264} 7265 7266MachineBasicBlock * 7267PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 7268 MachineBasicBlock *BB) const { 7269 if (MI->getOpcode() == TargetOpcode::STACKMAP || 7270 MI->getOpcode() == TargetOpcode::PATCHPOINT) 7271 return emitPatchPoint(MI, BB); 7272 7273 if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 || 7274 MI->getOpcode() == PPC::EH_SjLj_SetJmp64) { 7275 return emitEHSjLjSetJmp(MI, BB); 7276 } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 || 7277 MI->getOpcode() == PPC::EH_SjLj_LongJmp64) { 7278 return emitEHSjLjLongJmp(MI, BB); 7279 } 7280 7281 const TargetInstrInfo *TII = 7282 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 7283 7284 // To "insert" these instructions we actually have to insert their 7285 // control-flow patterns. 7286 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7287 MachineFunction::iterator It = BB; 7288 ++It; 7289 7290 MachineFunction *F = BB->getParent(); 7291 7292 if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || 7293 MI->getOpcode() == PPC::SELECT_CC_I8 || 7294 MI->getOpcode() == PPC::SELECT_I4 || 7295 MI->getOpcode() == PPC::SELECT_I8)) { 7296 SmallVector<MachineOperand, 2> Cond; 7297 if (MI->getOpcode() == PPC::SELECT_CC_I4 || 7298 MI->getOpcode() == PPC::SELECT_CC_I8) 7299 Cond.push_back(MI->getOperand(4)); 7300 else 7301 Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); 7302 Cond.push_back(MI->getOperand(1)); 7303 7304 DebugLoc dl = MI->getDebugLoc(); 7305 const TargetInstrInfo *TII = 7306 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 7307 TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), 7308 Cond, MI->getOperand(2).getReg(), 7309 MI->getOperand(3).getReg()); 7310 } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || 7311 MI->getOpcode() == PPC::SELECT_CC_I8 || 7312 MI->getOpcode() == PPC::SELECT_CC_F4 || 7313 MI->getOpcode() == PPC::SELECT_CC_F8 || 7314 MI->getOpcode() == PPC::SELECT_CC_VRRC || 7315 MI->getOpcode() == PPC::SELECT_CC_VSFRC || 7316 MI->getOpcode() == PPC::SELECT_CC_VSRC || 7317 MI->getOpcode() == PPC::SELECT_I4 || 7318 MI->getOpcode() == PPC::SELECT_I8 || 7319 MI->getOpcode() == PPC::SELECT_F4 || 7320 MI->getOpcode() == PPC::SELECT_F8 || 7321 MI->getOpcode() == PPC::SELECT_VRRC || 7322 MI->getOpcode() == PPC::SELECT_VSFRC || 7323 MI->getOpcode() == PPC::SELECT_VSRC) { 7324 // The incoming instruction knows the destination vreg to set, the 7325 // condition code register to branch on, the true/false values to 7326 // select between, and a branch opcode to use. 7327 7328 // thisMBB: 7329 // ... 7330 // TrueVal = ... 7331 // cmpTY ccX, r1, r2 7332 // bCC copy1MBB 7333 // fallthrough --> copy0MBB 7334 MachineBasicBlock *thisMBB = BB; 7335 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 7336 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 7337 DebugLoc dl = MI->getDebugLoc(); 7338 F->insert(It, copy0MBB); 7339 F->insert(It, sinkMBB); 7340 7341 // Transfer the remainder of BB and its successor edges to sinkMBB. 7342 sinkMBB->splice(sinkMBB->begin(), BB, 7343 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7344 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 7345 7346 // Next, add the true and fallthrough blocks as its successors. 7347 BB->addSuccessor(copy0MBB); 7348 BB->addSuccessor(sinkMBB); 7349 7350 if (MI->getOpcode() == PPC::SELECT_I4 || 7351 MI->getOpcode() == PPC::SELECT_I8 || 7352 MI->getOpcode() == PPC::SELECT_F4 || 7353 MI->getOpcode() == PPC::SELECT_F8 || 7354 MI->getOpcode() == PPC::SELECT_VRRC || 7355 MI->getOpcode() == PPC::SELECT_VSFRC || 7356 MI->getOpcode() == PPC::SELECT_VSRC) { 7357 BuildMI(BB, dl, TII->get(PPC::BC)) 7358 .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 7359 } else { 7360 unsigned SelectPred = MI->getOperand(4).getImm(); 7361 BuildMI(BB, dl, TII->get(PPC::BCC)) 7362 .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 7363 } 7364 7365 // copy0MBB: 7366 // %FalseValue = ... 7367 // # fallthrough to sinkMBB 7368 BB = copy0MBB; 7369 7370 // Update machine-CFG edges 7371 BB->addSuccessor(sinkMBB); 7372 7373 // sinkMBB: 7374 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 7375 // ... 7376 BB = sinkMBB; 7377 BuildMI(*BB, BB->begin(), dl, 7378 TII->get(PPC::PHI), MI->getOperand(0).getReg()) 7379 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 7380 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 7381 } else if (MI->getOpcode() == PPC::ReadTB) { 7382 // To read the 64-bit time-base register on a 32-bit target, we read the 7383 // two halves. Should the counter have wrapped while it was being read, we 7384 // need to try again. 7385 // ... 7386 // readLoop: 7387 // mfspr Rx,TBU # load from TBU 7388 // mfspr Ry,TB # load from TB 7389 // mfspr Rz,TBU # load from TBU 7390 // cmpw crX,Rx,Rz # check if ���old���=���new��� 7391 // bne readLoop # branch if they're not equal 7392 // ... 7393 7394 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB); 7395 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 7396 DebugLoc dl = MI->getDebugLoc(); 7397 F->insert(It, readMBB); 7398 F->insert(It, sinkMBB); 7399 7400 // Transfer the remainder of BB and its successor edges to sinkMBB. 7401 sinkMBB->splice(sinkMBB->begin(), BB, 7402 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7403 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 7404 7405 BB->addSuccessor(readMBB); 7406 BB = readMBB; 7407 7408 MachineRegisterInfo &RegInfo = F->getRegInfo(); 7409 unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 7410 unsigned LoReg = MI->getOperand(0).getReg(); 7411 unsigned HiReg = MI->getOperand(1).getReg(); 7412 7413 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); 7414 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); 7415 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); 7416 7417 unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 7418 7419 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) 7420 .addReg(HiReg).addReg(ReadAgainReg); 7421 BuildMI(BB, dl, TII->get(PPC::BCC)) 7422 .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB); 7423 7424 BB->addSuccessor(readMBB); 7425 BB->addSuccessor(sinkMBB); 7426 } 7427 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) 7428 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); 7429 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) 7430 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); 7431 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) 7432 BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4); 7433 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) 7434 BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8); 7435 7436 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) 7437 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); 7438 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) 7439 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); 7440 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) 7441 BB = EmitAtomicBinary(MI, BB, false, PPC::AND); 7442 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) 7443 BB = EmitAtomicBinary(MI, BB, true, PPC::AND8); 7444 7445 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) 7446 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); 7447 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) 7448 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); 7449 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) 7450 BB = EmitAtomicBinary(MI, BB, false, PPC::OR); 7451 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) 7452 BB = EmitAtomicBinary(MI, BB, true, PPC::OR8); 7453 7454 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) 7455 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); 7456 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) 7457 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); 7458 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) 7459 BB = EmitAtomicBinary(MI, BB, false, PPC::XOR); 7460 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) 7461 BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); 7462 7463 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) 7464 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); 7465 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) 7466 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); 7467 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) 7468 BB = EmitAtomicBinary(MI, BB, false, PPC::NAND); 7469 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) 7470 BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8); 7471 7472 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) 7473 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); 7474 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) 7475 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); 7476 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) 7477 BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF); 7478 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) 7479 BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8); 7480 7481 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) 7482 BB = EmitPartwordAtomicBinary(MI, BB, true, 0); 7483 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) 7484 BB = EmitPartwordAtomicBinary(MI, BB, false, 0); 7485 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) 7486 BB = EmitAtomicBinary(MI, BB, false, 0); 7487 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) 7488 BB = EmitAtomicBinary(MI, BB, true, 0); 7489 7490 else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || 7491 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) { 7492 bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; 7493 7494 unsigned dest = MI->getOperand(0).getReg(); 7495 unsigned ptrA = MI->getOperand(1).getReg(); 7496 unsigned ptrB = MI->getOperand(2).getReg(); 7497 unsigned oldval = MI->getOperand(3).getReg(); 7498 unsigned newval = MI->getOperand(4).getReg(); 7499 DebugLoc dl = MI->getDebugLoc(); 7500 7501 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 7502 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 7503 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 7504 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 7505 F->insert(It, loop1MBB); 7506 F->insert(It, loop2MBB); 7507 F->insert(It, midMBB); 7508 F->insert(It, exitMBB); 7509 exitMBB->splice(exitMBB->begin(), BB, 7510 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7511 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 7512 7513 // thisMBB: 7514 // ... 7515 // fallthrough --> loopMBB 7516 BB->addSuccessor(loop1MBB); 7517 7518 // loop1MBB: 7519 // l[wd]arx dest, ptr 7520 // cmp[wd] dest, oldval 7521 // bne- midMBB 7522 // loop2MBB: 7523 // st[wd]cx. newval, ptr 7524 // bne- loopMBB 7525 // b exitBB 7526 // midMBB: 7527 // st[wd]cx. dest, ptr 7528 // exitBB: 7529 BB = loop1MBB; 7530 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 7531 .addReg(ptrA).addReg(ptrB); 7532 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) 7533 .addReg(oldval).addReg(dest); 7534 BuildMI(BB, dl, TII->get(PPC::BCC)) 7535 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 7536 BB->addSuccessor(loop2MBB); 7537 BB->addSuccessor(midMBB); 7538 7539 BB = loop2MBB; 7540 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 7541 .addReg(newval).addReg(ptrA).addReg(ptrB); 7542 BuildMI(BB, dl, TII->get(PPC::BCC)) 7543 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 7544 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 7545 BB->addSuccessor(loop1MBB); 7546 BB->addSuccessor(exitMBB); 7547 7548 BB = midMBB; 7549 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 7550 .addReg(dest).addReg(ptrA).addReg(ptrB); 7551 BB->addSuccessor(exitMBB); 7552 7553 // exitMBB: 7554 // ... 7555 BB = exitMBB; 7556 } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || 7557 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { 7558 // We must use 64-bit registers for addresses when targeting 64-bit, 7559 // since we're actually doing arithmetic on them. Other registers 7560 // can be 32-bit. 7561 bool is64bit = Subtarget.isPPC64(); 7562 bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; 7563 7564 unsigned dest = MI->getOperand(0).getReg(); 7565 unsigned ptrA = MI->getOperand(1).getReg(); 7566 unsigned ptrB = MI->getOperand(2).getReg(); 7567 unsigned oldval = MI->getOperand(3).getReg(); 7568 unsigned newval = MI->getOperand(4).getReg(); 7569 DebugLoc dl = MI->getDebugLoc(); 7570 7571 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 7572 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 7573 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 7574 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 7575 F->insert(It, loop1MBB); 7576 F->insert(It, loop2MBB); 7577 F->insert(It, midMBB); 7578 F->insert(It, exitMBB); 7579 exitMBB->splice(exitMBB->begin(), BB, 7580 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7581 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 7582 7583 MachineRegisterInfo &RegInfo = F->getRegInfo(); 7584 const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass 7585 : &PPC::GPRCRegClass; 7586 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 7587 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 7588 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 7589 unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); 7590 unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); 7591 unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); 7592 unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); 7593 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 7594 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 7595 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 7596 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 7597 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 7598 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 7599 unsigned Ptr1Reg; 7600 unsigned TmpReg = RegInfo.createVirtualRegister(RC); 7601 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 7602 // thisMBB: 7603 // ... 7604 // fallthrough --> loopMBB 7605 BB->addSuccessor(loop1MBB); 7606 7607 // The 4-byte load must be aligned, while a char or short may be 7608 // anywhere in the word. Hence all this nasty bookkeeping code. 7609 // add ptr1, ptrA, ptrB [copy if ptrA==0] 7610 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 7611 // xori shift, shift1, 24 [16] 7612 // rlwinm ptr, ptr1, 0, 0, 29 7613 // slw newval2, newval, shift 7614 // slw oldval2, oldval,shift 7615 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 7616 // slw mask, mask2, shift 7617 // and newval3, newval2, mask 7618 // and oldval3, oldval2, mask 7619 // loop1MBB: 7620 // lwarx tmpDest, ptr 7621 // and tmp, tmpDest, mask 7622 // cmpw tmp, oldval3 7623 // bne- midMBB 7624 // loop2MBB: 7625 // andc tmp2, tmpDest, mask 7626 // or tmp4, tmp2, newval3 7627 // stwcx. tmp4, ptr 7628 // bne- loop1MBB 7629 // b exitBB 7630 // midMBB: 7631 // stwcx. tmpDest, ptr 7632 // exitBB: 7633 // srw dest, tmpDest, shift 7634 if (ptrA != ZeroReg) { 7635 Ptr1Reg = RegInfo.createVirtualRegister(RC); 7636 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 7637 .addReg(ptrA).addReg(ptrB); 7638 } else { 7639 Ptr1Reg = ptrB; 7640 } 7641 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 7642 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 7643 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 7644 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 7645 if (is64bit) 7646 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 7647 .addReg(Ptr1Reg).addImm(0).addImm(61); 7648 else 7649 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 7650 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 7651 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) 7652 .addReg(newval).addReg(ShiftReg); 7653 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) 7654 .addReg(oldval).addReg(ShiftReg); 7655 if (is8bit) 7656 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 7657 else { 7658 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 7659 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 7660 .addReg(Mask3Reg).addImm(65535); 7661 } 7662 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 7663 .addReg(Mask2Reg).addReg(ShiftReg); 7664 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) 7665 .addReg(NewVal2Reg).addReg(MaskReg); 7666 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) 7667 .addReg(OldVal2Reg).addReg(MaskReg); 7668 7669 BB = loop1MBB; 7670 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 7671 .addReg(ZeroReg).addReg(PtrReg); 7672 BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) 7673 .addReg(TmpDestReg).addReg(MaskReg); 7674 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) 7675 .addReg(TmpReg).addReg(OldVal3Reg); 7676 BuildMI(BB, dl, TII->get(PPC::BCC)) 7677 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 7678 BB->addSuccessor(loop2MBB); 7679 BB->addSuccessor(midMBB); 7680 7681 BB = loop2MBB; 7682 BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) 7683 .addReg(TmpDestReg).addReg(MaskReg); 7684 BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) 7685 .addReg(Tmp2Reg).addReg(NewVal3Reg); 7686 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) 7687 .addReg(ZeroReg).addReg(PtrReg); 7688 BuildMI(BB, dl, TII->get(PPC::BCC)) 7689 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 7690 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 7691 BB->addSuccessor(loop1MBB); 7692 BB->addSuccessor(exitMBB); 7693 7694 BB = midMBB; 7695 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) 7696 .addReg(ZeroReg).addReg(PtrReg); 7697 BB->addSuccessor(exitMBB); 7698 7699 // exitMBB: 7700 // ... 7701 BB = exitMBB; 7702 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) 7703 .addReg(ShiftReg); 7704 } else if (MI->getOpcode() == PPC::FADDrtz) { 7705 // This pseudo performs an FADD with rounding mode temporarily forced 7706 // to round-to-zero. We emit this via custom inserter since the FPSCR 7707 // is not modeled at the SelectionDAG level. 7708 unsigned Dest = MI->getOperand(0).getReg(); 7709 unsigned Src1 = MI->getOperand(1).getReg(); 7710 unsigned Src2 = MI->getOperand(2).getReg(); 7711 DebugLoc dl = MI->getDebugLoc(); 7712 7713 MachineRegisterInfo &RegInfo = F->getRegInfo(); 7714 unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 7715 7716 // Save FPSCR value. 7717 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); 7718 7719 // Set rounding mode to round-to-zero. 7720 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); 7721 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); 7722 7723 // Perform addition. 7724 BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); 7725 7726 // Restore FPSCR value. 7727 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg); 7728 } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || 7729 MI->getOpcode() == PPC::ANDIo_1_GT_BIT || 7730 MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 7731 MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) { 7732 unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 7733 MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ? 7734 PPC::ANDIo8 : PPC::ANDIo; 7735 bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || 7736 MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8); 7737 7738 MachineRegisterInfo &RegInfo = F->getRegInfo(); 7739 unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? 7740 &PPC::GPRCRegClass : 7741 &PPC::G8RCRegClass); 7742 7743 DebugLoc dl = MI->getDebugLoc(); 7744 BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) 7745 .addReg(MI->getOperand(1).getReg()).addImm(1); 7746 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), 7747 MI->getOperand(0).getReg()) 7748 .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); 7749 } else { 7750 llvm_unreachable("Unexpected instr type to insert"); 7751 } 7752 7753 MI->eraseFromParent(); // The pseudo instruction is gone now. 7754 return BB; 7755} 7756 7757//===----------------------------------------------------------------------===// 7758// Target Optimization Hooks 7759//===----------------------------------------------------------------------===// 7760 7761SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, 7762 DAGCombinerInfo &DCI, 7763 unsigned &RefinementSteps, 7764 bool &UseOneConstNR) const { 7765 EVT VT = Operand.getValueType(); 7766 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || 7767 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || 7768 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 7769 (VT == MVT::v2f64 && Subtarget.hasVSX())) { 7770 // Convergence is quadratic, so we essentially double the number of digits 7771 // correct after every iteration. For both FRE and FRSQRTE, the minimum 7772 // architected relative accuracy is 2^-5. When hasRecipPrec(), this is 7773 // 2^-14. IEEE float has 23 digits and double has 52 digits. 7774 RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; 7775 if (VT.getScalarType() == MVT::f64) 7776 ++RefinementSteps; 7777 UseOneConstNR = true; 7778 return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); 7779 } 7780 return SDValue(); 7781} 7782 7783SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, 7784 DAGCombinerInfo &DCI, 7785 unsigned &RefinementSteps) const { 7786 EVT VT = Operand.getValueType(); 7787 if ((VT == MVT::f32 && Subtarget.hasFRES()) || 7788 (VT == MVT::f64 && Subtarget.hasFRE()) || 7789 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 7790 (VT == MVT::v2f64 && Subtarget.hasVSX())) { 7791 // Convergence is quadratic, so we essentially double the number of digits 7792 // correct after every iteration. For both FRE and FRSQRTE, the minimum 7793 // architected relative accuracy is 2^-5. When hasRecipPrec(), this is 7794 // 2^-14. IEEE float has 23 digits and double has 52 digits. 7795 RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; 7796 if (VT.getScalarType() == MVT::f64) 7797 ++RefinementSteps; 7798 return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); 7799 } 7800 return SDValue(); 7801} 7802 7803bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { 7804 // Note: This functionality is used only when unsafe-fp-math is enabled, and 7805 // on cores with reciprocal estimates (which are used when unsafe-fp-math is 7806 // enabled for division), this functionality is redundant with the default 7807 // combiner logic (once the division -> reciprocal/multiply transformation 7808 // has taken place). As a result, this matters more for older cores than for 7809 // newer ones. 7810 7811 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 7812 // reciprocal if there are two or more FDIVs (for embedded cores with only 7813 // one FP pipeline) for three or more FDIVs (for generic OOO cores). 7814 switch (Subtarget.getDarwinDirective()) { 7815 default: 7816 return NumUsers > 2; 7817 case PPC::DIR_440: 7818 case PPC::DIR_A2: 7819 case PPC::DIR_E500mc: 7820 case PPC::DIR_E5500: 7821 return NumUsers > 1; 7822 } 7823} 7824 7825static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, 7826 unsigned Bytes, int Dist, 7827 SelectionDAG &DAG) { 7828 if (VT.getSizeInBits() / 8 != Bytes) 7829 return false; 7830 7831 SDValue BaseLoc = Base->getBasePtr(); 7832 if (Loc.getOpcode() == ISD::FrameIndex) { 7833 if (BaseLoc.getOpcode() != ISD::FrameIndex) 7834 return false; 7835 const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 7836 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 7837 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 7838 int FS = MFI->getObjectSize(FI); 7839 int BFS = MFI->getObjectSize(BFI); 7840 if (FS != BFS || FS != (int)Bytes) return false; 7841 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); 7842 } 7843 7844 // Handle X+C 7845 if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc && 7846 cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) 7847 return true; 7848 7849 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7850 const GlobalValue *GV1 = nullptr; 7851 const GlobalValue *GV2 = nullptr; 7852 int64_t Offset1 = 0; 7853 int64_t Offset2 = 0; 7854 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); 7855 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); 7856 if (isGA1 && isGA2 && GV1 == GV2) 7857 return Offset1 == (Offset2 + Dist*Bytes); 7858 return false; 7859} 7860 7861// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does 7862// not enforce equality of the chain operands. 7863static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, 7864 unsigned Bytes, int Dist, 7865 SelectionDAG &DAG) { 7866 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) { 7867 EVT VT = LS->getMemoryVT(); 7868 SDValue Loc = LS->getBasePtr(); 7869 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG); 7870 } 7871 7872 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { 7873 EVT VT; 7874 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 7875 default: return false; 7876 case Intrinsic::ppc_altivec_lvx: 7877 case Intrinsic::ppc_altivec_lvxl: 7878 case Intrinsic::ppc_vsx_lxvw4x: 7879 VT = MVT::v4i32; 7880 break; 7881 case Intrinsic::ppc_vsx_lxvd2x: 7882 VT = MVT::v2f64; 7883 break; 7884 case Intrinsic::ppc_altivec_lvebx: 7885 VT = MVT::i8; 7886 break; 7887 case Intrinsic::ppc_altivec_lvehx: 7888 VT = MVT::i16; 7889 break; 7890 case Intrinsic::ppc_altivec_lvewx: 7891 VT = MVT::i32; 7892 break; 7893 } 7894 7895 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG); 7896 } 7897 7898 if (N->getOpcode() == ISD::INTRINSIC_VOID) { 7899 EVT VT; 7900 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 7901 default: return false; 7902 case Intrinsic::ppc_altivec_stvx: 7903 case Intrinsic::ppc_altivec_stvxl: 7904 case Intrinsic::ppc_vsx_stxvw4x: 7905 VT = MVT::v4i32; 7906 break; 7907 case Intrinsic::ppc_vsx_stxvd2x: 7908 VT = MVT::v2f64; 7909 break; 7910 case Intrinsic::ppc_altivec_stvebx: 7911 VT = MVT::i8; 7912 break; 7913 case Intrinsic::ppc_altivec_stvehx: 7914 VT = MVT::i16; 7915 break; 7916 case Intrinsic::ppc_altivec_stvewx: 7917 VT = MVT::i32; 7918 break; 7919 } 7920 7921 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG); 7922 } 7923 7924 return false; 7925} 7926 7927// Return true is there is a nearyby consecutive load to the one provided 7928// (regardless of alignment). We search up and down the chain, looking though 7929// token factors and other loads (but nothing else). As a result, a true result 7930// indicates that it is safe to create a new consecutive load adjacent to the 7931// load provided. 7932static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { 7933 SDValue Chain = LD->getChain(); 7934 EVT VT = LD->getMemoryVT(); 7935 7936 SmallSet<SDNode *, 16> LoadRoots; 7937 SmallVector<SDNode *, 8> Queue(1, Chain.getNode()); 7938 SmallSet<SDNode *, 16> Visited; 7939 7940 // First, search up the chain, branching to follow all token-factor operands. 7941 // If we find a consecutive load, then we're done, otherwise, record all 7942 // nodes just above the top-level loads and token factors. 7943 while (!Queue.empty()) { 7944 SDNode *ChainNext = Queue.pop_back_val(); 7945 if (!Visited.insert(ChainNext).second) 7946 continue; 7947 7948 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) { 7949 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 7950 return true; 7951 7952 if (!Visited.count(ChainLD->getChain().getNode())) 7953 Queue.push_back(ChainLD->getChain().getNode()); 7954 } else if (ChainNext->getOpcode() == ISD::TokenFactor) { 7955 for (const SDUse &O : ChainNext->ops()) 7956 if (!Visited.count(O.getNode())) 7957 Queue.push_back(O.getNode()); 7958 } else 7959 LoadRoots.insert(ChainNext); 7960 } 7961 7962 // Second, search down the chain, starting from the top-level nodes recorded 7963 // in the first phase. These top-level nodes are the nodes just above all 7964 // loads and token factors. Starting with their uses, recursively look though 7965 // all loads (just the chain uses) and token factors to find a consecutive 7966 // load. 7967 Visited.clear(); 7968 Queue.clear(); 7969 7970 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(), 7971 IE = LoadRoots.end(); I != IE; ++I) { 7972 Queue.push_back(*I); 7973 7974 while (!Queue.empty()) { 7975 SDNode *LoadRoot = Queue.pop_back_val(); 7976 if (!Visited.insert(LoadRoot).second) 7977 continue; 7978 7979 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot)) 7980 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 7981 return true; 7982 7983 for (SDNode::use_iterator UI = LoadRoot->use_begin(), 7984 UE = LoadRoot->use_end(); UI != UE; ++UI) 7985 if (((isa<MemSDNode>(*UI) && 7986 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) || 7987 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) 7988 Queue.push_back(*UI); 7989 } 7990 } 7991 7992 return false; 7993} 7994 7995SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, 7996 DAGCombinerInfo &DCI) const { 7997 SelectionDAG &DAG = DCI.DAG; 7998 SDLoc dl(N); 7999 8000 assert(Subtarget.useCRBits() && 8001 "Expecting to be tracking CR bits"); 8002 // If we're tracking CR bits, we need to be careful that we don't have: 8003 // trunc(binary-ops(zext(x), zext(y))) 8004 // or 8005 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) 8006 // such that we're unnecessarily moving things into GPRs when it would be 8007 // better to keep them in CR bits. 8008 8009 // Note that trunc here can be an actual i1 trunc, or can be the effective 8010 // truncation that comes from a setcc or select_cc. 8011 if (N->getOpcode() == ISD::TRUNCATE && 8012 N->getValueType(0) != MVT::i1) 8013 return SDValue(); 8014 8015 if (N->getOperand(0).getValueType() != MVT::i32 && 8016 N->getOperand(0).getValueType() != MVT::i64) 8017 return SDValue(); 8018 8019 if (N->getOpcode() == ISD::SETCC || 8020 N->getOpcode() == ISD::SELECT_CC) { 8021 // If we're looking at a comparison, then we need to make sure that the 8022 // high bits (all except for the first) don't matter the result. 8023 ISD::CondCode CC = 8024 cast<CondCodeSDNode>(N->getOperand( 8025 N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); 8026 unsigned OpBits = N->getOperand(0).getValueSizeInBits(); 8027 8028 if (ISD::isSignedIntSetCC(CC)) { 8029 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || 8030 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) 8031 return SDValue(); 8032 } else if (ISD::isUnsignedIntSetCC(CC)) { 8033 if (!DAG.MaskedValueIsZero(N->getOperand(0), 8034 APInt::getHighBitsSet(OpBits, OpBits-1)) || 8035 !DAG.MaskedValueIsZero(N->getOperand(1), 8036 APInt::getHighBitsSet(OpBits, OpBits-1))) 8037 return SDValue(); 8038 } else { 8039 // This is neither a signed nor an unsigned comparison, just make sure 8040 // that the high bits are equal. 8041 APInt Op1Zero, Op1One; 8042 APInt Op2Zero, Op2One; 8043 DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One); 8044 DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One); 8045 8046 // We don't really care about what is known about the first bit (if 8047 // anything), so clear it in all masks prior to comparing them. 8048 Op1Zero.clearBit(0); Op1One.clearBit(0); 8049 Op2Zero.clearBit(0); Op2One.clearBit(0); 8050 8051 if (Op1Zero != Op2Zero || Op1One != Op2One) 8052 return SDValue(); 8053 } 8054 } 8055 8056 // We now know that the higher-order bits are irrelevant, we just need to 8057 // make sure that all of the intermediate operations are bit operations, and 8058 // all inputs are extensions. 8059 if (N->getOperand(0).getOpcode() != ISD::AND && 8060 N->getOperand(0).getOpcode() != ISD::OR && 8061 N->getOperand(0).getOpcode() != ISD::XOR && 8062 N->getOperand(0).getOpcode() != ISD::SELECT && 8063 N->getOperand(0).getOpcode() != ISD::SELECT_CC && 8064 N->getOperand(0).getOpcode() != ISD::TRUNCATE && 8065 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && 8066 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && 8067 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) 8068 return SDValue(); 8069 8070 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && 8071 N->getOperand(1).getOpcode() != ISD::AND && 8072 N->getOperand(1).getOpcode() != ISD::OR && 8073 N->getOperand(1).getOpcode() != ISD::XOR && 8074 N->getOperand(1).getOpcode() != ISD::SELECT && 8075 N->getOperand(1).getOpcode() != ISD::SELECT_CC && 8076 N->getOperand(1).getOpcode() != ISD::TRUNCATE && 8077 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && 8078 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && 8079 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) 8080 return SDValue(); 8081 8082 SmallVector<SDValue, 4> Inputs; 8083 SmallVector<SDValue, 8> BinOps, PromOps; 8084 SmallPtrSet<SDNode *, 16> Visited; 8085 8086 for (unsigned i = 0; i < 2; ++i) { 8087 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 8088 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 8089 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 8090 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || 8091 isa<ConstantSDNode>(N->getOperand(i))) 8092 Inputs.push_back(N->getOperand(i)); 8093 else 8094 BinOps.push_back(N->getOperand(i)); 8095 8096 if (N->getOpcode() == ISD::TRUNCATE) 8097 break; 8098 } 8099 8100 // Visit all inputs, collect all binary operations (and, or, xor and 8101 // select) that are all fed by extensions. 8102 while (!BinOps.empty()) { 8103 SDValue BinOp = BinOps.back(); 8104 BinOps.pop_back(); 8105 8106 if (!Visited.insert(BinOp.getNode()).second) 8107 continue; 8108 8109 PromOps.push_back(BinOp); 8110 8111 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 8112 // The condition of the select is not promoted. 8113 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 8114 continue; 8115 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 8116 continue; 8117 8118 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 8119 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 8120 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 8121 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || 8122 isa<ConstantSDNode>(BinOp.getOperand(i))) { 8123 Inputs.push_back(BinOp.getOperand(i)); 8124 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 8125 BinOp.getOperand(i).getOpcode() == ISD::OR || 8126 BinOp.getOperand(i).getOpcode() == ISD::XOR || 8127 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 8128 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || 8129 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 8130 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 8131 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 8132 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { 8133 BinOps.push_back(BinOp.getOperand(i)); 8134 } else { 8135 // We have an input that is not an extension or another binary 8136 // operation; we'll abort this transformation. 8137 return SDValue(); 8138 } 8139 } 8140 } 8141 8142 // Make sure that this is a self-contained cluster of operations (which 8143 // is not quite the same thing as saying that everything has only one 8144 // use). 8145 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 8146 if (isa<ConstantSDNode>(Inputs[i])) 8147 continue; 8148 8149 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 8150 UE = Inputs[i].getNode()->use_end(); 8151 UI != UE; ++UI) { 8152 SDNode *User = *UI; 8153 if (User != N && !Visited.count(User)) 8154 return SDValue(); 8155 8156 // Make sure that we're not going to promote the non-output-value 8157 // operand(s) or SELECT or SELECT_CC. 8158 // FIXME: Although we could sometimes handle this, and it does occur in 8159 // practice that one of the condition inputs to the select is also one of 8160 // the outputs, we currently can't deal with this. 8161 if (User->getOpcode() == ISD::SELECT) { 8162 if (User->getOperand(0) == Inputs[i]) 8163 return SDValue(); 8164 } else if (User->getOpcode() == ISD::SELECT_CC) { 8165 if (User->getOperand(0) == Inputs[i] || 8166 User->getOperand(1) == Inputs[i]) 8167 return SDValue(); 8168 } 8169 } 8170 } 8171 8172 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 8173 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 8174 UE = PromOps[i].getNode()->use_end(); 8175 UI != UE; ++UI) { 8176 SDNode *User = *UI; 8177 if (User != N && !Visited.count(User)) 8178 return SDValue(); 8179 8180 // Make sure that we're not going to promote the non-output-value 8181 // operand(s) or SELECT or SELECT_CC. 8182 // FIXME: Although we could sometimes handle this, and it does occur in 8183 // practice that one of the condition inputs to the select is also one of 8184 // the outputs, we currently can't deal with this. 8185 if (User->getOpcode() == ISD::SELECT) { 8186 if (User->getOperand(0) == PromOps[i]) 8187 return SDValue(); 8188 } else if (User->getOpcode() == ISD::SELECT_CC) { 8189 if (User->getOperand(0) == PromOps[i] || 8190 User->getOperand(1) == PromOps[i]) 8191 return SDValue(); 8192 } 8193 } 8194 } 8195 8196 // Replace all inputs with the extension operand. 8197 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 8198 // Constants may have users outside the cluster of to-be-promoted nodes, 8199 // and so we need to replace those as we do the promotions. 8200 if (isa<ConstantSDNode>(Inputs[i])) 8201 continue; 8202 else 8203 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); 8204 } 8205 8206 // Replace all operations (these are all the same, but have a different 8207 // (i1) return type). DAG.getNode will validate that the types of 8208 // a binary operator match, so go through the list in reverse so that 8209 // we've likely promoted both operands first. Any intermediate truncations or 8210 // extensions disappear. 8211 while (!PromOps.empty()) { 8212 SDValue PromOp = PromOps.back(); 8213 PromOps.pop_back(); 8214 8215 if (PromOp.getOpcode() == ISD::TRUNCATE || 8216 PromOp.getOpcode() == ISD::SIGN_EXTEND || 8217 PromOp.getOpcode() == ISD::ZERO_EXTEND || 8218 PromOp.getOpcode() == ISD::ANY_EXTEND) { 8219 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) && 8220 PromOp.getOperand(0).getValueType() != MVT::i1) { 8221 // The operand is not yet ready (see comment below). 8222 PromOps.insert(PromOps.begin(), PromOp); 8223 continue; 8224 } 8225 8226 SDValue RepValue = PromOp.getOperand(0); 8227 if (isa<ConstantSDNode>(RepValue)) 8228 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); 8229 8230 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); 8231 continue; 8232 } 8233 8234 unsigned C; 8235 switch (PromOp.getOpcode()) { 8236 default: C = 0; break; 8237 case ISD::SELECT: C = 1; break; 8238 case ISD::SELECT_CC: C = 2; break; 8239 } 8240 8241 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 8242 PromOp.getOperand(C).getValueType() != MVT::i1) || 8243 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 8244 PromOp.getOperand(C+1).getValueType() != MVT::i1)) { 8245 // The to-be-promoted operands of this node have not yet been 8246 // promoted (this should be rare because we're going through the 8247 // list backward, but if one of the operands has several users in 8248 // this cluster of to-be-promoted nodes, it is possible). 8249 PromOps.insert(PromOps.begin(), PromOp); 8250 continue; 8251 } 8252 8253 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 8254 PromOp.getNode()->op_end()); 8255 8256 // If there are any constant inputs, make sure they're replaced now. 8257 for (unsigned i = 0; i < 2; ++i) 8258 if (isa<ConstantSDNode>(Ops[C+i])) 8259 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); 8260 8261 DAG.ReplaceAllUsesOfValueWith(PromOp, 8262 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops)); 8263 } 8264 8265 // Now we're left with the initial truncation itself. 8266 if (N->getOpcode() == ISD::TRUNCATE) 8267 return N->getOperand(0); 8268 8269 // Otherwise, this is a comparison. The operands to be compared have just 8270 // changed type (to i1), but everything else is the same. 8271 return SDValue(N, 0); 8272} 8273 8274SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, 8275 DAGCombinerInfo &DCI) const { 8276 SelectionDAG &DAG = DCI.DAG; 8277 SDLoc dl(N); 8278 8279 // If we're tracking CR bits, we need to be careful that we don't have: 8280 // zext(binary-ops(trunc(x), trunc(y))) 8281 // or 8282 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) 8283 // such that we're unnecessarily moving things into CR bits that can more 8284 // efficiently stay in GPRs. Note that if we're not certain that the high 8285 // bits are set as required by the final extension, we still may need to do 8286 // some masking to get the proper behavior. 8287 8288 // This same functionality is important on PPC64 when dealing with 8289 // 32-to-64-bit extensions; these occur often when 32-bit values are used as 8290 // the return values of functions. Because it is so similar, it is handled 8291 // here as well. 8292 8293 if (N->getValueType(0) != MVT::i32 && 8294 N->getValueType(0) != MVT::i64) 8295 return SDValue(); 8296 8297 if (!((N->getOperand(0).getValueType() == MVT::i1 && 8298 Subtarget.useCRBits()) || 8299 (N->getOperand(0).getValueType() == MVT::i32 && 8300 Subtarget.isPPC64()))) 8301 return SDValue(); 8302 8303 if (N->getOperand(0).getOpcode() != ISD::AND && 8304 N->getOperand(0).getOpcode() != ISD::OR && 8305 N->getOperand(0).getOpcode() != ISD::XOR && 8306 N->getOperand(0).getOpcode() != ISD::SELECT && 8307 N->getOperand(0).getOpcode() != ISD::SELECT_CC) 8308 return SDValue(); 8309 8310 SmallVector<SDValue, 4> Inputs; 8311 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps; 8312 SmallPtrSet<SDNode *, 16> Visited; 8313 8314 // Visit all inputs, collect all binary operations (and, or, xor and 8315 // select) that are all fed by truncations. 8316 while (!BinOps.empty()) { 8317 SDValue BinOp = BinOps.back(); 8318 BinOps.pop_back(); 8319 8320 if (!Visited.insert(BinOp.getNode()).second) 8321 continue; 8322 8323 PromOps.push_back(BinOp); 8324 8325 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 8326 // The condition of the select is not promoted. 8327 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 8328 continue; 8329 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 8330 continue; 8331 8332 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 8333 isa<ConstantSDNode>(BinOp.getOperand(i))) { 8334 Inputs.push_back(BinOp.getOperand(i)); 8335 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 8336 BinOp.getOperand(i).getOpcode() == ISD::OR || 8337 BinOp.getOperand(i).getOpcode() == ISD::XOR || 8338 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 8339 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { 8340 BinOps.push_back(BinOp.getOperand(i)); 8341 } else { 8342 // We have an input that is not a truncation or another binary 8343 // operation; we'll abort this transformation. 8344 return SDValue(); 8345 } 8346 } 8347 } 8348 8349 // The operands of a select that must be truncated when the select is 8350 // promoted because the operand is actually part of the to-be-promoted set. 8351 DenseMap<SDNode *, EVT> SelectTruncOp[2]; 8352 8353 // Make sure that this is a self-contained cluster of operations (which 8354 // is not quite the same thing as saying that everything has only one 8355 // use). 8356 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 8357 if (isa<ConstantSDNode>(Inputs[i])) 8358 continue; 8359 8360 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 8361 UE = Inputs[i].getNode()->use_end(); 8362 UI != UE; ++UI) { 8363 SDNode *User = *UI; 8364 if (User != N && !Visited.count(User)) 8365 return SDValue(); 8366 8367 // If we're going to promote the non-output-value operand(s) or SELECT or 8368 // SELECT_CC, record them for truncation. 8369 if (User->getOpcode() == ISD::SELECT) { 8370 if (User->getOperand(0) == Inputs[i]) 8371 SelectTruncOp[0].insert(std::make_pair(User, 8372 User->getOperand(0).getValueType())); 8373 } else if (User->getOpcode() == ISD::SELECT_CC) { 8374 if (User->getOperand(0) == Inputs[i]) 8375 SelectTruncOp[0].insert(std::make_pair(User, 8376 User->getOperand(0).getValueType())); 8377 if (User->getOperand(1) == Inputs[i]) 8378 SelectTruncOp[1].insert(std::make_pair(User, 8379 User->getOperand(1).getValueType())); 8380 } 8381 } 8382 } 8383 8384 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 8385 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 8386 UE = PromOps[i].getNode()->use_end(); 8387 UI != UE; ++UI) { 8388 SDNode *User = *UI; 8389 if (User != N && !Visited.count(User)) 8390 return SDValue(); 8391 8392 // If we're going to promote the non-output-value operand(s) or SELECT or 8393 // SELECT_CC, record them for truncation. 8394 if (User->getOpcode() == ISD::SELECT) { 8395 if (User->getOperand(0) == PromOps[i]) 8396 SelectTruncOp[0].insert(std::make_pair(User, 8397 User->getOperand(0).getValueType())); 8398 } else if (User->getOpcode() == ISD::SELECT_CC) { 8399 if (User->getOperand(0) == PromOps[i]) 8400 SelectTruncOp[0].insert(std::make_pair(User, 8401 User->getOperand(0).getValueType())); 8402 if (User->getOperand(1) == PromOps[i]) 8403 SelectTruncOp[1].insert(std::make_pair(User, 8404 User->getOperand(1).getValueType())); 8405 } 8406 } 8407 } 8408 8409 unsigned PromBits = N->getOperand(0).getValueSizeInBits(); 8410 bool ReallyNeedsExt = false; 8411 if (N->getOpcode() != ISD::ANY_EXTEND) { 8412 // If all of the inputs are not already sign/zero extended, then 8413 // we'll still need to do that at the end. 8414 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 8415 if (isa<ConstantSDNode>(Inputs[i])) 8416 continue; 8417 8418 unsigned OpBits = 8419 Inputs[i].getOperand(0).getValueSizeInBits(); 8420 assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); 8421 8422 if ((N->getOpcode() == ISD::ZERO_EXTEND && 8423 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), 8424 APInt::getHighBitsSet(OpBits, 8425 OpBits-PromBits))) || 8426 (N->getOpcode() == ISD::SIGN_EXTEND && 8427 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < 8428 (OpBits-(PromBits-1)))) { 8429 ReallyNeedsExt = true; 8430 break; 8431 } 8432 } 8433 } 8434 8435 // Replace all inputs, either with the truncation operand, or a 8436 // truncation or extension to the final output type. 8437 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 8438 // Constant inputs need to be replaced with the to-be-promoted nodes that 8439 // use them because they might have users outside of the cluster of 8440 // promoted nodes. 8441 if (isa<ConstantSDNode>(Inputs[i])) 8442 continue; 8443 8444 SDValue InSrc = Inputs[i].getOperand(0); 8445 if (Inputs[i].getValueType() == N->getValueType(0)) 8446 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); 8447 else if (N->getOpcode() == ISD::SIGN_EXTEND) 8448 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 8449 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); 8450 else if (N->getOpcode() == ISD::ZERO_EXTEND) 8451 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 8452 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); 8453 else 8454 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 8455 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); 8456 } 8457 8458 // Replace all operations (these are all the same, but have a different 8459 // (promoted) return type). DAG.getNode will validate that the types of 8460 // a binary operator match, so go through the list in reverse so that 8461 // we've likely promoted both operands first. 8462 while (!PromOps.empty()) { 8463 SDValue PromOp = PromOps.back(); 8464 PromOps.pop_back(); 8465 8466 unsigned C; 8467 switch (PromOp.getOpcode()) { 8468 default: C = 0; break; 8469 case ISD::SELECT: C = 1; break; 8470 case ISD::SELECT_CC: C = 2; break; 8471 } 8472 8473 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 8474 PromOp.getOperand(C).getValueType() != N->getValueType(0)) || 8475 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 8476 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { 8477 // The to-be-promoted operands of this node have not yet been 8478 // promoted (this should be rare because we're going through the 8479 // list backward, but if one of the operands has several users in 8480 // this cluster of to-be-promoted nodes, it is possible). 8481 PromOps.insert(PromOps.begin(), PromOp); 8482 continue; 8483 } 8484 8485 // For SELECT and SELECT_CC nodes, we do a similar check for any 8486 // to-be-promoted comparison inputs. 8487 if (PromOp.getOpcode() == ISD::SELECT || 8488 PromOp.getOpcode() == ISD::SELECT_CC) { 8489 if ((SelectTruncOp[0].count(PromOp.getNode()) && 8490 PromOp.getOperand(0).getValueType() != N->getValueType(0)) || 8491 (SelectTruncOp[1].count(PromOp.getNode()) && 8492 PromOp.getOperand(1).getValueType() != N->getValueType(0))) { 8493 PromOps.insert(PromOps.begin(), PromOp); 8494 continue; 8495 } 8496 } 8497 8498 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 8499 PromOp.getNode()->op_end()); 8500 8501 // If this node has constant inputs, then they'll need to be promoted here. 8502 for (unsigned i = 0; i < 2; ++i) { 8503 if (!isa<ConstantSDNode>(Ops[C+i])) 8504 continue; 8505 if (Ops[C+i].getValueType() == N->getValueType(0)) 8506 continue; 8507 8508 if (N->getOpcode() == ISD::SIGN_EXTEND) 8509 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 8510 else if (N->getOpcode() == ISD::ZERO_EXTEND) 8511 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 8512 else 8513 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 8514 } 8515 8516 // If we've promoted the comparison inputs of a SELECT or SELECT_CC, 8517 // truncate them again to the original value type. 8518 if (PromOp.getOpcode() == ISD::SELECT || 8519 PromOp.getOpcode() == ISD::SELECT_CC) { 8520 auto SI0 = SelectTruncOp[0].find(PromOp.getNode()); 8521 if (SI0 != SelectTruncOp[0].end()) 8522 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]); 8523 auto SI1 = SelectTruncOp[1].find(PromOp.getNode()); 8524 if (SI1 != SelectTruncOp[1].end()) 8525 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]); 8526 } 8527 8528 DAG.ReplaceAllUsesOfValueWith(PromOp, 8529 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); 8530 } 8531 8532 // Now we're left with the initial extension itself. 8533 if (!ReallyNeedsExt) 8534 return N->getOperand(0); 8535 8536 // To zero extend, just mask off everything except for the first bit (in the 8537 // i1 case). 8538 if (N->getOpcode() == ISD::ZERO_EXTEND) 8539 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), 8540 DAG.getConstant(APInt::getLowBitsSet( 8541 N->getValueSizeInBits(0), PromBits), 8542 N->getValueType(0))); 8543 8544 assert(N->getOpcode() == ISD::SIGN_EXTEND && 8545 "Invalid extension type"); 8546 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0)); 8547 SDValue ShiftCst = 8548 DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy); 8549 return DAG.getNode(ISD::SRA, dl, N->getValueType(0), 8550 DAG.getNode(ISD::SHL, dl, N->getValueType(0), 8551 N->getOperand(0), ShiftCst), ShiftCst); 8552} 8553 8554SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, 8555 DAGCombinerInfo &DCI) const { 8556 assert((N->getOpcode() == ISD::SINT_TO_FP || 8557 N->getOpcode() == ISD::UINT_TO_FP) && 8558 "Need an int -> FP conversion node here"); 8559 8560 if (!Subtarget.has64BitSupport()) 8561 return SDValue(); 8562 8563 SelectionDAG &DAG = DCI.DAG; 8564 SDLoc dl(N); 8565 SDValue Op(N, 0); 8566 8567 // Don't handle ppc_fp128 here or i1 conversions. 8568 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 8569 return SDValue(); 8570 if (Op.getOperand(0).getValueType() == MVT::i1) 8571 return SDValue(); 8572 8573 // For i32 intermediate values, unfortunately, the conversion functions 8574 // leave the upper 32 bits of the value are undefined. Within the set of 8575 // scalar instructions, we have no method for zero- or sign-extending the 8576 // value. Thus, we cannot handle i32 intermediate values here. 8577 if (Op.getOperand(0).getValueType() == MVT::i32) 8578 return SDValue(); 8579 8580 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 8581 "UINT_TO_FP is supported only with FPCVT"); 8582 8583 // If we have FCFIDS, then use it when converting to single-precision. 8584 // Otherwise, convert to double-precision and then round. 8585 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 8586 (Op.getOpcode() == ISD::UINT_TO_FP ? 8587 PPCISD::FCFIDUS : PPCISD::FCFIDS) : 8588 (Op.getOpcode() == ISD::UINT_TO_FP ? 8589 PPCISD::FCFIDU : PPCISD::FCFID); 8590 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 8591 MVT::f32 : MVT::f64; 8592 8593 // If we're converting from a float, to an int, and back to a float again, 8594 // then we don't need the store/load pair at all. 8595 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT && 8596 Subtarget.hasFPCVT()) || 8597 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) { 8598 SDValue Src = Op.getOperand(0).getOperand(0); 8599 if (Src.getValueType() == MVT::f32) { 8600 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 8601 DCI.AddToWorklist(Src.getNode()); 8602 } 8603 8604 unsigned FCTOp = 8605 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 8606 PPCISD::FCTIDUZ; 8607 8608 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src); 8609 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp); 8610 8611 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { 8612 FP = DAG.getNode(ISD::FP_ROUND, dl, 8613 MVT::f32, FP, DAG.getIntPtrConstant(0)); 8614 DCI.AddToWorklist(FP.getNode()); 8615 } 8616 8617 return FP; 8618 } 8619 8620 return SDValue(); 8621} 8622 8623// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for 8624// builtins) into loads with swaps. 8625SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, 8626 DAGCombinerInfo &DCI) const { 8627 SelectionDAG &DAG = DCI.DAG; 8628 SDLoc dl(N); 8629 SDValue Chain; 8630 SDValue Base; 8631 MachineMemOperand *MMO; 8632 8633 switch (N->getOpcode()) { 8634 default: 8635 llvm_unreachable("Unexpected opcode for little endian VSX load"); 8636 case ISD::LOAD: { 8637 LoadSDNode *LD = cast<LoadSDNode>(N); 8638 Chain = LD->getChain(); 8639 Base = LD->getBasePtr(); 8640 MMO = LD->getMemOperand(); 8641 // If the MMO suggests this isn't a load of a full vector, leave 8642 // things alone. For a built-in, we have to make the change for 8643 // correctness, so if there is a size problem that will be a bug. 8644 if (MMO->getSize() < 16) 8645 return SDValue(); 8646 break; 8647 } 8648 case ISD::INTRINSIC_W_CHAIN: { 8649 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 8650 Chain = Intrin->getChain(); 8651 Base = Intrin->getBasePtr(); 8652 MMO = Intrin->getMemOperand(); 8653 break; 8654 } 8655 } 8656 8657 MVT VecTy = N->getValueType(0).getSimpleVT(); 8658 SDValue LoadOps[] = { Chain, Base }; 8659 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, 8660 DAG.getVTList(VecTy, MVT::Other), 8661 LoadOps, VecTy, MMO); 8662 DCI.AddToWorklist(Load.getNode()); 8663 Chain = Load.getValue(1); 8664 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, 8665 DAG.getVTList(VecTy, MVT::Other), Chain, Load); 8666 DCI.AddToWorklist(Swap.getNode()); 8667 return Swap; 8668} 8669 8670// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for 8671// builtins) into stores with swaps. 8672SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, 8673 DAGCombinerInfo &DCI) const { 8674 SelectionDAG &DAG = DCI.DAG; 8675 SDLoc dl(N); 8676 SDValue Chain; 8677 SDValue Base; 8678 unsigned SrcOpnd; 8679 MachineMemOperand *MMO; 8680 8681 switch (N->getOpcode()) { 8682 default: 8683 llvm_unreachable("Unexpected opcode for little endian VSX store"); 8684 case ISD::STORE: { 8685 StoreSDNode *ST = cast<StoreSDNode>(N); 8686 Chain = ST->getChain(); 8687 Base = ST->getBasePtr(); 8688 MMO = ST->getMemOperand(); 8689 SrcOpnd = 1; 8690 // If the MMO suggests this isn't a store of a full vector, leave 8691 // things alone. For a built-in, we have to make the change for 8692 // correctness, so if there is a size problem that will be a bug. 8693 if (MMO->getSize() < 16) 8694 return SDValue(); 8695 break; 8696 } 8697 case ISD::INTRINSIC_VOID: { 8698 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 8699 Chain = Intrin->getChain(); 8700 // Intrin->getBasePtr() oddly does not get what we want. 8701 Base = Intrin->getOperand(3); 8702 MMO = Intrin->getMemOperand(); 8703 SrcOpnd = 2; 8704 break; 8705 } 8706 } 8707 8708 SDValue Src = N->getOperand(SrcOpnd); 8709 MVT VecTy = Src.getValueType().getSimpleVT(); 8710 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, 8711 DAG.getVTList(VecTy, MVT::Other), Chain, Src); 8712 DCI.AddToWorklist(Swap.getNode()); 8713 Chain = Swap.getValue(1); 8714 SDValue StoreOps[] = { Chain, Swap, Base }; 8715 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl, 8716 DAG.getVTList(MVT::Other), 8717 StoreOps, VecTy, MMO); 8718 DCI.AddToWorklist(Store.getNode()); 8719 return Store; 8720} 8721 8722SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 8723 DAGCombinerInfo &DCI) const { 8724 const TargetMachine &TM = getTargetMachine(); 8725 SelectionDAG &DAG = DCI.DAG; 8726 SDLoc dl(N); 8727 switch (N->getOpcode()) { 8728 default: break; 8729 case PPCISD::SHL: 8730 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 8731 if (C->isNullValue()) // 0 << V -> 0. 8732 return N->getOperand(0); 8733 } 8734 break; 8735 case PPCISD::SRL: 8736 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 8737 if (C->isNullValue()) // 0 >>u V -> 0. 8738 return N->getOperand(0); 8739 } 8740 break; 8741 case PPCISD::SRA: 8742 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 8743 if (C->isNullValue() || // 0 >>s V -> 0. 8744 C->isAllOnesValue()) // -1 >>s V -> -1. 8745 return N->getOperand(0); 8746 } 8747 break; 8748 case ISD::SIGN_EXTEND: 8749 case ISD::ZERO_EXTEND: 8750 case ISD::ANY_EXTEND: 8751 return DAGCombineExtBoolTrunc(N, DCI); 8752 case ISD::TRUNCATE: 8753 case ISD::SETCC: 8754 case ISD::SELECT_CC: 8755 return DAGCombineTruncBoolExt(N, DCI); 8756 case ISD::SINT_TO_FP: 8757 case ISD::UINT_TO_FP: 8758 return combineFPToIntToFP(N, DCI); 8759 case ISD::STORE: { 8760 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 8761 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 8762 !cast<StoreSDNode>(N)->isTruncatingStore() && 8763 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 8764 N->getOperand(1).getValueType() == MVT::i32 && 8765 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 8766 SDValue Val = N->getOperand(1).getOperand(0); 8767 if (Val.getValueType() == MVT::f32) { 8768 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 8769 DCI.AddToWorklist(Val.getNode()); 8770 } 8771 Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); 8772 DCI.AddToWorklist(Val.getNode()); 8773 8774 SDValue Ops[] = { 8775 N->getOperand(0), Val, N->getOperand(2), 8776 DAG.getValueType(N->getOperand(1).getValueType()) 8777 }; 8778 8779 Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 8780 DAG.getVTList(MVT::Other), Ops, 8781 cast<StoreSDNode>(N)->getMemoryVT(), 8782 cast<StoreSDNode>(N)->getMemOperand()); 8783 DCI.AddToWorklist(Val.getNode()); 8784 return Val; 8785 } 8786 8787 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 8788 if (cast<StoreSDNode>(N)->isUnindexed() && 8789 N->getOperand(1).getOpcode() == ISD::BSWAP && 8790 N->getOperand(1).getNode()->hasOneUse() && 8791 (N->getOperand(1).getValueType() == MVT::i32 || 8792 N->getOperand(1).getValueType() == MVT::i16 || 8793 (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && 8794 TM.getSubtarget<PPCSubtarget>().isPPC64() && 8795 N->getOperand(1).getValueType() == MVT::i64))) { 8796 SDValue BSwapOp = N->getOperand(1).getOperand(0); 8797 // Do an any-extend to 32-bits if this is a half-word input. 8798 if (BSwapOp.getValueType() == MVT::i16) 8799 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); 8800 8801 SDValue Ops[] = { 8802 N->getOperand(0), BSwapOp, N->getOperand(2), 8803 DAG.getValueType(N->getOperand(1).getValueType()) 8804 }; 8805 return 8806 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), 8807 Ops, cast<StoreSDNode>(N)->getMemoryVT(), 8808 cast<StoreSDNode>(N)->getMemOperand()); 8809 } 8810 8811 // For little endian, VSX stores require generating xxswapd/lxvd2x. 8812 EVT VT = N->getOperand(1).getValueType(); 8813 if (VT.isSimple()) { 8814 MVT StoreVT = VT.getSimpleVT(); 8815 if (TM.getSubtarget<PPCSubtarget>().hasVSX() && 8816 TM.getSubtarget<PPCSubtarget>().isLittleEndian() && 8817 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 || 8818 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) 8819 return expandVSXStoreForLE(N, DCI); 8820 } 8821 break; 8822 } 8823 case ISD::LOAD: { 8824 LoadSDNode *LD = cast<LoadSDNode>(N); 8825 EVT VT = LD->getValueType(0); 8826 8827 // For little endian, VSX loads require generating lxvd2x/xxswapd. 8828 if (VT.isSimple()) { 8829 MVT LoadVT = VT.getSimpleVT(); 8830 if (TM.getSubtarget<PPCSubtarget>().hasVSX() && 8831 TM.getSubtarget<PPCSubtarget>().isLittleEndian() && 8832 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 || 8833 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32)) 8834 return expandVSXLoadForLE(N, DCI); 8835 } 8836 8837 Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); 8838 unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); 8839 if (ISD::isNON_EXTLoad(N) && VT.isVector() && 8840 TM.getSubtarget<PPCSubtarget>().hasAltivec() && 8841 // P8 and later hardware should just use LOAD. 8842 !TM.getSubtarget<PPCSubtarget>().hasP8Vector() && 8843 (VT == MVT::v16i8 || VT == MVT::v8i16 || 8844 VT == MVT::v4i32 || VT == MVT::v4f32) && 8845 LD->getAlignment() < ABIAlignment) { 8846 // This is a type-legal unaligned Altivec load. 8847 SDValue Chain = LD->getChain(); 8848 SDValue Ptr = LD->getBasePtr(); 8849 bool isLittleEndian = Subtarget.isLittleEndian(); 8850 8851 // This implements the loading of unaligned vectors as described in 8852 // the venerable Apple Velocity Engine overview. Specifically: 8853 // https://developer.apple.com/hardwaredrivers/ve/alignment.html 8854 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html 8855 // 8856 // The general idea is to expand a sequence of one or more unaligned 8857 // loads into an alignment-based permutation-control instruction (lvsl 8858 // or lvsr), a series of regular vector loads (which always truncate 8859 // their input address to an aligned address), and a series of 8860 // permutations. The results of these permutations are the requested 8861 // loaded values. The trick is that the last "extra" load is not taken 8862 // from the address you might suspect (sizeof(vector) bytes after the 8863 // last requested load), but rather sizeof(vector) - 1 bytes after the 8864 // last requested vector. The point of this is to avoid a page fault if 8865 // the base address happened to be aligned. This works because if the 8866 // base address is aligned, then adding less than a full vector length 8867 // will cause the last vector in the sequence to be (re)loaded. 8868 // Otherwise, the next vector will be fetched as you might suspect was 8869 // necessary. 8870 8871 // We might be able to reuse the permutation generation from 8872 // a different base address offset from this one by an aligned amount. 8873 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this 8874 // optimization later. 8875 Intrinsic::ID Intr = (isLittleEndian ? 8876 Intrinsic::ppc_altivec_lvsr : 8877 Intrinsic::ppc_altivec_lvsl); 8878 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8); 8879 8880 // Create the new MMO for the new base load. It is like the original MMO, 8881 // but represents an area in memory almost twice the vector size centered 8882 // on the original address. If the address is unaligned, we might start 8883 // reading up to (sizeof(vector)-1) bytes below the address of the 8884 // original unaligned load. 8885 MachineFunction &MF = DAG.getMachineFunction(); 8886 MachineMemOperand *BaseMMO = 8887 MF.getMachineMemOperand(LD->getMemOperand(), 8888 -LD->getMemoryVT().getStoreSize()+1, 8889 2*LD->getMemoryVT().getStoreSize()-1); 8890 8891 // Create the new base load. 8892 SDValue LDXIntID = DAG.getTargetConstant(Intrinsic::ppc_altivec_lvx, 8893 getPointerTy()); 8894 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; 8895 SDValue BaseLoad = 8896 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 8897 DAG.getVTList(MVT::v4i32, MVT::Other), 8898 BaseLoadOps, MVT::v4i32, BaseMMO); 8899 8900 // Note that the value of IncOffset (which is provided to the next 8901 // load's pointer info offset value, and thus used to calculate the 8902 // alignment), and the value of IncValue (which is actually used to 8903 // increment the pointer value) are different! This is because we 8904 // require the next load to appear to be aligned, even though it 8905 // is actually offset from the base pointer by a lesser amount. 8906 int IncOffset = VT.getSizeInBits() / 8; 8907 int IncValue = IncOffset; 8908 8909 // Walk (both up and down) the chain looking for another load at the real 8910 // (aligned) offset (the alignment of the other load does not matter in 8911 // this case). If found, then do not use the offset reduction trick, as 8912 // that will prevent the loads from being later combined (as they would 8913 // otherwise be duplicates). 8914 if (!findConsecutiveLoad(LD, DAG)) 8915 --IncValue; 8916 8917 SDValue Increment = DAG.getConstant(IncValue, getPointerTy()); 8918 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); 8919 8920 MachineMemOperand *ExtraMMO = 8921 MF.getMachineMemOperand(LD->getMemOperand(), 8922 1, 2*LD->getMemoryVT().getStoreSize()-1); 8923 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; 8924 SDValue ExtraLoad = 8925 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 8926 DAG.getVTList(MVT::v4i32, MVT::Other), 8927 ExtraLoadOps, MVT::v4i32, ExtraMMO); 8928 8929 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 8930 BaseLoad.getValue(1), ExtraLoad.getValue(1)); 8931 8932 // Because vperm has a big-endian bias, we must reverse the order 8933 // of the input vectors and complement the permute control vector 8934 // when generating little endian code. We have already handled the 8935 // latter by using lvsr instead of lvsl, so just reverse BaseLoad 8936 // and ExtraLoad here. 8937 SDValue Perm; 8938 if (isLittleEndian) 8939 Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, 8940 ExtraLoad, BaseLoad, PermCntl, DAG, dl); 8941 else 8942 Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, 8943 BaseLoad, ExtraLoad, PermCntl, DAG, dl); 8944 8945 if (VT != MVT::v4i32) 8946 Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm); 8947 8948 // The output of the permutation is our loaded result, the TokenFactor is 8949 // our new chain. 8950 DCI.CombineTo(N, Perm, TF); 8951 return SDValue(N, 0); 8952 } 8953 } 8954 break; 8955 case ISD::INTRINSIC_WO_CHAIN: { 8956 bool isLittleEndian = Subtarget.isLittleEndian(); 8957 Intrinsic::ID Intr = (isLittleEndian ? 8958 Intrinsic::ppc_altivec_lvsr : 8959 Intrinsic::ppc_altivec_lvsl); 8960 if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr && 8961 N->getOperand(1)->getOpcode() == ISD::ADD) { 8962 SDValue Add = N->getOperand(1); 8963 8964 if (DAG.MaskedValueIsZero(Add->getOperand(1), 8965 APInt::getAllOnesValue(4 /* 16 byte alignment */).zext( 8966 Add.getValueType().getScalarType().getSizeInBits()))) { 8967 SDNode *BasePtr = Add->getOperand(0).getNode(); 8968 for (SDNode::use_iterator UI = BasePtr->use_begin(), 8969 UE = BasePtr->use_end(); UI != UE; ++UI) { 8970 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 8971 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == 8972 Intr) { 8973 // We've found another LVSL/LVSR, and this address is an aligned 8974 // multiple of that one. The results will be the same, so use the 8975 // one we've just found instead. 8976 8977 return SDValue(*UI, 0); 8978 } 8979 } 8980 } 8981 } 8982 } 8983 8984 break; 8985 case ISD::INTRINSIC_W_CHAIN: { 8986 // For little endian, VSX loads require generating lxvd2x/xxswapd. 8987 if (TM.getSubtarget<PPCSubtarget>().hasVSX() && 8988 TM.getSubtarget<PPCSubtarget>().isLittleEndian()) { 8989 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 8990 default: 8991 break; 8992 case Intrinsic::ppc_vsx_lxvw4x: 8993 case Intrinsic::ppc_vsx_lxvd2x: 8994 return expandVSXLoadForLE(N, DCI); 8995 } 8996 } 8997 break; 8998 } 8999 case ISD::INTRINSIC_VOID: { 9000 // For little endian, VSX stores require generating xxswapd/stxvd2x. 9001 if (TM.getSubtarget<PPCSubtarget>().hasVSX() && 9002 TM.getSubtarget<PPCSubtarget>().isLittleEndian()) { 9003 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 9004 default: 9005 break; 9006 case Intrinsic::ppc_vsx_stxvw4x: 9007 case Intrinsic::ppc_vsx_stxvd2x: 9008 return expandVSXStoreForLE(N, DCI); 9009 } 9010 } 9011 break; 9012 } 9013 case ISD::BSWAP: 9014 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 9015 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 9016 N->getOperand(0).hasOneUse() && 9017 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || 9018 (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && 9019 TM.getSubtarget<PPCSubtarget>().isPPC64() && 9020 N->getValueType(0) == MVT::i64))) { 9021 SDValue Load = N->getOperand(0); 9022 LoadSDNode *LD = cast<LoadSDNode>(Load); 9023 // Create the byte-swapping load. 9024 SDValue Ops[] = { 9025 LD->getChain(), // Chain 9026 LD->getBasePtr(), // Ptr 9027 DAG.getValueType(N->getValueType(0)) // VT 9028 }; 9029 SDValue BSLoad = 9030 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, 9031 DAG.getVTList(N->getValueType(0) == MVT::i64 ? 9032 MVT::i64 : MVT::i32, MVT::Other), 9033 Ops, LD->getMemoryVT(), LD->getMemOperand()); 9034 9035 // If this is an i16 load, insert the truncate. 9036 SDValue ResVal = BSLoad; 9037 if (N->getValueType(0) == MVT::i16) 9038 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); 9039 9040 // First, combine the bswap away. This makes the value produced by the 9041 // load dead. 9042 DCI.CombineTo(N, ResVal); 9043 9044 // Next, combine the load away, we give it a bogus result value but a real 9045 // chain result. The result value is dead because the bswap is dead. 9046 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 9047 9048 // Return N so it doesn't get rechecked! 9049 return SDValue(N, 0); 9050 } 9051 9052 break; 9053 case PPCISD::VCMP: { 9054 // If a VCMPo node already exists with exactly the same operands as this 9055 // node, use its result instead of this node (VCMPo computes both a CR6 and 9056 // a normal output). 9057 // 9058 if (!N->getOperand(0).hasOneUse() && 9059 !N->getOperand(1).hasOneUse() && 9060 !N->getOperand(2).hasOneUse()) { 9061 9062 // Scan all of the users of the LHS, looking for VCMPo's that match. 9063 SDNode *VCMPoNode = nullptr; 9064 9065 SDNode *LHSN = N->getOperand(0).getNode(); 9066 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 9067 UI != E; ++UI) 9068 if (UI->getOpcode() == PPCISD::VCMPo && 9069 UI->getOperand(1) == N->getOperand(1) && 9070 UI->getOperand(2) == N->getOperand(2) && 9071 UI->getOperand(0) == N->getOperand(0)) { 9072 VCMPoNode = *UI; 9073 break; 9074 } 9075 9076 // If there is no VCMPo node, or if the flag value has a single use, don't 9077 // transform this. 9078 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 9079 break; 9080 9081 // Look at the (necessarily single) use of the flag value. If it has a 9082 // chain, this transformation is more complex. Note that multiple things 9083 // could use the value result, which we should ignore. 9084 SDNode *FlagUser = nullptr; 9085 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 9086 FlagUser == nullptr; ++UI) { 9087 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 9088 SDNode *User = *UI; 9089 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 9090 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { 9091 FlagUser = User; 9092 break; 9093 } 9094 } 9095 } 9096 9097 // If the user is a MFOCRF instruction, we know this is safe. 9098 // Otherwise we give up for right now. 9099 if (FlagUser->getOpcode() == PPCISD::MFOCRF) 9100 return SDValue(VCMPoNode, 0); 9101 } 9102 break; 9103 } 9104 case ISD::BRCOND: { 9105 SDValue Cond = N->getOperand(1); 9106 SDValue Target = N->getOperand(2); 9107 9108 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && 9109 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() == 9110 Intrinsic::ppc_is_decremented_ctr_nonzero) { 9111 9112 // We now need to make the intrinsic dead (it cannot be instruction 9113 // selected). 9114 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); 9115 assert(Cond.getNode()->hasOneUse() && 9116 "Counter decrement has more than one use"); 9117 9118 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, 9119 N->getOperand(0), Target); 9120 } 9121 } 9122 break; 9123 case ISD::BR_CC: { 9124 // If this is a branch on an altivec predicate comparison, lower this so 9125 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This 9126 // lowering is done pre-legalize, because the legalizer lowers the predicate 9127 // compare down to code that is difficult to reassemble. 9128 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 9129 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); 9130 9131 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero 9132 // value. If so, pass-through the AND to get to the intrinsic. 9133 if (LHS.getOpcode() == ISD::AND && 9134 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && 9135 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() == 9136 Intrinsic::ppc_is_decremented_ctr_nonzero && 9137 isa<ConstantSDNode>(LHS.getOperand(1)) && 9138 !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()-> 9139 isZero()) 9140 LHS = LHS.getOperand(0); 9141 9142 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && 9143 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 9144 Intrinsic::ppc_is_decremented_ctr_nonzero && 9145 isa<ConstantSDNode>(RHS)) { 9146 assert((CC == ISD::SETEQ || CC == ISD::SETNE) && 9147 "Counter decrement comparison is not EQ or NE"); 9148 9149 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 9150 bool isBDNZ = (CC == ISD::SETEQ && Val) || 9151 (CC == ISD::SETNE && !Val); 9152 9153 // We now need to make the intrinsic dead (it cannot be instruction 9154 // selected). 9155 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); 9156 assert(LHS.getNode()->hasOneUse() && 9157 "Counter decrement has more than one use"); 9158 9159 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, 9160 N->getOperand(0), N->getOperand(4)); 9161 } 9162 9163 int CompareOpc; 9164 bool isDot; 9165 9166 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 9167 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 9168 getAltivecCompareInfo(LHS, CompareOpc, isDot)) { 9169 assert(isDot && "Can't compare against a vector result!"); 9170 9171 // If this is a comparison against something other than 0/1, then we know 9172 // that the condition is never/always true. 9173 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 9174 if (Val != 0 && Val != 1) { 9175 if (CC == ISD::SETEQ) // Cond never true, remove branch. 9176 return N->getOperand(0); 9177 // Always !=, turn it into an unconditional branch. 9178 return DAG.getNode(ISD::BR, dl, MVT::Other, 9179 N->getOperand(0), N->getOperand(4)); 9180 } 9181 9182 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 9183 9184 // Create the PPCISD altivec 'dot' comparison node. 9185 SDValue Ops[] = { 9186 LHS.getOperand(2), // LHS of compare 9187 LHS.getOperand(3), // RHS of compare 9188 DAG.getConstant(CompareOpc, MVT::i32) 9189 }; 9190 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; 9191 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 9192 9193 // Unpack the result based on how the target uses it. 9194 PPC::Predicate CompOpc; 9195 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { 9196 default: // Can't happen, don't crash on invalid number though. 9197 case 0: // Branch on the value of the EQ bit of CR6. 9198 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 9199 break; 9200 case 1: // Branch on the inverted value of the EQ bit of CR6. 9201 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 9202 break; 9203 case 2: // Branch on the value of the LT bit of CR6. 9204 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 9205 break; 9206 case 3: // Branch on the inverted value of the LT bit of CR6. 9207 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 9208 break; 9209 } 9210 9211 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), 9212 DAG.getConstant(CompOpc, MVT::i32), 9213 DAG.getRegister(PPC::CR6, MVT::i32), 9214 N->getOperand(4), CompNode.getValue(1)); 9215 } 9216 break; 9217 } 9218 } 9219 9220 return SDValue(); 9221} 9222 9223SDValue 9224PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, 9225 SelectionDAG &DAG, 9226 std::vector<SDNode *> *Created) const { 9227 // fold (sdiv X, pow2) 9228 EVT VT = N->getValueType(0); 9229 if (VT == MVT::i64 && !Subtarget.isPPC64()) 9230 return SDValue(); 9231 if ((VT != MVT::i32 && VT != MVT::i64) || 9232 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) 9233 return SDValue(); 9234 9235 SDLoc DL(N); 9236 SDValue N0 = N->getOperand(0); 9237 9238 bool IsNegPow2 = (-Divisor).isPowerOf2(); 9239 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros(); 9240 SDValue ShiftAmt = DAG.getConstant(Lg2, VT); 9241 9242 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt); 9243 if (Created) 9244 Created->push_back(Op.getNode()); 9245 9246 if (IsNegPow2) { 9247 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), Op); 9248 if (Created) 9249 Created->push_back(Op.getNode()); 9250 } 9251 9252 return Op; 9253} 9254 9255//===----------------------------------------------------------------------===// 9256// Inline Assembly Support 9257//===----------------------------------------------------------------------===// 9258 9259void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 9260 APInt &KnownZero, 9261 APInt &KnownOne, 9262 const SelectionDAG &DAG, 9263 unsigned Depth) const { 9264 KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); 9265 switch (Op.getOpcode()) { 9266 default: break; 9267 case PPCISD::LBRX: { 9268 // lhbrx is known to have the top bits cleared out. 9269 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) 9270 KnownZero = 0xFFFF0000; 9271 break; 9272 } 9273 case ISD::INTRINSIC_WO_CHAIN: { 9274 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { 9275 default: break; 9276 case Intrinsic::ppc_altivec_vcmpbfp_p: 9277 case Intrinsic::ppc_altivec_vcmpeqfp_p: 9278 case Intrinsic::ppc_altivec_vcmpequb_p: 9279 case Intrinsic::ppc_altivec_vcmpequh_p: 9280 case Intrinsic::ppc_altivec_vcmpequw_p: 9281 case Intrinsic::ppc_altivec_vcmpgefp_p: 9282 case Intrinsic::ppc_altivec_vcmpgtfp_p: 9283 case Intrinsic::ppc_altivec_vcmpgtsb_p: 9284 case Intrinsic::ppc_altivec_vcmpgtsh_p: 9285 case Intrinsic::ppc_altivec_vcmpgtsw_p: 9286 case Intrinsic::ppc_altivec_vcmpgtub_p: 9287 case Intrinsic::ppc_altivec_vcmpgtuh_p: 9288 case Intrinsic::ppc_altivec_vcmpgtuw_p: 9289 KnownZero = ~1U; // All bits but the low one are known to be zero. 9290 break; 9291 } 9292 } 9293 } 9294} 9295 9296unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { 9297 switch (Subtarget.getDarwinDirective()) { 9298 default: break; 9299 case PPC::DIR_970: 9300 case PPC::DIR_PWR4: 9301 case PPC::DIR_PWR5: 9302 case PPC::DIR_PWR5X: 9303 case PPC::DIR_PWR6: 9304 case PPC::DIR_PWR6X: 9305 case PPC::DIR_PWR7: 9306 case PPC::DIR_PWR8: { 9307 if (!ML) 9308 break; 9309 9310 const PPCInstrInfo *TII = 9311 static_cast<const PPCInstrInfo *>(getTargetMachine().getSubtargetImpl()-> 9312 getInstrInfo()); 9313 9314 // For small loops (between 5 and 8 instructions), align to a 32-byte 9315 // boundary so that the entire loop fits in one instruction-cache line. 9316 uint64_t LoopSize = 0; 9317 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I) 9318 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) 9319 LoopSize += TII->GetInstSizeInBytes(J); 9320 9321 if (LoopSize > 16 && LoopSize <= 32) 9322 return 5; 9323 9324 break; 9325 } 9326 } 9327 9328 return TargetLowering::getPrefLoopAlignment(ML); 9329} 9330 9331/// getConstraintType - Given a constraint, return the type of 9332/// constraint it is for this target. 9333PPCTargetLowering::ConstraintType 9334PPCTargetLowering::getConstraintType(const std::string &Constraint) const { 9335 if (Constraint.size() == 1) { 9336 switch (Constraint[0]) { 9337 default: break; 9338 case 'b': 9339 case 'r': 9340 case 'f': 9341 case 'v': 9342 case 'y': 9343 return C_RegisterClass; 9344 case 'Z': 9345 // FIXME: While Z does indicate a memory constraint, it specifically 9346 // indicates an r+r address (used in conjunction with the 'y' modifier 9347 // in the replacement string). Currently, we're forcing the base 9348 // register to be r0 in the asm printer (which is interpreted as zero) 9349 // and forming the complete address in the second register. This is 9350 // suboptimal. 9351 return C_Memory; 9352 } 9353 } else if (Constraint == "wc") { // individual CR bits. 9354 return C_RegisterClass; 9355 } else if (Constraint == "wa" || Constraint == "wd" || 9356 Constraint == "wf" || Constraint == "ws") { 9357 return C_RegisterClass; // VSX registers. 9358 } 9359 return TargetLowering::getConstraintType(Constraint); 9360} 9361 9362/// Examine constraint type and operand type and determine a weight value. 9363/// This object must already have been set up with the operand type 9364/// and the current alternative constraint selected. 9365TargetLowering::ConstraintWeight 9366PPCTargetLowering::getSingleConstraintMatchWeight( 9367 AsmOperandInfo &info, const char *constraint) const { 9368 ConstraintWeight weight = CW_Invalid; 9369 Value *CallOperandVal = info.CallOperandVal; 9370 // If we don't have a value, we can't do a match, 9371 // but allow it at the lowest weight. 9372 if (!CallOperandVal) 9373 return CW_Default; 9374 Type *type = CallOperandVal->getType(); 9375 9376 // Look at the constraint type. 9377 if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) 9378 return CW_Register; // an individual CR bit. 9379 else if ((StringRef(constraint) == "wa" || 9380 StringRef(constraint) == "wd" || 9381 StringRef(constraint) == "wf") && 9382 type->isVectorTy()) 9383 return CW_Register; 9384 else if (StringRef(constraint) == "ws" && type->isDoubleTy()) 9385 return CW_Register; 9386 9387 switch (*constraint) { 9388 default: 9389 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 9390 break; 9391 case 'b': 9392 if (type->isIntegerTy()) 9393 weight = CW_Register; 9394 break; 9395 case 'f': 9396 if (type->isFloatTy()) 9397 weight = CW_Register; 9398 break; 9399 case 'd': 9400 if (type->isDoubleTy()) 9401 weight = CW_Register; 9402 break; 9403 case 'v': 9404 if (type->isVectorTy()) 9405 weight = CW_Register; 9406 break; 9407 case 'y': 9408 weight = CW_Register; 9409 break; 9410 case 'Z': 9411 weight = CW_Memory; 9412 break; 9413 } 9414 return weight; 9415} 9416 9417std::pair<unsigned, const TargetRegisterClass*> 9418PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 9419 MVT VT) const { 9420 if (Constraint.size() == 1) { 9421 // GCC RS6000 Constraint Letters 9422 switch (Constraint[0]) { 9423 case 'b': // R1-R31 9424 if (VT == MVT::i64 && Subtarget.isPPC64()) 9425 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); 9426 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); 9427 case 'r': // R0-R31 9428 if (VT == MVT::i64 && Subtarget.isPPC64()) 9429 return std::make_pair(0U, &PPC::G8RCRegClass); 9430 return std::make_pair(0U, &PPC::GPRCRegClass); 9431 case 'f': 9432 if (VT == MVT::f32 || VT == MVT::i32) 9433 return std::make_pair(0U, &PPC::F4RCRegClass); 9434 if (VT == MVT::f64 || VT == MVT::i64) 9435 return std::make_pair(0U, &PPC::F8RCRegClass); 9436 break; 9437 case 'v': 9438 return std::make_pair(0U, &PPC::VRRCRegClass); 9439 case 'y': // crrc 9440 return std::make_pair(0U, &PPC::CRRCRegClass); 9441 } 9442 } else if (Constraint == "wc") { // an individual CR bit. 9443 return std::make_pair(0U, &PPC::CRBITRCRegClass); 9444 } else if (Constraint == "wa" || Constraint == "wd" || 9445 Constraint == "wf") { 9446 return std::make_pair(0U, &PPC::VSRCRegClass); 9447 } else if (Constraint == "ws") { 9448 return std::make_pair(0U, &PPC::VSFRCRegClass); 9449 } 9450 9451 std::pair<unsigned, const TargetRegisterClass*> R = 9452 TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 9453 9454 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers 9455 // (which we call X[0-9]+). If a 64-bit value has been requested, and a 9456 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent 9457 // register. 9458 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use 9459 // the AsmName field from *RegisterInfo.td, then this would not be necessary. 9460 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && 9461 PPC::GPRCRegClass.contains(R.first)) { 9462 const TargetRegisterInfo *TRI = 9463 getTargetMachine().getSubtargetImpl()->getRegisterInfo(); 9464 return std::make_pair(TRI->getMatchingSuperReg(R.first, 9465 PPC::sub_32, &PPC::G8RCRegClass), 9466 &PPC::G8RCRegClass); 9467 } 9468 9469 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same. 9470 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) { 9471 R.first = PPC::CR0; 9472 R.second = &PPC::CRRCRegClass; 9473 } 9474 9475 return R; 9476} 9477 9478 9479/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 9480/// vector. If it is invalid, don't add anything to Ops. 9481void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 9482 std::string &Constraint, 9483 std::vector<SDValue>&Ops, 9484 SelectionDAG &DAG) const { 9485 SDValue Result; 9486 9487 // Only support length 1 constraints. 9488 if (Constraint.length() > 1) return; 9489 9490 char Letter = Constraint[0]; 9491 switch (Letter) { 9492 default: break; 9493 case 'I': 9494 case 'J': 9495 case 'K': 9496 case 'L': 9497 case 'M': 9498 case 'N': 9499 case 'O': 9500 case 'P': { 9501 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 9502 if (!CST) return; // Must be an immediate to match. 9503 int64_t Value = CST->getSExtValue(); 9504 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative 9505 // numbers are printed as such. 9506 switch (Letter) { 9507 default: llvm_unreachable("Unknown constraint letter!"); 9508 case 'I': // "I" is a signed 16-bit constant. 9509 if (isInt<16>(Value)) 9510 Result = DAG.getTargetConstant(Value, TCVT); 9511 break; 9512 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 9513 if (isShiftedUInt<16, 16>(Value)) 9514 Result = DAG.getTargetConstant(Value, TCVT); 9515 break; 9516 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 9517 if (isShiftedInt<16, 16>(Value)) 9518 Result = DAG.getTargetConstant(Value, TCVT); 9519 break; 9520 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 9521 if (isUInt<16>(Value)) 9522 Result = DAG.getTargetConstant(Value, TCVT); 9523 break; 9524 case 'M': // "M" is a constant that is greater than 31. 9525 if (Value > 31) 9526 Result = DAG.getTargetConstant(Value, TCVT); 9527 break; 9528 case 'N': // "N" is a positive constant that is an exact power of two. 9529 if (Value > 0 && isPowerOf2_64(Value)) 9530 Result = DAG.getTargetConstant(Value, TCVT); 9531 break; 9532 case 'O': // "O" is the constant zero. 9533 if (Value == 0) 9534 Result = DAG.getTargetConstant(Value, TCVT); 9535 break; 9536 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 9537 if (isInt<16>(-Value)) 9538 Result = DAG.getTargetConstant(Value, TCVT); 9539 break; 9540 } 9541 break; 9542 } 9543 } 9544 9545 if (Result.getNode()) { 9546 Ops.push_back(Result); 9547 return; 9548 } 9549 9550 // Handle standard constraint letters. 9551 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 9552} 9553 9554// isLegalAddressingMode - Return true if the addressing mode represented 9555// by AM is legal for this target, for a load/store of the specified type. 9556bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, 9557 Type *Ty) const { 9558 // FIXME: PPC does not allow r+i addressing modes for vectors! 9559 9560 // PPC allows a sign-extended 16-bit immediate field. 9561 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 9562 return false; 9563 9564 // No global is ever allowed as a base. 9565 if (AM.BaseGV) 9566 return false; 9567 9568 // PPC only support r+r, 9569 switch (AM.Scale) { 9570 case 0: // "r+i" or just "i", depending on HasBaseReg. 9571 break; 9572 case 1: 9573 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 9574 return false; 9575 // Otherwise we have r+r or r+i. 9576 break; 9577 case 2: 9578 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 9579 return false; 9580 // Allow 2*r as r+r. 9581 break; 9582 default: 9583 // No other scales are supported. 9584 return false; 9585 } 9586 9587 return true; 9588} 9589 9590SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, 9591 SelectionDAG &DAG) const { 9592 MachineFunction &MF = DAG.getMachineFunction(); 9593 MachineFrameInfo *MFI = MF.getFrameInfo(); 9594 MFI->setReturnAddressIsTaken(true); 9595 9596 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 9597 return SDValue(); 9598 9599 SDLoc dl(Op); 9600 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 9601 9602 // Make sure the function does not optimize away the store of the RA to 9603 // the stack. 9604 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 9605 FuncInfo->setLRStoreRequired(); 9606 bool isPPC64 = Subtarget.isPPC64(); 9607 bool isDarwinABI = Subtarget.isDarwinABI(); 9608 9609 if (Depth > 0) { 9610 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 9611 SDValue Offset = 9612 9613 DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI), 9614 isPPC64? MVT::i64 : MVT::i32); 9615 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 9616 DAG.getNode(ISD::ADD, dl, getPointerTy(), 9617 FrameAddr, Offset), 9618 MachinePointerInfo(), false, false, false, 0); 9619 } 9620 9621 // Just load the return address off the stack. 9622 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); 9623 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 9624 RetAddrFI, MachinePointerInfo(), false, false, false, 0); 9625} 9626 9627SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, 9628 SelectionDAG &DAG) const { 9629 SDLoc dl(Op); 9630 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 9631 9632 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 9633 bool isPPC64 = PtrVT == MVT::i64; 9634 9635 MachineFunction &MF = DAG.getMachineFunction(); 9636 MachineFrameInfo *MFI = MF.getFrameInfo(); 9637 MFI->setFrameAddressIsTaken(true); 9638 9639 // Naked functions never have a frame pointer, and so we use r1. For all 9640 // other functions, this decision must be delayed until during PEI. 9641 unsigned FrameReg; 9642 if (MF.getFunction()->getAttributes().hasAttribute( 9643 AttributeSet::FunctionIndex, Attribute::Naked)) 9644 FrameReg = isPPC64 ? PPC::X1 : PPC::R1; 9645 else 9646 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; 9647 9648 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, 9649 PtrVT); 9650 while (Depth--) 9651 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), 9652 FrameAddr, MachinePointerInfo(), false, false, 9653 false, 0); 9654 return FrameAddr; 9655} 9656 9657// FIXME? Maybe this could be a TableGen attribute on some registers and 9658// this table could be generated automatically from RegInfo. 9659unsigned PPCTargetLowering::getRegisterByName(const char* RegName, 9660 EVT VT) const { 9661 bool isPPC64 = Subtarget.isPPC64(); 9662 bool isDarwinABI = Subtarget.isDarwinABI(); 9663 9664 if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || 9665 (!isPPC64 && VT != MVT::i32)) 9666 report_fatal_error("Invalid register global variable type"); 9667 9668 bool is64Bit = isPPC64 && VT == MVT::i64; 9669 unsigned Reg = StringSwitch<unsigned>(RegName) 9670 .Case("r1", is64Bit ? PPC::X1 : PPC::R1) 9671 .Case("r2", isDarwinABI ? 0 : (is64Bit ? PPC::X2 : PPC::R2)) 9672 .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : 9673 (is64Bit ? PPC::X13 : PPC::R13)) 9674 .Default(0); 9675 9676 if (Reg) 9677 return Reg; 9678 report_fatal_error("Invalid register name global variable"); 9679} 9680 9681bool 9682PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 9683 // The PowerPC target isn't yet aware of offsets. 9684 return false; 9685} 9686 9687bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 9688 const CallInst &I, 9689 unsigned Intrinsic) const { 9690 9691 switch (Intrinsic) { 9692 case Intrinsic::ppc_altivec_lvx: 9693 case Intrinsic::ppc_altivec_lvxl: 9694 case Intrinsic::ppc_altivec_lvebx: 9695 case Intrinsic::ppc_altivec_lvehx: 9696 case Intrinsic::ppc_altivec_lvewx: 9697 case Intrinsic::ppc_vsx_lxvd2x: 9698 case Intrinsic::ppc_vsx_lxvw4x: { 9699 EVT VT; 9700 switch (Intrinsic) { 9701 case Intrinsic::ppc_altivec_lvebx: 9702 VT = MVT::i8; 9703 break; 9704 case Intrinsic::ppc_altivec_lvehx: 9705 VT = MVT::i16; 9706 break; 9707 case Intrinsic::ppc_altivec_lvewx: 9708 VT = MVT::i32; 9709 break; 9710 case Intrinsic::ppc_vsx_lxvd2x: 9711 VT = MVT::v2f64; 9712 break; 9713 default: 9714 VT = MVT::v4i32; 9715 break; 9716 } 9717 9718 Info.opc = ISD::INTRINSIC_W_CHAIN; 9719 Info.memVT = VT; 9720 Info.ptrVal = I.getArgOperand(0); 9721 Info.offset = -VT.getStoreSize()+1; 9722 Info.size = 2*VT.getStoreSize()-1; 9723 Info.align = 1; 9724 Info.vol = false; 9725 Info.readMem = true; 9726 Info.writeMem = false; 9727 return true; 9728 } 9729 case Intrinsic::ppc_altivec_stvx: 9730 case Intrinsic::ppc_altivec_stvxl: 9731 case Intrinsic::ppc_altivec_stvebx: 9732 case Intrinsic::ppc_altivec_stvehx: 9733 case Intrinsic::ppc_altivec_stvewx: 9734 case Intrinsic::ppc_vsx_stxvd2x: 9735 case Intrinsic::ppc_vsx_stxvw4x: { 9736 EVT VT; 9737 switch (Intrinsic) { 9738 case Intrinsic::ppc_altivec_stvebx: 9739 VT = MVT::i8; 9740 break; 9741 case Intrinsic::ppc_altivec_stvehx: 9742 VT = MVT::i16; 9743 break; 9744 case Intrinsic::ppc_altivec_stvewx: 9745 VT = MVT::i32; 9746 break; 9747 case Intrinsic::ppc_vsx_stxvd2x: 9748 VT = MVT::v2f64; 9749 break; 9750 default: 9751 VT = MVT::v4i32; 9752 break; 9753 } 9754 9755 Info.opc = ISD::INTRINSIC_VOID; 9756 Info.memVT = VT; 9757 Info.ptrVal = I.getArgOperand(1); 9758 Info.offset = -VT.getStoreSize()+1; 9759 Info.size = 2*VT.getStoreSize()-1; 9760 Info.align = 1; 9761 Info.vol = false; 9762 Info.readMem = false; 9763 Info.writeMem = true; 9764 return true; 9765 } 9766 default: 9767 break; 9768 } 9769 9770 return false; 9771} 9772 9773/// getOptimalMemOpType - Returns the target specific optimal type for load 9774/// and store operations as a result of memset, memcpy, and memmove 9775/// lowering. If DstAlign is zero that means it's safe to destination 9776/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 9777/// means there isn't a need to check it against alignment requirement, 9778/// probably because the source does not need to be loaded. If 'IsMemset' is 9779/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 9780/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 9781/// source is constant so it does not need to be loaded. 9782/// It returns EVT::Other if the type should be determined using generic 9783/// target-independent logic. 9784EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, 9785 unsigned DstAlign, unsigned SrcAlign, 9786 bool IsMemset, bool ZeroMemset, 9787 bool MemcpyStrSrc, 9788 MachineFunction &MF) const { 9789 if (Subtarget.isPPC64()) { 9790 return MVT::i64; 9791 } else { 9792 return MVT::i32; 9793 } 9794} 9795 9796/// \brief Returns true if it is beneficial to convert a load of a constant 9797/// to just the constant itself. 9798bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 9799 Type *Ty) const { 9800 assert(Ty->isIntegerTy()); 9801 9802 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 9803 if (BitSize == 0 || BitSize > 64) 9804 return false; 9805 return true; 9806} 9807 9808bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { 9809 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 9810 return false; 9811 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 9812 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 9813 return NumBits1 == 64 && NumBits2 == 32; 9814} 9815 9816bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { 9817 if (!VT1.isInteger() || !VT2.isInteger()) 9818 return false; 9819 unsigned NumBits1 = VT1.getSizeInBits(); 9820 unsigned NumBits2 = VT2.getSizeInBits(); 9821 return NumBits1 == 64 && NumBits2 == 32; 9822} 9823 9824bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 9825 // Generally speaking, zexts are not free, but they are free when they can be 9826 // folded with other operations. 9827 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) { 9828 EVT MemVT = LD->getMemoryVT(); 9829 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 || 9830 (Subtarget.isPPC64() && MemVT == MVT::i32)) && 9831 (LD->getExtensionType() == ISD::NON_EXTLOAD || 9832 LD->getExtensionType() == ISD::ZEXTLOAD)) 9833 return true; 9834 } 9835 9836 // FIXME: Add other cases... 9837 // - 32-bit shifts with a zext to i64 9838 // - zext after ctlz, bswap, etc. 9839 // - zext after and by a constant mask 9840 9841 return TargetLowering::isZExtFree(Val, VT2); 9842} 9843 9844bool PPCTargetLowering::isFPExtFree(EVT VT) const { 9845 assert(VT.isFloatingPoint()); 9846 return true; 9847} 9848 9849bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 9850 return isInt<16>(Imm) || isUInt<16>(Imm); 9851} 9852 9853bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { 9854 return isInt<16>(Imm) || isUInt<16>(Imm); 9855} 9856 9857bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 9858 unsigned, 9859 unsigned, 9860 bool *Fast) const { 9861 if (DisablePPCUnaligned) 9862 return false; 9863 9864 // PowerPC supports unaligned memory access for simple non-vector types. 9865 // Although accessing unaligned addresses is not as efficient as accessing 9866 // aligned addresses, it is generally more efficient than manual expansion, 9867 // and generally only traps for software emulation when crossing page 9868 // boundaries. 9869 9870 if (!VT.isSimple()) 9871 return false; 9872 9873 if (VT.getSimpleVT().isVector()) { 9874 if (Subtarget.hasVSX()) { 9875 if (VT != MVT::v2f64 && VT != MVT::v2i64 && 9876 VT != MVT::v4f32 && VT != MVT::v4i32) 9877 return false; 9878 } else { 9879 return false; 9880 } 9881 } 9882 9883 if (VT == MVT::ppcf128) 9884 return false; 9885 9886 if (Fast) 9887 *Fast = true; 9888 9889 return true; 9890} 9891 9892bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { 9893 VT = VT.getScalarType(); 9894 9895 if (!VT.isSimple()) 9896 return false; 9897 9898 switch (VT.getSimpleVT().SimpleTy) { 9899 case MVT::f32: 9900 case MVT::f64: 9901 return true; 9902 default: 9903 break; 9904 } 9905 9906 return false; 9907} 9908 9909const MCPhysReg * 9910PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { 9911 // LR is a callee-save register, but we must treat it as clobbered by any call 9912 // site. Hence we include LR in the scratch registers, which are in turn added 9913 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies 9914 // to CTR, which is used by any indirect call. 9915 static const MCPhysReg ScratchRegs[] = { 9916 PPC::X11, PPC::X12, PPC::LR8, PPC::CTR8, 0 9917 }; 9918 9919 return ScratchRegs; 9920} 9921 9922bool 9923PPCTargetLowering::shouldExpandBuildVectorWithShuffles( 9924 EVT VT , unsigned DefinedValues) const { 9925 if (VT == MVT::v2i64) 9926 return false; 9927 9928 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); 9929} 9930 9931Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { 9932 if (DisableILPPref || Subtarget.enableMachineScheduler()) 9933 return TargetLowering::getSchedulingPreference(N); 9934 9935 return Sched::ILP; 9936} 9937 9938// Create a fast isel object. 9939FastISel * 9940PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, 9941 const TargetLibraryInfo *LibInfo) const { 9942 return PPC::createFastISel(FuncInfo, LibInfo); 9943}
|