1263320SdimPull in r199975 from upstream llvm trunk (by Jakob Stoklund Olesen): 2263320Sdim 3263320Sdim Implement atomicrmw operations in 32 and 64 bits for SPARCv9. 4263320Sdim 5263320Sdim These all use the compare-and-swap CASA/CASXA instructions. 6263320Sdim 7269012SemasteIntroduced here: http://svnweb.freebsd.org/changeset/base/262261 8263320Sdim 9263320SdimIndex: test/CodeGen/SPARC/atomics.ll 10263320Sdim=================================================================== 11263320Sdim--- test/CodeGen/SPARC/atomics.ll 12263320Sdim+++ test/CodeGen/SPARC/atomics.ll 13263320Sdim@@ -1,4 +1,4 @@ 14263320Sdim-; RUN: llc < %s -march=sparcv9 | FileCheck %s 15263320Sdim+; RUN: llc < %s -march=sparcv9 -verify-machineinstrs | FileCheck %s 16263320Sdim 17263320Sdim ; CHECK-LABEL: test_atomic_i32 18263320Sdim ; CHECK: ld [%o0] 19263320Sdim@@ -61,3 +61,84 @@ entry: 20263320Sdim %b = atomicrmw xchg i32* %ptr, i32 42 monotonic 21263320Sdim ret i32 %b 22263320Sdim } 23263320Sdim+ 24263320Sdim+; CHECK-LABEL: test_load_add_32 25263320Sdim+; CHECK: membar 26263320Sdim+; CHECK: add 27263320Sdim+; CHECK: cas [%o0] 28263320Sdim+; CHECK: membar 29263320Sdim+define zeroext i32 @test_load_add_32(i32* %p, i32 zeroext %v) { 30263320Sdim+entry: 31263320Sdim+ %0 = atomicrmw add i32* %p, i32 %v seq_cst 32263320Sdim+ ret i32 %0 33263320Sdim+} 34263320Sdim+ 35263320Sdim+; CHECK-LABEL: test_load_sub_64 36263320Sdim+; CHECK: membar 37263320Sdim+; CHECK: sub 38263320Sdim+; CHECK: casx [%o0] 39263320Sdim+; CHECK: membar 40263320Sdim+define zeroext i64 @test_load_sub_64(i64* %p, i64 zeroext %v) { 41263320Sdim+entry: 42263320Sdim+ %0 = atomicrmw sub i64* %p, i64 %v seq_cst 43263320Sdim+ ret i64 %0 44263320Sdim+} 45263320Sdim+ 46263320Sdim+; CHECK-LABEL: test_load_xor_32 47263320Sdim+; CHECK: membar 48263320Sdim+; CHECK: xor 49263320Sdim+; CHECK: cas [%o0] 50263320Sdim+; CHECK: membar 51263320Sdim+define zeroext i32 @test_load_xor_32(i32* %p, i32 zeroext %v) { 52263320Sdim+entry: 53263320Sdim+ %0 = atomicrmw xor i32* %p, i32 %v seq_cst 54263320Sdim+ ret i32 %0 55263320Sdim+} 56263320Sdim+ 57263320Sdim+; CHECK-LABEL: test_load_and_32 58263320Sdim+; CHECK: membar 59263320Sdim+; CHECK: and 60263320Sdim+; CHECK-NOT: xor 61263320Sdim+; CHECK: cas [%o0] 62263320Sdim+; CHECK: membar 63263320Sdim+define zeroext i32 @test_load_and_32(i32* %p, i32 zeroext %v) { 64263320Sdim+entry: 65263320Sdim+ %0 = atomicrmw and i32* %p, i32 %v seq_cst 66263320Sdim+ ret i32 %0 67263320Sdim+} 68263320Sdim+ 69263320Sdim+; CHECK-LABEL: test_load_nand_32 70263320Sdim+; CHECK: membar 71263320Sdim+; CHECK: and 72263320Sdim+; CHECK: xor 73263320Sdim+; CHECK: cas [%o0] 74263320Sdim+; CHECK: membar 75263320Sdim+define zeroext i32 @test_load_nand_32(i32* %p, i32 zeroext %v) { 76263320Sdim+entry: 77263320Sdim+ %0 = atomicrmw nand i32* %p, i32 %v seq_cst 78263320Sdim+ ret i32 %0 79263320Sdim+} 80263320Sdim+ 81263320Sdim+; CHECK-LABEL: test_load_max_64 82263320Sdim+; CHECK: membar 83263320Sdim+; CHECK: cmp 84263320Sdim+; CHECK: movg %xcc 85263320Sdim+; CHECK: casx [%o0] 86263320Sdim+; CHECK: membar 87263320Sdim+define zeroext i64 @test_load_max_64(i64* %p, i64 zeroext %v) { 88263320Sdim+entry: 89263320Sdim+ %0 = atomicrmw max i64* %p, i64 %v seq_cst 90263320Sdim+ ret i64 %0 91263320Sdim+} 92263320Sdim+ 93263320Sdim+; CHECK-LABEL: test_load_umin_32 94263320Sdim+; CHECK: membar 95263320Sdim+; CHECK: cmp 96263320Sdim+; CHECK: movleu %icc 97263320Sdim+; CHECK: cas [%o0] 98263320Sdim+; CHECK: membar 99263320Sdim+define zeroext i32 @test_load_umin_32(i32* %p, i32 zeroext %v) { 100263320Sdim+entry: 101263320Sdim+ %0 = atomicrmw umin i32* %p, i32 %v seq_cst 102263320Sdim+ ret i32 %0 103263320Sdim+} 104263320SdimIndex: lib/Target/Sparc/SparcInstr64Bit.td 105263320Sdim=================================================================== 106263320Sdim--- lib/Target/Sparc/SparcInstr64Bit.td 107263320Sdim+++ lib/Target/Sparc/SparcInstr64Bit.td 108263320Sdim@@ -438,6 +438,31 @@ def : Pat<(atomic_store ADDRri:$dst, i64:$val), (S 109263320Sdim 110263320Sdim } // Predicates = [Is64Bit] 111263320Sdim 112263320Sdim+let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1, 113263320Sdim+ Defs = [ICC] in 114263320Sdim+multiclass AtomicRMW<SDPatternOperator op32, SDPatternOperator op64> { 115263320Sdim+ 116263320Sdim+ def _32 : Pseudo<(outs IntRegs:$rd), 117263320Sdim+ (ins ptr_rc:$addr, IntRegs:$rs2), "", 118263320Sdim+ [(set i32:$rd, (op32 iPTR:$addr, i32:$rs2))]>; 119263320Sdim+ 120263320Sdim+ let Predicates = [Is64Bit] in 121263320Sdim+ def _64 : Pseudo<(outs I64Regs:$rd), 122263320Sdim+ (ins ptr_rc:$addr, I64Regs:$rs2), "", 123263320Sdim+ [(set i64:$rd, (op64 iPTR:$addr, i64:$rs2))]>; 124263320Sdim+} 125263320Sdim+ 126263320Sdim+defm ATOMIC_LOAD_ADD : AtomicRMW<atomic_load_add_32, atomic_load_add_64>; 127263320Sdim+defm ATOMIC_LOAD_SUB : AtomicRMW<atomic_load_sub_32, atomic_load_sub_64>; 128263320Sdim+defm ATOMIC_LOAD_AND : AtomicRMW<atomic_load_and_32, atomic_load_and_64>; 129263320Sdim+defm ATOMIC_LOAD_OR : AtomicRMW<atomic_load_or_32, atomic_load_or_64>; 130263320Sdim+defm ATOMIC_LOAD_XOR : AtomicRMW<atomic_load_xor_32, atomic_load_xor_64>; 131263320Sdim+defm ATOMIC_LOAD_NAND : AtomicRMW<atomic_load_nand_32, atomic_load_nand_64>; 132263320Sdim+defm ATOMIC_LOAD_MIN : AtomicRMW<atomic_load_min_32, atomic_load_min_64>; 133263320Sdim+defm ATOMIC_LOAD_MAX : AtomicRMW<atomic_load_max_32, atomic_load_max_64>; 134263320Sdim+defm ATOMIC_LOAD_UMIN : AtomicRMW<atomic_load_umin_32, atomic_load_umin_64>; 135263320Sdim+defm ATOMIC_LOAD_UMAX : AtomicRMW<atomic_load_umax_32, atomic_load_umax_64>; 136263320Sdim+ 137263320Sdim // Global addresses, constant pool entries 138263320Sdim let Predicates = [Is64Bit] in { 139263320Sdim 140263320SdimIndex: lib/Target/Sparc/SparcISelLowering.cpp 141263320Sdim=================================================================== 142263320Sdim--- lib/Target/Sparc/SparcISelLowering.cpp 143263320Sdim+++ lib/Target/Sparc/SparcISelLowering.cpp 144263320Sdim@@ -2831,11 +2831,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) cons 145263320Sdim MachineBasicBlock * 146263320Sdim SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 147263320Sdim MachineBasicBlock *BB) const { 148263320Sdim- const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); 149263320Sdim- unsigned BROpcode; 150263320Sdim- unsigned CC; 151263320Sdim- DebugLoc dl = MI->getDebugLoc(); 152263320Sdim- // Figure out the conditional branch opcode to use for this select_cc. 153263320Sdim switch (MI->getOpcode()) { 154263320Sdim default: llvm_unreachable("Unknown SELECT_CC!"); 155263320Sdim case SP::SELECT_CC_Int_ICC: 156263320Sdim@@ -2842,17 +2837,64 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M 157263320Sdim case SP::SELECT_CC_FP_ICC: 158263320Sdim case SP::SELECT_CC_DFP_ICC: 159263320Sdim case SP::SELECT_CC_QFP_ICC: 160263320Sdim- BROpcode = SP::BCOND; 161263320Sdim- break; 162263320Sdim+ return expandSelectCC(MI, BB, SP::BCOND); 163263320Sdim case SP::SELECT_CC_Int_FCC: 164263320Sdim case SP::SELECT_CC_FP_FCC: 165263320Sdim case SP::SELECT_CC_DFP_FCC: 166263320Sdim case SP::SELECT_CC_QFP_FCC: 167263320Sdim- BROpcode = SP::FBCOND; 168263320Sdim- break; 169263320Sdim+ return expandSelectCC(MI, BB, SP::FBCOND); 170263320Sdim+ 171263320Sdim+ case SP::ATOMIC_LOAD_ADD_32: 172263320Sdim+ return expandAtomicRMW(MI, BB, SP::ADDrr); 173263320Sdim+ case SP::ATOMIC_LOAD_ADD_64: 174263320Sdim+ return expandAtomicRMW(MI, BB, SP::ADDXrr); 175263320Sdim+ case SP::ATOMIC_LOAD_SUB_32: 176263320Sdim+ return expandAtomicRMW(MI, BB, SP::SUBrr); 177263320Sdim+ case SP::ATOMIC_LOAD_SUB_64: 178263320Sdim+ return expandAtomicRMW(MI, BB, SP::SUBXrr); 179263320Sdim+ case SP::ATOMIC_LOAD_AND_32: 180263320Sdim+ return expandAtomicRMW(MI, BB, SP::ANDrr); 181263320Sdim+ case SP::ATOMIC_LOAD_AND_64: 182263320Sdim+ return expandAtomicRMW(MI, BB, SP::ANDXrr); 183263320Sdim+ case SP::ATOMIC_LOAD_OR_32: 184263320Sdim+ return expandAtomicRMW(MI, BB, SP::ORrr); 185263320Sdim+ case SP::ATOMIC_LOAD_OR_64: 186263320Sdim+ return expandAtomicRMW(MI, BB, SP::ORXrr); 187263320Sdim+ case SP::ATOMIC_LOAD_XOR_32: 188263320Sdim+ return expandAtomicRMW(MI, BB, SP::XORrr); 189263320Sdim+ case SP::ATOMIC_LOAD_XOR_64: 190263320Sdim+ return expandAtomicRMW(MI, BB, SP::XORXrr); 191263320Sdim+ case SP::ATOMIC_LOAD_NAND_32: 192263320Sdim+ return expandAtomicRMW(MI, BB, SP::ANDrr); 193263320Sdim+ case SP::ATOMIC_LOAD_NAND_64: 194263320Sdim+ return expandAtomicRMW(MI, BB, SP::ANDXrr); 195263320Sdim+ 196263320Sdim+ case SP::ATOMIC_LOAD_MAX_32: 197263320Sdim+ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_G); 198263320Sdim+ case SP::ATOMIC_LOAD_MAX_64: 199263320Sdim+ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_G); 200263320Sdim+ case SP::ATOMIC_LOAD_MIN_32: 201263320Sdim+ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LE); 202263320Sdim+ case SP::ATOMIC_LOAD_MIN_64: 203263320Sdim+ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LE); 204263320Sdim+ case SP::ATOMIC_LOAD_UMAX_32: 205263320Sdim+ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_GU); 206263320Sdim+ case SP::ATOMIC_LOAD_UMAX_64: 207263320Sdim+ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_GU); 208263320Sdim+ case SP::ATOMIC_LOAD_UMIN_32: 209263320Sdim+ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LEU); 210263320Sdim+ case SP::ATOMIC_LOAD_UMIN_64: 211263320Sdim+ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LEU); 212263320Sdim } 213263320Sdim+} 214263320Sdim 215263320Sdim- CC = (SPCC::CondCodes)MI->getOperand(3).getImm(); 216263320Sdim+MachineBasicBlock* 217263320Sdim+SparcTargetLowering::expandSelectCC(MachineInstr *MI, 218263320Sdim+ MachineBasicBlock *BB, 219263320Sdim+ unsigned BROpcode) const { 220263320Sdim+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); 221263320Sdim+ DebugLoc dl = MI->getDebugLoc(); 222263320Sdim+ unsigned CC = (SPCC::CondCodes)MI->getOperand(3).getImm(); 223263320Sdim 224263320Sdim // To "insert" a SELECT_CC instruction, we actually have to insert the diamond 225263320Sdim // control-flow pattern. The incoming instruction knows the destination vreg 226263320Sdim@@ -2906,6 +2948,100 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M 227263320Sdim return BB; 228263320Sdim } 229263320Sdim 230263320Sdim+MachineBasicBlock* 231263320Sdim+SparcTargetLowering::expandAtomicRMW(MachineInstr *MI, 232263320Sdim+ MachineBasicBlock *MBB, 233263320Sdim+ unsigned Opcode, 234263320Sdim+ unsigned CondCode) const { 235263320Sdim+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); 236263320Sdim+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 237263320Sdim+ DebugLoc DL = MI->getDebugLoc(); 238263320Sdim+ 239263320Sdim+ // MI is an atomic read-modify-write instruction of the form: 240263320Sdim+ // 241263320Sdim+ // rd = atomicrmw<op> addr, rs2 242263320Sdim+ // 243263320Sdim+ // All three operands are registers. 244263320Sdim+ unsigned DestReg = MI->getOperand(0).getReg(); 245263320Sdim+ unsigned AddrReg = MI->getOperand(1).getReg(); 246263320Sdim+ unsigned Rs2Reg = MI->getOperand(2).getReg(); 247263320Sdim+ 248263320Sdim+ // SelectionDAG has already inserted memory barriers before and after MI, so 249263320Sdim+ // we simply have to implement the operatiuon in terms of compare-and-swap. 250263320Sdim+ // 251263320Sdim+ // %val0 = load %addr 252263320Sdim+ // loop: 253263320Sdim+ // %val = phi %val0, %dest 254263320Sdim+ // %upd = op %val, %rs2 255263320Sdim+ // %dest = cas %addr, %upd, %val 256263320Sdim+ // cmp %val, %dest 257263320Sdim+ // bne loop 258263320Sdim+ // done: 259263320Sdim+ // 260263320Sdim+ bool is64Bit = SP::I64RegsRegClass.hasSubClassEq(MRI.getRegClass(DestReg)); 261263320Sdim+ const TargetRegisterClass *ValueRC = 262263320Sdim+ is64Bit ? &SP::I64RegsRegClass : &SP::IntRegsRegClass; 263263320Sdim+ unsigned Val0Reg = MRI.createVirtualRegister(ValueRC); 264263320Sdim+ 265263320Sdim+ BuildMI(*MBB, MI, DL, TII.get(is64Bit ? SP::LDXri : SP::LDri), Val0Reg) 266263320Sdim+ .addReg(AddrReg).addImm(0); 267263320Sdim+ 268263320Sdim+ // Split the basic block MBB before MI and insert the loop block in the hole. 269263320Sdim+ MachineFunction::iterator MFI = MBB; 270263320Sdim+ const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 271263320Sdim+ MachineFunction *MF = MBB->getParent(); 272263320Sdim+ MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 273263320Sdim+ MachineBasicBlock *DoneMBB = MF->CreateMachineBasicBlock(LLVM_BB); 274263320Sdim+ ++MFI; 275263320Sdim+ MF->insert(MFI, LoopMBB); 276263320Sdim+ MF->insert(MFI, DoneMBB); 277263320Sdim+ 278263320Sdim+ // Move MI and following instructions to DoneMBB. 279263320Sdim+ DoneMBB->splice(DoneMBB->begin(), MBB, MI, MBB->end()); 280263320Sdim+ DoneMBB->transferSuccessorsAndUpdatePHIs(MBB); 281263320Sdim+ 282263320Sdim+ // Connect the CFG again. 283263320Sdim+ MBB->addSuccessor(LoopMBB); 284263320Sdim+ LoopMBB->addSuccessor(LoopMBB); 285263320Sdim+ LoopMBB->addSuccessor(DoneMBB); 286263320Sdim+ 287263320Sdim+ // Build the loop block. 288263320Sdim+ unsigned ValReg = MRI.createVirtualRegister(ValueRC); 289263320Sdim+ unsigned UpdReg = MRI.createVirtualRegister(ValueRC); 290263320Sdim+ 291263320Sdim+ BuildMI(LoopMBB, DL, TII.get(SP::PHI), ValReg) 292263320Sdim+ .addReg(Val0Reg).addMBB(MBB) 293263320Sdim+ .addReg(DestReg).addMBB(LoopMBB); 294263320Sdim+ 295263320Sdim+ if (CondCode) { 296263320Sdim+ // This is one of the min/max operations. We need a CMPrr followed by a 297263320Sdim+ // MOVXCC/MOVICC. 298263320Sdim+ BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(Rs2Reg); 299263320Sdim+ BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg) 300263320Sdim+ .addReg(ValReg).addReg(Rs2Reg).addImm(CondCode); 301263320Sdim+ } else { 302263320Sdim+ BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg) 303263320Sdim+ .addReg(ValReg).addReg(Rs2Reg); 304263320Sdim+ } 305263320Sdim+ 306263320Sdim+ if (MI->getOpcode() == SP::ATOMIC_LOAD_NAND_32 || 307263320Sdim+ MI->getOpcode() == SP::ATOMIC_LOAD_NAND_64) { 308263320Sdim+ unsigned TmpReg = UpdReg; 309263320Sdim+ UpdReg = MRI.createVirtualRegister(ValueRC); 310263320Sdim+ BuildMI(LoopMBB, DL, TII.get(SP::XORri), UpdReg).addReg(TmpReg).addImm(-1); 311263320Sdim+ } 312263320Sdim+ 313263320Sdim+ BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::CASXrr : SP::CASrr), DestReg) 314263320Sdim+ .addReg(AddrReg).addReg(UpdReg).addReg(ValReg) 315263320Sdim+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 316263320Sdim+ BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(DestReg); 317263320Sdim+ BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::BPXCC : SP::BCOND)) 318263320Sdim+ .addMBB(LoopMBB).addImm(SPCC::ICC_NE); 319263320Sdim+ 320263320Sdim+ MI->eraseFromParent(); 321263320Sdim+ return DoneMBB; 322263320Sdim+} 323263320Sdim+ 324263320Sdim //===----------------------------------------------------------------------===// 325263320Sdim // Sparc Inline Assembly Support 326263320Sdim //===----------------------------------------------------------------------===// 327263320SdimIndex: lib/Target/Sparc/SparcISelLowering.h 328263320Sdim=================================================================== 329263320Sdim--- lib/Target/Sparc/SparcISelLowering.h 330263320Sdim+++ lib/Target/Sparc/SparcISelLowering.h 331263320Sdim@@ -165,6 +165,13 @@ namespace llvm { 332263320Sdim virtual void ReplaceNodeResults(SDNode *N, 333263320Sdim SmallVectorImpl<SDValue>& Results, 334263320Sdim SelectionDAG &DAG) const; 335263320Sdim+ 336263320Sdim+ MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB, 337263320Sdim+ unsigned BROpcode) const; 338263320Sdim+ MachineBasicBlock *expandAtomicRMW(MachineInstr *MI, 339263320Sdim+ MachineBasicBlock *BB, 340263320Sdim+ unsigned Opcode, 341263320Sdim+ unsigned CondCode = 0) const; 342263320Sdim }; 343263320Sdim } // end namespace llvm 344263320Sdim 345