1Pull in r199975 from upstream llvm trunk (by Jakob Stoklund Olesen): 2 3 Implement atomicrmw operations in 32 and 64 bits for SPARCv9. 4 5 These all use the compare-and-swap CASA/CASXA instructions. 6 7Introduced here: http://svnweb.freebsd.org/changeset/base/262261 8 9Index: test/CodeGen/SPARC/atomics.ll 10=================================================================== 11--- test/CodeGen/SPARC/atomics.ll 12+++ test/CodeGen/SPARC/atomics.ll 13@@ -1,4 +1,4 @@ 14-; RUN: llc < %s -march=sparcv9 | FileCheck %s 15+; RUN: llc < %s -march=sparcv9 -verify-machineinstrs | FileCheck %s 16 17 ; CHECK-LABEL: test_atomic_i32 18 ; CHECK: ld [%o0] 19@@ -61,3 +61,84 @@ entry: 20 %b = atomicrmw xchg i32* %ptr, i32 42 monotonic 21 ret i32 %b 22 } 23+ 24+; CHECK-LABEL: test_load_add_32 25+; CHECK: membar 26+; CHECK: add 27+; CHECK: cas [%o0] 28+; CHECK: membar 29+define zeroext i32 @test_load_add_32(i32* %p, i32 zeroext %v) { 30+entry: 31+ %0 = atomicrmw add i32* %p, i32 %v seq_cst 32+ ret i32 %0 33+} 34+ 35+; CHECK-LABEL: test_load_sub_64 36+; CHECK: membar 37+; CHECK: sub 38+; CHECK: casx [%o0] 39+; CHECK: membar 40+define zeroext i64 @test_load_sub_64(i64* %p, i64 zeroext %v) { 41+entry: 42+ %0 = atomicrmw sub i64* %p, i64 %v seq_cst 43+ ret i64 %0 44+} 45+ 46+; CHECK-LABEL: test_load_xor_32 47+; CHECK: membar 48+; CHECK: xor 49+; CHECK: cas [%o0] 50+; CHECK: membar 51+define zeroext i32 @test_load_xor_32(i32* %p, i32 zeroext %v) { 52+entry: 53+ %0 = atomicrmw xor i32* %p, i32 %v seq_cst 54+ ret i32 %0 55+} 56+ 57+; CHECK-LABEL: test_load_and_32 58+; CHECK: membar 59+; CHECK: and 60+; CHECK-NOT: xor 61+; CHECK: cas [%o0] 62+; CHECK: membar 63+define zeroext i32 @test_load_and_32(i32* %p, i32 zeroext %v) { 64+entry: 65+ %0 = atomicrmw and i32* %p, i32 %v seq_cst 66+ ret i32 %0 67+} 68+ 69+; CHECK-LABEL: test_load_nand_32 70+; CHECK: membar 71+; CHECK: and 72+; CHECK: xor 73+; CHECK: cas [%o0] 74+; CHECK: membar 75+define zeroext i32 @test_load_nand_32(i32* %p, i32 zeroext %v) { 76+entry: 77+ %0 = atomicrmw nand i32* %p, i32 %v seq_cst 78+ ret i32 %0 79+} 80+ 81+; CHECK-LABEL: test_load_max_64 82+; CHECK: membar 83+; CHECK: cmp 84+; CHECK: movg %xcc 85+; CHECK: casx [%o0] 86+; CHECK: membar 87+define zeroext i64 @test_load_max_64(i64* %p, i64 zeroext %v) { 88+entry: 89+ %0 = atomicrmw max i64* %p, i64 %v seq_cst 90+ ret i64 %0 91+} 92+ 93+; CHECK-LABEL: test_load_umin_32 94+; CHECK: membar 95+; CHECK: cmp 96+; CHECK: movleu %icc 97+; CHECK: cas [%o0] 98+; CHECK: membar 99+define zeroext i32 @test_load_umin_32(i32* %p, i32 zeroext %v) { 100+entry: 101+ %0 = atomicrmw umin i32* %p, i32 %v seq_cst 102+ ret i32 %0 103+} 104Index: lib/Target/Sparc/SparcInstr64Bit.td 105=================================================================== 106--- lib/Target/Sparc/SparcInstr64Bit.td 107+++ lib/Target/Sparc/SparcInstr64Bit.td 108@@ -438,6 +438,31 @@ def : Pat<(atomic_store ADDRri:$dst, i64:$val), (S 109 110 } // Predicates = [Is64Bit] 111 112+let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1, 113+ Defs = [ICC] in 114+multiclass AtomicRMW<SDPatternOperator op32, SDPatternOperator op64> { 115+ 116+ def _32 : Pseudo<(outs IntRegs:$rd), 117+ (ins ptr_rc:$addr, IntRegs:$rs2), "", 118+ [(set i32:$rd, (op32 iPTR:$addr, i32:$rs2))]>; 119+ 120+ let Predicates = [Is64Bit] in 121+ def _64 : Pseudo<(outs I64Regs:$rd), 122+ (ins ptr_rc:$addr, I64Regs:$rs2), "", 123+ [(set i64:$rd, (op64 iPTR:$addr, i64:$rs2))]>; 124+} 125+ 126+defm ATOMIC_LOAD_ADD : AtomicRMW<atomic_load_add_32, atomic_load_add_64>; 127+defm ATOMIC_LOAD_SUB : AtomicRMW<atomic_load_sub_32, atomic_load_sub_64>; 128+defm ATOMIC_LOAD_AND : AtomicRMW<atomic_load_and_32, atomic_load_and_64>; 129+defm ATOMIC_LOAD_OR : AtomicRMW<atomic_load_or_32, atomic_load_or_64>; 130+defm ATOMIC_LOAD_XOR : AtomicRMW<atomic_load_xor_32, atomic_load_xor_64>; 131+defm ATOMIC_LOAD_NAND : AtomicRMW<atomic_load_nand_32, atomic_load_nand_64>; 132+defm ATOMIC_LOAD_MIN : AtomicRMW<atomic_load_min_32, atomic_load_min_64>; 133+defm ATOMIC_LOAD_MAX : AtomicRMW<atomic_load_max_32, atomic_load_max_64>; 134+defm ATOMIC_LOAD_UMIN : AtomicRMW<atomic_load_umin_32, atomic_load_umin_64>; 135+defm ATOMIC_LOAD_UMAX : AtomicRMW<atomic_load_umax_32, atomic_load_umax_64>; 136+ 137 // Global addresses, constant pool entries 138 let Predicates = [Is64Bit] in { 139 140Index: lib/Target/Sparc/SparcISelLowering.cpp 141=================================================================== 142--- lib/Target/Sparc/SparcISelLowering.cpp 143+++ lib/Target/Sparc/SparcISelLowering.cpp 144@@ -2831,11 +2831,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) cons 145 MachineBasicBlock * 146 SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 147 MachineBasicBlock *BB) const { 148- const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); 149- unsigned BROpcode; 150- unsigned CC; 151- DebugLoc dl = MI->getDebugLoc(); 152- // Figure out the conditional branch opcode to use for this select_cc. 153 switch (MI->getOpcode()) { 154 default: llvm_unreachable("Unknown SELECT_CC!"); 155 case SP::SELECT_CC_Int_ICC: 156@@ -2842,17 +2837,64 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M 157 case SP::SELECT_CC_FP_ICC: 158 case SP::SELECT_CC_DFP_ICC: 159 case SP::SELECT_CC_QFP_ICC: 160- BROpcode = SP::BCOND; 161- break; 162+ return expandSelectCC(MI, BB, SP::BCOND); 163 case SP::SELECT_CC_Int_FCC: 164 case SP::SELECT_CC_FP_FCC: 165 case SP::SELECT_CC_DFP_FCC: 166 case SP::SELECT_CC_QFP_FCC: 167- BROpcode = SP::FBCOND; 168- break; 169+ return expandSelectCC(MI, BB, SP::FBCOND); 170+ 171+ case SP::ATOMIC_LOAD_ADD_32: 172+ return expandAtomicRMW(MI, BB, SP::ADDrr); 173+ case SP::ATOMIC_LOAD_ADD_64: 174+ return expandAtomicRMW(MI, BB, SP::ADDXrr); 175+ case SP::ATOMIC_LOAD_SUB_32: 176+ return expandAtomicRMW(MI, BB, SP::SUBrr); 177+ case SP::ATOMIC_LOAD_SUB_64: 178+ return expandAtomicRMW(MI, BB, SP::SUBXrr); 179+ case SP::ATOMIC_LOAD_AND_32: 180+ return expandAtomicRMW(MI, BB, SP::ANDrr); 181+ case SP::ATOMIC_LOAD_AND_64: 182+ return expandAtomicRMW(MI, BB, SP::ANDXrr); 183+ case SP::ATOMIC_LOAD_OR_32: 184+ return expandAtomicRMW(MI, BB, SP::ORrr); 185+ case SP::ATOMIC_LOAD_OR_64: 186+ return expandAtomicRMW(MI, BB, SP::ORXrr); 187+ case SP::ATOMIC_LOAD_XOR_32: 188+ return expandAtomicRMW(MI, BB, SP::XORrr); 189+ case SP::ATOMIC_LOAD_XOR_64: 190+ return expandAtomicRMW(MI, BB, SP::XORXrr); 191+ case SP::ATOMIC_LOAD_NAND_32: 192+ return expandAtomicRMW(MI, BB, SP::ANDrr); 193+ case SP::ATOMIC_LOAD_NAND_64: 194+ return expandAtomicRMW(MI, BB, SP::ANDXrr); 195+ 196+ case SP::ATOMIC_LOAD_MAX_32: 197+ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_G); 198+ case SP::ATOMIC_LOAD_MAX_64: 199+ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_G); 200+ case SP::ATOMIC_LOAD_MIN_32: 201+ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LE); 202+ case SP::ATOMIC_LOAD_MIN_64: 203+ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LE); 204+ case SP::ATOMIC_LOAD_UMAX_32: 205+ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_GU); 206+ case SP::ATOMIC_LOAD_UMAX_64: 207+ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_GU); 208+ case SP::ATOMIC_LOAD_UMIN_32: 209+ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LEU); 210+ case SP::ATOMIC_LOAD_UMIN_64: 211+ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LEU); 212 } 213+} 214 215- CC = (SPCC::CondCodes)MI->getOperand(3).getImm(); 216+MachineBasicBlock* 217+SparcTargetLowering::expandSelectCC(MachineInstr *MI, 218+ MachineBasicBlock *BB, 219+ unsigned BROpcode) const { 220+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); 221+ DebugLoc dl = MI->getDebugLoc(); 222+ unsigned CC = (SPCC::CondCodes)MI->getOperand(3).getImm(); 223 224 // To "insert" a SELECT_CC instruction, we actually have to insert the diamond 225 // control-flow pattern. The incoming instruction knows the destination vreg 226@@ -2906,6 +2948,100 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M 227 return BB; 228 } 229 230+MachineBasicBlock* 231+SparcTargetLowering::expandAtomicRMW(MachineInstr *MI, 232+ MachineBasicBlock *MBB, 233+ unsigned Opcode, 234+ unsigned CondCode) const { 235+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); 236+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 237+ DebugLoc DL = MI->getDebugLoc(); 238+ 239+ // MI is an atomic read-modify-write instruction of the form: 240+ // 241+ // rd = atomicrmw<op> addr, rs2 242+ // 243+ // All three operands are registers. 244+ unsigned DestReg = MI->getOperand(0).getReg(); 245+ unsigned AddrReg = MI->getOperand(1).getReg(); 246+ unsigned Rs2Reg = MI->getOperand(2).getReg(); 247+ 248+ // SelectionDAG has already inserted memory barriers before and after MI, so 249+ // we simply have to implement the operatiuon in terms of compare-and-swap. 250+ // 251+ // %val0 = load %addr 252+ // loop: 253+ // %val = phi %val0, %dest 254+ // %upd = op %val, %rs2 255+ // %dest = cas %addr, %upd, %val 256+ // cmp %val, %dest 257+ // bne loop 258+ // done: 259+ // 260+ bool is64Bit = SP::I64RegsRegClass.hasSubClassEq(MRI.getRegClass(DestReg)); 261+ const TargetRegisterClass *ValueRC = 262+ is64Bit ? &SP::I64RegsRegClass : &SP::IntRegsRegClass; 263+ unsigned Val0Reg = MRI.createVirtualRegister(ValueRC); 264+ 265+ BuildMI(*MBB, MI, DL, TII.get(is64Bit ? SP::LDXri : SP::LDri), Val0Reg) 266+ .addReg(AddrReg).addImm(0); 267+ 268+ // Split the basic block MBB before MI and insert the loop block in the hole. 269+ MachineFunction::iterator MFI = MBB; 270+ const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 271+ MachineFunction *MF = MBB->getParent(); 272+ MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 273+ MachineBasicBlock *DoneMBB = MF->CreateMachineBasicBlock(LLVM_BB); 274+ ++MFI; 275+ MF->insert(MFI, LoopMBB); 276+ MF->insert(MFI, DoneMBB); 277+ 278+ // Move MI and following instructions to DoneMBB. 279+ DoneMBB->splice(DoneMBB->begin(), MBB, MI, MBB->end()); 280+ DoneMBB->transferSuccessorsAndUpdatePHIs(MBB); 281+ 282+ // Connect the CFG again. 283+ MBB->addSuccessor(LoopMBB); 284+ LoopMBB->addSuccessor(LoopMBB); 285+ LoopMBB->addSuccessor(DoneMBB); 286+ 287+ // Build the loop block. 288+ unsigned ValReg = MRI.createVirtualRegister(ValueRC); 289+ unsigned UpdReg = MRI.createVirtualRegister(ValueRC); 290+ 291+ BuildMI(LoopMBB, DL, TII.get(SP::PHI), ValReg) 292+ .addReg(Val0Reg).addMBB(MBB) 293+ .addReg(DestReg).addMBB(LoopMBB); 294+ 295+ if (CondCode) { 296+ // This is one of the min/max operations. We need a CMPrr followed by a 297+ // MOVXCC/MOVICC. 298+ BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(Rs2Reg); 299+ BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg) 300+ .addReg(ValReg).addReg(Rs2Reg).addImm(CondCode); 301+ } else { 302+ BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg) 303+ .addReg(ValReg).addReg(Rs2Reg); 304+ } 305+ 306+ if (MI->getOpcode() == SP::ATOMIC_LOAD_NAND_32 || 307+ MI->getOpcode() == SP::ATOMIC_LOAD_NAND_64) { 308+ unsigned TmpReg = UpdReg; 309+ UpdReg = MRI.createVirtualRegister(ValueRC); 310+ BuildMI(LoopMBB, DL, TII.get(SP::XORri), UpdReg).addReg(TmpReg).addImm(-1); 311+ } 312+ 313+ BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::CASXrr : SP::CASrr), DestReg) 314+ .addReg(AddrReg).addReg(UpdReg).addReg(ValReg) 315+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 316+ BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(DestReg); 317+ BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::BPXCC : SP::BCOND)) 318+ .addMBB(LoopMBB).addImm(SPCC::ICC_NE); 319+ 320+ MI->eraseFromParent(); 321+ return DoneMBB; 322+} 323+ 324 //===----------------------------------------------------------------------===// 325 // Sparc Inline Assembly Support 326 //===----------------------------------------------------------------------===// 327Index: lib/Target/Sparc/SparcISelLowering.h 328=================================================================== 329--- lib/Target/Sparc/SparcISelLowering.h 330+++ lib/Target/Sparc/SparcISelLowering.h 331@@ -165,6 +165,13 @@ namespace llvm { 332 virtual void ReplaceNodeResults(SDNode *N, 333 SmallVectorImpl<SDValue>& Results, 334 SelectionDAG &DAG) const; 335+ 336+ MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB, 337+ unsigned BROpcode) const; 338+ MachineBasicBlock *expandAtomicRMW(MachineInstr *MI, 339+ MachineBasicBlock *BB, 340+ unsigned Opcode, 341+ unsigned CondCode = 0) const; 342 }; 343 } // end namespace llvm 344 345