1263320SdimPull in r199975 from upstream llvm trunk (by Jakob Stoklund Olesen):
2263320Sdim
3263320Sdim  Implement atomicrmw operations in 32 and 64 bits for SPARCv9.
4263320Sdim
5263320Sdim  These all use the compare-and-swap CASA/CASXA instructions.
6263320Sdim
7269012SemasteIntroduced here: http://svnweb.freebsd.org/changeset/base/262261
8263320Sdim
9263320SdimIndex: test/CodeGen/SPARC/atomics.ll
10263320Sdim===================================================================
11263320Sdim--- test/CodeGen/SPARC/atomics.ll
12263320Sdim+++ test/CodeGen/SPARC/atomics.ll
13263320Sdim@@ -1,4 +1,4 @@
14263320Sdim-; RUN: llc < %s -march=sparcv9 | FileCheck %s
15263320Sdim+; RUN: llc < %s -march=sparcv9 -verify-machineinstrs | FileCheck %s
16263320Sdim 
17263320Sdim ; CHECK-LABEL: test_atomic_i32
18263320Sdim ; CHECK:       ld [%o0]
19263320Sdim@@ -61,3 +61,84 @@ entry:
20263320Sdim   %b = atomicrmw xchg i32* %ptr, i32 42 monotonic
21263320Sdim   ret i32 %b
22263320Sdim }
23263320Sdim+
24263320Sdim+; CHECK-LABEL: test_load_add_32
25263320Sdim+; CHECK: membar
26263320Sdim+; CHECK: add
27263320Sdim+; CHECK: cas [%o0]
28263320Sdim+; CHECK: membar
29263320Sdim+define zeroext i32 @test_load_add_32(i32* %p, i32 zeroext %v) {
30263320Sdim+entry:
31263320Sdim+  %0 = atomicrmw add i32* %p, i32 %v seq_cst
32263320Sdim+  ret i32 %0
33263320Sdim+}
34263320Sdim+
35263320Sdim+; CHECK-LABEL: test_load_sub_64
36263320Sdim+; CHECK: membar
37263320Sdim+; CHECK: sub
38263320Sdim+; CHECK: casx [%o0]
39263320Sdim+; CHECK: membar
40263320Sdim+define zeroext i64 @test_load_sub_64(i64* %p, i64 zeroext %v) {
41263320Sdim+entry:
42263320Sdim+  %0 = atomicrmw sub i64* %p, i64 %v seq_cst
43263320Sdim+  ret i64 %0
44263320Sdim+}
45263320Sdim+
46263320Sdim+; CHECK-LABEL: test_load_xor_32
47263320Sdim+; CHECK: membar
48263320Sdim+; CHECK: xor
49263320Sdim+; CHECK: cas [%o0]
50263320Sdim+; CHECK: membar
51263320Sdim+define zeroext i32 @test_load_xor_32(i32* %p, i32 zeroext %v) {
52263320Sdim+entry:
53263320Sdim+  %0 = atomicrmw xor i32* %p, i32 %v seq_cst
54263320Sdim+  ret i32 %0
55263320Sdim+}
56263320Sdim+
57263320Sdim+; CHECK-LABEL: test_load_and_32
58263320Sdim+; CHECK: membar
59263320Sdim+; CHECK: and
60263320Sdim+; CHECK-NOT: xor
61263320Sdim+; CHECK: cas [%o0]
62263320Sdim+; CHECK: membar
63263320Sdim+define zeroext i32 @test_load_and_32(i32* %p, i32 zeroext %v) {
64263320Sdim+entry:
65263320Sdim+  %0 = atomicrmw and i32* %p, i32 %v seq_cst
66263320Sdim+  ret i32 %0
67263320Sdim+}
68263320Sdim+
69263320Sdim+; CHECK-LABEL: test_load_nand_32
70263320Sdim+; CHECK: membar
71263320Sdim+; CHECK: and
72263320Sdim+; CHECK: xor
73263320Sdim+; CHECK: cas [%o0]
74263320Sdim+; CHECK: membar
75263320Sdim+define zeroext i32 @test_load_nand_32(i32* %p, i32 zeroext %v) {
76263320Sdim+entry:
77263320Sdim+  %0 = atomicrmw nand i32* %p, i32 %v seq_cst
78263320Sdim+  ret i32 %0
79263320Sdim+}
80263320Sdim+
81263320Sdim+; CHECK-LABEL: test_load_max_64
82263320Sdim+; CHECK: membar
83263320Sdim+; CHECK: cmp
84263320Sdim+; CHECK: movg %xcc
85263320Sdim+; CHECK: casx [%o0]
86263320Sdim+; CHECK: membar
87263320Sdim+define zeroext i64 @test_load_max_64(i64* %p, i64 zeroext %v) {
88263320Sdim+entry:
89263320Sdim+  %0 = atomicrmw max i64* %p, i64 %v seq_cst
90263320Sdim+  ret i64 %0
91263320Sdim+}
92263320Sdim+
93263320Sdim+; CHECK-LABEL: test_load_umin_32
94263320Sdim+; CHECK: membar
95263320Sdim+; CHECK: cmp
96263320Sdim+; CHECK: movleu %icc
97263320Sdim+; CHECK: cas [%o0]
98263320Sdim+; CHECK: membar
99263320Sdim+define zeroext i32 @test_load_umin_32(i32* %p, i32 zeroext %v) {
100263320Sdim+entry:
101263320Sdim+  %0 = atomicrmw umin i32* %p, i32 %v seq_cst
102263320Sdim+  ret i32 %0
103263320Sdim+}
104263320SdimIndex: lib/Target/Sparc/SparcInstr64Bit.td
105263320Sdim===================================================================
106263320Sdim--- lib/Target/Sparc/SparcInstr64Bit.td
107263320Sdim+++ lib/Target/Sparc/SparcInstr64Bit.td
108263320Sdim@@ -438,6 +438,31 @@ def : Pat<(atomic_store ADDRri:$dst, i64:$val), (S
109263320Sdim 
110263320Sdim } // Predicates = [Is64Bit]
111263320Sdim 
112263320Sdim+let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1,
113263320Sdim+    Defs = [ICC] in
114263320Sdim+multiclass AtomicRMW<SDPatternOperator op32, SDPatternOperator op64> {
115263320Sdim+
116263320Sdim+  def _32 : Pseudo<(outs IntRegs:$rd),
117263320Sdim+                   (ins ptr_rc:$addr, IntRegs:$rs2), "",
118263320Sdim+                   [(set i32:$rd, (op32 iPTR:$addr, i32:$rs2))]>;
119263320Sdim+
120263320Sdim+  let Predicates = [Is64Bit] in
121263320Sdim+  def _64 : Pseudo<(outs I64Regs:$rd),
122263320Sdim+                   (ins ptr_rc:$addr, I64Regs:$rs2), "",
123263320Sdim+                   [(set i64:$rd, (op64 iPTR:$addr, i64:$rs2))]>;
124263320Sdim+}
125263320Sdim+
126263320Sdim+defm ATOMIC_LOAD_ADD  : AtomicRMW<atomic_load_add_32,  atomic_load_add_64>;
127263320Sdim+defm ATOMIC_LOAD_SUB  : AtomicRMW<atomic_load_sub_32,  atomic_load_sub_64>;
128263320Sdim+defm ATOMIC_LOAD_AND  : AtomicRMW<atomic_load_and_32,  atomic_load_and_64>;
129263320Sdim+defm ATOMIC_LOAD_OR   : AtomicRMW<atomic_load_or_32,   atomic_load_or_64>;
130263320Sdim+defm ATOMIC_LOAD_XOR  : AtomicRMW<atomic_load_xor_32,  atomic_load_xor_64>;
131263320Sdim+defm ATOMIC_LOAD_NAND : AtomicRMW<atomic_load_nand_32, atomic_load_nand_64>;
132263320Sdim+defm ATOMIC_LOAD_MIN  : AtomicRMW<atomic_load_min_32,  atomic_load_min_64>;
133263320Sdim+defm ATOMIC_LOAD_MAX  : AtomicRMW<atomic_load_max_32,  atomic_load_max_64>;
134263320Sdim+defm ATOMIC_LOAD_UMIN : AtomicRMW<atomic_load_umin_32, atomic_load_umin_64>;
135263320Sdim+defm ATOMIC_LOAD_UMAX : AtomicRMW<atomic_load_umax_32, atomic_load_umax_64>;
136263320Sdim+
137263320Sdim // Global addresses, constant pool entries
138263320Sdim let Predicates = [Is64Bit] in {
139263320Sdim 
140263320SdimIndex: lib/Target/Sparc/SparcISelLowering.cpp
141263320Sdim===================================================================
142263320Sdim--- lib/Target/Sparc/SparcISelLowering.cpp
143263320Sdim+++ lib/Target/Sparc/SparcISelLowering.cpp
144263320Sdim@@ -2831,11 +2831,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) cons
145263320Sdim MachineBasicBlock *
146263320Sdim SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
147263320Sdim                                                  MachineBasicBlock *BB) const {
148263320Sdim-  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
149263320Sdim-  unsigned BROpcode;
150263320Sdim-  unsigned CC;
151263320Sdim-  DebugLoc dl = MI->getDebugLoc();
152263320Sdim-  // Figure out the conditional branch opcode to use for this select_cc.
153263320Sdim   switch (MI->getOpcode()) {
154263320Sdim   default: llvm_unreachable("Unknown SELECT_CC!");
155263320Sdim   case SP::SELECT_CC_Int_ICC:
156263320Sdim@@ -2842,17 +2837,64 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M
157263320Sdim   case SP::SELECT_CC_FP_ICC:
158263320Sdim   case SP::SELECT_CC_DFP_ICC:
159263320Sdim   case SP::SELECT_CC_QFP_ICC:
160263320Sdim-    BROpcode = SP::BCOND;
161263320Sdim-    break;
162263320Sdim+    return expandSelectCC(MI, BB, SP::BCOND);
163263320Sdim   case SP::SELECT_CC_Int_FCC:
164263320Sdim   case SP::SELECT_CC_FP_FCC:
165263320Sdim   case SP::SELECT_CC_DFP_FCC:
166263320Sdim   case SP::SELECT_CC_QFP_FCC:
167263320Sdim-    BROpcode = SP::FBCOND;
168263320Sdim-    break;
169263320Sdim+    return expandSelectCC(MI, BB, SP::FBCOND);
170263320Sdim+
171263320Sdim+  case SP::ATOMIC_LOAD_ADD_32:
172263320Sdim+    return expandAtomicRMW(MI, BB, SP::ADDrr);
173263320Sdim+  case SP::ATOMIC_LOAD_ADD_64:
174263320Sdim+    return expandAtomicRMW(MI, BB, SP::ADDXrr);
175263320Sdim+  case SP::ATOMIC_LOAD_SUB_32:
176263320Sdim+    return expandAtomicRMW(MI, BB, SP::SUBrr);
177263320Sdim+  case SP::ATOMIC_LOAD_SUB_64:
178263320Sdim+    return expandAtomicRMW(MI, BB, SP::SUBXrr);
179263320Sdim+  case SP::ATOMIC_LOAD_AND_32:
180263320Sdim+    return expandAtomicRMW(MI, BB, SP::ANDrr);
181263320Sdim+  case SP::ATOMIC_LOAD_AND_64:
182263320Sdim+    return expandAtomicRMW(MI, BB, SP::ANDXrr);
183263320Sdim+  case SP::ATOMIC_LOAD_OR_32:
184263320Sdim+    return expandAtomicRMW(MI, BB, SP::ORrr);
185263320Sdim+  case SP::ATOMIC_LOAD_OR_64:
186263320Sdim+    return expandAtomicRMW(MI, BB, SP::ORXrr);
187263320Sdim+  case SP::ATOMIC_LOAD_XOR_32:
188263320Sdim+    return expandAtomicRMW(MI, BB, SP::XORrr);
189263320Sdim+  case SP::ATOMIC_LOAD_XOR_64:
190263320Sdim+    return expandAtomicRMW(MI, BB, SP::XORXrr);
191263320Sdim+  case SP::ATOMIC_LOAD_NAND_32:
192263320Sdim+    return expandAtomicRMW(MI, BB, SP::ANDrr);
193263320Sdim+  case SP::ATOMIC_LOAD_NAND_64:
194263320Sdim+    return expandAtomicRMW(MI, BB, SP::ANDXrr);
195263320Sdim+
196263320Sdim+  case SP::ATOMIC_LOAD_MAX_32:
197263320Sdim+    return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_G);
198263320Sdim+  case SP::ATOMIC_LOAD_MAX_64:
199263320Sdim+    return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_G);
200263320Sdim+  case SP::ATOMIC_LOAD_MIN_32:
201263320Sdim+    return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LE);
202263320Sdim+  case SP::ATOMIC_LOAD_MIN_64:
203263320Sdim+    return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LE);
204263320Sdim+  case SP::ATOMIC_LOAD_UMAX_32:
205263320Sdim+    return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_GU);
206263320Sdim+  case SP::ATOMIC_LOAD_UMAX_64:
207263320Sdim+    return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_GU);
208263320Sdim+  case SP::ATOMIC_LOAD_UMIN_32:
209263320Sdim+    return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LEU);
210263320Sdim+  case SP::ATOMIC_LOAD_UMIN_64:
211263320Sdim+    return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LEU);
212263320Sdim   }
213263320Sdim+}
214263320Sdim 
215263320Sdim-  CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
216263320Sdim+MachineBasicBlock*
217263320Sdim+SparcTargetLowering::expandSelectCC(MachineInstr *MI,
218263320Sdim+                                    MachineBasicBlock *BB,
219263320Sdim+                                    unsigned BROpcode) const {
220263320Sdim+  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
221263320Sdim+  DebugLoc dl = MI->getDebugLoc();
222263320Sdim+  unsigned CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
223263320Sdim 
224263320Sdim   // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
225263320Sdim   // control-flow pattern.  The incoming instruction knows the destination vreg
226263320Sdim@@ -2906,6 +2948,100 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M
227263320Sdim   return BB;
228263320Sdim }
229263320Sdim 
230263320Sdim+MachineBasicBlock*
231263320Sdim+SparcTargetLowering::expandAtomicRMW(MachineInstr *MI,
232263320Sdim+                                     MachineBasicBlock *MBB,
233263320Sdim+                                     unsigned Opcode,
234263320Sdim+                                     unsigned CondCode) const {
235263320Sdim+  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
236263320Sdim+  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
237263320Sdim+  DebugLoc DL = MI->getDebugLoc();
238263320Sdim+
239263320Sdim+  // MI is an atomic read-modify-write instruction of the form:
240263320Sdim+  //
241263320Sdim+  //   rd = atomicrmw<op> addr, rs2
242263320Sdim+  //
243263320Sdim+  // All three operands are registers.
244263320Sdim+  unsigned DestReg = MI->getOperand(0).getReg();
245263320Sdim+  unsigned AddrReg = MI->getOperand(1).getReg();
246263320Sdim+  unsigned Rs2Reg  = MI->getOperand(2).getReg();
247263320Sdim+
248263320Sdim+  // SelectionDAG has already inserted memory barriers before and after MI, so
249263320Sdim+  // we simply have to implement the operatiuon in terms of compare-and-swap.
250263320Sdim+  //
251263320Sdim+  //   %val0 = load %addr
252263320Sdim+  // loop:
253263320Sdim+  //   %val = phi %val0, %dest
254263320Sdim+  //   %upd = op %val, %rs2
255263320Sdim+  //   %dest = cas %addr, %upd, %val
256263320Sdim+  //   cmp %val, %dest
257263320Sdim+  //   bne loop
258263320Sdim+  // done:
259263320Sdim+  //
260263320Sdim+  bool is64Bit = SP::I64RegsRegClass.hasSubClassEq(MRI.getRegClass(DestReg));
261263320Sdim+  const TargetRegisterClass *ValueRC =
262263320Sdim+    is64Bit ? &SP::I64RegsRegClass : &SP::IntRegsRegClass;
263263320Sdim+  unsigned Val0Reg = MRI.createVirtualRegister(ValueRC);
264263320Sdim+
265263320Sdim+  BuildMI(*MBB, MI, DL, TII.get(is64Bit ? SP::LDXri : SP::LDri), Val0Reg)
266263320Sdim+    .addReg(AddrReg).addImm(0);
267263320Sdim+
268263320Sdim+  // Split the basic block MBB before MI and insert the loop block in the hole.
269263320Sdim+  MachineFunction::iterator MFI = MBB;
270263320Sdim+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
271263320Sdim+  MachineFunction *MF = MBB->getParent();
272263320Sdim+  MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
273263320Sdim+  MachineBasicBlock *DoneMBB = MF->CreateMachineBasicBlock(LLVM_BB);
274263320Sdim+  ++MFI;
275263320Sdim+  MF->insert(MFI, LoopMBB);
276263320Sdim+  MF->insert(MFI, DoneMBB);
277263320Sdim+
278263320Sdim+  // Move MI and following instructions to DoneMBB.
279263320Sdim+  DoneMBB->splice(DoneMBB->begin(), MBB, MI, MBB->end());
280263320Sdim+  DoneMBB->transferSuccessorsAndUpdatePHIs(MBB);
281263320Sdim+
282263320Sdim+  // Connect the CFG again.
283263320Sdim+  MBB->addSuccessor(LoopMBB);
284263320Sdim+  LoopMBB->addSuccessor(LoopMBB);
285263320Sdim+  LoopMBB->addSuccessor(DoneMBB);
286263320Sdim+
287263320Sdim+  // Build the loop block.
288263320Sdim+  unsigned ValReg = MRI.createVirtualRegister(ValueRC);
289263320Sdim+  unsigned UpdReg = MRI.createVirtualRegister(ValueRC);
290263320Sdim+
291263320Sdim+  BuildMI(LoopMBB, DL, TII.get(SP::PHI), ValReg)
292263320Sdim+    .addReg(Val0Reg).addMBB(MBB)
293263320Sdim+    .addReg(DestReg).addMBB(LoopMBB);
294263320Sdim+
295263320Sdim+  if (CondCode) {
296263320Sdim+    // This is one of the min/max operations. We need a CMPrr followed by a
297263320Sdim+    // MOVXCC/MOVICC.
298263320Sdim+    BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(Rs2Reg);
299263320Sdim+    BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg)
300263320Sdim+      .addReg(ValReg).addReg(Rs2Reg).addImm(CondCode);
301263320Sdim+  } else {
302263320Sdim+    BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg)
303263320Sdim+      .addReg(ValReg).addReg(Rs2Reg);
304263320Sdim+  }
305263320Sdim+
306263320Sdim+  if (MI->getOpcode() == SP::ATOMIC_LOAD_NAND_32 ||
307263320Sdim+      MI->getOpcode() == SP::ATOMIC_LOAD_NAND_64) {
308263320Sdim+    unsigned TmpReg = UpdReg;
309263320Sdim+    UpdReg = MRI.createVirtualRegister(ValueRC);
310263320Sdim+    BuildMI(LoopMBB, DL, TII.get(SP::XORri), UpdReg).addReg(TmpReg).addImm(-1);
311263320Sdim+  }
312263320Sdim+
313263320Sdim+  BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::CASXrr : SP::CASrr), DestReg)
314263320Sdim+    .addReg(AddrReg).addReg(UpdReg).addReg(ValReg)
315263320Sdim+    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
316263320Sdim+  BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(DestReg);
317263320Sdim+  BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::BPXCC : SP::BCOND))
318263320Sdim+    .addMBB(LoopMBB).addImm(SPCC::ICC_NE);
319263320Sdim+
320263320Sdim+  MI->eraseFromParent();
321263320Sdim+  return DoneMBB;
322263320Sdim+}
323263320Sdim+
324263320Sdim //===----------------------------------------------------------------------===//
325263320Sdim //                         Sparc Inline Assembly Support
326263320Sdim //===----------------------------------------------------------------------===//
327263320SdimIndex: lib/Target/Sparc/SparcISelLowering.h
328263320Sdim===================================================================
329263320Sdim--- lib/Target/Sparc/SparcISelLowering.h
330263320Sdim+++ lib/Target/Sparc/SparcISelLowering.h
331263320Sdim@@ -165,6 +165,13 @@ namespace llvm {
332263320Sdim     virtual void ReplaceNodeResults(SDNode *N,
333263320Sdim                                     SmallVectorImpl<SDValue>& Results,
334263320Sdim                                     SelectionDAG &DAG) const;
335263320Sdim+
336263320Sdim+    MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB,
337263320Sdim+                                      unsigned BROpcode) const;
338263320Sdim+    MachineBasicBlock *expandAtomicRMW(MachineInstr *MI,
339263320Sdim+                                       MachineBasicBlock *BB,
340263320Sdim+                                       unsigned Opcode,
341263320Sdim+                                       unsigned CondCode = 0) const;
342263320Sdim   };
343263320Sdim } // end namespace llvm
344263320Sdim 
345