1//===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements IR expansion for reduction intrinsics, allowing targets
10// to enable the experimental intrinsics until just before codegen.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/CodeGen/ExpandReductions.h"
15#include "llvm/Analysis/TargetTransformInfo.h"
16#include "llvm/CodeGen/Passes.h"
17#include "llvm/IR/Function.h"
18#include "llvm/IR/IRBuilder.h"
19#include "llvm/IR/InstIterator.h"
20#include "llvm/IR/IntrinsicInst.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/Module.h"
23#include "llvm/InitializePasses.h"
24#include "llvm/Pass.h"
25#include "llvm/Transforms/Utils/LoopUtils.h"
26
27using namespace llvm;
28
29namespace {
30
31unsigned getOpcode(Intrinsic::ID ID) {
32  switch (ID) {
33  case Intrinsic::experimental_vector_reduce_v2_fadd:
34    return Instruction::FAdd;
35  case Intrinsic::experimental_vector_reduce_v2_fmul:
36    return Instruction::FMul;
37  case Intrinsic::experimental_vector_reduce_add:
38    return Instruction::Add;
39  case Intrinsic::experimental_vector_reduce_mul:
40    return Instruction::Mul;
41  case Intrinsic::experimental_vector_reduce_and:
42    return Instruction::And;
43  case Intrinsic::experimental_vector_reduce_or:
44    return Instruction::Or;
45  case Intrinsic::experimental_vector_reduce_xor:
46    return Instruction::Xor;
47  case Intrinsic::experimental_vector_reduce_smax:
48  case Intrinsic::experimental_vector_reduce_smin:
49  case Intrinsic::experimental_vector_reduce_umax:
50  case Intrinsic::experimental_vector_reduce_umin:
51    return Instruction::ICmp;
52  case Intrinsic::experimental_vector_reduce_fmax:
53  case Intrinsic::experimental_vector_reduce_fmin:
54    return Instruction::FCmp;
55  default:
56    llvm_unreachable("Unexpected ID");
57  }
58}
59
60RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
61  switch (ID) {
62  case Intrinsic::experimental_vector_reduce_smax:
63    return RecurrenceDescriptor::MRK_SIntMax;
64  case Intrinsic::experimental_vector_reduce_smin:
65    return RecurrenceDescriptor::MRK_SIntMin;
66  case Intrinsic::experimental_vector_reduce_umax:
67    return RecurrenceDescriptor::MRK_UIntMax;
68  case Intrinsic::experimental_vector_reduce_umin:
69    return RecurrenceDescriptor::MRK_UIntMin;
70  case Intrinsic::experimental_vector_reduce_fmax:
71    return RecurrenceDescriptor::MRK_FloatMax;
72  case Intrinsic::experimental_vector_reduce_fmin:
73    return RecurrenceDescriptor::MRK_FloatMin;
74  default:
75    return RecurrenceDescriptor::MRK_Invalid;
76  }
77}
78
79bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
80  bool Changed = false;
81  SmallVector<IntrinsicInst *, 4> Worklist;
82  for (auto &I : instructions(F)) {
83    if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
84      switch (II->getIntrinsicID()) {
85      default: break;
86      case Intrinsic::experimental_vector_reduce_v2_fadd:
87      case Intrinsic::experimental_vector_reduce_v2_fmul:
88      case Intrinsic::experimental_vector_reduce_add:
89      case Intrinsic::experimental_vector_reduce_mul:
90      case Intrinsic::experimental_vector_reduce_and:
91      case Intrinsic::experimental_vector_reduce_or:
92      case Intrinsic::experimental_vector_reduce_xor:
93      case Intrinsic::experimental_vector_reduce_smax:
94      case Intrinsic::experimental_vector_reduce_smin:
95      case Intrinsic::experimental_vector_reduce_umax:
96      case Intrinsic::experimental_vector_reduce_umin:
97      case Intrinsic::experimental_vector_reduce_fmax:
98      case Intrinsic::experimental_vector_reduce_fmin:
99        if (TTI->shouldExpandReduction(II))
100          Worklist.push_back(II);
101
102        break;
103      }
104    }
105  }
106
107  for (auto *II : Worklist) {
108    FastMathFlags FMF =
109        isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
110    Intrinsic::ID ID = II->getIntrinsicID();
111    RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID);
112
113    Value *Rdx = nullptr;
114    IRBuilder<> Builder(II);
115    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
116    Builder.setFastMathFlags(FMF);
117    switch (ID) {
118    default: llvm_unreachable("Unexpected intrinsic!");
119    case Intrinsic::experimental_vector_reduce_v2_fadd:
120    case Intrinsic::experimental_vector_reduce_v2_fmul: {
121      // FMFs must be attached to the call, otherwise it's an ordered reduction
122      // and it can't be handled by generating a shuffle sequence.
123      Value *Acc = II->getArgOperand(0);
124      Value *Vec = II->getArgOperand(1);
125      if (!FMF.allowReassoc())
126        Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
127      else {
128        if (!isPowerOf2_32(Vec->getType()->getVectorNumElements()))
129          continue;
130
131        Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
132        Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
133                                  Acc, Rdx, "bin.rdx");
134      }
135      break;
136    }
137    case Intrinsic::experimental_vector_reduce_add:
138    case Intrinsic::experimental_vector_reduce_mul:
139    case Intrinsic::experimental_vector_reduce_and:
140    case Intrinsic::experimental_vector_reduce_or:
141    case Intrinsic::experimental_vector_reduce_xor:
142    case Intrinsic::experimental_vector_reduce_smax:
143    case Intrinsic::experimental_vector_reduce_smin:
144    case Intrinsic::experimental_vector_reduce_umax:
145    case Intrinsic::experimental_vector_reduce_umin:
146    case Intrinsic::experimental_vector_reduce_fmax:
147    case Intrinsic::experimental_vector_reduce_fmin: {
148      Value *Vec = II->getArgOperand(0);
149      if (!isPowerOf2_32(Vec->getType()->getVectorNumElements()))
150        continue;
151
152      Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
153      break;
154    }
155    }
156    II->replaceAllUsesWith(Rdx);
157    II->eraseFromParent();
158    Changed = true;
159  }
160  return Changed;
161}
162
163class ExpandReductions : public FunctionPass {
164public:
165  static char ID;
166  ExpandReductions() : FunctionPass(ID) {
167    initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
168  }
169
170  bool runOnFunction(Function &F) override {
171    const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
172    return expandReductions(F, TTI);
173  }
174
175  void getAnalysisUsage(AnalysisUsage &AU) const override {
176    AU.addRequired<TargetTransformInfoWrapperPass>();
177    AU.setPreservesCFG();
178  }
179};
180}
181
182char ExpandReductions::ID;
183INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
184                      "Expand reduction intrinsics", false, false)
185INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
186INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
187                    "Expand reduction intrinsics", false, false)
188
189FunctionPass *llvm::createExpandReductionsPass() {
190  return new ExpandReductions();
191}
192
193PreservedAnalyses ExpandReductionsPass::run(Function &F,
194                                            FunctionAnalysisManager &AM) {
195  const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
196  if (!expandReductions(F, &TTI))
197    return PreservedAnalyses::all();
198  PreservedAnalyses PA;
199  PA.preserveSet<CFGAnalyses>();
200  return PA;
201}
202