1//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7/// \file 8//===----------------------------------------------------------------------===// 9 10#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 11#define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 12 13#include "llvm/IR/PassManager.h" 14#include "llvm/Pass.h" 15#include "llvm/Support/AMDGPUAddrSpace.h" 16#include "llvm/Support/CodeGen.h" 17 18namespace llvm { 19 20class AMDGPUTargetMachine; 21class TargetMachine; 22 23// GlobalISel passes 24void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &); 25FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone); 26void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &); 27FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone); 28FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone); 29void initializeAMDGPURegBankCombinerPass(PassRegistry &); 30 31void initializeAMDGPURegBankSelectPass(PassRegistry &); 32 33// SI Passes 34FunctionPass *createGCNDPPCombinePass(); 35FunctionPass *createSIAnnotateControlFlowPass(); 36FunctionPass *createSIFoldOperandsPass(); 37FunctionPass *createSIPeepholeSDWAPass(); 38FunctionPass *createSILowerI1CopiesPass(); 39FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass(); 40FunctionPass *createSIShrinkInstructionsPass(); 41FunctionPass *createSILoadStoreOptimizerPass(); 42FunctionPass *createSIWholeQuadModePass(); 43FunctionPass *createSIFixControlFlowLiveIntervalsPass(); 44FunctionPass *createSIOptimizeExecMaskingPreRAPass(); 45FunctionPass *createSIOptimizeVGPRLiveRangePass(); 46FunctionPass *createSIFixSGPRCopiesPass(); 47FunctionPass *createLowerWWMCopiesPass(); 48FunctionPass *createSIMemoryLegalizerPass(); 49FunctionPass *createSIInsertWaitcntsPass(); 50FunctionPass *createSIPreAllocateWWMRegsPass(); 51FunctionPass *createSIFormMemoryClausesPass(); 52 53FunctionPass *createSIPostRABundlerPass(); 54FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *); 55ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *); 56FunctionPass *createAMDGPUCodeGenPreparePass(); 57FunctionPass *createAMDGPULateCodeGenPreparePass(); 58FunctionPass *createAMDGPUMachineCFGStructurizerPass(); 59FunctionPass *createAMDGPURewriteOutArgumentsPass(); 60ModulePass * 61createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr); 62FunctionPass *createSIModeRegisterPass(); 63FunctionPass *createGCNPreRAOptimizationsPass(); 64 65struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { 66 AMDGPUSimplifyLibCallsPass() {} 67 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 68}; 69 70struct AMDGPUImageIntrinsicOptimizerPass 71 : PassInfoMixin<AMDGPUImageIntrinsicOptimizerPass> { 72 AMDGPUImageIntrinsicOptimizerPass(TargetMachine &TM) : TM(TM) {} 73 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 74 75private: 76 TargetMachine &TM; 77}; 78 79struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> { 80 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 81}; 82 83void initializeAMDGPUDAGToDAGISelPass(PassRegistry&); 84 85void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); 86extern char &AMDGPUMachineCFGStructurizerID; 87 88void initializeAMDGPUAlwaysInlinePass(PassRegistry&); 89 90Pass *createAMDGPUAnnotateKernelFeaturesPass(); 91Pass *createAMDGPUAttributorLegacyPass(); 92void initializeAMDGPUAttributorLegacyPass(PassRegistry &); 93void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); 94extern char &AMDGPUAnnotateKernelFeaturesID; 95 96// DPP/Iterative option enables the atomic optimizer with given strategy 97// whereas None disables the atomic optimizer. 98enum class ScanOptions { DPP, Iterative, None }; 99FunctionPass *createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy); 100void initializeAMDGPUAtomicOptimizerPass(PassRegistry &); 101extern char &AMDGPUAtomicOptimizerID; 102 103ModulePass *createAMDGPUCtorDtorLoweringLegacyPass(); 104void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &); 105extern char &AMDGPUCtorDtorLoweringLegacyPassID; 106 107FunctionPass *createAMDGPULowerKernelArgumentsPass(); 108void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &); 109extern char &AMDGPULowerKernelArgumentsID; 110 111FunctionPass *createAMDGPUPromoteKernelArgumentsPass(); 112void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &); 113extern char &AMDGPUPromoteKernelArgumentsID; 114 115struct AMDGPUPromoteKernelArgumentsPass 116 : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> { 117 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 118}; 119 120ModulePass *createAMDGPULowerKernelAttributesPass(); 121void initializeAMDGPULowerKernelAttributesPass(PassRegistry &); 122extern char &AMDGPULowerKernelAttributesID; 123 124struct AMDGPULowerKernelAttributesPass 125 : PassInfoMixin<AMDGPULowerKernelAttributesPass> { 126 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 127}; 128 129void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &); 130extern char &AMDGPULowerModuleLDSLegacyPassID; 131 132struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> { 133 const AMDGPUTargetMachine &TM; 134 AMDGPULowerModuleLDSPass(const AMDGPUTargetMachine &TM_) : TM(TM_) {} 135 136 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 137}; 138 139void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &); 140extern char &AMDGPURewriteOutArgumentsID; 141 142void initializeGCNDPPCombinePass(PassRegistry &); 143extern char &GCNDPPCombineID; 144 145void initializeSIFoldOperandsPass(PassRegistry &); 146extern char &SIFoldOperandsID; 147 148void initializeSIPeepholeSDWAPass(PassRegistry &); 149extern char &SIPeepholeSDWAID; 150 151void initializeSIShrinkInstructionsPass(PassRegistry&); 152extern char &SIShrinkInstructionsID; 153 154void initializeSIFixSGPRCopiesPass(PassRegistry &); 155extern char &SIFixSGPRCopiesID; 156 157void initializeSIFixVGPRCopiesPass(PassRegistry &); 158extern char &SIFixVGPRCopiesID; 159 160void initializeSILowerWWMCopiesPass(PassRegistry &); 161extern char &SILowerWWMCopiesID; 162 163void initializeSILowerI1CopiesPass(PassRegistry &); 164extern char &SILowerI1CopiesID; 165 166void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &); 167extern char &AMDGPUGlobalISelDivergenceLoweringID; 168 169void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &); 170extern char &AMDGPUMarkLastScratchLoadID; 171 172void initializeSILowerSGPRSpillsPass(PassRegistry &); 173extern char &SILowerSGPRSpillsID; 174 175void initializeSILoadStoreOptimizerPass(PassRegistry &); 176extern char &SILoadStoreOptimizerID; 177 178void initializeSIWholeQuadModePass(PassRegistry &); 179extern char &SIWholeQuadModeID; 180 181void initializeSILowerControlFlowPass(PassRegistry &); 182extern char &SILowerControlFlowID; 183 184void initializeSIPreEmitPeepholePass(PassRegistry &); 185extern char &SIPreEmitPeepholeID; 186 187void initializeSILateBranchLoweringPass(PassRegistry &); 188extern char &SILateBranchLoweringPassID; 189 190void initializeSIOptimizeExecMaskingPass(PassRegistry &); 191extern char &SIOptimizeExecMaskingID; 192 193void initializeSIPreAllocateWWMRegsPass(PassRegistry &); 194extern char &SIPreAllocateWWMRegsID; 195 196void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &); 197extern char &AMDGPUImageIntrinsicOptimizerID; 198 199void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); 200extern char &AMDGPUPerfHintAnalysisID; 201 202void initializeGCNRegPressurePrinterPass(PassRegistry &); 203extern char &GCNRegPressurePrinterID; 204 205// Passes common to R600 and SI 206FunctionPass *createAMDGPUPromoteAlloca(); 207void initializeAMDGPUPromoteAllocaPass(PassRegistry&); 208extern char &AMDGPUPromoteAllocaID; 209 210FunctionPass *createAMDGPUPromoteAllocaToVector(); 211void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&); 212extern char &AMDGPUPromoteAllocaToVectorID; 213 214struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> { 215 AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {} 216 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 217 218private: 219 TargetMachine &TM; 220}; 221 222struct AMDGPUPromoteAllocaToVectorPass 223 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> { 224 AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {} 225 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 226 227private: 228 TargetMachine &TM; 229}; 230 231struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> { 232 AMDGPUAtomicOptimizerPass(TargetMachine &TM, ScanOptions ScanImpl) 233 : TM(TM), ScanImpl(ScanImpl) {} 234 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 235 236private: 237 TargetMachine &TM; 238 ScanOptions ScanImpl; 239}; 240 241Pass *createAMDGPUStructurizeCFGPass(); 242FunctionPass *createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel); 243ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); 244 245struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> { 246 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {} 247 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 248 249private: 250 bool GlobalOpt; 251}; 252 253class AMDGPUCodeGenPreparePass 254 : public PassInfoMixin<AMDGPUCodeGenPreparePass> { 255private: 256 TargetMachine &TM; 257 258public: 259 AMDGPUCodeGenPreparePass(TargetMachine &TM) : TM(TM){}; 260 PreservedAnalyses run(Function &, FunctionAnalysisManager &); 261}; 262 263class AMDGPULowerKernelArgumentsPass 264 : public PassInfoMixin<AMDGPULowerKernelArgumentsPass> { 265private: 266 TargetMachine &TM; 267 268public: 269 AMDGPULowerKernelArgumentsPass(TargetMachine &TM) : TM(TM){}; 270 PreservedAnalyses run(Function &, FunctionAnalysisManager &); 271}; 272 273class AMDGPUAttributorPass : public PassInfoMixin<AMDGPUAttributorPass> { 274private: 275 TargetMachine &TM; 276 277public: 278 AMDGPUAttributorPass(TargetMachine &TM) : TM(TM){}; 279 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 280}; 281 282FunctionPass *createAMDGPUAnnotateUniformValues(); 283 284ModulePass *createAMDGPUPrintfRuntimeBinding(); 285void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); 286extern char &AMDGPUPrintfRuntimeBindingID; 287 288void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &); 289extern char &AMDGPUResourceUsageAnalysisID; 290 291struct AMDGPUPrintfRuntimeBindingPass 292 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> { 293 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 294}; 295 296ModulePass* createAMDGPUUnifyMetadataPass(); 297void initializeAMDGPUUnifyMetadataPass(PassRegistry&); 298extern char &AMDGPUUnifyMetadataID; 299 300struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> { 301 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 302}; 303 304void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&); 305extern char &SIOptimizeExecMaskingPreRAID; 306 307void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &); 308extern char &SIOptimizeVGPRLiveRangeID; 309 310void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); 311extern char &AMDGPUAnnotateUniformValuesPassID; 312 313void initializeAMDGPUCodeGenPreparePass(PassRegistry&); 314extern char &AMDGPUCodeGenPrepareID; 315 316void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &); 317extern char &AMDGPURemoveIncompatibleFunctionsID; 318 319void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); 320extern char &AMDGPULateCodeGenPrepareID; 321 322FunctionPass *createAMDGPURewriteUndefForPHILegacyPass(); 323void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &); 324extern char &AMDGPURewriteUndefForPHILegacyPassID; 325 326class AMDGPURewriteUndefForPHIPass 327 : public PassInfoMixin<AMDGPURewriteUndefForPHIPass> { 328public: 329 AMDGPURewriteUndefForPHIPass() = default; 330 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 331}; 332 333void initializeSIAnnotateControlFlowPass(PassRegistry&); 334extern char &SIAnnotateControlFlowPassID; 335 336void initializeSIMemoryLegalizerPass(PassRegistry&); 337extern char &SIMemoryLegalizerID; 338 339void initializeSIModeRegisterPass(PassRegistry&); 340extern char &SIModeRegisterID; 341 342void initializeAMDGPUInsertDelayAluPass(PassRegistry &); 343extern char &AMDGPUInsertDelayAluID; 344 345void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &); 346extern char &AMDGPUInsertSingleUseVDSTID; 347 348void initializeSIInsertHardClausesPass(PassRegistry &); 349extern char &SIInsertHardClausesID; 350 351void initializeSIInsertWaitcntsPass(PassRegistry&); 352extern char &SIInsertWaitcntsID; 353 354void initializeSIFormMemoryClausesPass(PassRegistry&); 355extern char &SIFormMemoryClausesID; 356 357void initializeSIPostRABundlerPass(PassRegistry&); 358extern char &SIPostRABundlerID; 359 360void initializeGCNCreateVOPDPass(PassRegistry &); 361extern char &GCNCreateVOPDID; 362 363void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&); 364extern char &AMDGPUUnifyDivergentExitNodesID; 365 366ImmutablePass *createAMDGPUAAWrapperPass(); 367void initializeAMDGPUAAWrapperPassPass(PassRegistry&); 368ImmutablePass *createAMDGPUExternalAAWrapperPass(); 369void initializeAMDGPUExternalAAWrapperPass(PassRegistry&); 370 371void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &); 372 373ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass(); 374void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &); 375extern char &AMDGPUOpenCLEnqueuedBlockLoweringID; 376 377void initializeGCNNSAReassignPass(PassRegistry &); 378extern char &GCNNSAReassignID; 379 380void initializeGCNPreRALongBranchRegPass(PassRegistry &); 381extern char &GCNPreRALongBranchRegID; 382 383void initializeGCNPreRAOptimizationsPass(PassRegistry &); 384extern char &GCNPreRAOptimizationsID; 385 386FunctionPass *createAMDGPUSetWavePriorityPass(); 387void initializeAMDGPUSetWavePriorityPass(PassRegistry &); 388 389void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &); 390extern char &GCNRewritePartialRegUsesID; 391 392namespace AMDGPU { 393enum TargetIndex { 394 TI_CONSTDATA_START, 395 TI_SCRATCH_RSRC_DWORD0, 396 TI_SCRATCH_RSRC_DWORD1, 397 TI_SCRATCH_RSRC_DWORD2, 398 TI_SCRATCH_RSRC_DWORD3 399}; 400 401// FIXME: Missing constant_32bit 402inline bool isFlatGlobalAddrSpace(unsigned AS) { 403 return AS == AMDGPUAS::GLOBAL_ADDRESS || 404 AS == AMDGPUAS::FLAT_ADDRESS || 405 AS == AMDGPUAS::CONSTANT_ADDRESS || 406 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 407} 408 409inline bool isExtendedGlobalAddrSpace(unsigned AS) { 410 return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS || 411 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT || 412 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 413} 414 415static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) { 416 static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 9, "Addr space out of range"); 417 418 if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS) 419 return true; 420 421 // This array is indexed by address space value enum elements 0 ... to 9 422 // clang-format off 423 static const bool ASAliasRules[10][10] = { 424 /* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc BufStrdPtr */ 425 /* Flat */ {true, true, false, true, true, true, true, true, true, true}, 426 /* Global */ {true, true, false, false, true, false, true, true, true, true}, 427 /* Region */ {false, false, true, false, false, false, false, false, false, false}, 428 /* Group */ {true, false, false, true, false, false, false, false, false, false}, 429 /* Constant */ {true, true, false, false, false, false, true, true, true, true}, 430 /* Private */ {true, false, false, false, false, true, false, false, false, false}, 431 /* Constant 32-bit */ {true, true, false, false, true, false, false, true, true, true}, 432 /* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true, true}, 433 /* Buffer Resource */ {true, true, false, false, true, false, true, true, true, true}, 434 /* Buffer Strided Ptr */ {true, true, false, false, true, false, true, true, true, true}, 435 }; 436 // clang-format on 437 438 return ASAliasRules[AS1][AS2]; 439} 440 441} 442 443} // End namespace llvm 444 445#endif 446