R600Instructions.td revision 266715
1//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// R600 Tablegen instruction definitions 11// 12//===----------------------------------------------------------------------===// 13 14include "R600Intrinsics.td" 15include "R600InstrFormats.td" 16 17class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> : 18 InstR600 <outs, ins, asm, pattern, NullALU> { 19 20 let Namespace = "AMDGPU"; 21} 22 23def MEMxi : Operand<iPTR> { 24 let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index); 25 let PrintMethod = "printMemOperand"; 26} 27 28def MEMrr : Operand<iPTR> { 29 let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index); 30} 31 32// Operands for non-registers 33 34class InstFlag<string PM = "printOperand", int Default = 0> 35 : OperandWithDefaultOps <i32, (ops (i32 Default))> { 36 let PrintMethod = PM; 37} 38 39// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers 40def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> { 41 let PrintMethod = "printSel"; 42} 43def BANK_SWIZZLE : OperandWithDefaultOps <i32, (ops (i32 0))> { 44 let PrintMethod = "printBankSwizzle"; 45} 46 47def LITERAL : InstFlag<"printLiteral">; 48 49def WRITE : InstFlag <"printWrite", 1>; 50def OMOD : InstFlag <"printOMOD">; 51def REL : InstFlag <"printRel">; 52def CLAMP : InstFlag <"printClamp">; 53def NEG : InstFlag <"printNeg">; 54def ABS : InstFlag <"printAbs">; 55def UEM : InstFlag <"printUpdateExecMask">; 56def UP : InstFlag <"printUpdatePred">; 57 58// XXX: The r600g finalizer in Mesa expects last to be one in most cases. 59// Once we start using the packetizer in this backend we should have this 60// default to 0. 61def LAST : InstFlag<"printLast", 1>; 62def RSel : Operand<i32> { 63 let PrintMethod = "printRSel"; 64} 65def CT: Operand<i32> { 66 let PrintMethod = "printCT"; 67} 68 69def FRAMEri : Operand<iPTR> { 70 let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index); 71} 72 73def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>; 74def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; 75def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; 76def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>; 77def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>; 78 79 80def R600_Pred : PredicateOperand<i32, (ops R600_Predicate), 81 (ops PRED_SEL_OFF)>; 82 83 84let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { 85 86// Class for instructions with only one source register. 87// If you add new ins to this instruction, make sure they are listed before 88// $literal, because the backend currently assumes that the last operand is 89// a literal. Also be sure to update the enum R600Op1OperandIndex::ROI in 90// R600Defines.h, R600InstrInfo::buildDefaultInstruction(), 91// and R600InstrInfo::getOperandIdx(). 92class R600_1OP <bits<11> inst, string opName, list<dag> pattern, 93 InstrItinClass itin = AnyALU> : 94 InstR600 <(outs R600_Reg32:$dst), 95 (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, 96 R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, 97 LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal, 98 BANK_SWIZZLE:$bank_swizzle), 99 !strconcat(" ", opName, 100 "$clamp $last $dst$write$dst_rel$omod, " 101 "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " 102 "$pred_sel $bank_swizzle"), 103 pattern, 104 itin>, 105 R600ALU_Word0, 106 R600ALU_Word1_OP2 <inst> { 107 108 let src1 = 0; 109 let src1_rel = 0; 110 let src1_neg = 0; 111 let src1_abs = 0; 112 let update_exec_mask = 0; 113 let update_pred = 0; 114 let HasNativeOperands = 1; 115 let Op1 = 1; 116 let ALUInst = 1; 117 let DisableEncoding = "$literal"; 118 let UseNamedOperandTable = 1; 119 120 let Inst{31-0} = Word0; 121 let Inst{63-32} = Word1; 122} 123 124class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node, 125 InstrItinClass itin = AnyALU> : 126 R600_1OP <inst, opName, 127 [(set R600_Reg32:$dst, (node R600_Reg32:$src0))] 128>; 129 130// If you add or change the operands for R600_2OP instructions, you must 131// also update the R600Op2OperandIndex::ROI enum in R600Defines.h, 132// R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx(). 133class R600_2OP <bits<11> inst, string opName, list<dag> pattern, 134 InstrItinClass itin = AnyALU> : 135 InstR600 <(outs R600_Reg32:$dst), 136 (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write, 137 OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, 138 R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, 139 R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel, 140 LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal, 141 BANK_SWIZZLE:$bank_swizzle), 142 !strconcat(" ", opName, 143 "$clamp $last $update_exec_mask$update_pred$dst$write$dst_rel$omod, " 144 "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " 145 "$src1_neg$src1_abs$src1$src1_abs$src1_rel, " 146 "$pred_sel $bank_swizzle"), 147 pattern, 148 itin>, 149 R600ALU_Word0, 150 R600ALU_Word1_OP2 <inst> { 151 152 let HasNativeOperands = 1; 153 let Op2 = 1; 154 let ALUInst = 1; 155 let DisableEncoding = "$literal"; 156 let UseNamedOperandTable = 1; 157 158 let Inst{31-0} = Word0; 159 let Inst{63-32} = Word1; 160} 161 162class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node, 163 InstrItinClass itim = AnyALU> : 164 R600_2OP <inst, opName, 165 [(set R600_Reg32:$dst, (node R600_Reg32:$src0, 166 R600_Reg32:$src1))] 167>; 168 169// If you add our change the operands for R600_3OP instructions, you must 170// also update the R600Op3OperandIndex::ROI enum in R600Defines.h, 171// R600InstrInfo::buildDefaultInstruction(), and 172// R600InstrInfo::getOperandIdx(). 173class R600_3OP <bits<5> inst, string opName, list<dag> pattern, 174 InstrItinClass itin = AnyALU> : 175 InstR600 <(outs R600_Reg32:$dst), 176 (ins REL:$dst_rel, CLAMP:$clamp, 177 R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel, 178 R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel, 179 R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel, 180 LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal, 181 BANK_SWIZZLE:$bank_swizzle), 182 !strconcat(" ", opName, "$clamp $last $dst$dst_rel, " 183 "$src0_neg$src0$src0_rel, " 184 "$src1_neg$src1$src1_rel, " 185 "$src2_neg$src2$src2_rel, " 186 "$pred_sel" 187 "$bank_swizzle"), 188 pattern, 189 itin>, 190 R600ALU_Word0, 191 R600ALU_Word1_OP3<inst>{ 192 193 let HasNativeOperands = 1; 194 let DisableEncoding = "$literal"; 195 let Op3 = 1; 196 let UseNamedOperandTable = 1; 197 let ALUInst = 1; 198 199 let Inst{31-0} = Word0; 200 let Inst{63-32} = Word1; 201} 202 203class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern, 204 InstrItinClass itin = VecALU> : 205 InstR600 <(outs R600_Reg32:$dst), 206 ins, 207 asm, 208 pattern, 209 itin>; 210 211 212 213} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0 214 215def TEX_SHADOW : PatLeaf< 216 (imm), 217 [{uint32_t TType = (uint32_t)N->getZExtValue(); 218 return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13); 219 }] 220>; 221 222def TEX_RECT : PatLeaf< 223 (imm), 224 [{uint32_t TType = (uint32_t)N->getZExtValue(); 225 return TType == 5; 226 }] 227>; 228 229def TEX_ARRAY : PatLeaf< 230 (imm), 231 [{uint32_t TType = (uint32_t)N->getZExtValue(); 232 return TType == 9 || TType == 10 || TType == 16; 233 }] 234>; 235 236def TEX_SHADOW_ARRAY : PatLeaf< 237 (imm), 238 [{uint32_t TType = (uint32_t)N->getZExtValue(); 239 return TType == 11 || TType == 12 || TType == 17; 240 }] 241>; 242 243def TEX_MSAA : PatLeaf< 244 (imm), 245 [{uint32_t TType = (uint32_t)N->getZExtValue(); 246 return TType == 14; 247 }] 248>; 249 250def TEX_ARRAY_MSAA : PatLeaf< 251 (imm), 252 [{uint32_t TType = (uint32_t)N->getZExtValue(); 253 return TType == 15; 254 }] 255>; 256 257class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask, 258 dag outs, dag ins, string asm, list<dag> pattern> : 259 InstR600ISA <outs, ins, asm, pattern>, 260 CF_ALLOC_EXPORT_WORD0_RAT, CF_ALLOC_EXPORT_WORD1_BUF { 261 262 let rat_id = ratid; 263 let rat_inst = ratinst; 264 let rim = 0; 265 // XXX: Have a separate instruction for non-indexed writes. 266 let type = 1; 267 let rw_rel = 0; 268 let elem_size = 0; 269 270 let array_size = 0; 271 let comp_mask = mask; 272 let burst_count = 0; 273 let vpm = 0; 274 let cf_inst = cfinst; 275 let mark = 0; 276 let barrier = 1; 277 278 let Inst{31-0} = Word0; 279 let Inst{63-32} = Word1; 280 let IsExport = 1; 281 282} 283 284class VTX_READ <string name, bits<8> buffer_id, dag outs, list<dag> pattern> 285 : InstR600ISA <outs, (ins MEMxi:$src_gpr), name, pattern>, 286 VTX_WORD1_GPR { 287 288 // Static fields 289 let DST_REL = 0; 290 // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, 291 // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, 292 // however, based on my testing if USE_CONST_FIELDS is set, then all 293 // these fields need to be set to 0. 294 let USE_CONST_FIELDS = 0; 295 let NUM_FORMAT_ALL = 1; 296 let FORMAT_COMP_ALL = 0; 297 let SRF_MODE_ALL = 0; 298 299 let Inst{63-32} = Word1; 300 // LLVM can only encode 64-bit instructions, so these fields are manually 301 // encoded in R600CodeEmitter 302 // 303 // bits<16> OFFSET; 304 // bits<2> ENDIAN_SWAP = 0; 305 // bits<1> CONST_BUF_NO_STRIDE = 0; 306 // bits<1> MEGA_FETCH = 0; 307 // bits<1> ALT_CONST = 0; 308 // bits<2> BUFFER_INDEX_MODE = 0; 309 310 // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding 311 // is done in R600CodeEmitter 312 // 313 // Inst{79-64} = OFFSET; 314 // Inst{81-80} = ENDIAN_SWAP; 315 // Inst{82} = CONST_BUF_NO_STRIDE; 316 // Inst{83} = MEGA_FETCH; 317 // Inst{84} = ALT_CONST; 318 // Inst{86-85} = BUFFER_INDEX_MODE; 319 // Inst{95-86} = 0; Reserved 320 321 // VTX_WORD3 (Padding) 322 // 323 // Inst{127-96} = 0; 324 325 let VTXInst = 1; 326} 327 328class LoadParamFrag <PatFrag load_type> : PatFrag < 329 (ops node:$ptr), (load_type node:$ptr), 330 [{ return isConstantLoad(dyn_cast<LoadSDNode>(N), 0); }] 331>; 332 333def load_param : LoadParamFrag<load>; 334def load_param_exti8 : LoadParamFrag<az_extloadi8>; 335def load_param_exti16 : LoadParamFrag<az_extloadi16>; 336 337def isR600 : Predicate<"Subtarget.getGeneration() <= AMDGPUSubtarget::R700">; 338def isR700 : Predicate<"Subtarget.getGeneration() == AMDGPUSubtarget::R700">; 339def isEG : Predicate< 340 "Subtarget.getGeneration() >= AMDGPUSubtarget::EVERGREEN && " 341 "Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS && " 342 "!Subtarget.hasCaymanISA()">; 343 344def isCayman : Predicate<"Subtarget.hasCaymanISA()">; 345def isEGorCayman : Predicate<"Subtarget.getGeneration() == " 346 "AMDGPUSubtarget::EVERGREEN" 347 "|| Subtarget.getGeneration() ==" 348 "AMDGPUSubtarget::NORTHERN_ISLANDS">; 349 350def isR600toCayman : Predicate< 351 "Subtarget.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">; 352 353//===----------------------------------------------------------------------===// 354// R600 SDNodes 355//===----------------------------------------------------------------------===// 356 357def INTERP_PAIR_XY : AMDGPUShaderInst < 358 (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1), 359 (ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2), 360 "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1", 361 []>; 362 363def INTERP_PAIR_ZW : AMDGPUShaderInst < 364 (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1), 365 (ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2), 366 "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1", 367 []>; 368 369def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", 370 SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, 371 [SDNPVariadic] 372>; 373 374def DOT4 : SDNode<"AMDGPUISD::DOT4", 375 SDTypeProfile<1, 8, [SDTCisFP<0>, SDTCisVT<1, f32>, SDTCisVT<2, f32>, 376 SDTCisVT<3, f32>, SDTCisVT<4, f32>, SDTCisVT<5, f32>, 377 SDTCisVT<6, f32>, SDTCisVT<7, f32>, SDTCisVT<8, f32>]>, 378 [] 379>; 380 381def COS_HW : SDNode<"AMDGPUISD::COS_HW", 382 SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]> 383>; 384 385def SIN_HW : SDNode<"AMDGPUISD::SIN_HW", 386 SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]> 387>; 388 389def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>; 390 391def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>; 392 393multiclass TexPattern<bits<32> TextureOp, Instruction inst, ValueType vt = v4f32> { 394def : Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR, 395 (i32 imm:$srcx), (i32 imm:$srcy), (i32 imm:$srcz), (i32 imm:$srcw), 396 (i32 imm:$offsetx), (i32 imm:$offsety), (i32 imm:$offsetz), 397 (i32 imm:$DST_SEL_X), (i32 imm:$DST_SEL_Y), (i32 imm:$DST_SEL_Z), 398 (i32 imm:$DST_SEL_W), 399 (i32 imm:$RESOURCE_ID), (i32 imm:$SAMPLER_ID), 400 (i32 imm:$COORD_TYPE_X), (i32 imm:$COORD_TYPE_Y), (i32 imm:$COORD_TYPE_Z), 401 (i32 imm:$COORD_TYPE_W)), 402 (inst R600_Reg128:$SRC_GPR, 403 imm:$srcx, imm:$srcy, imm:$srcz, imm:$srcw, 404 imm:$offsetx, imm:$offsety, imm:$offsetz, 405 imm:$DST_SEL_X, imm:$DST_SEL_Y, imm:$DST_SEL_Z, 406 imm:$DST_SEL_W, 407 imm:$RESOURCE_ID, imm:$SAMPLER_ID, 408 imm:$COORD_TYPE_X, imm:$COORD_TYPE_Y, imm:$COORD_TYPE_Z, 409 imm:$COORD_TYPE_W)>; 410} 411 412//===----------------------------------------------------------------------===// 413// Interpolation Instructions 414//===----------------------------------------------------------------------===// 415 416def INTERP_VEC_LOAD : AMDGPUShaderInst < 417 (outs R600_Reg128:$dst), 418 (ins i32imm:$src0), 419 "INTERP_LOAD $src0 : $dst", 420 [(set R600_Reg128:$dst, (int_R600_interp_const imm:$src0))]>; 421 422def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { 423 let bank_swizzle = 5; 424} 425 426def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> { 427 let bank_swizzle = 5; 428} 429 430def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>; 431 432//===----------------------------------------------------------------------===// 433// Export Instructions 434//===----------------------------------------------------------------------===// 435 436def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; 437 438def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType, 439 [SDNPHasChain, SDNPSideEffect]>; 440 441class ExportWord0 { 442 field bits<32> Word0; 443 444 bits<13> arraybase; 445 bits<2> type; 446 bits<7> gpr; 447 bits<2> elem_size; 448 449 let Word0{12-0} = arraybase; 450 let Word0{14-13} = type; 451 let Word0{21-15} = gpr; 452 let Word0{22} = 0; // RW_REL 453 let Word0{29-23} = 0; // INDEX_GPR 454 let Word0{31-30} = elem_size; 455} 456 457class ExportSwzWord1 { 458 field bits<32> Word1; 459 460 bits<3> sw_x; 461 bits<3> sw_y; 462 bits<3> sw_z; 463 bits<3> sw_w; 464 bits<1> eop; 465 bits<8> inst; 466 467 let Word1{2-0} = sw_x; 468 let Word1{5-3} = sw_y; 469 let Word1{8-6} = sw_z; 470 let Word1{11-9} = sw_w; 471} 472 473class ExportBufWord1 { 474 field bits<32> Word1; 475 476 bits<12> arraySize; 477 bits<4> compMask; 478 bits<1> eop; 479 bits<8> inst; 480 481 let Word1{11-0} = arraySize; 482 let Word1{15-12} = compMask; 483} 484 485multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { 486 def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg), 487 (ExportInst 488 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0), 489 0, 61, 0, 7, 7, 7, cf_inst, 0) 490 >; 491 492 def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg), 493 (ExportInst 494 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0), 495 0, 61, 7, 0, 7, 7, cf_inst, 0) 496 >; 497 498 def : Pat<(int_R600_store_dummy (i32 imm:$type)), 499 (ExportInst 500 (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0) 501 >; 502 503 def : Pat<(int_R600_store_dummy 1), 504 (ExportInst 505 (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0) 506 >; 507 508 def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), 509 (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)), 510 (ExportInst R600_Reg128:$src, imm:$type, imm:$base, 511 imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0) 512 >; 513 514} 515 516multiclass SteamOutputExportPattern<Instruction ExportInst, 517 bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> { 518// Stream0 519 def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 520 (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)), 521 (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 522 4095, imm:$mask, buf0inst, 0)>; 523// Stream1 524 def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 525 (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)), 526 (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 527 4095, imm:$mask, buf1inst, 0)>; 528// Stream2 529 def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 530 (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)), 531 (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 532 4095, imm:$mask, buf2inst, 0)>; 533// Stream3 534 def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 535 (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)), 536 (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 537 4095, imm:$mask, buf3inst, 0)>; 538} 539 540// Export Instructions should not be duplicated by TailDuplication pass 541// (which assumes that duplicable instruction are affected by exec mask) 542let usesCustomInserter = 1, isNotDuplicable = 1 in { 543 544class ExportSwzInst : InstR600ISA<( 545 outs), 546 (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase, 547 RSel:$sw_x, RSel:$sw_y, RSel:$sw_z, RSel:$sw_w, i32imm:$inst, 548 i32imm:$eop), 549 !strconcat("EXPORT", " $gpr.$sw_x$sw_y$sw_z$sw_w"), 550 []>, ExportWord0, ExportSwzWord1 { 551 let elem_size = 3; 552 let Inst{31-0} = Word0; 553 let Inst{63-32} = Word1; 554 let IsExport = 1; 555} 556 557} // End usesCustomInserter = 1 558 559class ExportBufInst : InstR600ISA<( 560 outs), 561 (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase, 562 i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop), 563 !strconcat("EXPORT", " $gpr"), 564 []>, ExportWord0, ExportBufWord1 { 565 let elem_size = 0; 566 let Inst{31-0} = Word0; 567 let Inst{63-32} = Word1; 568 let IsExport = 1; 569} 570 571//===----------------------------------------------------------------------===// 572// Control Flow Instructions 573//===----------------------------------------------------------------------===// 574 575 576def KCACHE : InstFlag<"printKCache">; 577 578class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs), 579(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, 580KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1, 581i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, 582i32imm:$COUNT, i32imm:$Enabled), 583!strconcat(OpName, " $COUNT, @$ADDR, " 584"KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"), 585[] >, CF_ALU_WORD0, CF_ALU_WORD1 { 586 field bits<64> Inst; 587 588 let CF_INST = inst; 589 let ALT_CONST = 0; 590 let WHOLE_QUAD_MODE = 0; 591 let BARRIER = 1; 592 let UseNamedOperandTable = 1; 593 594 let Inst{31-0} = Word0; 595 let Inst{63-32} = Word1; 596} 597 598class CF_WORD0_R600 { 599 field bits<32> Word0; 600 601 bits<32> ADDR; 602 603 let Word0 = ADDR; 604} 605 606class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), 607ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 { 608 field bits<64> Inst; 609 bits<4> CNT; 610 611 let CF_INST = inst; 612 let BARRIER = 1; 613 let CF_CONST = 0; 614 let VALID_PIXEL_MODE = 0; 615 let COND = 0; 616 let COUNT = CNT{2-0}; 617 let CALL_COUNT = 0; 618 let COUNT_3 = CNT{3}; 619 let END_OF_PROGRAM = 0; 620 let WHOLE_QUAD_MODE = 0; 621 622 let Inst{31-0} = Word0; 623 let Inst{63-32} = Word1; 624} 625 626class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), 627ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG { 628 field bits<64> Inst; 629 630 let CF_INST = inst; 631 let BARRIER = 1; 632 let JUMPTABLE_SEL = 0; 633 let CF_CONST = 0; 634 let VALID_PIXEL_MODE = 0; 635 let COND = 0; 636 let END_OF_PROGRAM = 0; 637 638 let Inst{31-0} = Word0; 639 let Inst{63-32} = Word1; 640} 641 642def CF_ALU : ALU_CLAUSE<8, "ALU">; 643def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">; 644def CF_ALU_POP_AFTER : ALU_CLAUSE<10, "ALU_POP_AFTER">; 645 646def FETCH_CLAUSE : AMDGPUInst <(outs), 647(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > { 648 field bits<8> Inst; 649 bits<8> num; 650 let Inst = num; 651} 652 653def ALU_CLAUSE : AMDGPUInst <(outs), 654(ins i32imm:$addr), "ALU clause starting at $addr:", [] > { 655 field bits<8> Inst; 656 bits<8> num; 657 let Inst = num; 658} 659 660def LITERALS : AMDGPUInst <(outs), 661(ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > { 662 field bits<64> Inst; 663 bits<32> literal1; 664 bits<32> literal2; 665 666 let Inst{31-0} = literal1; 667 let Inst{63-32} = literal2; 668} 669 670def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > { 671 field bits<64> Inst; 672} 673 674let Predicates = [isR600toCayman] in { 675 676//===----------------------------------------------------------------------===// 677// Common Instructions R600, R700, Evergreen, Cayman 678//===----------------------------------------------------------------------===// 679 680def ADD : R600_2OP_Helper <0x0, "ADD", fadd>; 681// Non-IEEE MUL: 0 * anything = 0 682def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>; 683def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>; 684def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>; 685def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>; 686 687// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td, 688// so some of the instruction names don't match the asm string. 689// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics. 690def SETE : R600_2OP < 691 0x08, "SETE", 692 [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OEQ))] 693>; 694 695def SGT : R600_2OP < 696 0x09, "SETGT", 697 [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGT))] 698>; 699 700def SGE : R600_2OP < 701 0xA, "SETGE", 702 [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGE))] 703>; 704 705def SNE : R600_2OP < 706 0xB, "SETNE", 707 [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_UNE))] 708>; 709 710def SETE_DX10 : R600_2OP < 711 0xC, "SETE_DX10", 712 [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OEQ))] 713>; 714 715def SETGT_DX10 : R600_2OP < 716 0xD, "SETGT_DX10", 717 [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGT))] 718>; 719 720def SETGE_DX10 : R600_2OP < 721 0xE, "SETGE_DX10", 722 [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGE))] 723>; 724 725def SETNE_DX10 : R600_2OP < 726 0xF, "SETNE_DX10", 727 [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_UNE))] 728>; 729 730def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; 731def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>; 732def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>; 733def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>; 734def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>; 735 736def MOV : R600_1OP <0x19, "MOV", []>; 737 738let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { 739 740class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst < 741 (outs R600_Reg32:$dst), 742 (ins immType:$imm), 743 "", 744 [] 745>; 746 747} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 748 749def MOV_IMM_I32 : MOV_IMM<i32, i32imm>; 750def : Pat < 751 (imm:$val), 752 (MOV_IMM_I32 imm:$val) 753>; 754 755def MOV_IMM_F32 : MOV_IMM<f32, f32imm>; 756def : Pat < 757 (fpimm:$val), 758 (MOV_IMM_F32 fpimm:$val) 759>; 760 761def PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>; 762def PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>; 763def PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>; 764def PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>; 765 766let hasSideEffects = 1 in { 767 768def KILLGT : R600_2OP <0x2D, "KILLGT", []>; 769 770} // end hasSideEffects 771 772def AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>; 773def OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>; 774def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>; 775def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>; 776def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>; 777def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>; 778def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>; 779def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>; 780def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>; 781def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>; 782 783def SETE_INT : R600_2OP < 784 0x3A, "SETE_INT", 785 [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETEQ))] 786>; 787 788def SETGT_INT : R600_2OP < 789 0x3B, "SETGT_INT", 790 [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGT))] 791>; 792 793def SETGE_INT : R600_2OP < 794 0x3C, "SETGE_INT", 795 [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGE))] 796>; 797 798def SETNE_INT : R600_2OP < 799 0x3D, "SETNE_INT", 800 [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETNE))] 801>; 802 803def SETGT_UINT : R600_2OP < 804 0x3E, "SETGT_UINT", 805 [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGT))] 806>; 807 808def SETGE_UINT : R600_2OP < 809 0x3F, "SETGE_UINT", 810 [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGE))] 811>; 812 813def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>; 814def PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>; 815def PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>; 816def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>; 817 818def CNDE_INT : R600_3OP < 819 0x1C, "CNDE_INT", 820 [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_EQ))] 821>; 822 823def CNDGE_INT : R600_3OP < 824 0x1E, "CNDGE_INT", 825 [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGE))] 826>; 827 828def CNDGT_INT : R600_3OP < 829 0x1D, "CNDGT_INT", 830 [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGT))] 831>; 832 833//===----------------------------------------------------------------------===// 834// Texture instructions 835//===----------------------------------------------------------------------===// 836 837let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { 838 839class R600_TEX <bits<11> inst, string opName> : 840 InstR600 <(outs R600_Reg128:$DST_GPR), 841 (ins R600_Reg128:$SRC_GPR, 842 RSel:$srcx, RSel:$srcy, RSel:$srcz, RSel:$srcw, 843 i32imm:$offsetx, i32imm:$offsety, i32imm:$offsetz, 844 RSel:$DST_SEL_X, RSel:$DST_SEL_Y, RSel:$DST_SEL_Z, RSel:$DST_SEL_W, 845 i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, 846 CT:$COORD_TYPE_X, CT:$COORD_TYPE_Y, CT:$COORD_TYPE_Z, 847 CT:$COORD_TYPE_W), 848 !strconcat(opName, 849 " $DST_GPR.$DST_SEL_X$DST_SEL_Y$DST_SEL_Z$DST_SEL_W, " 850 "$SRC_GPR.$srcx$srcy$srcz$srcw " 851 "RID:$RESOURCE_ID SID:$SAMPLER_ID " 852 "CT:$COORD_TYPE_X$COORD_TYPE_Y$COORD_TYPE_Z$COORD_TYPE_W"), 853 [], 854 NullALU>, TEX_WORD0, TEX_WORD1, TEX_WORD2 { 855 let Inst{31-0} = Word0; 856 let Inst{63-32} = Word1; 857 858 let TEX_INST = inst{4-0}; 859 let SRC_REL = 0; 860 let DST_REL = 0; 861 let LOD_BIAS = 0; 862 863 let INST_MOD = 0; 864 let FETCH_WHOLE_QUAD = 0; 865 let ALT_CONST = 0; 866 let SAMPLER_INDEX_MODE = 0; 867 let RESOURCE_INDEX_MODE = 0; 868 869 let TEXInst = 1; 870} 871 872} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0 873 874 875 876def TEX_SAMPLE : R600_TEX <0x10, "TEX_SAMPLE">; 877def TEX_SAMPLE_C : R600_TEX <0x18, "TEX_SAMPLE_C">; 878def TEX_SAMPLE_L : R600_TEX <0x11, "TEX_SAMPLE_L">; 879def TEX_SAMPLE_C_L : R600_TEX <0x19, "TEX_SAMPLE_C_L">; 880def TEX_SAMPLE_LB : R600_TEX <0x12, "TEX_SAMPLE_LB">; 881def TEX_SAMPLE_C_LB : R600_TEX <0x1A, "TEX_SAMPLE_C_LB">; 882def TEX_LD : R600_TEX <0x03, "TEX_LD">; 883def TEX_LDPTR : R600_TEX <0x03, "TEX_LDPTR"> { 884 let INST_MOD = 1; 885} 886def TEX_GET_TEXTURE_RESINFO : R600_TEX <0x04, "TEX_GET_TEXTURE_RESINFO">; 887def TEX_GET_GRADIENTS_H : R600_TEX <0x07, "TEX_GET_GRADIENTS_H">; 888def TEX_GET_GRADIENTS_V : R600_TEX <0x08, "TEX_GET_GRADIENTS_V">; 889def TEX_SET_GRADIENTS_H : R600_TEX <0x0B, "TEX_SET_GRADIENTS_H">; 890def TEX_SET_GRADIENTS_V : R600_TEX <0x0C, "TEX_SET_GRADIENTS_V">; 891def TEX_SAMPLE_G : R600_TEX <0x14, "TEX_SAMPLE_G">; 892def TEX_SAMPLE_C_G : R600_TEX <0x1C, "TEX_SAMPLE_C_G">; 893 894defm : TexPattern<0, TEX_SAMPLE>; 895defm : TexPattern<1, TEX_SAMPLE_C>; 896defm : TexPattern<2, TEX_SAMPLE_L>; 897defm : TexPattern<3, TEX_SAMPLE_C_L>; 898defm : TexPattern<4, TEX_SAMPLE_LB>; 899defm : TexPattern<5, TEX_SAMPLE_C_LB>; 900defm : TexPattern<6, TEX_LD, v4i32>; 901defm : TexPattern<7, TEX_GET_TEXTURE_RESINFO, v4i32>; 902defm : TexPattern<8, TEX_GET_GRADIENTS_H>; 903defm : TexPattern<9, TEX_GET_GRADIENTS_V>; 904defm : TexPattern<10, TEX_LDPTR, v4i32>; 905 906//===----------------------------------------------------------------------===// 907// Helper classes for common instructions 908//===----------------------------------------------------------------------===// 909 910class MUL_LIT_Common <bits<5> inst> : R600_3OP < 911 inst, "MUL_LIT", 912 [] 913>; 914 915class MULADD_Common <bits<5> inst> : R600_3OP < 916 inst, "MULADD", 917 [] 918>; 919 920class MULADD_IEEE_Common <bits<5> inst> : R600_3OP < 921 inst, "MULADD_IEEE", 922 [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))] 923>; 924 925class CNDE_Common <bits<5> inst> : R600_3OP < 926 inst, "CNDE", 927 [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))] 928>; 929 930class CNDGT_Common <bits<5> inst> : R600_3OP < 931 inst, "CNDGT", 932 [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGT))] 933> { 934 let Itinerary = VecALU; 935} 936 937class CNDGE_Common <bits<5> inst> : R600_3OP < 938 inst, "CNDGE", 939 [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGE))] 940> { 941 let Itinerary = VecALU; 942} 943 944 945let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { 946class R600_VEC2OP<list<dag> pattern> : InstR600 <(outs R600_Reg32:$dst), (ins 947// Slot X 948 UEM:$update_exec_mask_X, UP:$update_pred_X, WRITE:$write_X, 949 OMOD:$omod_X, REL:$dst_rel_X, CLAMP:$clamp_X, 950 R600_TReg32_X:$src0_X, NEG:$src0_neg_X, REL:$src0_rel_X, ABS:$src0_abs_X, SEL:$src0_sel_X, 951 R600_TReg32_X:$src1_X, NEG:$src1_neg_X, REL:$src1_rel_X, ABS:$src1_abs_X, SEL:$src1_sel_X, 952 R600_Pred:$pred_sel_X, 953// Slot Y 954 UEM:$update_exec_mask_Y, UP:$update_pred_Y, WRITE:$write_Y, 955 OMOD:$omod_Y, REL:$dst_rel_Y, CLAMP:$clamp_Y, 956 R600_TReg32_Y:$src0_Y, NEG:$src0_neg_Y, REL:$src0_rel_Y, ABS:$src0_abs_Y, SEL:$src0_sel_Y, 957 R600_TReg32_Y:$src1_Y, NEG:$src1_neg_Y, REL:$src1_rel_Y, ABS:$src1_abs_Y, SEL:$src1_sel_Y, 958 R600_Pred:$pred_sel_Y, 959// Slot Z 960 UEM:$update_exec_mask_Z, UP:$update_pred_Z, WRITE:$write_Z, 961 OMOD:$omod_Z, REL:$dst_rel_Z, CLAMP:$clamp_Z, 962 R600_TReg32_Z:$src0_Z, NEG:$src0_neg_Z, REL:$src0_rel_Z, ABS:$src0_abs_Z, SEL:$src0_sel_Z, 963 R600_TReg32_Z:$src1_Z, NEG:$src1_neg_Z, REL:$src1_rel_Z, ABS:$src1_abs_Z, SEL:$src1_sel_Z, 964 R600_Pred:$pred_sel_Z, 965// Slot W 966 UEM:$update_exec_mask_W, UP:$update_pred_W, WRITE:$write_W, 967 OMOD:$omod_W, REL:$dst_rel_W, CLAMP:$clamp_W, 968 R600_TReg32_W:$src0_W, NEG:$src0_neg_W, REL:$src0_rel_W, ABS:$src0_abs_W, SEL:$src0_sel_W, 969 R600_TReg32_W:$src1_W, NEG:$src1_neg_W, REL:$src1_rel_W, ABS:$src1_abs_W, SEL:$src1_sel_W, 970 R600_Pred:$pred_sel_W, 971 LITERAL:$literal0, LITERAL:$literal1), 972 "", 973 pattern, 974 AnyALU> { 975 976 let UseNamedOperandTable = 1; 977 978} 979} 980 981def DOT_4 : R600_VEC2OP<[(set R600_Reg32:$dst, (DOT4 982 R600_TReg32_X:$src0_X, R600_TReg32_X:$src1_X, 983 R600_TReg32_Y:$src0_Y, R600_TReg32_Y:$src1_Y, 984 R600_TReg32_Z:$src0_Z, R600_TReg32_Z:$src1_Z, 985 R600_TReg32_W:$src0_W, R600_TReg32_W:$src1_W))]>; 986 987 988class DOT4_Common <bits<11> inst> : R600_2OP <inst, "DOT4", []>; 989 990 991let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { 992multiclass CUBE_Common <bits<11> inst> { 993 994 def _pseudo : InstR600 < 995 (outs R600_Reg128:$dst), 996 (ins R600_Reg128:$src0), 997 "CUBE $dst $src0", 998 [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src0))], 999 VecALU 1000 > { 1001 let isPseudo = 1; 1002 let UseNamedOperandTable = 1; 1003 } 1004 1005 def _real : R600_2OP <inst, "CUBE", []>; 1006} 1007} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0 1008 1009class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper < 1010 inst, "EXP_IEEE", fexp2 1011> { 1012 let Itinerary = TransALU; 1013} 1014 1015class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper < 1016 inst, "FLT_TO_INT", fp_to_sint 1017> { 1018 let Itinerary = TransALU; 1019} 1020 1021class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < 1022 inst, "INT_TO_FLT", sint_to_fp 1023> { 1024 let Itinerary = TransALU; 1025} 1026 1027class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper < 1028 inst, "FLT_TO_UINT", fp_to_uint 1029> { 1030 let Itinerary = TransALU; 1031} 1032 1033class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < 1034 inst, "UINT_TO_FLT", uint_to_fp 1035> { 1036 let Itinerary = TransALU; 1037} 1038 1039class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP < 1040 inst, "LOG_CLAMPED", [] 1041>; 1042 1043class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper < 1044 inst, "LOG_IEEE", flog2 1045> { 1046 let Itinerary = TransALU; 1047} 1048 1049class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>; 1050class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>; 1051class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>; 1052class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper < 1053 inst, "MULHI_INT", mulhs 1054> { 1055 let Itinerary = TransALU; 1056} 1057class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper < 1058 inst, "MULHI", mulhu 1059> { 1060 let Itinerary = TransALU; 1061} 1062class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper < 1063 inst, "MULLO_INT", mul 1064> { 1065 let Itinerary = TransALU; 1066} 1067class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> { 1068 let Itinerary = TransALU; 1069} 1070 1071class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP < 1072 inst, "RECIP_CLAMPED", [] 1073> { 1074 let Itinerary = TransALU; 1075} 1076 1077class RECIP_IEEE_Common <bits<11> inst> : R600_1OP < 1078 inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))] 1079> { 1080 let Itinerary = TransALU; 1081} 1082 1083class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper < 1084 inst, "RECIP_UINT", AMDGPUurecip 1085> { 1086 let Itinerary = TransALU; 1087} 1088 1089class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper < 1090 inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq 1091> { 1092 let Itinerary = TransALU; 1093} 1094 1095class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP < 1096 inst, "RECIPSQRT_IEEE", [] 1097> { 1098 let Itinerary = TransALU; 1099} 1100 1101class SIN_Common <bits<11> inst> : R600_1OP < 1102 inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{ 1103 let Trig = 1; 1104 let Itinerary = TransALU; 1105} 1106 1107class COS_Common <bits<11> inst> : R600_1OP < 1108 inst, "COS", [(set f32:$dst, (COS_HW f32:$src0))]> { 1109 let Trig = 1; 1110 let Itinerary = TransALU; 1111} 1112 1113def CLAMP_R600 : CLAMP <R600_Reg32>; 1114def FABS_R600 : FABS<R600_Reg32>; 1115def FNEG_R600 : FNEG<R600_Reg32>; 1116 1117//===----------------------------------------------------------------------===// 1118// Helper patterns for complex intrinsics 1119//===----------------------------------------------------------------------===// 1120 1121multiclass DIV_Common <InstR600 recip_ieee> { 1122def : Pat< 1123 (int_AMDGPU_div f32:$src0, f32:$src1), 1124 (MUL_IEEE $src0, (recip_ieee $src1)) 1125>; 1126 1127def : Pat< 1128 (fdiv f32:$src0, f32:$src1), 1129 (MUL_IEEE $src0, (recip_ieee $src1)) 1130>; 1131} 1132 1133class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> 1134 : Pat < 1135 (int_TGSI_lit_z f32:$src_x, f32:$src_y, f32:$src_w), 1136 (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x)) 1137>; 1138 1139// FROUND pattern 1140class FROUNDPat<Instruction CNDGE> : Pat < 1141 (AMDGPUround f32:$x), 1142 (CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x)) 1143>; 1144 1145 1146//===----------------------------------------------------------------------===// 1147// R600 / R700 Instructions 1148//===----------------------------------------------------------------------===// 1149 1150let Predicates = [isR600] in { 1151 1152 def MUL_LIT_r600 : MUL_LIT_Common<0x0C>; 1153 def MULADD_r600 : MULADD_Common<0x10>; 1154 def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>; 1155 def CNDE_r600 : CNDE_Common<0x18>; 1156 def CNDGT_r600 : CNDGT_Common<0x19>; 1157 def CNDGE_r600 : CNDGE_Common<0x1A>; 1158 def DOT4_r600 : DOT4_Common<0x50>; 1159 defm CUBE_r600 : CUBE_Common<0x52>; 1160 def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; 1161 def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; 1162 def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>; 1163 def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>; 1164 def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>; 1165 def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>; 1166 def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>; 1167 def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>; 1168 def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>; 1169 def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>; 1170 def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>; 1171 def SIN_r600 : SIN_Common<0x6E>; 1172 def COS_r600 : COS_Common<0x6F>; 1173 def ASHR_r600 : ASHR_Common<0x70>; 1174 def LSHR_r600 : LSHR_Common<0x71>; 1175 def LSHL_r600 : LSHL_Common<0x72>; 1176 def MULLO_INT_r600 : MULLO_INT_Common<0x73>; 1177 def MULHI_INT_r600 : MULHI_INT_Common<0x74>; 1178 def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>; 1179 def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>; 1180 def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>; 1181 1182 defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>; 1183 def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>; 1184 def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; 1185 1186 def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; 1187 def : FROUNDPat <CNDGE_r600>; 1188 1189 def R600_ExportSwz : ExportSwzInst { 1190 let Word1{20-17} = 0; // BURST_COUNT 1191 let Word1{21} = eop; 1192 let Word1{22} = 0; // VALID_PIXEL_MODE 1193 let Word1{30-23} = inst; 1194 let Word1{31} = 1; // BARRIER 1195 } 1196 defm : ExportPattern<R600_ExportSwz, 39>; 1197 1198 def R600_ExportBuf : ExportBufInst { 1199 let Word1{20-17} = 0; // BURST_COUNT 1200 let Word1{21} = eop; 1201 let Word1{22} = 0; // VALID_PIXEL_MODE 1202 let Word1{30-23} = inst; 1203 let Word1{31} = 1; // BARRIER 1204 } 1205 defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>; 1206 1207 def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$CNT), 1208 "TEX $CNT @$ADDR"> { 1209 let POP_COUNT = 0; 1210 } 1211 def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$CNT), 1212 "VTX $CNT @$ADDR"> { 1213 let POP_COUNT = 0; 1214 } 1215 def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR), 1216 "LOOP_START_DX10 @$ADDR"> { 1217 let POP_COUNT = 0; 1218 let CNT = 0; 1219 } 1220 def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { 1221 let POP_COUNT = 0; 1222 let CNT = 0; 1223 } 1224 def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR), 1225 "LOOP_BREAK @$ADDR"> { 1226 let POP_COUNT = 0; 1227 let CNT = 0; 1228 } 1229 def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR), 1230 "CONTINUE @$ADDR"> { 1231 let POP_COUNT = 0; 1232 let CNT = 0; 1233 } 1234 def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1235 "JUMP @$ADDR POP:$POP_COUNT"> { 1236 let CNT = 0; 1237 } 1238 def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1239 "ELSE @$ADDR POP:$POP_COUNT"> { 1240 let CNT = 0; 1241 } 1242 def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> { 1243 let ADDR = 0; 1244 let CNT = 0; 1245 let POP_COUNT = 0; 1246 } 1247 def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1248 "POP @$ADDR POP:$POP_COUNT"> { 1249 let CNT = 0; 1250 } 1251 def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> { 1252 let CNT = 0; 1253 let POP_COUNT = 0; 1254 let ADDR = 0; 1255 let END_OF_PROGRAM = 1; 1256 } 1257 1258} 1259 1260//===----------------------------------------------------------------------===// 1261// R700 Only instructions 1262//===----------------------------------------------------------------------===// 1263 1264let Predicates = [isR700] in { 1265 def SIN_r700 : SIN_Common<0x6E>; 1266 def COS_r700 : COS_Common<0x6F>; 1267} 1268 1269//===----------------------------------------------------------------------===// 1270// Evergreen / Cayman store instructions 1271//===----------------------------------------------------------------------===// 1272 1273let Predicates = [isEGorCayman] in { 1274 1275class CF_MEM_RAT_CACHELESS <bits<6> rat_inst, bits<4> rat_id, bits<4> mask, dag ins, 1276 string name, list<dag> pattern> 1277 : EG_CF_RAT <0x57, rat_inst, rat_id, mask, (outs), ins, 1278 "MEM_RAT_CACHELESS "#name, pattern>; 1279 1280class CF_MEM_RAT <bits<6> rat_inst, bits<4> rat_id, dag ins, string name, 1281 list<dag> pattern> 1282 : EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins, 1283 "MEM_RAT "#name, pattern>; 1284 1285def RAT_MSKOR : CF_MEM_RAT <0x11, 0, 1286 (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr), 1287 "MSKOR $rw_gpr.XW, $index_gpr", 1288 [(mskor_global v4i32:$rw_gpr, i32:$index_gpr)] 1289> { 1290 let eop = 0; 1291} 1292 1293} // End Predicates = [isEGorCayman] 1294 1295 1296//===----------------------------------------------------------------------===// 1297// Evergreen Only instructions 1298//===----------------------------------------------------------------------===// 1299 1300let Predicates = [isEG] in { 1301 1302def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; 1303defm DIV_eg : DIV_Common<RECIP_IEEE_eg>; 1304 1305def MULLO_INT_eg : MULLO_INT_Common<0x8F>; 1306def MULHI_INT_eg : MULHI_INT_Common<0x90>; 1307def MULLO_UINT_eg : MULLO_UINT_Common<0x91>; 1308def MULHI_UINT_eg : MULHI_UINT_Common<0x92>; 1309def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; 1310def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>; 1311def EXP_IEEE_eg : EXP_IEEE_Common<0x81>; 1312def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; 1313def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; 1314def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; 1315def SIN_eg : SIN_Common<0x8D>; 1316def COS_eg : COS_Common<0x8E>; 1317 1318def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>; 1319def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; 1320 1321//===----------------------------------------------------------------------===// 1322// Memory read/write instructions 1323//===----------------------------------------------------------------------===// 1324 1325let usesCustomInserter = 1 in { 1326 1327// 32-bit store 1328def RAT_WRITE_CACHELESS_32_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x1, 1329 (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), 1330 "STORE_RAW $rw_gpr, $index_gpr, $eop", 1331 [(global_store i32:$rw_gpr, i32:$index_gpr)] 1332>; 1333 1334// 64-bit store 1335def RAT_WRITE_CACHELESS_64_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x3, 1336 (ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), 1337 "STORE_RAW $rw_gpr.XY, $index_gpr, $eop", 1338 [(global_store v2i32:$rw_gpr, i32:$index_gpr)] 1339>; 1340 1341//128-bit store 1342def RAT_WRITE_CACHELESS_128_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0xf, 1343 (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), 1344 "STORE_RAW $rw_gpr.XYZW, $index_gpr, $eop", 1345 [(global_store v4i32:$rw_gpr, i32:$index_gpr)] 1346>; 1347 1348} // End usesCustomInserter = 1 1349 1350class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> 1351 : VTX_WORD0_eg, VTX_READ<name, buffer_id, outs, pattern> { 1352 1353 // Static fields 1354 let VC_INST = 0; 1355 let FETCH_TYPE = 2; 1356 let FETCH_WHOLE_QUAD = 0; 1357 let BUFFER_ID = buffer_id; 1358 let SRC_REL = 0; 1359 // XXX: We can infer this field based on the SRC_GPR. This would allow us 1360 // to store vertex addresses in any channel, not just X. 1361 let SRC_SEL_X = 0; 1362 1363 let Inst{31-0} = Word0; 1364} 1365 1366class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern> 1367 : VTX_READ_eg <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id, 1368 (outs R600_TReg32_X:$dst_gpr), pattern> { 1369 1370 let MEGA_FETCH_COUNT = 1; 1371 let DST_SEL_X = 0; 1372 let DST_SEL_Y = 7; // Masked 1373 let DST_SEL_Z = 7; // Masked 1374 let DST_SEL_W = 7; // Masked 1375 let DATA_FORMAT = 1; // FMT_8 1376} 1377 1378class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern> 1379 : VTX_READ_eg <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id, 1380 (outs R600_TReg32_X:$dst_gpr), pattern> { 1381 let MEGA_FETCH_COUNT = 2; 1382 let DST_SEL_X = 0; 1383 let DST_SEL_Y = 7; // Masked 1384 let DST_SEL_Z = 7; // Masked 1385 let DST_SEL_W = 7; // Masked 1386 let DATA_FORMAT = 5; // FMT_16 1387 1388} 1389 1390class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> 1391 : VTX_READ_eg <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id, 1392 (outs R600_TReg32_X:$dst_gpr), pattern> { 1393 1394 let MEGA_FETCH_COUNT = 4; 1395 let DST_SEL_X = 0; 1396 let DST_SEL_Y = 7; // Masked 1397 let DST_SEL_Z = 7; // Masked 1398 let DST_SEL_W = 7; // Masked 1399 let DATA_FORMAT = 0xD; // COLOR_32 1400 1401 // This is not really necessary, but there were some GPU hangs that appeared 1402 // to be caused by ALU instructions in the next instruction group that wrote 1403 // to the $src_gpr registers of the VTX_READ. 1404 // e.g. 1405 // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24 1406 // %T2_X<def> = MOV %ZERO 1407 //Adding this constraint prevents this from happening. 1408 let Constraints = "$src_gpr.ptr = $dst_gpr"; 1409} 1410 1411class VTX_READ_64_eg <bits<8> buffer_id, list<dag> pattern> 1412 : VTX_READ_eg <"VTX_READ_64 $dst_gpr.XY, $src_gpr", buffer_id, 1413 (outs R600_Reg64:$dst_gpr), pattern> { 1414 1415 let MEGA_FETCH_COUNT = 8; 1416 let DST_SEL_X = 0; 1417 let DST_SEL_Y = 1; 1418 let DST_SEL_Z = 7; 1419 let DST_SEL_W = 7; 1420 let DATA_FORMAT = 0x1D; // COLOR_32_32 1421} 1422 1423class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> 1424 : VTX_READ_eg <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id, 1425 (outs R600_Reg128:$dst_gpr), pattern> { 1426 1427 let MEGA_FETCH_COUNT = 16; 1428 let DST_SEL_X = 0; 1429 let DST_SEL_Y = 1; 1430 let DST_SEL_Z = 2; 1431 let DST_SEL_W = 3; 1432 let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 1433 1434 // XXX: Need to force VTX_READ_128 instructions to write to the same register 1435 // that holds its buffer address to avoid potential hangs. We can't use 1436 // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst 1437 // registers are different sizes. 1438} 1439 1440//===----------------------------------------------------------------------===// 1441// VTX Read from parameter memory space 1442//===----------------------------------------------------------------------===// 1443 1444def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, 1445 [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))] 1446>; 1447 1448def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, 1449 [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))] 1450>; 1451 1452def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, 1453 [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] 1454>; 1455 1456def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <0, 1457 [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] 1458>; 1459 1460def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, 1461 [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] 1462>; 1463 1464//===----------------------------------------------------------------------===// 1465// VTX Read from global memory space 1466//===----------------------------------------------------------------------===// 1467 1468// 8-bit reads 1469def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1, 1470 [(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))] 1471>; 1472 1473def VTX_READ_GLOBAL_16_eg : VTX_READ_16_eg <1, 1474 [(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))] 1475>; 1476 1477// 32-bit reads 1478def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, 1479 [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] 1480>; 1481 1482// 64-bit reads 1483def VTX_READ_GLOBAL_64_eg : VTX_READ_64_eg <1, 1484 [(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] 1485>; 1486 1487// 128-bit reads 1488def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, 1489 [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] 1490>; 1491 1492} // End Predicates = [isEG] 1493 1494//===----------------------------------------------------------------------===// 1495// Evergreen / Cayman Instructions 1496//===----------------------------------------------------------------------===// 1497 1498let Predicates = [isEGorCayman] in { 1499 1500 // BFE_UINT - bit_extract, an optimization for mask and shift 1501 // Src0 = Input 1502 // Src1 = Offset 1503 // Src2 = Width 1504 // 1505 // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width) 1506 // 1507 // Example Usage: 1508 // (Offset, Width) 1509 // 1510 // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0 1511 // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8 1512 // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16 1513 // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24 1514 def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", 1515 [(set i32:$dst, (int_AMDIL_bit_extract_u32 i32:$src0, i32:$src1, 1516 i32:$src2))], 1517 VecALU 1518 >; 1519// XXX: This pattern is broken, disabling for now. See comment in 1520// AMDGPUInstructions.td for more info. 1521// def : BFEPattern <BFE_UINT_eg>; 1522 1523 def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>; 1524 defm : BFIPatterns <BFI_INT_eg>; 1525 1526 def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24", 1527 [(set i32:$dst, (add (mul U24:$src0, U24:$src1), i32:$src2))], VecALU 1528 >; 1529 def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>; 1530 def : ROTRPattern <BIT_ALIGN_INT_eg>; 1531 1532 def MULADD_eg : MULADD_Common<0x14>; 1533 def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>; 1534 def ASHR_eg : ASHR_Common<0x15>; 1535 def LSHR_eg : LSHR_Common<0x16>; 1536 def LSHL_eg : LSHL_Common<0x17>; 1537 def CNDE_eg : CNDE_Common<0x19>; 1538 def CNDGT_eg : CNDGT_Common<0x1A>; 1539 def CNDGE_eg : CNDGE_Common<0x1B>; 1540 def MUL_LIT_eg : MUL_LIT_Common<0x1F>; 1541 def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>; 1542 def MUL_UINT24_eg : R600_2OP <0xB5, "MUL_UINT24", 1543 [(set i32:$dst, (mul U24:$src0, U24:$src1))], VecALU 1544 >; 1545 def DOT4_eg : DOT4_Common<0xBE>; 1546 defm CUBE_eg : CUBE_Common<0xC0>; 1547 1548let hasSideEffects = 1 in { 1549 def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>; 1550} 1551 1552 def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>; 1553 1554 def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { 1555 let Pattern = []; 1556 let Itinerary = AnyALU; 1557 } 1558 1559 def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; 1560 1561 def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> { 1562 let Pattern = []; 1563 } 1564 1565 def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>; 1566 1567def GROUP_BARRIER : InstR600 < 1568 (outs), (ins), " GROUP_BARRIER", [(int_AMDGPU_barrier_local)], AnyALU>, 1569 R600ALU_Word0, 1570 R600ALU_Word1_OP2 <0x54> { 1571 1572 let dst = 0; 1573 let dst_rel = 0; 1574 let src0 = 0; 1575 let src0_rel = 0; 1576 let src0_neg = 0; 1577 let src0_abs = 0; 1578 let src1 = 0; 1579 let src1_rel = 0; 1580 let src1_neg = 0; 1581 let src1_abs = 0; 1582 let write = 0; 1583 let omod = 0; 1584 let clamp = 0; 1585 let last = 1; 1586 let bank_swizzle = 0; 1587 let pred_sel = 0; 1588 let update_exec_mask = 0; 1589 let update_pred = 0; 1590 1591 let Inst{31-0} = Word0; 1592 let Inst{63-32} = Word1; 1593 1594 let ALUInst = 1; 1595} 1596 1597//===----------------------------------------------------------------------===// 1598// LDS Instructions 1599//===----------------------------------------------------------------------===// 1600class R600_LDS <bits<6> op, dag outs, dag ins, string asm, 1601 list<dag> pattern = []> : 1602 1603 InstR600 <outs, ins, asm, pattern, XALU>, 1604 R600_ALU_LDS_Word0, 1605 R600LDS_Word1 { 1606 1607 bits<6> offset = 0; 1608 let lds_op = op; 1609 1610 let Word1{27} = offset{0}; 1611 let Word1{12} = offset{1}; 1612 let Word1{28} = offset{2}; 1613 let Word1{31} = offset{3}; 1614 let Word0{12} = offset{4}; 1615 let Word0{25} = offset{5}; 1616 1617 1618 let Inst{31-0} = Word0; 1619 let Inst{63-32} = Word1; 1620 1621 let ALUInst = 1; 1622 let HasNativeOperands = 1; 1623 let UseNamedOperandTable = 1; 1624} 1625 1626class R600_LDS_1A <bits<6> lds_op, string name, list<dag> pattern> : R600_LDS < 1627 lds_op, 1628 (outs R600_Reg32:$dst), 1629 (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, 1630 LAST:$last, R600_Pred:$pred_sel, 1631 BANK_SWIZZLE:$bank_swizzle), 1632 " "#name#" $last OQAP, $src0$src0_rel $pred_sel", 1633 pattern 1634 > { 1635 1636 let src1 = 0; 1637 let src1_rel = 0; 1638 let src2 = 0; 1639 let src2_rel = 0; 1640 1641 let usesCustomInserter = 1; 1642 let LDS_1A = 1; 1643 let DisableEncoding = "$dst"; 1644} 1645 1646class R600_LDS_1A1D <bits<6> lds_op, dag outs, string name, list<dag> pattern, 1647 string dst =""> : 1648 R600_LDS < 1649 lds_op, outs, 1650 (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, 1651 R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel, 1652 LAST:$last, R600_Pred:$pred_sel, 1653 BANK_SWIZZLE:$bank_swizzle), 1654 " "#name#" $last "#dst#"$src0$src0_rel, $src1$src1_rel, $pred_sel", 1655 pattern 1656 > { 1657 1658 field string BaseOp; 1659 1660 let src2 = 0; 1661 let src2_rel = 0; 1662 let LDS_1A1D = 1; 1663} 1664 1665class R600_LDS_1A1D_NORET <bits<6> lds_op, string name, list<dag> pattern> : 1666 R600_LDS_1A1D <lds_op, (outs), name, pattern> { 1667 let BaseOp = name; 1668} 1669 1670class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> : 1671 R600_LDS_1A1D <lds_op, (outs R600_Reg32:$dst), name##"_RET", pattern, "OQAP, "> { 1672 1673 let BaseOp = name; 1674 let usesCustomInserter = 1; 1675 let DisableEncoding = "$dst"; 1676} 1677 1678class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> : 1679 R600_LDS < 1680 lds_op, 1681 (outs), 1682 (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, 1683 R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel, 1684 R600_Reg32:$src2, REL:$src2_rel, SEL:$src2_sel, 1685 LAST:$last, R600_Pred:$pred_sel, BANK_SWIZZLE:$bank_swizzle), 1686 " "#name# "$last $src0$src0_rel, $src1$src1_rel, $src2$src2_rel, $pred_sel", 1687 pattern> { 1688 let LDS_1A2D = 1; 1689} 1690 1691def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >; 1692def LDS_SUB : R600_LDS_1A1D_NORET <0x1, "LDS_SUB", [] >; 1693def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE", 1694 [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)] 1695>; 1696def LDS_BYTE_WRITE : R600_LDS_1A1D_NORET<0x12, "LDS_BYTE_WRITE", 1697 [(truncstorei8_local i32:$src1, i32:$src0)] 1698>; 1699def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE", 1700 [(truncstorei16_local i32:$src1, i32:$src0)] 1701>; 1702def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD", 1703 [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))] 1704>; 1705def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB", 1706 [(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))] 1707>; 1708def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", 1709 [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))] 1710>; 1711def LDS_BYTE_READ_RET : R600_LDS_1A <0x36, "LDS_BYTE_READ_RET", 1712 [(set i32:$dst, (sextloadi8_local i32:$src0))] 1713>; 1714def LDS_UBYTE_READ_RET : R600_LDS_1A <0x37, "LDS_UBYTE_READ_RET", 1715 [(set i32:$dst, (az_extloadi8_local i32:$src0))] 1716>; 1717def LDS_SHORT_READ_RET : R600_LDS_1A <0x38, "LDS_SHORT_READ_RET", 1718 [(set i32:$dst, (sextloadi16_local i32:$src0))] 1719>; 1720def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET", 1721 [(set i32:$dst, (az_extloadi16_local i32:$src0))] 1722>; 1723 1724 // TRUNC is used for the FLT_TO_INT instructions to work around a 1725 // perceived problem where the rounding modes are applied differently 1726 // depending on the instruction and the slot they are in. 1727 // See: 1728 // https://bugs.freedesktop.org/show_bug.cgi?id=50232 1729 // Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c 1730 // 1731 // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes, 1732 // which do not need to be truncated since the fp values are 0.0f or 1.0f. 1733 // We should look into handling these cases separately. 1734 def : Pat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>; 1735 1736 def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>; 1737 1738 // SHA-256 Patterns 1739 def : SHA256MaPattern <BFI_INT_eg, XOR_INT>; 1740 1741 def : FROUNDPat <CNDGE_eg>; 1742 1743 def EG_ExportSwz : ExportSwzInst { 1744 let Word1{19-16} = 0; // BURST_COUNT 1745 let Word1{20} = 0; // VALID_PIXEL_MODE 1746 let Word1{21} = eop; 1747 let Word1{29-22} = inst; 1748 let Word1{30} = 0; // MARK 1749 let Word1{31} = 1; // BARRIER 1750 } 1751 defm : ExportPattern<EG_ExportSwz, 83>; 1752 1753 def EG_ExportBuf : ExportBufInst { 1754 let Word1{19-16} = 0; // BURST_COUNT 1755 let Word1{20} = 0; // VALID_PIXEL_MODE 1756 let Word1{21} = eop; 1757 let Word1{29-22} = inst; 1758 let Word1{30} = 0; // MARK 1759 let Word1{31} = 1; // BARRIER 1760 } 1761 defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>; 1762 1763 def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT), 1764 "TEX $COUNT @$ADDR"> { 1765 let POP_COUNT = 0; 1766 } 1767 def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT), 1768 "VTX $COUNT @$ADDR"> { 1769 let POP_COUNT = 0; 1770 } 1771 def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR), 1772 "LOOP_START_DX10 @$ADDR"> { 1773 let POP_COUNT = 0; 1774 let COUNT = 0; 1775 } 1776 def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { 1777 let POP_COUNT = 0; 1778 let COUNT = 0; 1779 } 1780 def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR), 1781 "LOOP_BREAK @$ADDR"> { 1782 let POP_COUNT = 0; 1783 let COUNT = 0; 1784 } 1785 def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR), 1786 "CONTINUE @$ADDR"> { 1787 let POP_COUNT = 0; 1788 let COUNT = 0; 1789 } 1790 def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1791 "JUMP @$ADDR POP:$POP_COUNT"> { 1792 let COUNT = 0; 1793 } 1794 def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1795 "ELSE @$ADDR POP:$POP_COUNT"> { 1796 let COUNT = 0; 1797 } 1798 def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> { 1799 let ADDR = 0; 1800 let COUNT = 0; 1801 let POP_COUNT = 0; 1802 } 1803 def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1804 "POP @$ADDR POP:$POP_COUNT"> { 1805 let COUNT = 0; 1806 } 1807 def CF_END_EG : CF_CLAUSE_EG<0, (ins), "CF_END"> { 1808 let COUNT = 0; 1809 let POP_COUNT = 0; 1810 let ADDR = 0; 1811 let END_OF_PROGRAM = 1; 1812 } 1813 1814} // End Predicates = [isEGorCayman] 1815 1816//===----------------------------------------------------------------------===// 1817// Regist loads and stores - for indirect addressing 1818//===----------------------------------------------------------------------===// 1819 1820defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>; 1821 1822//===----------------------------------------------------------------------===// 1823// Cayman Instructions 1824//===----------------------------------------------------------------------===// 1825 1826let Predicates = [isCayman] in { 1827 1828def MULADD_INT24_cm : R600_3OP <0x08, "MULADD_INT24", 1829 [(set i32:$dst, (add (mul I24:$src0, I24:$src1), i32:$src2))], VecALU 1830>; 1831def MUL_INT24_cm : R600_2OP <0x5B, "MUL_INT24", 1832 [(set i32:$dst, (mul I24:$src0, I24:$src1))], VecALU 1833>; 1834 1835let isVector = 1 in { 1836 1837def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; 1838 1839def MULLO_INT_cm : MULLO_INT_Common<0x8F>; 1840def MULHI_INT_cm : MULHI_INT_Common<0x90>; 1841def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; 1842def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; 1843def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>; 1844def EXP_IEEE_cm : EXP_IEEE_Common<0x81>; 1845def LOG_IEEE_cm : LOG_IEEE_Common<0x83>; 1846def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>; 1847def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>; 1848def SIN_cm : SIN_Common<0x8D>; 1849def COS_cm : COS_Common<0x8E>; 1850} // End isVector = 1 1851 1852def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>; 1853 1854defm DIV_cm : DIV_Common<RECIP_IEEE_cm>; 1855 1856// RECIP_UINT emulation for Cayman 1857// The multiplication scales from [0,1] to the unsigned integer range 1858def : Pat < 1859 (AMDGPUurecip i32:$src0), 1860 (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)), 1861 (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1))) 1862>; 1863 1864 def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { 1865 let ADDR = 0; 1866 let POP_COUNT = 0; 1867 let COUNT = 0; 1868 } 1869 1870def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; 1871 1872class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> : 1873 CF_MEM_RAT_CACHELESS <0x14, 0, mask, 1874 (ins rc:$rw_gpr, R600_TReg32_X:$index_gpr), 1875 "STORE_DWORD $rw_gpr, $index_gpr", 1876 [(global_store vt:$rw_gpr, i32:$index_gpr)]> { 1877 let eop = 0; // This bit is not used on Cayman. 1878} 1879 1880def RAT_STORE_DWORD32 : RAT_STORE_DWORD <R600_TReg32_X, i32, 0x1>; 1881def RAT_STORE_DWORD64 : RAT_STORE_DWORD <R600_Reg64, v2i32, 0x3>; 1882def RAT_STORE_DWORD128 : RAT_STORE_DWORD <R600_Reg128, v4i32, 0xf>; 1883 1884class VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern> 1885 : VTX_WORD0_cm, VTX_READ<name, buffer_id, outs, pattern> { 1886 1887 // Static fields 1888 let VC_INST = 0; 1889 let FETCH_TYPE = 2; 1890 let FETCH_WHOLE_QUAD = 0; 1891 let BUFFER_ID = buffer_id; 1892 let SRC_REL = 0; 1893 // XXX: We can infer this field based on the SRC_GPR. This would allow us 1894 // to store vertex addresses in any channel, not just X. 1895 let SRC_SEL_X = 0; 1896 let SRC_SEL_Y = 0; 1897 let STRUCTURED_READ = 0; 1898 let LDS_REQ = 0; 1899 let COALESCED_READ = 0; 1900 1901 let Inst{31-0} = Word0; 1902} 1903 1904class VTX_READ_8_cm <bits<8> buffer_id, list<dag> pattern> 1905 : VTX_READ_cm <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id, 1906 (outs R600_TReg32_X:$dst_gpr), pattern> { 1907 1908 let DST_SEL_X = 0; 1909 let DST_SEL_Y = 7; // Masked 1910 let DST_SEL_Z = 7; // Masked 1911 let DST_SEL_W = 7; // Masked 1912 let DATA_FORMAT = 1; // FMT_8 1913} 1914 1915class VTX_READ_16_cm <bits<8> buffer_id, list<dag> pattern> 1916 : VTX_READ_cm <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id, 1917 (outs R600_TReg32_X:$dst_gpr), pattern> { 1918 let DST_SEL_X = 0; 1919 let DST_SEL_Y = 7; // Masked 1920 let DST_SEL_Z = 7; // Masked 1921 let DST_SEL_W = 7; // Masked 1922 let DATA_FORMAT = 5; // FMT_16 1923 1924} 1925 1926class VTX_READ_32_cm <bits<8> buffer_id, list<dag> pattern> 1927 : VTX_READ_cm <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id, 1928 (outs R600_TReg32_X:$dst_gpr), pattern> { 1929 1930 let DST_SEL_X = 0; 1931 let DST_SEL_Y = 7; // Masked 1932 let DST_SEL_Z = 7; // Masked 1933 let DST_SEL_W = 7; // Masked 1934 let DATA_FORMAT = 0xD; // COLOR_32 1935 1936 // This is not really necessary, but there were some GPU hangs that appeared 1937 // to be caused by ALU instructions in the next instruction group that wrote 1938 // to the $src_gpr registers of the VTX_READ. 1939 // e.g. 1940 // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24 1941 // %T2_X<def> = MOV %ZERO 1942 //Adding this constraint prevents this from happening. 1943 let Constraints = "$src_gpr.ptr = $dst_gpr"; 1944} 1945 1946class VTX_READ_64_cm <bits<8> buffer_id, list<dag> pattern> 1947 : VTX_READ_cm <"VTX_READ_64 $dst_gpr, $src_gpr", buffer_id, 1948 (outs R600_Reg64:$dst_gpr), pattern> { 1949 1950 let DST_SEL_X = 0; 1951 let DST_SEL_Y = 1; 1952 let DST_SEL_Z = 7; 1953 let DST_SEL_W = 7; 1954 let DATA_FORMAT = 0x1D; // COLOR_32_32 1955} 1956 1957class VTX_READ_128_cm <bits<8> buffer_id, list<dag> pattern> 1958 : VTX_READ_cm <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id, 1959 (outs R600_Reg128:$dst_gpr), pattern> { 1960 1961 let DST_SEL_X = 0; 1962 let DST_SEL_Y = 1; 1963 let DST_SEL_Z = 2; 1964 let DST_SEL_W = 3; 1965 let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 1966 1967 // XXX: Need to force VTX_READ_128 instructions to write to the same register 1968 // that holds its buffer address to avoid potential hangs. We can't use 1969 // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst 1970 // registers are different sizes. 1971} 1972 1973//===----------------------------------------------------------------------===// 1974// VTX Read from parameter memory space 1975//===----------------------------------------------------------------------===// 1976def VTX_READ_PARAM_8_cm : VTX_READ_8_cm <0, 1977 [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))] 1978>; 1979 1980def VTX_READ_PARAM_16_cm : VTX_READ_16_cm <0, 1981 [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))] 1982>; 1983 1984def VTX_READ_PARAM_32_cm : VTX_READ_32_cm <0, 1985 [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] 1986>; 1987 1988def VTX_READ_PARAM_64_cm : VTX_READ_64_cm <0, 1989 [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] 1990>; 1991 1992def VTX_READ_PARAM_128_cm : VTX_READ_128_cm <0, 1993 [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] 1994>; 1995 1996//===----------------------------------------------------------------------===// 1997// VTX Read from global memory space 1998//===----------------------------------------------------------------------===// 1999 2000// 8-bit reads 2001def VTX_READ_GLOBAL_8_cm : VTX_READ_8_cm <1, 2002 [(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))] 2003>; 2004 2005def VTX_READ_GLOBAL_16_cm : VTX_READ_16_cm <1, 2006 [(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))] 2007>; 2008 2009// 32-bit reads 2010def VTX_READ_GLOBAL_32_cm : VTX_READ_32_cm <1, 2011 [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] 2012>; 2013 2014// 64-bit reads 2015def VTX_READ_GLOBAL_64_cm : VTX_READ_64_cm <1, 2016 [(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] 2017>; 2018 2019// 128-bit reads 2020def VTX_READ_GLOBAL_128_cm : VTX_READ_128_cm <1, 2021 [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] 2022>; 2023 2024} // End isCayman 2025 2026//===----------------------------------------------------------------------===// 2027// Branch Instructions 2028//===----------------------------------------------------------------------===// 2029 2030 2031def IF_PREDICATE_SET : ILFormat<(outs), (ins GPRI32:$src), 2032 "IF_PREDICATE_SET $src", []>; 2033 2034//===----------------------------------------------------------------------===// 2035// Pseudo instructions 2036//===----------------------------------------------------------------------===// 2037 2038let isPseudo = 1 in { 2039 2040def PRED_X : InstR600 < 2041 (outs R600_Predicate_Bit:$dst), 2042 (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags), 2043 "", [], NullALU> { 2044 let FlagOperandIdx = 3; 2045} 2046 2047let isTerminator = 1, isBranch = 1 in { 2048def JUMP_COND : InstR600 < 2049 (outs), 2050 (ins brtarget:$target, R600_Predicate_Bit:$p), 2051 "JUMP $target ($p)", 2052 [], AnyALU 2053 >; 2054 2055def JUMP : InstR600 < 2056 (outs), 2057 (ins brtarget:$target), 2058 "JUMP $target", 2059 [], AnyALU 2060 > 2061{ 2062 let isPredicable = 1; 2063 let isBarrier = 1; 2064} 2065 2066} // End isTerminator = 1, isBranch = 1 2067 2068let usesCustomInserter = 1 in { 2069 2070let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in { 2071 2072def MASK_WRITE : AMDGPUShaderInst < 2073 (outs), 2074 (ins R600_Reg32:$src), 2075 "MASK_WRITE $src", 2076 [] 2077>; 2078 2079} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1 2080 2081 2082def TXD: InstR600 < 2083 (outs R600_Reg128:$dst), 2084 (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, 2085 i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), 2086 "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", 2087 [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2, 2088 imm:$resourceId, imm:$samplerId, imm:$textureTarget))], 2089 NullALU > { 2090 let TEXInst = 1; 2091} 2092 2093def TXD_SHADOW: InstR600 < 2094 (outs R600_Reg128:$dst), 2095 (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, 2096 i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), 2097 "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", 2098 [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2, 2099 imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))], 2100 NullALU 2101> { 2102 let TEXInst = 1; 2103} 2104} // End isPseudo = 1 2105} // End usesCustomInserter = 1 2106 2107//===---------------------------------------------------------------------===// 2108// Return instruction 2109//===---------------------------------------------------------------------===// 2110let isTerminator = 1, isReturn = 1, hasCtrlDep = 1, 2111 usesCustomInserter = 1 in { 2112 def RETURN : ILFormat<(outs), (ins variable_ops), 2113 "RETURN", [(IL_retflag)]>; 2114} 2115 2116 2117//===----------------------------------------------------------------------===// 2118// Constant Buffer Addressing Support 2119//===----------------------------------------------------------------------===// 2120 2121let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { 2122def CONST_COPY : Instruction { 2123 let OutOperandList = (outs R600_Reg32:$dst); 2124 let InOperandList = (ins i32imm:$src); 2125 let Pattern = 2126 [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))]; 2127 let AsmString = "CONST_COPY"; 2128 let neverHasSideEffects = 1; 2129 let isAsCheapAsAMove = 1; 2130 let Itinerary = NullALU; 2131} 2132} // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" 2133 2134def TEX_VTX_CONSTBUF : 2135 InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr", 2136 [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>, 2137 VTX_WORD1_GPR, VTX_WORD0_eg { 2138 2139 let VC_INST = 0; 2140 let FETCH_TYPE = 2; 2141 let FETCH_WHOLE_QUAD = 0; 2142 let SRC_REL = 0; 2143 let SRC_SEL_X = 0; 2144 let DST_REL = 0; 2145 let USE_CONST_FIELDS = 0; 2146 let NUM_FORMAT_ALL = 2; 2147 let FORMAT_COMP_ALL = 1; 2148 let SRF_MODE_ALL = 1; 2149 let MEGA_FETCH_COUNT = 16; 2150 let DST_SEL_X = 0; 2151 let DST_SEL_Y = 1; 2152 let DST_SEL_Z = 2; 2153 let DST_SEL_W = 3; 2154 let DATA_FORMAT = 35; 2155 2156 let Inst{31-0} = Word0; 2157 let Inst{63-32} = Word1; 2158 2159// LLVM can only encode 64-bit instructions, so these fields are manually 2160// encoded in R600CodeEmitter 2161// 2162// bits<16> OFFSET; 2163// bits<2> ENDIAN_SWAP = 0; 2164// bits<1> CONST_BUF_NO_STRIDE = 0; 2165// bits<1> MEGA_FETCH = 0; 2166// bits<1> ALT_CONST = 0; 2167// bits<2> BUFFER_INDEX_MODE = 0; 2168 2169 2170 2171// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding 2172// is done in R600CodeEmitter 2173// 2174// Inst{79-64} = OFFSET; 2175// Inst{81-80} = ENDIAN_SWAP; 2176// Inst{82} = CONST_BUF_NO_STRIDE; 2177// Inst{83} = MEGA_FETCH; 2178// Inst{84} = ALT_CONST; 2179// Inst{86-85} = BUFFER_INDEX_MODE; 2180// Inst{95-86} = 0; Reserved 2181 2182// VTX_WORD3 (Padding) 2183// 2184// Inst{127-96} = 0; 2185 let VTXInst = 1; 2186} 2187 2188def TEX_VTX_TEXBUF: 2189 InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr", 2190 [(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>, 2191VTX_WORD1_GPR, VTX_WORD0_eg { 2192 2193let VC_INST = 0; 2194let FETCH_TYPE = 2; 2195let FETCH_WHOLE_QUAD = 0; 2196let SRC_REL = 0; 2197let SRC_SEL_X = 0; 2198let DST_REL = 0; 2199let USE_CONST_FIELDS = 1; 2200let NUM_FORMAT_ALL = 0; 2201let FORMAT_COMP_ALL = 0; 2202let SRF_MODE_ALL = 1; 2203let MEGA_FETCH_COUNT = 16; 2204let DST_SEL_X = 0; 2205let DST_SEL_Y = 1; 2206let DST_SEL_Z = 2; 2207let DST_SEL_W = 3; 2208let DATA_FORMAT = 0; 2209 2210let Inst{31-0} = Word0; 2211let Inst{63-32} = Word1; 2212 2213// LLVM can only encode 64-bit instructions, so these fields are manually 2214// encoded in R600CodeEmitter 2215// 2216// bits<16> OFFSET; 2217// bits<2> ENDIAN_SWAP = 0; 2218// bits<1> CONST_BUF_NO_STRIDE = 0; 2219// bits<1> MEGA_FETCH = 0; 2220// bits<1> ALT_CONST = 0; 2221// bits<2> BUFFER_INDEX_MODE = 0; 2222 2223 2224 2225// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding 2226// is done in R600CodeEmitter 2227// 2228// Inst{79-64} = OFFSET; 2229// Inst{81-80} = ENDIAN_SWAP; 2230// Inst{82} = CONST_BUF_NO_STRIDE; 2231// Inst{83} = MEGA_FETCH; 2232// Inst{84} = ALT_CONST; 2233// Inst{86-85} = BUFFER_INDEX_MODE; 2234// Inst{95-86} = 0; Reserved 2235 2236// VTX_WORD3 (Padding) 2237// 2238// Inst{127-96} = 0; 2239 let VTXInst = 1; 2240} 2241 2242 2243 2244//===--------------------------------------------------------------------===// 2245// Instructions support 2246//===--------------------------------------------------------------------===// 2247//===---------------------------------------------------------------------===// 2248// Custom Inserter for Branches and returns, this eventually will be a 2249// seperate pass 2250//===---------------------------------------------------------------------===// 2251let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { 2252 def BRANCH : ILFormat<(outs), (ins brtarget:$target), 2253 "; Pseudo unconditional branch instruction", 2254 [(br bb:$target)]>; 2255 defm BRANCH_COND : BranchConditional<IL_brcond, R600_Reg32, R600_Reg32>; 2256} 2257 2258//===---------------------------------------------------------------------===// 2259// Flow and Program control Instructions 2260//===---------------------------------------------------------------------===// 2261let isTerminator=1 in { 2262 def SWITCH : ILFormat< (outs), (ins GPRI32:$src), 2263 !strconcat("SWITCH", " $src"), []>; 2264 def CASE : ILFormat< (outs), (ins GPRI32:$src), 2265 !strconcat("CASE", " $src"), []>; 2266 def BREAK : ILFormat< (outs), (ins), 2267 "BREAK", []>; 2268 def CONTINUE : ILFormat< (outs), (ins), 2269 "CONTINUE", []>; 2270 def DEFAULT : ILFormat< (outs), (ins), 2271 "DEFAULT", []>; 2272 def ELSE : ILFormat< (outs), (ins), 2273 "ELSE", []>; 2274 def ENDSWITCH : ILFormat< (outs), (ins), 2275 "ENDSWITCH", []>; 2276 def ENDMAIN : ILFormat< (outs), (ins), 2277 "ENDMAIN", []>; 2278 def END : ILFormat< (outs), (ins), 2279 "END", []>; 2280 def ENDFUNC : ILFormat< (outs), (ins), 2281 "ENDFUNC", []>; 2282 def ENDIF : ILFormat< (outs), (ins), 2283 "ENDIF", []>; 2284 def WHILELOOP : ILFormat< (outs), (ins), 2285 "WHILE", []>; 2286 def ENDLOOP : ILFormat< (outs), (ins), 2287 "ENDLOOP", []>; 2288 def FUNC : ILFormat< (outs), (ins), 2289 "FUNC", []>; 2290 def RETDYN : ILFormat< (outs), (ins), 2291 "RET_DYN", []>; 2292 // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2293 defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">; 2294 // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2295 defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">; 2296 // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2297 defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">; 2298 // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2299 defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">; 2300 // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2301 defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">; 2302 // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2303 defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">; 2304 defm IFC : BranchInstr2<"IFC">; 2305 defm BREAKC : BranchInstr2<"BREAKC">; 2306 defm CONTINUEC : BranchInstr2<"CONTINUEC">; 2307} 2308 2309//===----------------------------------------------------------------------===// 2310// ISel Patterns 2311//===----------------------------------------------------------------------===// 2312 2313// CND*_INT Pattterns for f32 True / False values 2314 2315class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat < 2316 (selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc), 2317 (cnd $src0, $src1, $src2) 2318>; 2319 2320def : CND_INT_f32 <CNDE_INT, SETEQ>; 2321def : CND_INT_f32 <CNDGT_INT, SETGT>; 2322def : CND_INT_f32 <CNDGE_INT, SETGE>; 2323 2324//CNDGE_INT extra pattern 2325def : Pat < 2326 (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_SGT), 2327 (CNDGE_INT $src0, $src1, $src2) 2328>; 2329 2330// KIL Patterns 2331def KILP : Pat < 2332 (int_AMDGPU_kilp), 2333 (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO))) 2334>; 2335 2336def KIL : Pat < 2337 (int_AMDGPU_kill f32:$src0), 2338 (MASK_WRITE (KILLGT (f32 ZERO), $src0)) 2339>; 2340 2341def : Extract_Element <f32, v4f32, 0, sub0>; 2342def : Extract_Element <f32, v4f32, 1, sub1>; 2343def : Extract_Element <f32, v4f32, 2, sub2>; 2344def : Extract_Element <f32, v4f32, 3, sub3>; 2345 2346def : Insert_Element <f32, v4f32, 0, sub0>; 2347def : Insert_Element <f32, v4f32, 1, sub1>; 2348def : Insert_Element <f32, v4f32, 2, sub2>; 2349def : Insert_Element <f32, v4f32, 3, sub3>; 2350 2351def : Extract_Element <i32, v4i32, 0, sub0>; 2352def : Extract_Element <i32, v4i32, 1, sub1>; 2353def : Extract_Element <i32, v4i32, 2, sub2>; 2354def : Extract_Element <i32, v4i32, 3, sub3>; 2355 2356def : Insert_Element <i32, v4i32, 0, sub0>; 2357def : Insert_Element <i32, v4i32, 1, sub1>; 2358def : Insert_Element <i32, v4i32, 2, sub2>; 2359def : Insert_Element <i32, v4i32, 3, sub3>; 2360 2361def : Vector4_Build <v4f32, f32>; 2362def : Vector4_Build <v4i32, i32>; 2363 2364def : Extract_Element <f32, v2f32, 0, sub0>; 2365def : Extract_Element <f32, v2f32, 1, sub1>; 2366 2367def : Insert_Element <f32, v2f32, 0, sub0>; 2368def : Insert_Element <f32, v2f32, 1, sub1>; 2369 2370def : Extract_Element <i32, v2i32, 0, sub0>; 2371def : Extract_Element <i32, v2i32, 1, sub1>; 2372 2373def : Insert_Element <i32, v2i32, 0, sub0>; 2374def : Insert_Element <i32, v2i32, 1, sub1>; 2375 2376// bitconvert patterns 2377 2378def : BitConvert <i32, f32, R600_Reg32>; 2379def : BitConvert <f32, i32, R600_Reg32>; 2380def : BitConvert <v2f32, v2i32, R600_Reg64>; 2381def : BitConvert <v2i32, v2f32, R600_Reg64>; 2382def : BitConvert <v4f32, v4i32, R600_Reg128>; 2383def : BitConvert <v4i32, v4f32, R600_Reg128>; 2384 2385// DWORDADDR pattern 2386def : DwordAddrPat <i32, R600_Reg32>; 2387 2388} // End isR600toCayman Predicate 2389 2390def getLDSNoRetOp : InstrMapping { 2391 let FilterClass = "R600_LDS_1A1D"; 2392 let RowFields = ["BaseOp"]; 2393 let ColFields = ["DisableEncoding"]; 2394 let KeyCol = ["$dst"]; 2395 let ValueCols = [[""""]]; 2396} 2397