R600Instructions.td revision 263508
1249259Sdim//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===// 2249259Sdim// 3249259Sdim// The LLVM Compiler Infrastructure 4249259Sdim// 5249259Sdim// This file is distributed under the University of Illinois Open Source 6249259Sdim// License. See LICENSE.TXT for details. 7249259Sdim// 8249259Sdim//===----------------------------------------------------------------------===// 9249259Sdim// 10249259Sdim// R600 Tablegen instruction definitions 11249259Sdim// 12249259Sdim//===----------------------------------------------------------------------===// 13249259Sdim 14249259Sdiminclude "R600Intrinsics.td" 15263508Sdiminclude "R600InstrFormats.td" 16249259Sdim 17249259Sdimclass InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> : 18251662Sdim InstR600 <outs, ins, asm, pattern, NullALU> { 19249259Sdim 20249259Sdim let Namespace = "AMDGPU"; 21249259Sdim} 22249259Sdim 23249259Sdimdef MEMxi : Operand<iPTR> { 24249259Sdim let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index); 25249259Sdim let PrintMethod = "printMemOperand"; 26249259Sdim} 27249259Sdim 28249259Sdimdef MEMrr : Operand<iPTR> { 29249259Sdim let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index); 30249259Sdim} 31249259Sdim 32249259Sdim// Operands for non-registers 33249259Sdim 34249259Sdimclass InstFlag<string PM = "printOperand", int Default = 0> 35249259Sdim : OperandWithDefaultOps <i32, (ops (i32 Default))> { 36249259Sdim let PrintMethod = PM; 37249259Sdim} 38249259Sdim 39249259Sdim// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers 40249259Sdimdef SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> { 41249259Sdim let PrintMethod = "printSel"; 42249259Sdim} 43251662Sdimdef BANK_SWIZZLE : OperandWithDefaultOps <i32, (ops (i32 0))> { 44251662Sdim let PrintMethod = "printBankSwizzle"; 45251662Sdim} 46249259Sdim 47249259Sdimdef LITERAL : InstFlag<"printLiteral">; 48249259Sdim 49249259Sdimdef WRITE : InstFlag <"printWrite", 1>; 50249259Sdimdef OMOD : InstFlag <"printOMOD">; 51249259Sdimdef REL : InstFlag <"printRel">; 52249259Sdimdef CLAMP : InstFlag <"printClamp">; 53249259Sdimdef NEG : InstFlag <"printNeg">; 54249259Sdimdef ABS : InstFlag <"printAbs">; 55249259Sdimdef UEM : InstFlag <"printUpdateExecMask">; 56249259Sdimdef UP : InstFlag <"printUpdatePred">; 57249259Sdim 58249259Sdim// XXX: The r600g finalizer in Mesa expects last to be one in most cases. 59249259Sdim// Once we start using the packetizer in this backend we should have this 60249259Sdim// default to 0. 61249259Sdimdef LAST : InstFlag<"printLast", 1>; 62263508Sdimdef RSel : Operand<i32> { 63263508Sdim let PrintMethod = "printRSel"; 64263508Sdim} 65263508Sdimdef CT: Operand<i32> { 66263508Sdim let PrintMethod = "printCT"; 67263508Sdim} 68249259Sdim 69249259Sdimdef FRAMEri : Operand<iPTR> { 70249259Sdim let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index); 71249259Sdim} 72249259Sdim 73249259Sdimdef ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>; 74249259Sdimdef ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; 75249259Sdimdef ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; 76249259Sdimdef ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>; 77249259Sdimdef ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>; 78249259Sdim 79249259Sdim 80249259Sdimdef R600_Pred : PredicateOperand<i32, (ops R600_Predicate), 81249259Sdim (ops PRED_SEL_OFF)>; 82249259Sdim 83249259Sdim 84249259Sdimlet mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { 85249259Sdim 86249259Sdim// Class for instructions with only one source register. 87249259Sdim// If you add new ins to this instruction, make sure they are listed before 88249259Sdim// $literal, because the backend currently assumes that the last operand is 89249259Sdim// a literal. Also be sure to update the enum R600Op1OperandIndex::ROI in 90249259Sdim// R600Defines.h, R600InstrInfo::buildDefaultInstruction(), 91249259Sdim// and R600InstrInfo::getOperandIdx(). 92249259Sdimclass R600_1OP <bits<11> inst, string opName, list<dag> pattern, 93249259Sdim InstrItinClass itin = AnyALU> : 94251662Sdim InstR600 <(outs R600_Reg32:$dst), 95249259Sdim (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, 96249259Sdim R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, 97251662Sdim LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal, 98251662Sdim BANK_SWIZZLE:$bank_swizzle), 99249259Sdim !strconcat(" ", opName, 100263508Sdim "$clamp $last $dst$write$dst_rel$omod, " 101249259Sdim "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " 102251662Sdim "$pred_sel $bank_swizzle"), 103249259Sdim pattern, 104249259Sdim itin>, 105249259Sdim R600ALU_Word0, 106249259Sdim R600ALU_Word1_OP2 <inst> { 107249259Sdim 108249259Sdim let src1 = 0; 109249259Sdim let src1_rel = 0; 110249259Sdim let src1_neg = 0; 111249259Sdim let src1_abs = 0; 112249259Sdim let update_exec_mask = 0; 113249259Sdim let update_pred = 0; 114249259Sdim let HasNativeOperands = 1; 115249259Sdim let Op1 = 1; 116263508Sdim let ALUInst = 1; 117249259Sdim let DisableEncoding = "$literal"; 118263508Sdim let UseNamedOperandTable = 1; 119249259Sdim 120249259Sdim let Inst{31-0} = Word0; 121249259Sdim let Inst{63-32} = Word1; 122249259Sdim} 123249259Sdim 124249259Sdimclass R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node, 125249259Sdim InstrItinClass itin = AnyALU> : 126249259Sdim R600_1OP <inst, opName, 127249259Sdim [(set R600_Reg32:$dst, (node R600_Reg32:$src0))] 128249259Sdim>; 129249259Sdim 130263508Sdim// If you add or change the operands for R600_2OP instructions, you must 131249259Sdim// also update the R600Op2OperandIndex::ROI enum in R600Defines.h, 132249259Sdim// R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx(). 133249259Sdimclass R600_2OP <bits<11> inst, string opName, list<dag> pattern, 134249259Sdim InstrItinClass itin = AnyALU> : 135251662Sdim InstR600 <(outs R600_Reg32:$dst), 136249259Sdim (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write, 137249259Sdim OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, 138249259Sdim R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, 139249259Sdim R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel, 140251662Sdim LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal, 141251662Sdim BANK_SWIZZLE:$bank_swizzle), 142249259Sdim !strconcat(" ", opName, 143263508Sdim "$clamp $last $update_exec_mask$update_pred$dst$write$dst_rel$omod, " 144249259Sdim "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " 145249259Sdim "$src1_neg$src1_abs$src1$src1_abs$src1_rel, " 146251662Sdim "$pred_sel $bank_swizzle"), 147249259Sdim pattern, 148249259Sdim itin>, 149249259Sdim R600ALU_Word0, 150249259Sdim R600ALU_Word1_OP2 <inst> { 151249259Sdim 152249259Sdim let HasNativeOperands = 1; 153249259Sdim let Op2 = 1; 154263508Sdim let ALUInst = 1; 155249259Sdim let DisableEncoding = "$literal"; 156263508Sdim let UseNamedOperandTable = 1; 157249259Sdim 158249259Sdim let Inst{31-0} = Word0; 159249259Sdim let Inst{63-32} = Word1; 160249259Sdim} 161249259Sdim 162249259Sdimclass R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node, 163249259Sdim InstrItinClass itim = AnyALU> : 164249259Sdim R600_2OP <inst, opName, 165249259Sdim [(set R600_Reg32:$dst, (node R600_Reg32:$src0, 166249259Sdim R600_Reg32:$src1))] 167249259Sdim>; 168249259Sdim 169249259Sdim// If you add our change the operands for R600_3OP instructions, you must 170249259Sdim// also update the R600Op3OperandIndex::ROI enum in R600Defines.h, 171249259Sdim// R600InstrInfo::buildDefaultInstruction(), and 172249259Sdim// R600InstrInfo::getOperandIdx(). 173249259Sdimclass R600_3OP <bits<5> inst, string opName, list<dag> pattern, 174249259Sdim InstrItinClass itin = AnyALU> : 175251662Sdim InstR600 <(outs R600_Reg32:$dst), 176249259Sdim (ins REL:$dst_rel, CLAMP:$clamp, 177249259Sdim R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel, 178249259Sdim R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel, 179249259Sdim R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel, 180251662Sdim LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal, 181251662Sdim BANK_SWIZZLE:$bank_swizzle), 182263508Sdim !strconcat(" ", opName, "$clamp $last $dst$dst_rel, " 183249259Sdim "$src0_neg$src0$src0_rel, " 184249259Sdim "$src1_neg$src1$src1_rel, " 185249259Sdim "$src2_neg$src2$src2_rel, " 186251662Sdim "$pred_sel" 187251662Sdim "$bank_swizzle"), 188249259Sdim pattern, 189249259Sdim itin>, 190249259Sdim R600ALU_Word0, 191249259Sdim R600ALU_Word1_OP3<inst>{ 192249259Sdim 193249259Sdim let HasNativeOperands = 1; 194249259Sdim let DisableEncoding = "$literal"; 195249259Sdim let Op3 = 1; 196263508Sdim let UseNamedOperandTable = 1; 197263508Sdim let ALUInst = 1; 198249259Sdim 199249259Sdim let Inst{31-0} = Word0; 200249259Sdim let Inst{63-32} = Word1; 201249259Sdim} 202249259Sdim 203249259Sdimclass R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern, 204249259Sdim InstrItinClass itin = VecALU> : 205251662Sdim InstR600 <(outs R600_Reg32:$dst), 206249259Sdim ins, 207249259Sdim asm, 208249259Sdim pattern, 209249259Sdim itin>; 210249259Sdim 211249259Sdim 212249259Sdim 213249259Sdim} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0 214249259Sdim 215249259Sdimdef TEX_SHADOW : PatLeaf< 216249259Sdim (imm), 217249259Sdim [{uint32_t TType = (uint32_t)N->getZExtValue(); 218249259Sdim return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13); 219249259Sdim }] 220249259Sdim>; 221249259Sdim 222249259Sdimdef TEX_RECT : PatLeaf< 223249259Sdim (imm), 224249259Sdim [{uint32_t TType = (uint32_t)N->getZExtValue(); 225249259Sdim return TType == 5; 226249259Sdim }] 227249259Sdim>; 228249259Sdim 229249259Sdimdef TEX_ARRAY : PatLeaf< 230249259Sdim (imm), 231249259Sdim [{uint32_t TType = (uint32_t)N->getZExtValue(); 232263508Sdim return TType == 9 || TType == 10 || TType == 16; 233249259Sdim }] 234249259Sdim>; 235249259Sdim 236249259Sdimdef TEX_SHADOW_ARRAY : PatLeaf< 237249259Sdim (imm), 238249259Sdim [{uint32_t TType = (uint32_t)N->getZExtValue(); 239249259Sdim return TType == 11 || TType == 12 || TType == 17; 240249259Sdim }] 241249259Sdim>; 242249259Sdim 243263508Sdimdef TEX_MSAA : PatLeaf< 244263508Sdim (imm), 245263508Sdim [{uint32_t TType = (uint32_t)N->getZExtValue(); 246263508Sdim return TType == 14; 247263508Sdim }] 248263508Sdim>; 249249259Sdim 250263508Sdimdef TEX_ARRAY_MSAA : PatLeaf< 251263508Sdim (imm), 252263508Sdim [{uint32_t TType = (uint32_t)N->getZExtValue(); 253263508Sdim return TType == 15; 254263508Sdim }] 255263508Sdim>; 256249259Sdim 257263508Sdimclass EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask, 258263508Sdim dag outs, dag ins, string asm, list<dag> pattern> : 259263508Sdim InstR600ISA <outs, ins, asm, pattern>, 260263508Sdim CF_ALLOC_EXPORT_WORD0_RAT, CF_ALLOC_EXPORT_WORD1_BUF { 261249259Sdim 262263508Sdim let rat_id = ratid; 263263508Sdim let rat_inst = ratinst; 264263508Sdim let rim = 0; 265263508Sdim // XXX: Have a separate instruction for non-indexed writes. 266263508Sdim let type = 1; 267263508Sdim let rw_rel = 0; 268263508Sdim let elem_size = 0; 269249259Sdim 270263508Sdim let array_size = 0; 271263508Sdim let comp_mask = mask; 272263508Sdim let burst_count = 0; 273263508Sdim let vpm = 0; 274263508Sdim let cf_inst = cfinst; 275263508Sdim let mark = 0; 276263508Sdim let barrier = 1; 277263508Sdim 278263508Sdim let Inst{31-0} = Word0; 279263508Sdim let Inst{63-32} = Word1; 280263508Sdim let IsExport = 1; 281263508Sdim 282249259Sdim} 283249259Sdim 284263508Sdimclass VTX_READ <string name, bits<8> buffer_id, dag outs, list<dag> pattern> 285263508Sdim : InstR600ISA <outs, (ins MEMxi:$src_gpr), name, pattern>, 286263508Sdim VTX_WORD1_GPR { 287263508Sdim 288263508Sdim // Static fields 289263508Sdim let DST_REL = 0; 290263508Sdim // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, 291263508Sdim // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, 292263508Sdim // however, based on my testing if USE_CONST_FIELDS is set, then all 293263508Sdim // these fields need to be set to 0. 294263508Sdim let USE_CONST_FIELDS = 0; 295263508Sdim let NUM_FORMAT_ALL = 1; 296263508Sdim let FORMAT_COMP_ALL = 0; 297263508Sdim let SRF_MODE_ALL = 0; 298263508Sdim 299263508Sdim let Inst{63-32} = Word1; 300263508Sdim // LLVM can only encode 64-bit instructions, so these fields are manually 301263508Sdim // encoded in R600CodeEmitter 302263508Sdim // 303263508Sdim // bits<16> OFFSET; 304263508Sdim // bits<2> ENDIAN_SWAP = 0; 305263508Sdim // bits<1> CONST_BUF_NO_STRIDE = 0; 306263508Sdim // bits<1> MEGA_FETCH = 0; 307263508Sdim // bits<1> ALT_CONST = 0; 308263508Sdim // bits<2> BUFFER_INDEX_MODE = 0; 309263508Sdim 310263508Sdim // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding 311263508Sdim // is done in R600CodeEmitter 312263508Sdim // 313263508Sdim // Inst{79-64} = OFFSET; 314263508Sdim // Inst{81-80} = ENDIAN_SWAP; 315263508Sdim // Inst{82} = CONST_BUF_NO_STRIDE; 316263508Sdim // Inst{83} = MEGA_FETCH; 317263508Sdim // Inst{84} = ALT_CONST; 318263508Sdim // Inst{86-85} = BUFFER_INDEX_MODE; 319263508Sdim // Inst{95-86} = 0; Reserved 320263508Sdim 321263508Sdim // VTX_WORD3 (Padding) 322263508Sdim // 323263508Sdim // Inst{127-96} = 0; 324263508Sdim 325263508Sdim let VTXInst = 1; 326263508Sdim} 327263508Sdim 328249259Sdimclass LoadParamFrag <PatFrag load_type> : PatFrag < 329249259Sdim (ops node:$ptr), (load_type node:$ptr), 330263508Sdim [{ return isConstantLoad(dyn_cast<LoadSDNode>(N), 0); }] 331249259Sdim>; 332249259Sdim 333249259Sdimdef load_param : LoadParamFrag<load>; 334263508Sdimdef load_param_exti8 : LoadParamFrag<az_extloadi8>; 335263508Sdimdef load_param_exti16 : LoadParamFrag<az_extloadi16>; 336249259Sdim 337263508Sdimdef isR600 : Predicate<"Subtarget.getGeneration() <= AMDGPUSubtarget::R700">; 338263508Sdimdef isR700 : Predicate<"Subtarget.getGeneration() == AMDGPUSubtarget::R700">; 339249259Sdimdef isEG : Predicate< 340263508Sdim "Subtarget.getGeneration() >= AMDGPUSubtarget::EVERGREEN && " 341263508Sdim "Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS && " 342263508Sdim "!Subtarget.hasCaymanISA()">; 343249259Sdim 344263508Sdimdef isCayman : Predicate<"Subtarget.hasCaymanISA()">; 345263508Sdimdef isEGorCayman : Predicate<"Subtarget.getGeneration() == " 346263508Sdim "AMDGPUSubtarget::EVERGREEN" 347263508Sdim "|| Subtarget.getGeneration() ==" 348263508Sdim "AMDGPUSubtarget::NORTHERN_ISLANDS">; 349249259Sdim 350249259Sdimdef isR600toCayman : Predicate< 351263508Sdim "Subtarget.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">; 352249259Sdim 353249259Sdim//===----------------------------------------------------------------------===// 354249259Sdim// R600 SDNodes 355249259Sdim//===----------------------------------------------------------------------===// 356249259Sdim 357249259Sdimdef INTERP_PAIR_XY : AMDGPUShaderInst < 358249259Sdim (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1), 359263508Sdim (ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2), 360249259Sdim "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1", 361249259Sdim []>; 362249259Sdim 363249259Sdimdef INTERP_PAIR_ZW : AMDGPUShaderInst < 364249259Sdim (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1), 365263508Sdim (ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2), 366249259Sdim "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1", 367249259Sdim []>; 368249259Sdim 369249259Sdimdef CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", 370249259Sdim SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, 371249259Sdim [SDNPVariadic] 372249259Sdim>; 373249259Sdim 374263508Sdimdef DOT4 : SDNode<"AMDGPUISD::DOT4", 375263508Sdim SDTypeProfile<1, 8, [SDTCisFP<0>, SDTCisVT<1, f32>, SDTCisVT<2, f32>, 376263508Sdim SDTCisVT<3, f32>, SDTCisVT<4, f32>, SDTCisVT<5, f32>, 377263508Sdim SDTCisVT<6, f32>, SDTCisVT<7, f32>, SDTCisVT<8, f32>]>, 378263508Sdim [] 379263508Sdim>; 380263508Sdim 381263508Sdimdef COS_HW : SDNode<"AMDGPUISD::COS_HW", 382263508Sdim SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]> 383263508Sdim>; 384263508Sdim 385263508Sdimdef SIN_HW : SDNode<"AMDGPUISD::SIN_HW", 386263508Sdim SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]> 387263508Sdim>; 388263508Sdim 389263508Sdimdef TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>; 390263508Sdim 391263508Sdimdef TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>; 392263508Sdim 393263508Sdimmulticlass TexPattern<bits<32> TextureOp, Instruction inst, ValueType vt = v4f32> { 394263508Sdimdef : Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR, 395263508Sdim (i32 imm:$srcx), (i32 imm:$srcy), (i32 imm:$srcz), (i32 imm:$srcw), 396263508Sdim (i32 imm:$offsetx), (i32 imm:$offsety), (i32 imm:$offsetz), 397263508Sdim (i32 imm:$DST_SEL_X), (i32 imm:$DST_SEL_Y), (i32 imm:$DST_SEL_Z), 398263508Sdim (i32 imm:$DST_SEL_W), 399263508Sdim (i32 imm:$RESOURCE_ID), (i32 imm:$SAMPLER_ID), 400263508Sdim (i32 imm:$COORD_TYPE_X), (i32 imm:$COORD_TYPE_Y), (i32 imm:$COORD_TYPE_Z), 401263508Sdim (i32 imm:$COORD_TYPE_W)), 402263508Sdim (inst R600_Reg128:$SRC_GPR, 403263508Sdim imm:$srcx, imm:$srcy, imm:$srcz, imm:$srcw, 404263508Sdim imm:$offsetx, imm:$offsety, imm:$offsetz, 405263508Sdim imm:$DST_SEL_X, imm:$DST_SEL_Y, imm:$DST_SEL_Z, 406263508Sdim imm:$DST_SEL_W, 407263508Sdim imm:$RESOURCE_ID, imm:$SAMPLER_ID, 408263508Sdim imm:$COORD_TYPE_X, imm:$COORD_TYPE_Y, imm:$COORD_TYPE_Z, 409263508Sdim imm:$COORD_TYPE_W)>; 410263508Sdim} 411263508Sdim 412249259Sdim//===----------------------------------------------------------------------===// 413249259Sdim// Interpolation Instructions 414249259Sdim//===----------------------------------------------------------------------===// 415249259Sdim 416249259Sdimdef INTERP_VEC_LOAD : AMDGPUShaderInst < 417249259Sdim (outs R600_Reg128:$dst), 418249259Sdim (ins i32imm:$src0), 419249259Sdim "INTERP_LOAD $src0 : $dst", 420263508Sdim [(set R600_Reg128:$dst, (int_R600_interp_const imm:$src0))]>; 421249259Sdim 422249259Sdimdef INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { 423249259Sdim let bank_swizzle = 5; 424249259Sdim} 425249259Sdim 426249259Sdimdef INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> { 427249259Sdim let bank_swizzle = 5; 428249259Sdim} 429249259Sdim 430249259Sdimdef INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>; 431249259Sdim 432249259Sdim//===----------------------------------------------------------------------===// 433249259Sdim// Export Instructions 434249259Sdim//===----------------------------------------------------------------------===// 435249259Sdim 436249259Sdimdef ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; 437249259Sdim 438249259Sdimdef EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType, 439249259Sdim [SDNPHasChain, SDNPSideEffect]>; 440249259Sdim 441249259Sdimclass ExportWord0 { 442249259Sdim field bits<32> Word0; 443249259Sdim 444249259Sdim bits<13> arraybase; 445249259Sdim bits<2> type; 446249259Sdim bits<7> gpr; 447249259Sdim bits<2> elem_size; 448249259Sdim 449249259Sdim let Word0{12-0} = arraybase; 450249259Sdim let Word0{14-13} = type; 451249259Sdim let Word0{21-15} = gpr; 452249259Sdim let Word0{22} = 0; // RW_REL 453249259Sdim let Word0{29-23} = 0; // INDEX_GPR 454249259Sdim let Word0{31-30} = elem_size; 455249259Sdim} 456249259Sdim 457249259Sdimclass ExportSwzWord1 { 458249259Sdim field bits<32> Word1; 459249259Sdim 460249259Sdim bits<3> sw_x; 461249259Sdim bits<3> sw_y; 462249259Sdim bits<3> sw_z; 463249259Sdim bits<3> sw_w; 464249259Sdim bits<1> eop; 465249259Sdim bits<8> inst; 466249259Sdim 467249259Sdim let Word1{2-0} = sw_x; 468249259Sdim let Word1{5-3} = sw_y; 469249259Sdim let Word1{8-6} = sw_z; 470249259Sdim let Word1{11-9} = sw_w; 471249259Sdim} 472249259Sdim 473249259Sdimclass ExportBufWord1 { 474249259Sdim field bits<32> Word1; 475249259Sdim 476249259Sdim bits<12> arraySize; 477249259Sdim bits<4> compMask; 478249259Sdim bits<1> eop; 479249259Sdim bits<8> inst; 480249259Sdim 481249259Sdim let Word1{11-0} = arraySize; 482249259Sdim let Word1{15-12} = compMask; 483249259Sdim} 484249259Sdim 485249259Sdimmulticlass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { 486249259Sdim def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg), 487249259Sdim (ExportInst 488249259Sdim (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0), 489249259Sdim 0, 61, 0, 7, 7, 7, cf_inst, 0) 490249259Sdim >; 491249259Sdim 492249259Sdim def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg), 493249259Sdim (ExportInst 494249259Sdim (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0), 495249259Sdim 0, 61, 7, 0, 7, 7, cf_inst, 0) 496249259Sdim >; 497249259Sdim 498249259Sdim def : Pat<(int_R600_store_dummy (i32 imm:$type)), 499249259Sdim (ExportInst 500249259Sdim (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0) 501249259Sdim >; 502249259Sdim 503249259Sdim def : Pat<(int_R600_store_dummy 1), 504249259Sdim (ExportInst 505249259Sdim (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0) 506249259Sdim >; 507249259Sdim 508249259Sdim def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), 509249259Sdim (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)), 510249259Sdim (ExportInst R600_Reg128:$src, imm:$type, imm:$base, 511249259Sdim imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0) 512249259Sdim >; 513249259Sdim 514249259Sdim} 515249259Sdim 516249259Sdimmulticlass SteamOutputExportPattern<Instruction ExportInst, 517249259Sdim bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> { 518249259Sdim// Stream0 519249259Sdim def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 520249259Sdim (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)), 521249259Sdim (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 522249259Sdim 4095, imm:$mask, buf0inst, 0)>; 523249259Sdim// Stream1 524249259Sdim def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 525249259Sdim (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)), 526249259Sdim (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 527249259Sdim 4095, imm:$mask, buf1inst, 0)>; 528249259Sdim// Stream2 529249259Sdim def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 530249259Sdim (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)), 531249259Sdim (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 532249259Sdim 4095, imm:$mask, buf2inst, 0)>; 533249259Sdim// Stream3 534249259Sdim def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 535249259Sdim (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)), 536249259Sdim (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 537249259Sdim 4095, imm:$mask, buf3inst, 0)>; 538249259Sdim} 539249259Sdim 540251662Sdim// Export Instructions should not be duplicated by TailDuplication pass 541251662Sdim// (which assumes that duplicable instruction are affected by exec mask) 542251662Sdimlet usesCustomInserter = 1, isNotDuplicable = 1 in { 543249259Sdim 544249259Sdimclass ExportSwzInst : InstR600ISA<( 545249259Sdim outs), 546249259Sdim (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase, 547263508Sdim RSel:$sw_x, RSel:$sw_y, RSel:$sw_z, RSel:$sw_w, i32imm:$inst, 548249259Sdim i32imm:$eop), 549263508Sdim !strconcat("EXPORT", " $gpr.$sw_x$sw_y$sw_z$sw_w"), 550249259Sdim []>, ExportWord0, ExportSwzWord1 { 551249259Sdim let elem_size = 3; 552249259Sdim let Inst{31-0} = Word0; 553249259Sdim let Inst{63-32} = Word1; 554263508Sdim let IsExport = 1; 555249259Sdim} 556249259Sdim 557249259Sdim} // End usesCustomInserter = 1 558249259Sdim 559249259Sdimclass ExportBufInst : InstR600ISA<( 560249259Sdim outs), 561249259Sdim (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase, 562249259Sdim i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop), 563249259Sdim !strconcat("EXPORT", " $gpr"), 564249259Sdim []>, ExportWord0, ExportBufWord1 { 565249259Sdim let elem_size = 0; 566249259Sdim let Inst{31-0} = Word0; 567249259Sdim let Inst{63-32} = Word1; 568263508Sdim let IsExport = 1; 569249259Sdim} 570249259Sdim 571249259Sdim//===----------------------------------------------------------------------===// 572249259Sdim// Control Flow Instructions 573249259Sdim//===----------------------------------------------------------------------===// 574249259Sdim 575249259Sdim 576251662Sdimdef KCACHE : InstFlag<"printKCache">; 577251662Sdim 578249259Sdimclass ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs), 579251662Sdim(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, 580251662SdimKCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1, 581251662Sdimi32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, 582263508Sdimi32imm:$COUNT, i32imm:$Enabled), 583249259Sdim!strconcat(OpName, " $COUNT, @$ADDR, " 584251662Sdim"KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"), 585249259Sdim[] >, CF_ALU_WORD0, CF_ALU_WORD1 { 586249259Sdim field bits<64> Inst; 587249259Sdim 588249259Sdim let CF_INST = inst; 589249259Sdim let ALT_CONST = 0; 590249259Sdim let WHOLE_QUAD_MODE = 0; 591249259Sdim let BARRIER = 1; 592263508Sdim let UseNamedOperandTable = 1; 593249259Sdim 594249259Sdim let Inst{31-0} = Word0; 595249259Sdim let Inst{63-32} = Word1; 596249259Sdim} 597249259Sdim 598251662Sdimclass CF_WORD0_R600 { 599249259Sdim field bits<32> Word0; 600249259Sdim 601251662Sdim bits<32> ADDR; 602251662Sdim 603251662Sdim let Word0 = ADDR; 604251662Sdim} 605251662Sdim 606251662Sdimclass CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), 607251662Sdimins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 { 608251662Sdim field bits<64> Inst; 609263508Sdim bits<4> CNT; 610251662Sdim 611251662Sdim let CF_INST = inst; 612251662Sdim let BARRIER = 1; 613251662Sdim let CF_CONST = 0; 614251662Sdim let VALID_PIXEL_MODE = 0; 615251662Sdim let COND = 0; 616263508Sdim let COUNT = CNT{2-0}; 617251662Sdim let CALL_COUNT = 0; 618263508Sdim let COUNT_3 = CNT{3}; 619251662Sdim let END_OF_PROGRAM = 0; 620251662Sdim let WHOLE_QUAD_MODE = 0; 621251662Sdim 622251662Sdim let Inst{31-0} = Word0; 623251662Sdim let Inst{63-32} = Word1; 624251662Sdim} 625251662Sdim 626251662Sdimclass CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), 627251662Sdimins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG { 628249259Sdim field bits<64> Inst; 629249259Sdim 630249259Sdim let CF_INST = inst; 631249259Sdim let BARRIER = 1; 632249259Sdim let JUMPTABLE_SEL = 0; 633249259Sdim let CF_CONST = 0; 634249259Sdim let VALID_PIXEL_MODE = 0; 635249259Sdim let COND = 0; 636251662Sdim let END_OF_PROGRAM = 0; 637249259Sdim 638249259Sdim let Inst{31-0} = Word0; 639249259Sdim let Inst{63-32} = Word1; 640249259Sdim} 641249259Sdim 642251662Sdimdef CF_ALU : ALU_CLAUSE<8, "ALU">; 643251662Sdimdef CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">; 644263508Sdimdef CF_ALU_POP_AFTER : ALU_CLAUSE<10, "ALU_POP_AFTER">; 645249259Sdim 646251662Sdimdef FETCH_CLAUSE : AMDGPUInst <(outs), 647251662Sdim(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > { 648251662Sdim field bits<8> Inst; 649251662Sdim bits<8> num; 650251662Sdim let Inst = num; 651249259Sdim} 652249259Sdim 653251662Sdimdef ALU_CLAUSE : AMDGPUInst <(outs), 654251662Sdim(ins i32imm:$addr), "ALU clause starting at $addr:", [] > { 655251662Sdim field bits<8> Inst; 656251662Sdim bits<8> num; 657251662Sdim let Inst = num; 658249259Sdim} 659249259Sdim 660251662Sdimdef LITERALS : AMDGPUInst <(outs), 661251662Sdim(ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > { 662251662Sdim field bits<64> Inst; 663251662Sdim bits<32> literal1; 664251662Sdim bits<32> literal2; 665249259Sdim 666251662Sdim let Inst{31-0} = literal1; 667251662Sdim let Inst{63-32} = literal2; 668249259Sdim} 669249259Sdim 670251662Sdimdef PAD : AMDGPUInst <(outs), (ins), "PAD", [] > { 671251662Sdim field bits<64> Inst; 672249259Sdim} 673249259Sdim 674249259Sdimlet Predicates = [isR600toCayman] in { 675249259Sdim 676249259Sdim//===----------------------------------------------------------------------===// 677249259Sdim// Common Instructions R600, R700, Evergreen, Cayman 678249259Sdim//===----------------------------------------------------------------------===// 679249259Sdim 680249259Sdimdef ADD : R600_2OP_Helper <0x0, "ADD", fadd>; 681249259Sdim// Non-IEEE MUL: 0 * anything = 0 682249259Sdimdef MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>; 683249259Sdimdef MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>; 684249259Sdimdef MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>; 685249259Sdimdef MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>; 686249259Sdim 687249259Sdim// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td, 688249259Sdim// so some of the instruction names don't match the asm string. 689249259Sdim// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics. 690249259Sdimdef SETE : R600_2OP < 691249259Sdim 0x08, "SETE", 692263508Sdim [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OEQ))] 693249259Sdim>; 694249259Sdim 695249259Sdimdef SGT : R600_2OP < 696249259Sdim 0x09, "SETGT", 697263508Sdim [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGT))] 698249259Sdim>; 699249259Sdim 700249259Sdimdef SGE : R600_2OP < 701249259Sdim 0xA, "SETGE", 702263508Sdim [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGE))] 703249259Sdim>; 704249259Sdim 705249259Sdimdef SNE : R600_2OP < 706249259Sdim 0xB, "SETNE", 707263508Sdim [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_UNE))] 708249259Sdim>; 709249259Sdim 710249259Sdimdef SETE_DX10 : R600_2OP < 711249259Sdim 0xC, "SETE_DX10", 712263508Sdim [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OEQ))] 713249259Sdim>; 714249259Sdim 715249259Sdimdef SETGT_DX10 : R600_2OP < 716249259Sdim 0xD, "SETGT_DX10", 717263508Sdim [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGT))] 718249259Sdim>; 719249259Sdim 720249259Sdimdef SETGE_DX10 : R600_2OP < 721249259Sdim 0xE, "SETGE_DX10", 722263508Sdim [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGE))] 723249259Sdim>; 724249259Sdim 725249259Sdimdef SETNE_DX10 : R600_2OP < 726249259Sdim 0xF, "SETNE_DX10", 727263508Sdim [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_UNE))] 728249259Sdim>; 729249259Sdim 730249259Sdimdef FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; 731249259Sdimdef TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>; 732249259Sdimdef CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>; 733249259Sdimdef RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>; 734249259Sdimdef FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>; 735249259Sdim 736249259Sdimdef MOV : R600_1OP <0x19, "MOV", []>; 737249259Sdim 738249259Sdimlet isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { 739249259Sdim 740249259Sdimclass MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst < 741249259Sdim (outs R600_Reg32:$dst), 742249259Sdim (ins immType:$imm), 743249259Sdim "", 744249259Sdim [] 745249259Sdim>; 746249259Sdim 747249259Sdim} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 748249259Sdim 749249259Sdimdef MOV_IMM_I32 : MOV_IMM<i32, i32imm>; 750249259Sdimdef : Pat < 751249259Sdim (imm:$val), 752249259Sdim (MOV_IMM_I32 imm:$val) 753249259Sdim>; 754249259Sdim 755249259Sdimdef MOV_IMM_F32 : MOV_IMM<f32, f32imm>; 756249259Sdimdef : Pat < 757249259Sdim (fpimm:$val), 758249259Sdim (MOV_IMM_F32 fpimm:$val) 759249259Sdim>; 760249259Sdim 761249259Sdimdef PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>; 762249259Sdimdef PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>; 763249259Sdimdef PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>; 764249259Sdimdef PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>; 765249259Sdim 766249259Sdimlet hasSideEffects = 1 in { 767249259Sdim 768249259Sdimdef KILLGT : R600_2OP <0x2D, "KILLGT", []>; 769249259Sdim 770249259Sdim} // end hasSideEffects 771249259Sdim 772249259Sdimdef AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>; 773249259Sdimdef OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>; 774249259Sdimdef XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>; 775249259Sdimdef NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>; 776249259Sdimdef ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>; 777249259Sdimdef SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>; 778249259Sdimdef MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>; 779249259Sdimdef MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>; 780249259Sdimdef MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>; 781249259Sdimdef MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>; 782249259Sdim 783249259Sdimdef SETE_INT : R600_2OP < 784249259Sdim 0x3A, "SETE_INT", 785251662Sdim [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETEQ))] 786249259Sdim>; 787249259Sdim 788249259Sdimdef SETGT_INT : R600_2OP < 789249259Sdim 0x3B, "SETGT_INT", 790251662Sdim [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGT))] 791249259Sdim>; 792249259Sdim 793249259Sdimdef SETGE_INT : R600_2OP < 794249259Sdim 0x3C, "SETGE_INT", 795251662Sdim [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGE))] 796249259Sdim>; 797249259Sdim 798249259Sdimdef SETNE_INT : R600_2OP < 799249259Sdim 0x3D, "SETNE_INT", 800251662Sdim [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETNE))] 801249259Sdim>; 802249259Sdim 803249259Sdimdef SETGT_UINT : R600_2OP < 804249259Sdim 0x3E, "SETGT_UINT", 805251662Sdim [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGT))] 806249259Sdim>; 807249259Sdim 808249259Sdimdef SETGE_UINT : R600_2OP < 809249259Sdim 0x3F, "SETGE_UINT", 810251662Sdim [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGE))] 811249259Sdim>; 812249259Sdim 813249259Sdimdef PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>; 814249259Sdimdef PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>; 815249259Sdimdef PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>; 816249259Sdimdef PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>; 817249259Sdim 818249259Sdimdef CNDE_INT : R600_3OP < 819249259Sdim 0x1C, "CNDE_INT", 820251662Sdim [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_EQ))] 821249259Sdim>; 822249259Sdim 823249259Sdimdef CNDGE_INT : R600_3OP < 824249259Sdim 0x1E, "CNDGE_INT", 825263508Sdim [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGE))] 826249259Sdim>; 827249259Sdim 828249259Sdimdef CNDGT_INT : R600_3OP < 829249259Sdim 0x1D, "CNDGT_INT", 830263508Sdim [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGT))] 831249259Sdim>; 832249259Sdim 833249259Sdim//===----------------------------------------------------------------------===// 834249259Sdim// Texture instructions 835249259Sdim//===----------------------------------------------------------------------===// 836249259Sdim 837263508Sdimlet mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { 838249259Sdim 839263508Sdimclass R600_TEX <bits<11> inst, string opName> : 840263508Sdim InstR600 <(outs R600_Reg128:$DST_GPR), 841263508Sdim (ins R600_Reg128:$SRC_GPR, 842263508Sdim RSel:$srcx, RSel:$srcy, RSel:$srcz, RSel:$srcw, 843263508Sdim i32imm:$offsetx, i32imm:$offsety, i32imm:$offsetz, 844263508Sdim RSel:$DST_SEL_X, RSel:$DST_SEL_Y, RSel:$DST_SEL_Z, RSel:$DST_SEL_W, 845263508Sdim i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, 846263508Sdim CT:$COORD_TYPE_X, CT:$COORD_TYPE_Y, CT:$COORD_TYPE_Z, 847263508Sdim CT:$COORD_TYPE_W), 848263508Sdim !strconcat(opName, 849263508Sdim " $DST_GPR.$DST_SEL_X$DST_SEL_Y$DST_SEL_Z$DST_SEL_W, " 850263508Sdim "$SRC_GPR.$srcx$srcy$srcz$srcw " 851263508Sdim "RID:$RESOURCE_ID SID:$SAMPLER_ID " 852263508Sdim "CT:$COORD_TYPE_X$COORD_TYPE_Y$COORD_TYPE_Z$COORD_TYPE_W"), 853263508Sdim [], 854263508Sdim NullALU>, TEX_WORD0, TEX_WORD1, TEX_WORD2 { 855263508Sdim let Inst{31-0} = Word0; 856263508Sdim let Inst{63-32} = Word1; 857249259Sdim 858263508Sdim let TEX_INST = inst{4-0}; 859263508Sdim let SRC_REL = 0; 860263508Sdim let DST_REL = 0; 861263508Sdim let LOD_BIAS = 0; 862249259Sdim 863263508Sdim let INST_MOD = 0; 864263508Sdim let FETCH_WHOLE_QUAD = 0; 865263508Sdim let ALT_CONST = 0; 866263508Sdim let SAMPLER_INDEX_MODE = 0; 867263508Sdim let RESOURCE_INDEX_MODE = 0; 868249259Sdim 869263508Sdim let TEXInst = 1; 870263508Sdim} 871249259Sdim 872263508Sdim} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0 873249259Sdim 874249259Sdim 875249259Sdim 876263508Sdimdef TEX_SAMPLE : R600_TEX <0x10, "TEX_SAMPLE">; 877263508Sdimdef TEX_SAMPLE_C : R600_TEX <0x18, "TEX_SAMPLE_C">; 878263508Sdimdef TEX_SAMPLE_L : R600_TEX <0x11, "TEX_SAMPLE_L">; 879263508Sdimdef TEX_SAMPLE_C_L : R600_TEX <0x19, "TEX_SAMPLE_C_L">; 880263508Sdimdef TEX_SAMPLE_LB : R600_TEX <0x12, "TEX_SAMPLE_LB">; 881263508Sdimdef TEX_SAMPLE_C_LB : R600_TEX <0x1A, "TEX_SAMPLE_C_LB">; 882263508Sdimdef TEX_LD : R600_TEX <0x03, "TEX_LD">; 883263508Sdimdef TEX_LDPTR : R600_TEX <0x03, "TEX_LDPTR"> { 884263508Sdim let INST_MOD = 1; 885263508Sdim} 886263508Sdimdef TEX_GET_TEXTURE_RESINFO : R600_TEX <0x04, "TEX_GET_TEXTURE_RESINFO">; 887263508Sdimdef TEX_GET_GRADIENTS_H : R600_TEX <0x07, "TEX_GET_GRADIENTS_H">; 888263508Sdimdef TEX_GET_GRADIENTS_V : R600_TEX <0x08, "TEX_GET_GRADIENTS_V">; 889263508Sdimdef TEX_SET_GRADIENTS_H : R600_TEX <0x0B, "TEX_SET_GRADIENTS_H">; 890263508Sdimdef TEX_SET_GRADIENTS_V : R600_TEX <0x0C, "TEX_SET_GRADIENTS_V">; 891263508Sdimdef TEX_SAMPLE_G : R600_TEX <0x14, "TEX_SAMPLE_G">; 892263508Sdimdef TEX_SAMPLE_C_G : R600_TEX <0x1C, "TEX_SAMPLE_C_G">; 893249259Sdim 894263508Sdimdefm : TexPattern<0, TEX_SAMPLE>; 895263508Sdimdefm : TexPattern<1, TEX_SAMPLE_C>; 896263508Sdimdefm : TexPattern<2, TEX_SAMPLE_L>; 897263508Sdimdefm : TexPattern<3, TEX_SAMPLE_C_L>; 898263508Sdimdefm : TexPattern<4, TEX_SAMPLE_LB>; 899263508Sdimdefm : TexPattern<5, TEX_SAMPLE_C_LB>; 900263508Sdimdefm : TexPattern<6, TEX_LD, v4i32>; 901263508Sdimdefm : TexPattern<7, TEX_GET_TEXTURE_RESINFO, v4i32>; 902263508Sdimdefm : TexPattern<8, TEX_GET_GRADIENTS_H>; 903263508Sdimdefm : TexPattern<9, TEX_GET_GRADIENTS_V>; 904263508Sdimdefm : TexPattern<10, TEX_LDPTR, v4i32>; 905249259Sdim 906249259Sdim//===----------------------------------------------------------------------===// 907249259Sdim// Helper classes for common instructions 908249259Sdim//===----------------------------------------------------------------------===// 909249259Sdim 910249259Sdimclass MUL_LIT_Common <bits<5> inst> : R600_3OP < 911249259Sdim inst, "MUL_LIT", 912249259Sdim [] 913249259Sdim>; 914249259Sdim 915249259Sdimclass MULADD_Common <bits<5> inst> : R600_3OP < 916249259Sdim inst, "MULADD", 917249259Sdim [] 918249259Sdim>; 919249259Sdim 920249259Sdimclass MULADD_IEEE_Common <bits<5> inst> : R600_3OP < 921249259Sdim inst, "MULADD_IEEE", 922251662Sdim [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))] 923249259Sdim>; 924249259Sdim 925249259Sdimclass CNDE_Common <bits<5> inst> : R600_3OP < 926249259Sdim inst, "CNDE", 927263508Sdim [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))] 928249259Sdim>; 929249259Sdim 930249259Sdimclass CNDGT_Common <bits<5> inst> : R600_3OP < 931249259Sdim inst, "CNDGT", 932263508Sdim [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGT))] 933263508Sdim> { 934263508Sdim let Itinerary = VecALU; 935263508Sdim} 936249259Sdim 937249259Sdimclass CNDGE_Common <bits<5> inst> : R600_3OP < 938249259Sdim inst, "CNDGE", 939263508Sdim [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGE))] 940263508Sdim> { 941263508Sdim let Itinerary = VecALU; 942263508Sdim} 943249259Sdim 944249259Sdim 945263508Sdimlet isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { 946263508Sdimclass R600_VEC2OP<list<dag> pattern> : InstR600 <(outs R600_Reg32:$dst), (ins 947263508Sdim// Slot X 948263508Sdim UEM:$update_exec_mask_X, UP:$update_pred_X, WRITE:$write_X, 949263508Sdim OMOD:$omod_X, REL:$dst_rel_X, CLAMP:$clamp_X, 950263508Sdim R600_TReg32_X:$src0_X, NEG:$src0_neg_X, REL:$src0_rel_X, ABS:$src0_abs_X, SEL:$src0_sel_X, 951263508Sdim R600_TReg32_X:$src1_X, NEG:$src1_neg_X, REL:$src1_rel_X, ABS:$src1_abs_X, SEL:$src1_sel_X, 952263508Sdim R600_Pred:$pred_sel_X, 953263508Sdim// Slot Y 954263508Sdim UEM:$update_exec_mask_Y, UP:$update_pred_Y, WRITE:$write_Y, 955263508Sdim OMOD:$omod_Y, REL:$dst_rel_Y, CLAMP:$clamp_Y, 956263508Sdim R600_TReg32_Y:$src0_Y, NEG:$src0_neg_Y, REL:$src0_rel_Y, ABS:$src0_abs_Y, SEL:$src0_sel_Y, 957263508Sdim R600_TReg32_Y:$src1_Y, NEG:$src1_neg_Y, REL:$src1_rel_Y, ABS:$src1_abs_Y, SEL:$src1_sel_Y, 958263508Sdim R600_Pred:$pred_sel_Y, 959263508Sdim// Slot Z 960263508Sdim UEM:$update_exec_mask_Z, UP:$update_pred_Z, WRITE:$write_Z, 961263508Sdim OMOD:$omod_Z, REL:$dst_rel_Z, CLAMP:$clamp_Z, 962263508Sdim R600_TReg32_Z:$src0_Z, NEG:$src0_neg_Z, REL:$src0_rel_Z, ABS:$src0_abs_Z, SEL:$src0_sel_Z, 963263508Sdim R600_TReg32_Z:$src1_Z, NEG:$src1_neg_Z, REL:$src1_rel_Z, ABS:$src1_abs_Z, SEL:$src1_sel_Z, 964263508Sdim R600_Pred:$pred_sel_Z, 965263508Sdim// Slot W 966263508Sdim UEM:$update_exec_mask_W, UP:$update_pred_W, WRITE:$write_W, 967263508Sdim OMOD:$omod_W, REL:$dst_rel_W, CLAMP:$clamp_W, 968263508Sdim R600_TReg32_W:$src0_W, NEG:$src0_neg_W, REL:$src0_rel_W, ABS:$src0_abs_W, SEL:$src0_sel_W, 969263508Sdim R600_TReg32_W:$src1_W, NEG:$src1_neg_W, REL:$src1_rel_W, ABS:$src1_abs_W, SEL:$src1_sel_W, 970263508Sdim R600_Pred:$pred_sel_W, 971263508Sdim LITERAL:$literal0, LITERAL:$literal1), 972263508Sdim "", 973263508Sdim pattern, 974263508Sdim AnyALU> { 975249259Sdim 976263508Sdim let UseNamedOperandTable = 1; 977263508Sdim 978249259Sdim} 979263508Sdim} 980249259Sdim 981263508Sdimdef DOT_4 : R600_VEC2OP<[(set R600_Reg32:$dst, (DOT4 982263508Sdim R600_TReg32_X:$src0_X, R600_TReg32_X:$src1_X, 983263508Sdim R600_TReg32_Y:$src0_Y, R600_TReg32_Y:$src1_Y, 984263508Sdim R600_TReg32_Z:$src0_Z, R600_TReg32_Z:$src1_Z, 985263508Sdim R600_TReg32_W:$src0_W, R600_TReg32_W:$src1_W))]>; 986263508Sdim 987263508Sdim 988263508Sdimclass DOT4_Common <bits<11> inst> : R600_2OP <inst, "DOT4", []>; 989263508Sdim 990263508Sdim 991249259Sdimlet mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { 992249259Sdimmulticlass CUBE_Common <bits<11> inst> { 993249259Sdim 994249259Sdim def _pseudo : InstR600 < 995249259Sdim (outs R600_Reg128:$dst), 996263508Sdim (ins R600_Reg128:$src0), 997263508Sdim "CUBE $dst $src0", 998263508Sdim [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src0))], 999249259Sdim VecALU 1000249259Sdim > { 1001249259Sdim let isPseudo = 1; 1002263508Sdim let UseNamedOperandTable = 1; 1003249259Sdim } 1004249259Sdim 1005249259Sdim def _real : R600_2OP <inst, "CUBE", []>; 1006249259Sdim} 1007249259Sdim} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0 1008249259Sdim 1009249259Sdimclass EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper < 1010249259Sdim inst, "EXP_IEEE", fexp2 1011251662Sdim> { 1012251662Sdim let Itinerary = TransALU; 1013251662Sdim} 1014249259Sdim 1015249259Sdimclass FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper < 1016249259Sdim inst, "FLT_TO_INT", fp_to_sint 1017251662Sdim> { 1018251662Sdim let Itinerary = TransALU; 1019251662Sdim} 1020249259Sdim 1021249259Sdimclass INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < 1022249259Sdim inst, "INT_TO_FLT", sint_to_fp 1023251662Sdim> { 1024251662Sdim let Itinerary = TransALU; 1025251662Sdim} 1026249259Sdim 1027249259Sdimclass FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper < 1028249259Sdim inst, "FLT_TO_UINT", fp_to_uint 1029251662Sdim> { 1030251662Sdim let Itinerary = TransALU; 1031251662Sdim} 1032249259Sdim 1033249259Sdimclass UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < 1034249259Sdim inst, "UINT_TO_FLT", uint_to_fp 1035251662Sdim> { 1036251662Sdim let Itinerary = TransALU; 1037251662Sdim} 1038249259Sdim 1039249259Sdimclass LOG_CLAMPED_Common <bits<11> inst> : R600_1OP < 1040249259Sdim inst, "LOG_CLAMPED", [] 1041249259Sdim>; 1042249259Sdim 1043249259Sdimclass LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper < 1044249259Sdim inst, "LOG_IEEE", flog2 1045251662Sdim> { 1046251662Sdim let Itinerary = TransALU; 1047251662Sdim} 1048249259Sdim 1049249259Sdimclass LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>; 1050249259Sdimclass LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>; 1051249259Sdimclass ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>; 1052249259Sdimclass MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper < 1053249259Sdim inst, "MULHI_INT", mulhs 1054251662Sdim> { 1055251662Sdim let Itinerary = TransALU; 1056251662Sdim} 1057249259Sdimclass MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper < 1058249259Sdim inst, "MULHI", mulhu 1059251662Sdim> { 1060251662Sdim let Itinerary = TransALU; 1061251662Sdim} 1062249259Sdimclass MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper < 1063249259Sdim inst, "MULLO_INT", mul 1064251662Sdim> { 1065251662Sdim let Itinerary = TransALU; 1066251662Sdim} 1067251662Sdimclass MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> { 1068251662Sdim let Itinerary = TransALU; 1069251662Sdim} 1070249259Sdim 1071249259Sdimclass RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP < 1072249259Sdim inst, "RECIP_CLAMPED", [] 1073251662Sdim> { 1074251662Sdim let Itinerary = TransALU; 1075251662Sdim} 1076249259Sdim 1077249259Sdimclass RECIP_IEEE_Common <bits<11> inst> : R600_1OP < 1078251662Sdim inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))] 1079251662Sdim> { 1080251662Sdim let Itinerary = TransALU; 1081251662Sdim} 1082249259Sdim 1083249259Sdimclass RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper < 1084249259Sdim inst, "RECIP_UINT", AMDGPUurecip 1085251662Sdim> { 1086251662Sdim let Itinerary = TransALU; 1087251662Sdim} 1088249259Sdim 1089249259Sdimclass RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper < 1090249259Sdim inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq 1091251662Sdim> { 1092251662Sdim let Itinerary = TransALU; 1093251662Sdim} 1094249259Sdim 1095249259Sdimclass RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP < 1096249259Sdim inst, "RECIPSQRT_IEEE", [] 1097251662Sdim> { 1098251662Sdim let Itinerary = TransALU; 1099251662Sdim} 1100249259Sdim 1101249259Sdimclass SIN_Common <bits<11> inst> : R600_1OP < 1102263508Sdim inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{ 1103249259Sdim let Trig = 1; 1104251662Sdim let Itinerary = TransALU; 1105249259Sdim} 1106249259Sdim 1107249259Sdimclass COS_Common <bits<11> inst> : R600_1OP < 1108263508Sdim inst, "COS", [(set f32:$dst, (COS_HW f32:$src0))]> { 1109249259Sdim let Trig = 1; 1110251662Sdim let Itinerary = TransALU; 1111249259Sdim} 1112249259Sdim 1113263508Sdimdef CLAMP_R600 : CLAMP <R600_Reg32>; 1114263508Sdimdef FABS_R600 : FABS<R600_Reg32>; 1115263508Sdimdef FNEG_R600 : FNEG<R600_Reg32>; 1116263508Sdim 1117249259Sdim//===----------------------------------------------------------------------===// 1118249259Sdim// Helper patterns for complex intrinsics 1119249259Sdim//===----------------------------------------------------------------------===// 1120249259Sdim 1121249259Sdimmulticlass DIV_Common <InstR600 recip_ieee> { 1122249259Sdimdef : Pat< 1123251662Sdim (int_AMDGPU_div f32:$src0, f32:$src1), 1124251662Sdim (MUL_IEEE $src0, (recip_ieee $src1)) 1125249259Sdim>; 1126249259Sdim 1127249259Sdimdef : Pat< 1128251662Sdim (fdiv f32:$src0, f32:$src1), 1129251662Sdim (MUL_IEEE $src0, (recip_ieee $src1)) 1130249259Sdim>; 1131249259Sdim} 1132249259Sdim 1133251662Sdimclass TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> 1134251662Sdim : Pat < 1135251662Sdim (int_TGSI_lit_z f32:$src_x, f32:$src_y, f32:$src_w), 1136251662Sdim (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x)) 1137249259Sdim>; 1138249259Sdim 1139263508Sdim// FROUND pattern 1140263508Sdimclass FROUNDPat<Instruction CNDGE> : Pat < 1141263508Sdim (AMDGPUround f32:$x), 1142263508Sdim (CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x)) 1143263508Sdim>; 1144263508Sdim 1145263508Sdim 1146249259Sdim//===----------------------------------------------------------------------===// 1147249259Sdim// R600 / R700 Instructions 1148249259Sdim//===----------------------------------------------------------------------===// 1149249259Sdim 1150249259Sdimlet Predicates = [isR600] in { 1151249259Sdim 1152249259Sdim def MUL_LIT_r600 : MUL_LIT_Common<0x0C>; 1153249259Sdim def MULADD_r600 : MULADD_Common<0x10>; 1154249259Sdim def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>; 1155249259Sdim def CNDE_r600 : CNDE_Common<0x18>; 1156249259Sdim def CNDGT_r600 : CNDGT_Common<0x19>; 1157249259Sdim def CNDGE_r600 : CNDGE_Common<0x1A>; 1158263508Sdim def DOT4_r600 : DOT4_Common<0x50>; 1159249259Sdim defm CUBE_r600 : CUBE_Common<0x52>; 1160249259Sdim def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; 1161249259Sdim def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; 1162249259Sdim def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>; 1163249259Sdim def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>; 1164249259Sdim def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>; 1165249259Sdim def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>; 1166249259Sdim def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>; 1167249259Sdim def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>; 1168249259Sdim def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>; 1169249259Sdim def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>; 1170249259Sdim def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>; 1171249259Sdim def SIN_r600 : SIN_Common<0x6E>; 1172249259Sdim def COS_r600 : COS_Common<0x6F>; 1173249259Sdim def ASHR_r600 : ASHR_Common<0x70>; 1174249259Sdim def LSHR_r600 : LSHR_Common<0x71>; 1175249259Sdim def LSHL_r600 : LSHL_Common<0x72>; 1176249259Sdim def MULLO_INT_r600 : MULLO_INT_Common<0x73>; 1177249259Sdim def MULHI_INT_r600 : MULHI_INT_Common<0x74>; 1178249259Sdim def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>; 1179249259Sdim def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>; 1180249259Sdim def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>; 1181249259Sdim 1182249259Sdim defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>; 1183251662Sdim def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>; 1184249259Sdim def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; 1185249259Sdim 1186251662Sdim def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; 1187263508Sdim def : FROUNDPat <CNDGE_r600>; 1188249259Sdim 1189249259Sdim def R600_ExportSwz : ExportSwzInst { 1190251662Sdim let Word1{20-17} = 0; // BURST_COUNT 1191249259Sdim let Word1{21} = eop; 1192263508Sdim let Word1{22} = 0; // VALID_PIXEL_MODE 1193249259Sdim let Word1{30-23} = inst; 1194249259Sdim let Word1{31} = 1; // BARRIER 1195249259Sdim } 1196249259Sdim defm : ExportPattern<R600_ExportSwz, 39>; 1197249259Sdim 1198249259Sdim def R600_ExportBuf : ExportBufInst { 1199251662Sdim let Word1{20-17} = 0; // BURST_COUNT 1200249259Sdim let Word1{21} = eop; 1201263508Sdim let Word1{22} = 0; // VALID_PIXEL_MODE 1202249259Sdim let Word1{30-23} = inst; 1203249259Sdim let Word1{31} = 1; // BARRIER 1204249259Sdim } 1205249259Sdim defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>; 1206251662Sdim 1207263508Sdim def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$CNT), 1208263508Sdim "TEX $CNT @$ADDR"> { 1209251662Sdim let POP_COUNT = 0; 1210251662Sdim } 1211263508Sdim def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$CNT), 1212263508Sdim "VTX $CNT @$ADDR"> { 1213251662Sdim let POP_COUNT = 0; 1214251662Sdim } 1215251662Sdim def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR), 1216251662Sdim "LOOP_START_DX10 @$ADDR"> { 1217251662Sdim let POP_COUNT = 0; 1218263508Sdim let CNT = 0; 1219251662Sdim } 1220251662Sdim def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { 1221251662Sdim let POP_COUNT = 0; 1222263508Sdim let CNT = 0; 1223251662Sdim } 1224251662Sdim def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR), 1225251662Sdim "LOOP_BREAK @$ADDR"> { 1226251662Sdim let POP_COUNT = 0; 1227263508Sdim let CNT = 0; 1228251662Sdim } 1229251662Sdim def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR), 1230251662Sdim "CONTINUE @$ADDR"> { 1231251662Sdim let POP_COUNT = 0; 1232263508Sdim let CNT = 0; 1233251662Sdim } 1234251662Sdim def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1235251662Sdim "JUMP @$ADDR POP:$POP_COUNT"> { 1236263508Sdim let CNT = 0; 1237251662Sdim } 1238251662Sdim def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1239251662Sdim "ELSE @$ADDR POP:$POP_COUNT"> { 1240263508Sdim let CNT = 0; 1241251662Sdim } 1242251662Sdim def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> { 1243251662Sdim let ADDR = 0; 1244263508Sdim let CNT = 0; 1245251662Sdim let POP_COUNT = 0; 1246251662Sdim } 1247251662Sdim def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1248251662Sdim "POP @$ADDR POP:$POP_COUNT"> { 1249263508Sdim let CNT = 0; 1250251662Sdim } 1251251662Sdim def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> { 1252263508Sdim let CNT = 0; 1253251662Sdim let POP_COUNT = 0; 1254251662Sdim let ADDR = 0; 1255251662Sdim let END_OF_PROGRAM = 1; 1256251662Sdim } 1257251662Sdim 1258249259Sdim} 1259249259Sdim 1260249259Sdim//===----------------------------------------------------------------------===// 1261249259Sdim// R700 Only instructions 1262249259Sdim//===----------------------------------------------------------------------===// 1263249259Sdim 1264249259Sdimlet Predicates = [isR700] in { 1265249259Sdim def SIN_r700 : SIN_Common<0x6E>; 1266249259Sdim def COS_r700 : COS_Common<0x6F>; 1267263508Sdim} 1268249259Sdim 1269263508Sdim//===----------------------------------------------------------------------===// 1270263508Sdim// Evergreen / Cayman store instructions 1271263508Sdim//===----------------------------------------------------------------------===// 1272263508Sdim 1273263508Sdimlet Predicates = [isEGorCayman] in { 1274263508Sdim 1275263508Sdimclass CF_MEM_RAT_CACHELESS <bits<6> rat_inst, bits<4> rat_id, bits<4> mask, dag ins, 1276263508Sdim string name, list<dag> pattern> 1277263508Sdim : EG_CF_RAT <0x57, rat_inst, rat_id, mask, (outs), ins, 1278263508Sdim "MEM_RAT_CACHELESS "#name, pattern>; 1279263508Sdim 1280263508Sdimclass CF_MEM_RAT <bits<6> rat_inst, bits<4> rat_id, dag ins, string name, 1281263508Sdim list<dag> pattern> 1282263508Sdim : EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins, 1283263508Sdim "MEM_RAT "#name, pattern>; 1284263508Sdim 1285263508Sdimdef RAT_MSKOR : CF_MEM_RAT <0x11, 0, 1286263508Sdim (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr), 1287263508Sdim "MSKOR $rw_gpr.XW, $index_gpr", 1288263508Sdim [(mskor_global v4i32:$rw_gpr, i32:$index_gpr)] 1289263508Sdim> { 1290263508Sdim let eop = 0; 1291249259Sdim} 1292249259Sdim 1293263508Sdim} // End Predicates = [isEGorCayman] 1294263508Sdim 1295263508Sdim 1296249259Sdim//===----------------------------------------------------------------------===// 1297249259Sdim// Evergreen Only instructions 1298249259Sdim//===----------------------------------------------------------------------===// 1299249259Sdim 1300249259Sdimlet Predicates = [isEG] in { 1301249259Sdim 1302249259Sdimdef RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; 1303249259Sdimdefm DIV_eg : DIV_Common<RECIP_IEEE_eg>; 1304249259Sdim 1305249259Sdimdef MULLO_INT_eg : MULLO_INT_Common<0x8F>; 1306249259Sdimdef MULHI_INT_eg : MULHI_INT_Common<0x90>; 1307249259Sdimdef MULLO_UINT_eg : MULLO_UINT_Common<0x91>; 1308249259Sdimdef MULHI_UINT_eg : MULHI_UINT_Common<0x92>; 1309249259Sdimdef RECIP_UINT_eg : RECIP_UINT_Common<0x94>; 1310249259Sdimdef RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>; 1311249259Sdimdef EXP_IEEE_eg : EXP_IEEE_Common<0x81>; 1312249259Sdimdef LOG_IEEE_eg : LOG_IEEE_Common<0x83>; 1313249259Sdimdef RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; 1314249259Sdimdef RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; 1315249259Sdimdef SIN_eg : SIN_Common<0x8D>; 1316249259Sdimdef COS_eg : COS_Common<0x8E>; 1317249259Sdim 1318251662Sdimdef : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>; 1319251662Sdimdef : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; 1320263508Sdim 1321263508Sdim//===----------------------------------------------------------------------===// 1322263508Sdim// Memory read/write instructions 1323263508Sdim//===----------------------------------------------------------------------===// 1324263508Sdim 1325263508Sdimlet usesCustomInserter = 1 in { 1326263508Sdim 1327263508Sdim// 32-bit store 1328263508Sdimdef RAT_WRITE_CACHELESS_32_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x1, 1329263508Sdim (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), 1330263508Sdim "STORE_RAW $rw_gpr, $index_gpr, $eop", 1331263508Sdim [(global_store i32:$rw_gpr, i32:$index_gpr)] 1332263508Sdim>; 1333263508Sdim 1334263508Sdim// 64-bit store 1335263508Sdimdef RAT_WRITE_CACHELESS_64_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x3, 1336263508Sdim (ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), 1337263508Sdim "STORE_RAW $rw_gpr.XY, $index_gpr, $eop", 1338263508Sdim [(global_store v2i32:$rw_gpr, i32:$index_gpr)] 1339263508Sdim>; 1340263508Sdim 1341263508Sdim//128-bit store 1342263508Sdimdef RAT_WRITE_CACHELESS_128_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0xf, 1343263508Sdim (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), 1344263508Sdim "STORE_RAW $rw_gpr.XYZW, $index_gpr, $eop", 1345263508Sdim [(global_store v4i32:$rw_gpr, i32:$index_gpr)] 1346263508Sdim>; 1347263508Sdim 1348263508Sdim} // End usesCustomInserter = 1 1349263508Sdim 1350263508Sdimclass VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> 1351263508Sdim : VTX_WORD0_eg, VTX_READ<name, buffer_id, outs, pattern> { 1352263508Sdim 1353263508Sdim // Static fields 1354263508Sdim let VC_INST = 0; 1355263508Sdim let FETCH_TYPE = 2; 1356263508Sdim let FETCH_WHOLE_QUAD = 0; 1357263508Sdim let BUFFER_ID = buffer_id; 1358263508Sdim let SRC_REL = 0; 1359263508Sdim // XXX: We can infer this field based on the SRC_GPR. This would allow us 1360263508Sdim // to store vertex addresses in any channel, not just X. 1361263508Sdim let SRC_SEL_X = 0; 1362263508Sdim 1363263508Sdim let Inst{31-0} = Word0; 1364263508Sdim} 1365263508Sdim 1366263508Sdimclass VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern> 1367263508Sdim : VTX_READ_eg <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id, 1368263508Sdim (outs R600_TReg32_X:$dst_gpr), pattern> { 1369263508Sdim 1370263508Sdim let MEGA_FETCH_COUNT = 1; 1371263508Sdim let DST_SEL_X = 0; 1372263508Sdim let DST_SEL_Y = 7; // Masked 1373263508Sdim let DST_SEL_Z = 7; // Masked 1374263508Sdim let DST_SEL_W = 7; // Masked 1375263508Sdim let DATA_FORMAT = 1; // FMT_8 1376263508Sdim} 1377263508Sdim 1378263508Sdimclass VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern> 1379263508Sdim : VTX_READ_eg <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id, 1380263508Sdim (outs R600_TReg32_X:$dst_gpr), pattern> { 1381263508Sdim let MEGA_FETCH_COUNT = 2; 1382263508Sdim let DST_SEL_X = 0; 1383263508Sdim let DST_SEL_Y = 7; // Masked 1384263508Sdim let DST_SEL_Z = 7; // Masked 1385263508Sdim let DST_SEL_W = 7; // Masked 1386263508Sdim let DATA_FORMAT = 5; // FMT_16 1387263508Sdim 1388263508Sdim} 1389263508Sdim 1390263508Sdimclass VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> 1391263508Sdim : VTX_READ_eg <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id, 1392263508Sdim (outs R600_TReg32_X:$dst_gpr), pattern> { 1393263508Sdim 1394263508Sdim let MEGA_FETCH_COUNT = 4; 1395263508Sdim let DST_SEL_X = 0; 1396263508Sdim let DST_SEL_Y = 7; // Masked 1397263508Sdim let DST_SEL_Z = 7; // Masked 1398263508Sdim let DST_SEL_W = 7; // Masked 1399263508Sdim let DATA_FORMAT = 0xD; // COLOR_32 1400263508Sdim 1401263508Sdim // This is not really necessary, but there were some GPU hangs that appeared 1402263508Sdim // to be caused by ALU instructions in the next instruction group that wrote 1403263508Sdim // to the $src_gpr registers of the VTX_READ. 1404263508Sdim // e.g. 1405263508Sdim // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24 1406263508Sdim // %T2_X<def> = MOV %ZERO 1407263508Sdim //Adding this constraint prevents this from happening. 1408263508Sdim let Constraints = "$src_gpr.ptr = $dst_gpr"; 1409263508Sdim} 1410263508Sdim 1411263508Sdimclass VTX_READ_64_eg <bits<8> buffer_id, list<dag> pattern> 1412263508Sdim : VTX_READ_eg <"VTX_READ_64 $dst_gpr.XY, $src_gpr", buffer_id, 1413263508Sdim (outs R600_Reg64:$dst_gpr), pattern> { 1414263508Sdim 1415263508Sdim let MEGA_FETCH_COUNT = 8; 1416263508Sdim let DST_SEL_X = 0; 1417263508Sdim let DST_SEL_Y = 1; 1418263508Sdim let DST_SEL_Z = 7; 1419263508Sdim let DST_SEL_W = 7; 1420263508Sdim let DATA_FORMAT = 0x1D; // COLOR_32_32 1421263508Sdim} 1422263508Sdim 1423263508Sdimclass VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> 1424263508Sdim : VTX_READ_eg <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id, 1425263508Sdim (outs R600_Reg128:$dst_gpr), pattern> { 1426263508Sdim 1427263508Sdim let MEGA_FETCH_COUNT = 16; 1428263508Sdim let DST_SEL_X = 0; 1429263508Sdim let DST_SEL_Y = 1; 1430263508Sdim let DST_SEL_Z = 2; 1431263508Sdim let DST_SEL_W = 3; 1432263508Sdim let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 1433263508Sdim 1434263508Sdim // XXX: Need to force VTX_READ_128 instructions to write to the same register 1435263508Sdim // that holds its buffer address to avoid potential hangs. We can't use 1436263508Sdim // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst 1437263508Sdim // registers are different sizes. 1438263508Sdim} 1439263508Sdim 1440263508Sdim//===----------------------------------------------------------------------===// 1441263508Sdim// VTX Read from parameter memory space 1442263508Sdim//===----------------------------------------------------------------------===// 1443263508Sdim 1444263508Sdimdef VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, 1445263508Sdim [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))] 1446263508Sdim>; 1447263508Sdim 1448263508Sdimdef VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, 1449263508Sdim [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))] 1450263508Sdim>; 1451263508Sdim 1452263508Sdimdef VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, 1453263508Sdim [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] 1454263508Sdim>; 1455263508Sdim 1456263508Sdimdef VTX_READ_PARAM_64_eg : VTX_READ_64_eg <0, 1457263508Sdim [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] 1458263508Sdim>; 1459263508Sdim 1460263508Sdimdef VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, 1461263508Sdim [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] 1462263508Sdim>; 1463263508Sdim 1464263508Sdim//===----------------------------------------------------------------------===// 1465263508Sdim// VTX Read from global memory space 1466263508Sdim//===----------------------------------------------------------------------===// 1467263508Sdim 1468263508Sdim// 8-bit reads 1469263508Sdimdef VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1, 1470263508Sdim [(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))] 1471263508Sdim>; 1472263508Sdim 1473263508Sdimdef VTX_READ_GLOBAL_16_eg : VTX_READ_16_eg <1, 1474263508Sdim [(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))] 1475263508Sdim>; 1476263508Sdim 1477263508Sdim// 32-bit reads 1478263508Sdimdef VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, 1479263508Sdim [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] 1480263508Sdim>; 1481263508Sdim 1482263508Sdim// 64-bit reads 1483263508Sdimdef VTX_READ_GLOBAL_64_eg : VTX_READ_64_eg <1, 1484263508Sdim [(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] 1485263508Sdim>; 1486263508Sdim 1487263508Sdim// 128-bit reads 1488263508Sdimdef VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, 1489263508Sdim [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] 1490263508Sdim>; 1491263508Sdim 1492249259Sdim} // End Predicates = [isEG] 1493249259Sdim 1494249259Sdim//===----------------------------------------------------------------------===// 1495249259Sdim// Evergreen / Cayman Instructions 1496249259Sdim//===----------------------------------------------------------------------===// 1497249259Sdim 1498249259Sdimlet Predicates = [isEGorCayman] in { 1499249259Sdim 1500249259Sdim // BFE_UINT - bit_extract, an optimization for mask and shift 1501249259Sdim // Src0 = Input 1502249259Sdim // Src1 = Offset 1503249259Sdim // Src2 = Width 1504249259Sdim // 1505249259Sdim // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width) 1506249259Sdim // 1507249259Sdim // Example Usage: 1508249259Sdim // (Offset, Width) 1509249259Sdim // 1510249259Sdim // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0 1511249259Sdim // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8 1512249259Sdim // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16 1513249259Sdim // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24 1514249259Sdim def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", 1515251662Sdim [(set i32:$dst, (int_AMDIL_bit_extract_u32 i32:$src0, i32:$src1, 1516251662Sdim i32:$src2))], 1517249259Sdim VecALU 1518249259Sdim >; 1519251662Sdim def : BFEPattern <BFE_UINT_eg>; 1520249259Sdim 1521251662Sdim def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>; 1522251662Sdim defm : BFIPatterns <BFI_INT_eg>; 1523251662Sdim 1524263508Sdim def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24", 1525263508Sdim [(set i32:$dst, (add (mul U24:$src0, U24:$src1), i32:$src2))], VecALU 1526249259Sdim >; 1527263508Sdim def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>; 1528263508Sdim def : ROTRPattern <BIT_ALIGN_INT_eg>; 1529249259Sdim 1530249259Sdim def MULADD_eg : MULADD_Common<0x14>; 1531249259Sdim def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>; 1532249259Sdim def ASHR_eg : ASHR_Common<0x15>; 1533249259Sdim def LSHR_eg : LSHR_Common<0x16>; 1534249259Sdim def LSHL_eg : LSHL_Common<0x17>; 1535249259Sdim def CNDE_eg : CNDE_Common<0x19>; 1536249259Sdim def CNDGT_eg : CNDGT_Common<0x1A>; 1537249259Sdim def CNDGE_eg : CNDGE_Common<0x1B>; 1538249259Sdim def MUL_LIT_eg : MUL_LIT_Common<0x1F>; 1539249259Sdim def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>; 1540263508Sdim def MUL_UINT24_eg : R600_2OP <0xB5, "MUL_UINT24", 1541263508Sdim [(set i32:$dst, (mul U24:$src0, U24:$src1))], VecALU 1542263508Sdim >; 1543263508Sdim def DOT4_eg : DOT4_Common<0xBE>; 1544249259Sdim defm CUBE_eg : CUBE_Common<0xC0>; 1545249259Sdim 1546249259Sdimlet hasSideEffects = 1 in { 1547249259Sdim def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>; 1548249259Sdim} 1549249259Sdim 1550249259Sdim def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>; 1551249259Sdim 1552249259Sdim def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { 1553249259Sdim let Pattern = []; 1554263508Sdim let Itinerary = AnyALU; 1555249259Sdim } 1556249259Sdim 1557249259Sdim def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; 1558249259Sdim 1559249259Sdim def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> { 1560249259Sdim let Pattern = []; 1561249259Sdim } 1562249259Sdim 1563249259Sdim def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>; 1564249259Sdim 1565263508Sdimdef GROUP_BARRIER : InstR600 < 1566263508Sdim (outs), (ins), " GROUP_BARRIER", [(int_AMDGPU_barrier_local)], AnyALU>, 1567263508Sdim R600ALU_Word0, 1568263508Sdim R600ALU_Word1_OP2 <0x54> { 1569263508Sdim 1570263508Sdim let dst = 0; 1571263508Sdim let dst_rel = 0; 1572263508Sdim let src0 = 0; 1573263508Sdim let src0_rel = 0; 1574263508Sdim let src0_neg = 0; 1575263508Sdim let src0_abs = 0; 1576263508Sdim let src1 = 0; 1577263508Sdim let src1_rel = 0; 1578263508Sdim let src1_neg = 0; 1579263508Sdim let src1_abs = 0; 1580263508Sdim let write = 0; 1581263508Sdim let omod = 0; 1582263508Sdim let clamp = 0; 1583263508Sdim let last = 1; 1584263508Sdim let bank_swizzle = 0; 1585263508Sdim let pred_sel = 0; 1586263508Sdim let update_exec_mask = 0; 1587263508Sdim let update_pred = 0; 1588263508Sdim 1589263508Sdim let Inst{31-0} = Word0; 1590263508Sdim let Inst{63-32} = Word1; 1591263508Sdim 1592263508Sdim let ALUInst = 1; 1593263508Sdim} 1594263508Sdim 1595263508Sdim//===----------------------------------------------------------------------===// 1596263508Sdim// LDS Instructions 1597263508Sdim//===----------------------------------------------------------------------===// 1598263508Sdimclass R600_LDS <bits<6> op, dag outs, dag ins, string asm, 1599263508Sdim list<dag> pattern = []> : 1600263508Sdim 1601263508Sdim InstR600 <outs, ins, asm, pattern, XALU>, 1602263508Sdim R600_ALU_LDS_Word0, 1603263508Sdim R600LDS_Word1 { 1604263508Sdim 1605263508Sdim bits<6> offset = 0; 1606263508Sdim let lds_op = op; 1607263508Sdim 1608263508Sdim let Word1{27} = offset{0}; 1609263508Sdim let Word1{12} = offset{1}; 1610263508Sdim let Word1{28} = offset{2}; 1611263508Sdim let Word1{31} = offset{3}; 1612263508Sdim let Word0{12} = offset{4}; 1613263508Sdim let Word0{25} = offset{5}; 1614263508Sdim 1615263508Sdim 1616263508Sdim let Inst{31-0} = Word0; 1617263508Sdim let Inst{63-32} = Word1; 1618263508Sdim 1619263508Sdim let ALUInst = 1; 1620263508Sdim let HasNativeOperands = 1; 1621263508Sdim let UseNamedOperandTable = 1; 1622263508Sdim} 1623263508Sdim 1624263508Sdimclass R600_LDS_1A <bits<6> lds_op, string name, list<dag> pattern> : R600_LDS < 1625263508Sdim lds_op, 1626263508Sdim (outs R600_Reg32:$dst), 1627263508Sdim (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, 1628263508Sdim LAST:$last, R600_Pred:$pred_sel, 1629263508Sdim BANK_SWIZZLE:$bank_swizzle), 1630263508Sdim " "#name#" $last OQAP, $src0$src0_rel $pred_sel", 1631263508Sdim pattern 1632263508Sdim > { 1633263508Sdim 1634263508Sdim let src1 = 0; 1635263508Sdim let src1_rel = 0; 1636263508Sdim let src2 = 0; 1637263508Sdim let src2_rel = 0; 1638263508Sdim 1639263508Sdim let Defs = [OQAP]; 1640263508Sdim let usesCustomInserter = 1; 1641263508Sdim let LDS_1A = 1; 1642263508Sdim let DisableEncoding = "$dst"; 1643263508Sdim} 1644263508Sdim 1645263508Sdimclass R600_LDS_1A1D <bits<6> lds_op, dag outs, string name, list<dag> pattern, 1646263508Sdim string dst =""> : 1647263508Sdim R600_LDS < 1648263508Sdim lds_op, outs, 1649263508Sdim (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, 1650263508Sdim R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel, 1651263508Sdim LAST:$last, R600_Pred:$pred_sel, 1652263508Sdim BANK_SWIZZLE:$bank_swizzle), 1653263508Sdim " "#name#" $last "#dst#"$src0$src0_rel, $src1$src1_rel, $pred_sel", 1654263508Sdim pattern 1655263508Sdim > { 1656263508Sdim 1657263508Sdim field string BaseOp; 1658263508Sdim 1659263508Sdim let src2 = 0; 1660263508Sdim let src2_rel = 0; 1661263508Sdim let LDS_1A1D = 1; 1662263508Sdim} 1663263508Sdim 1664263508Sdimclass R600_LDS_1A1D_NORET <bits<6> lds_op, string name, list<dag> pattern> : 1665263508Sdim R600_LDS_1A1D <lds_op, (outs), name, pattern> { 1666263508Sdim let BaseOp = name; 1667263508Sdim} 1668263508Sdim 1669263508Sdimclass R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> : 1670263508Sdim R600_LDS_1A1D <lds_op, (outs R600_Reg32:$dst), name##"_RET", pattern, "OQAP, "> { 1671263508Sdim 1672263508Sdim let BaseOp = name; 1673263508Sdim let usesCustomInserter = 1; 1674263508Sdim let DisableEncoding = "$dst"; 1675263508Sdim let Defs = [OQAP]; 1676263508Sdim} 1677263508Sdim 1678263508Sdimclass R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> : 1679263508Sdim R600_LDS < 1680263508Sdim lds_op, 1681263508Sdim (outs), 1682263508Sdim (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, 1683263508Sdim R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel, 1684263508Sdim R600_Reg32:$src2, REL:$src2_rel, SEL:$src2_sel, 1685263508Sdim LAST:$last, R600_Pred:$pred_sel, BANK_SWIZZLE:$bank_swizzle), 1686263508Sdim " "#name# "$last $src0$src0_rel, $src1$src1_rel, $src2$src2_rel, $pred_sel", 1687263508Sdim pattern> { 1688263508Sdim let LDS_1A2D = 1; 1689263508Sdim} 1690263508Sdim 1691263508Sdimdef LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >; 1692263508Sdimdef LDS_SUB : R600_LDS_1A1D_NORET <0x1, "LDS_SUB", [] >; 1693263508Sdimdef LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE", 1694263508Sdim [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)] 1695263508Sdim>; 1696263508Sdimdef LDS_BYTE_WRITE : R600_LDS_1A1D_NORET<0x12, "LDS_BYTE_WRITE", 1697263508Sdim [(truncstorei8_local i32:$src1, i32:$src0)] 1698263508Sdim>; 1699263508Sdimdef LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE", 1700263508Sdim [(truncstorei16_local i32:$src1, i32:$src0)] 1701263508Sdim>; 1702263508Sdimdef LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD", 1703263508Sdim [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))] 1704263508Sdim>; 1705263508Sdimdef LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB", 1706263508Sdim [(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))] 1707263508Sdim>; 1708263508Sdimdef LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", 1709263508Sdim [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))] 1710263508Sdim>; 1711263508Sdimdef LDS_BYTE_READ_RET : R600_LDS_1A <0x36, "LDS_BYTE_READ_RET", 1712263508Sdim [(set i32:$dst, (sextloadi8_local i32:$src0))] 1713263508Sdim>; 1714263508Sdimdef LDS_UBYTE_READ_RET : R600_LDS_1A <0x37, "LDS_UBYTE_READ_RET", 1715263508Sdim [(set i32:$dst, (az_extloadi8_local i32:$src0))] 1716263508Sdim>; 1717263508Sdimdef LDS_SHORT_READ_RET : R600_LDS_1A <0x38, "LDS_SHORT_READ_RET", 1718263508Sdim [(set i32:$dst, (sextloadi16_local i32:$src0))] 1719263508Sdim>; 1720263508Sdimdef LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET", 1721263508Sdim [(set i32:$dst, (az_extloadi16_local i32:$src0))] 1722263508Sdim>; 1723263508Sdim 1724249259Sdim // TRUNC is used for the FLT_TO_INT instructions to work around a 1725249259Sdim // perceived problem where the rounding modes are applied differently 1726249259Sdim // depending on the instruction and the slot they are in. 1727249259Sdim // See: 1728249259Sdim // https://bugs.freedesktop.org/show_bug.cgi?id=50232 1729249259Sdim // Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c 1730249259Sdim // 1731249259Sdim // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes, 1732249259Sdim // which do not need to be truncated since the fp values are 0.0f or 1.0f. 1733249259Sdim // We should look into handling these cases separately. 1734251662Sdim def : Pat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>; 1735249259Sdim 1736251662Sdim def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>; 1737249259Sdim 1738251662Sdim // SHA-256 Patterns 1739251662Sdim def : SHA256MaPattern <BFI_INT_eg, XOR_INT>; 1740251662Sdim 1741263508Sdim def : FROUNDPat <CNDGE_eg>; 1742263508Sdim 1743249259Sdim def EG_ExportSwz : ExportSwzInst { 1744251662Sdim let Word1{19-16} = 0; // BURST_COUNT 1745263508Sdim let Word1{20} = 0; // VALID_PIXEL_MODE 1746249259Sdim let Word1{21} = eop; 1747249259Sdim let Word1{29-22} = inst; 1748249259Sdim let Word1{30} = 0; // MARK 1749249259Sdim let Word1{31} = 1; // BARRIER 1750249259Sdim } 1751249259Sdim defm : ExportPattern<EG_ExportSwz, 83>; 1752249259Sdim 1753249259Sdim def EG_ExportBuf : ExportBufInst { 1754251662Sdim let Word1{19-16} = 0; // BURST_COUNT 1755263508Sdim let Word1{20} = 0; // VALID_PIXEL_MODE 1756249259Sdim let Word1{21} = eop; 1757249259Sdim let Word1{29-22} = inst; 1758249259Sdim let Word1{30} = 0; // MARK 1759249259Sdim let Word1{31} = 1; // BARRIER 1760249259Sdim } 1761249259Sdim defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>; 1762249259Sdim 1763251662Sdim def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT), 1764251662Sdim "TEX $COUNT @$ADDR"> { 1765251662Sdim let POP_COUNT = 0; 1766251662Sdim } 1767251662Sdim def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT), 1768251662Sdim "VTX $COUNT @$ADDR"> { 1769251662Sdim let POP_COUNT = 0; 1770251662Sdim } 1771251662Sdim def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR), 1772251662Sdim "LOOP_START_DX10 @$ADDR"> { 1773251662Sdim let POP_COUNT = 0; 1774251662Sdim let COUNT = 0; 1775251662Sdim } 1776251662Sdim def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { 1777251662Sdim let POP_COUNT = 0; 1778251662Sdim let COUNT = 0; 1779251662Sdim } 1780251662Sdim def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR), 1781251662Sdim "LOOP_BREAK @$ADDR"> { 1782251662Sdim let POP_COUNT = 0; 1783251662Sdim let COUNT = 0; 1784251662Sdim } 1785251662Sdim def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR), 1786251662Sdim "CONTINUE @$ADDR"> { 1787251662Sdim let POP_COUNT = 0; 1788251662Sdim let COUNT = 0; 1789251662Sdim } 1790251662Sdim def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1791251662Sdim "JUMP @$ADDR POP:$POP_COUNT"> { 1792251662Sdim let COUNT = 0; 1793251662Sdim } 1794251662Sdim def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1795251662Sdim "ELSE @$ADDR POP:$POP_COUNT"> { 1796251662Sdim let COUNT = 0; 1797251662Sdim } 1798251662Sdim def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> { 1799251662Sdim let ADDR = 0; 1800251662Sdim let COUNT = 0; 1801251662Sdim let POP_COUNT = 0; 1802251662Sdim } 1803251662Sdim def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1804251662Sdim "POP @$ADDR POP:$POP_COUNT"> { 1805251662Sdim let COUNT = 0; 1806251662Sdim } 1807251662Sdim def CF_END_EG : CF_CLAUSE_EG<0, (ins), "CF_END"> { 1808251662Sdim let COUNT = 0; 1809251662Sdim let POP_COUNT = 0; 1810251662Sdim let ADDR = 0; 1811251662Sdim let END_OF_PROGRAM = 1; 1812251662Sdim } 1813251662Sdim 1814263508Sdim} // End Predicates = [isEGorCayman] 1815263508Sdim 1816249259Sdim//===----------------------------------------------------------------------===// 1817263508Sdim// Regist loads and stores - for indirect addressing 1818249259Sdim//===----------------------------------------------------------------------===// 1819249259Sdim 1820263508Sdimdefm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>; 1821249259Sdim 1822263508Sdim//===----------------------------------------------------------------------===// 1823263508Sdim// Cayman Instructions 1824263508Sdim//===----------------------------------------------------------------------===// 1825249259Sdim 1826263508Sdimlet Predicates = [isCayman] in { 1827249259Sdim 1828263508Sdimdef MULADD_INT24_cm : R600_3OP <0x08, "MULADD_INT24", 1829263508Sdim [(set i32:$dst, (add (mul I24:$src0, I24:$src1), i32:$src2))], VecALU 1830249259Sdim>; 1831263508Sdimdef MUL_INT24_cm : R600_2OP <0x5B, "MUL_INT24", 1832263508Sdim [(set i32:$dst, (mul I24:$src0, I24:$src1))], VecALU 1833263508Sdim>; 1834249259Sdim 1835263508Sdimlet isVector = 1 in { 1836263508Sdim 1837263508Sdimdef RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; 1838263508Sdim 1839263508Sdimdef MULLO_INT_cm : MULLO_INT_Common<0x8F>; 1840263508Sdimdef MULHI_INT_cm : MULHI_INT_Common<0x90>; 1841263508Sdimdef MULLO_UINT_cm : MULLO_UINT_Common<0x91>; 1842263508Sdimdef MULHI_UINT_cm : MULHI_UINT_Common<0x92>; 1843263508Sdimdef RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>; 1844263508Sdimdef EXP_IEEE_cm : EXP_IEEE_Common<0x81>; 1845263508Sdimdef LOG_IEEE_cm : LOG_IEEE_Common<0x83>; 1846263508Sdimdef RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>; 1847263508Sdimdef RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>; 1848263508Sdimdef SIN_cm : SIN_Common<0x8D>; 1849263508Sdimdef COS_cm : COS_Common<0x8E>; 1850263508Sdim} // End isVector = 1 1851263508Sdim 1852263508Sdimdef : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>; 1853263508Sdim 1854263508Sdimdefm DIV_cm : DIV_Common<RECIP_IEEE_cm>; 1855263508Sdim 1856263508Sdim// RECIP_UINT emulation for Cayman 1857263508Sdim// The multiplication scales from [0,1] to the unsigned integer range 1858263508Sdimdef : Pat < 1859263508Sdim (AMDGPUurecip i32:$src0), 1860263508Sdim (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)), 1861263508Sdim (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1))) 1862249259Sdim>; 1863249259Sdim 1864263508Sdim def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { 1865263508Sdim let ADDR = 0; 1866263508Sdim let POP_COUNT = 0; 1867263508Sdim let COUNT = 0; 1868263508Sdim } 1869249259Sdim 1870263508Sdimdef : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; 1871263508Sdim 1872263508Sdimclass RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> : 1873263508Sdim CF_MEM_RAT_CACHELESS <0x14, 0, mask, 1874263508Sdim (ins rc:$rw_gpr, R600_TReg32_X:$index_gpr), 1875263508Sdim "STORE_DWORD $rw_gpr, $index_gpr", 1876263508Sdim [(global_store vt:$rw_gpr, i32:$index_gpr)]> { 1877263508Sdim let eop = 0; // This bit is not used on Cayman. 1878263508Sdim} 1879263508Sdim 1880263508Sdimdef RAT_STORE_DWORD32 : RAT_STORE_DWORD <R600_TReg32_X, i32, 0x1>; 1881263508Sdimdef RAT_STORE_DWORD64 : RAT_STORE_DWORD <R600_Reg64, v2i32, 0x3>; 1882263508Sdimdef RAT_STORE_DWORD128 : RAT_STORE_DWORD <R600_Reg128, v4i32, 0xf>; 1883263508Sdim 1884263508Sdimclass VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern> 1885263508Sdim : VTX_WORD0_cm, VTX_READ<name, buffer_id, outs, pattern> { 1886263508Sdim 1887249259Sdim // Static fields 1888249259Sdim let VC_INST = 0; 1889249259Sdim let FETCH_TYPE = 2; 1890249259Sdim let FETCH_WHOLE_QUAD = 0; 1891249259Sdim let BUFFER_ID = buffer_id; 1892249259Sdim let SRC_REL = 0; 1893249259Sdim // XXX: We can infer this field based on the SRC_GPR. This would allow us 1894249259Sdim // to store vertex addresses in any channel, not just X. 1895249259Sdim let SRC_SEL_X = 0; 1896263508Sdim let SRC_SEL_Y = 0; 1897263508Sdim let STRUCTURED_READ = 0; 1898263508Sdim let LDS_REQ = 0; 1899263508Sdim let COALESCED_READ = 0; 1900249259Sdim 1901249259Sdim let Inst{31-0} = Word0; 1902249259Sdim} 1903249259Sdim 1904263508Sdimclass VTX_READ_8_cm <bits<8> buffer_id, list<dag> pattern> 1905263508Sdim : VTX_READ_cm <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id, 1906263508Sdim (outs R600_TReg32_X:$dst_gpr), pattern> { 1907249259Sdim 1908249259Sdim let DST_SEL_X = 0; 1909249259Sdim let DST_SEL_Y = 7; // Masked 1910249259Sdim let DST_SEL_Z = 7; // Masked 1911249259Sdim let DST_SEL_W = 7; // Masked 1912249259Sdim let DATA_FORMAT = 1; // FMT_8 1913249259Sdim} 1914249259Sdim 1915263508Sdimclass VTX_READ_16_cm <bits<8> buffer_id, list<dag> pattern> 1916263508Sdim : VTX_READ_cm <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id, 1917263508Sdim (outs R600_TReg32_X:$dst_gpr), pattern> { 1918249259Sdim let DST_SEL_X = 0; 1919249259Sdim let DST_SEL_Y = 7; // Masked 1920249259Sdim let DST_SEL_Z = 7; // Masked 1921249259Sdim let DST_SEL_W = 7; // Masked 1922249259Sdim let DATA_FORMAT = 5; // FMT_16 1923249259Sdim 1924249259Sdim} 1925249259Sdim 1926263508Sdimclass VTX_READ_32_cm <bits<8> buffer_id, list<dag> pattern> 1927263508Sdim : VTX_READ_cm <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id, 1928263508Sdim (outs R600_TReg32_X:$dst_gpr), pattern> { 1929249259Sdim 1930249259Sdim let DST_SEL_X = 0; 1931249259Sdim let DST_SEL_Y = 7; // Masked 1932249259Sdim let DST_SEL_Z = 7; // Masked 1933249259Sdim let DST_SEL_W = 7; // Masked 1934249259Sdim let DATA_FORMAT = 0xD; // COLOR_32 1935249259Sdim 1936249259Sdim // This is not really necessary, but there were some GPU hangs that appeared 1937249259Sdim // to be caused by ALU instructions in the next instruction group that wrote 1938263508Sdim // to the $src_gpr registers of the VTX_READ. 1939249259Sdim // e.g. 1940249259Sdim // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24 1941249259Sdim // %T2_X<def> = MOV %ZERO 1942249259Sdim //Adding this constraint prevents this from happening. 1943263508Sdim let Constraints = "$src_gpr.ptr = $dst_gpr"; 1944249259Sdim} 1945249259Sdim 1946263508Sdimclass VTX_READ_64_cm <bits<8> buffer_id, list<dag> pattern> 1947263508Sdim : VTX_READ_cm <"VTX_READ_64 $dst_gpr, $src_gpr", buffer_id, 1948263508Sdim (outs R600_Reg64:$dst_gpr), pattern> { 1949249259Sdim 1950263508Sdim let DST_SEL_X = 0; 1951263508Sdim let DST_SEL_Y = 1; 1952263508Sdim let DST_SEL_Z = 7; 1953263508Sdim let DST_SEL_W = 7; 1954263508Sdim let DATA_FORMAT = 0x1D; // COLOR_32_32 1955263508Sdim} 1956263508Sdim 1957263508Sdimclass VTX_READ_128_cm <bits<8> buffer_id, list<dag> pattern> 1958263508Sdim : VTX_READ_cm <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id, 1959263508Sdim (outs R600_Reg128:$dst_gpr), pattern> { 1960263508Sdim 1961249259Sdim let DST_SEL_X = 0; 1962249259Sdim let DST_SEL_Y = 1; 1963249259Sdim let DST_SEL_Z = 2; 1964249259Sdim let DST_SEL_W = 3; 1965249259Sdim let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 1966249259Sdim 1967249259Sdim // XXX: Need to force VTX_READ_128 instructions to write to the same register 1968249259Sdim // that holds its buffer address to avoid potential hangs. We can't use 1969263508Sdim // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst 1970249259Sdim // registers are different sizes. 1971249259Sdim} 1972249259Sdim 1973249259Sdim//===----------------------------------------------------------------------===// 1974249259Sdim// VTX Read from parameter memory space 1975249259Sdim//===----------------------------------------------------------------------===// 1976263508Sdimdef VTX_READ_PARAM_8_cm : VTX_READ_8_cm <0, 1977263508Sdim [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))] 1978263508Sdim>; 1979249259Sdim 1980263508Sdimdef VTX_READ_PARAM_16_cm : VTX_READ_16_cm <0, 1981263508Sdim [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))] 1982249259Sdim>; 1983249259Sdim 1984263508Sdimdef VTX_READ_PARAM_32_cm : VTX_READ_32_cm <0, 1985263508Sdim [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] 1986249259Sdim>; 1987249259Sdim 1988263508Sdimdef VTX_READ_PARAM_64_cm : VTX_READ_64_cm <0, 1989263508Sdim [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] 1990249259Sdim>; 1991249259Sdim 1992263508Sdimdef VTX_READ_PARAM_128_cm : VTX_READ_128_cm <0, 1993263508Sdim [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] 1994249259Sdim>; 1995249259Sdim 1996249259Sdim//===----------------------------------------------------------------------===// 1997249259Sdim// VTX Read from global memory space 1998249259Sdim//===----------------------------------------------------------------------===// 1999249259Sdim 2000249259Sdim// 8-bit reads 2001263508Sdimdef VTX_READ_GLOBAL_8_cm : VTX_READ_8_cm <1, 2002263508Sdim [(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))] 2003249259Sdim>; 2004249259Sdim 2005263508Sdimdef VTX_READ_GLOBAL_16_cm : VTX_READ_16_cm <1, 2006263508Sdim [(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))] 2007249259Sdim>; 2008249259Sdim 2009263508Sdim// 32-bit reads 2010263508Sdimdef VTX_READ_GLOBAL_32_cm : VTX_READ_32_cm <1, 2011263508Sdim [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] 2012249259Sdim>; 2013249259Sdim 2014263508Sdim// 64-bit reads 2015263508Sdimdef VTX_READ_GLOBAL_64_cm : VTX_READ_64_cm <1, 2016263508Sdim [(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] 2017249259Sdim>; 2018249259Sdim 2019263508Sdim// 128-bit reads 2020263508Sdimdef VTX_READ_GLOBAL_128_cm : VTX_READ_128_cm <1, 2021263508Sdim [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] 2022249259Sdim>; 2023249259Sdim 2024249259Sdim} // End isCayman 2025249259Sdim 2026249259Sdim//===----------------------------------------------------------------------===// 2027249259Sdim// Branch Instructions 2028249259Sdim//===----------------------------------------------------------------------===// 2029249259Sdim 2030249259Sdim 2031249259Sdimdef IF_PREDICATE_SET : ILFormat<(outs), (ins GPRI32:$src), 2032249259Sdim "IF_PREDICATE_SET $src", []>; 2033249259Sdim 2034249259Sdim//===----------------------------------------------------------------------===// 2035249259Sdim// Pseudo instructions 2036249259Sdim//===----------------------------------------------------------------------===// 2037249259Sdim 2038249259Sdimlet isPseudo = 1 in { 2039249259Sdim 2040249259Sdimdef PRED_X : InstR600 < 2041251662Sdim (outs R600_Predicate_Bit:$dst), 2042249259Sdim (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags), 2043249259Sdim "", [], NullALU> { 2044249259Sdim let FlagOperandIdx = 3; 2045249259Sdim} 2046249259Sdim 2047249259Sdimlet isTerminator = 1, isBranch = 1 in { 2048251662Sdimdef JUMP_COND : InstR600 < 2049249259Sdim (outs), 2050249259Sdim (ins brtarget:$target, R600_Predicate_Bit:$p), 2051249259Sdim "JUMP $target ($p)", 2052249259Sdim [], AnyALU 2053249259Sdim >; 2054249259Sdim 2055251662Sdimdef JUMP : InstR600 < 2056249259Sdim (outs), 2057249259Sdim (ins brtarget:$target), 2058249259Sdim "JUMP $target", 2059249259Sdim [], AnyALU 2060249259Sdim > 2061249259Sdim{ 2062249259Sdim let isPredicable = 1; 2063249259Sdim let isBarrier = 1; 2064249259Sdim} 2065249259Sdim 2066249259Sdim} // End isTerminator = 1, isBranch = 1 2067249259Sdim 2068249259Sdimlet usesCustomInserter = 1 in { 2069249259Sdim 2070249259Sdimlet mayLoad = 0, mayStore = 0, hasSideEffects = 1 in { 2071249259Sdim 2072249259Sdimdef MASK_WRITE : AMDGPUShaderInst < 2073249259Sdim (outs), 2074249259Sdim (ins R600_Reg32:$src), 2075249259Sdim "MASK_WRITE $src", 2076249259Sdim [] 2077249259Sdim>; 2078249259Sdim 2079249259Sdim} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1 2080249259Sdim 2081249259Sdim 2082251662Sdimdef TXD: InstR600 < 2083249259Sdim (outs R600_Reg128:$dst), 2084251662Sdim (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, 2085251662Sdim i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), 2086249259Sdim "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", 2087251662Sdim [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2, 2088251662Sdim imm:$resourceId, imm:$samplerId, imm:$textureTarget))], 2089251662Sdim NullALU > { 2090251662Sdim let TEXInst = 1; 2091251662Sdim} 2092249259Sdim 2093251662Sdimdef TXD_SHADOW: InstR600 < 2094249259Sdim (outs R600_Reg128:$dst), 2095251662Sdim (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, 2096251662Sdim i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), 2097249259Sdim "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", 2098251662Sdim [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2, 2099251662Sdim imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))], 2100251662Sdim NullALU 2101251662Sdim> { 2102251662Sdim let TEXInst = 1; 2103251662Sdim} 2104249259Sdim} // End isPseudo = 1 2105249259Sdim} // End usesCustomInserter = 1 2106249259Sdim 2107249259Sdim//===---------------------------------------------------------------------===// 2108249259Sdim// Return instruction 2109249259Sdim//===---------------------------------------------------------------------===// 2110249259Sdimlet isTerminator = 1, isReturn = 1, hasCtrlDep = 1, 2111249259Sdim usesCustomInserter = 1 in { 2112249259Sdim def RETURN : ILFormat<(outs), (ins variable_ops), 2113249259Sdim "RETURN", [(IL_retflag)]>; 2114249259Sdim} 2115249259Sdim 2116249259Sdim 2117249259Sdim//===----------------------------------------------------------------------===// 2118249259Sdim// Constant Buffer Addressing Support 2119249259Sdim//===----------------------------------------------------------------------===// 2120249259Sdim 2121249259Sdimlet usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { 2122249259Sdimdef CONST_COPY : Instruction { 2123249259Sdim let OutOperandList = (outs R600_Reg32:$dst); 2124249259Sdim let InOperandList = (ins i32imm:$src); 2125249259Sdim let Pattern = 2126249259Sdim [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))]; 2127249259Sdim let AsmString = "CONST_COPY"; 2128249259Sdim let neverHasSideEffects = 1; 2129249259Sdim let isAsCheapAsAMove = 1; 2130249259Sdim let Itinerary = NullALU; 2131249259Sdim} 2132249259Sdim} // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" 2133249259Sdim 2134249259Sdimdef TEX_VTX_CONSTBUF : 2135249259Sdim InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr", 2136251662Sdim [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>, 2137263508Sdim VTX_WORD1_GPR, VTX_WORD0_eg { 2138249259Sdim 2139249259Sdim let VC_INST = 0; 2140249259Sdim let FETCH_TYPE = 2; 2141249259Sdim let FETCH_WHOLE_QUAD = 0; 2142249259Sdim let SRC_REL = 0; 2143249259Sdim let SRC_SEL_X = 0; 2144249259Sdim let DST_REL = 0; 2145249259Sdim let USE_CONST_FIELDS = 0; 2146249259Sdim let NUM_FORMAT_ALL = 2; 2147249259Sdim let FORMAT_COMP_ALL = 1; 2148249259Sdim let SRF_MODE_ALL = 1; 2149249259Sdim let MEGA_FETCH_COUNT = 16; 2150249259Sdim let DST_SEL_X = 0; 2151249259Sdim let DST_SEL_Y = 1; 2152249259Sdim let DST_SEL_Z = 2; 2153249259Sdim let DST_SEL_W = 3; 2154249259Sdim let DATA_FORMAT = 35; 2155249259Sdim 2156249259Sdim let Inst{31-0} = Word0; 2157249259Sdim let Inst{63-32} = Word1; 2158249259Sdim 2159249259Sdim// LLVM can only encode 64-bit instructions, so these fields are manually 2160249259Sdim// encoded in R600CodeEmitter 2161249259Sdim// 2162249259Sdim// bits<16> OFFSET; 2163249259Sdim// bits<2> ENDIAN_SWAP = 0; 2164249259Sdim// bits<1> CONST_BUF_NO_STRIDE = 0; 2165249259Sdim// bits<1> MEGA_FETCH = 0; 2166249259Sdim// bits<1> ALT_CONST = 0; 2167249259Sdim// bits<2> BUFFER_INDEX_MODE = 0; 2168249259Sdim 2169249259Sdim 2170249259Sdim 2171249259Sdim// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding 2172249259Sdim// is done in R600CodeEmitter 2173249259Sdim// 2174249259Sdim// Inst{79-64} = OFFSET; 2175249259Sdim// Inst{81-80} = ENDIAN_SWAP; 2176249259Sdim// Inst{82} = CONST_BUF_NO_STRIDE; 2177249259Sdim// Inst{83} = MEGA_FETCH; 2178249259Sdim// Inst{84} = ALT_CONST; 2179249259Sdim// Inst{86-85} = BUFFER_INDEX_MODE; 2180249259Sdim// Inst{95-86} = 0; Reserved 2181249259Sdim 2182249259Sdim// VTX_WORD3 (Padding) 2183249259Sdim// 2184249259Sdim// Inst{127-96} = 0; 2185251662Sdim let VTXInst = 1; 2186249259Sdim} 2187249259Sdim 2188249259Sdimdef TEX_VTX_TEXBUF: 2189249259Sdim InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr", 2190251662Sdim [(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>, 2191263508SdimVTX_WORD1_GPR, VTX_WORD0_eg { 2192249259Sdim 2193249259Sdimlet VC_INST = 0; 2194249259Sdimlet FETCH_TYPE = 2; 2195249259Sdimlet FETCH_WHOLE_QUAD = 0; 2196249259Sdimlet SRC_REL = 0; 2197249259Sdimlet SRC_SEL_X = 0; 2198249259Sdimlet DST_REL = 0; 2199249259Sdimlet USE_CONST_FIELDS = 1; 2200249259Sdimlet NUM_FORMAT_ALL = 0; 2201249259Sdimlet FORMAT_COMP_ALL = 0; 2202249259Sdimlet SRF_MODE_ALL = 1; 2203249259Sdimlet MEGA_FETCH_COUNT = 16; 2204249259Sdimlet DST_SEL_X = 0; 2205249259Sdimlet DST_SEL_Y = 1; 2206249259Sdimlet DST_SEL_Z = 2; 2207249259Sdimlet DST_SEL_W = 3; 2208249259Sdimlet DATA_FORMAT = 0; 2209249259Sdim 2210249259Sdimlet Inst{31-0} = Word0; 2211249259Sdimlet Inst{63-32} = Word1; 2212249259Sdim 2213249259Sdim// LLVM can only encode 64-bit instructions, so these fields are manually 2214249259Sdim// encoded in R600CodeEmitter 2215249259Sdim// 2216249259Sdim// bits<16> OFFSET; 2217249259Sdim// bits<2> ENDIAN_SWAP = 0; 2218249259Sdim// bits<1> CONST_BUF_NO_STRIDE = 0; 2219249259Sdim// bits<1> MEGA_FETCH = 0; 2220249259Sdim// bits<1> ALT_CONST = 0; 2221249259Sdim// bits<2> BUFFER_INDEX_MODE = 0; 2222249259Sdim 2223249259Sdim 2224249259Sdim 2225249259Sdim// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding 2226249259Sdim// is done in R600CodeEmitter 2227249259Sdim// 2228249259Sdim// Inst{79-64} = OFFSET; 2229249259Sdim// Inst{81-80} = ENDIAN_SWAP; 2230249259Sdim// Inst{82} = CONST_BUF_NO_STRIDE; 2231249259Sdim// Inst{83} = MEGA_FETCH; 2232249259Sdim// Inst{84} = ALT_CONST; 2233249259Sdim// Inst{86-85} = BUFFER_INDEX_MODE; 2234249259Sdim// Inst{95-86} = 0; Reserved 2235249259Sdim 2236249259Sdim// VTX_WORD3 (Padding) 2237249259Sdim// 2238249259Sdim// Inst{127-96} = 0; 2239251662Sdim let VTXInst = 1; 2240249259Sdim} 2241249259Sdim 2242249259Sdim 2243249259Sdim 2244249259Sdim//===--------------------------------------------------------------------===// 2245249259Sdim// Instructions support 2246249259Sdim//===--------------------------------------------------------------------===// 2247249259Sdim//===---------------------------------------------------------------------===// 2248249259Sdim// Custom Inserter for Branches and returns, this eventually will be a 2249249259Sdim// seperate pass 2250249259Sdim//===---------------------------------------------------------------------===// 2251249259Sdimlet isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { 2252249259Sdim def BRANCH : ILFormat<(outs), (ins brtarget:$target), 2253249259Sdim "; Pseudo unconditional branch instruction", 2254249259Sdim [(br bb:$target)]>; 2255263508Sdim defm BRANCH_COND : BranchConditional<IL_brcond, R600_Reg32, R600_Reg32>; 2256249259Sdim} 2257249259Sdim 2258249259Sdim//===---------------------------------------------------------------------===// 2259249259Sdim// Flow and Program control Instructions 2260249259Sdim//===---------------------------------------------------------------------===// 2261249259Sdimlet isTerminator=1 in { 2262249259Sdim def SWITCH : ILFormat< (outs), (ins GPRI32:$src), 2263249259Sdim !strconcat("SWITCH", " $src"), []>; 2264249259Sdim def CASE : ILFormat< (outs), (ins GPRI32:$src), 2265249259Sdim !strconcat("CASE", " $src"), []>; 2266249259Sdim def BREAK : ILFormat< (outs), (ins), 2267249259Sdim "BREAK", []>; 2268249259Sdim def CONTINUE : ILFormat< (outs), (ins), 2269249259Sdim "CONTINUE", []>; 2270249259Sdim def DEFAULT : ILFormat< (outs), (ins), 2271249259Sdim "DEFAULT", []>; 2272249259Sdim def ELSE : ILFormat< (outs), (ins), 2273249259Sdim "ELSE", []>; 2274249259Sdim def ENDSWITCH : ILFormat< (outs), (ins), 2275249259Sdim "ENDSWITCH", []>; 2276249259Sdim def ENDMAIN : ILFormat< (outs), (ins), 2277249259Sdim "ENDMAIN", []>; 2278249259Sdim def END : ILFormat< (outs), (ins), 2279249259Sdim "END", []>; 2280249259Sdim def ENDFUNC : ILFormat< (outs), (ins), 2281249259Sdim "ENDFUNC", []>; 2282249259Sdim def ENDIF : ILFormat< (outs), (ins), 2283249259Sdim "ENDIF", []>; 2284249259Sdim def WHILELOOP : ILFormat< (outs), (ins), 2285249259Sdim "WHILE", []>; 2286249259Sdim def ENDLOOP : ILFormat< (outs), (ins), 2287249259Sdim "ENDLOOP", []>; 2288249259Sdim def FUNC : ILFormat< (outs), (ins), 2289249259Sdim "FUNC", []>; 2290249259Sdim def RETDYN : ILFormat< (outs), (ins), 2291249259Sdim "RET_DYN", []>; 2292249259Sdim // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2293249259Sdim defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">; 2294249259Sdim // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2295249259Sdim defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">; 2296249259Sdim // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2297249259Sdim defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">; 2298249259Sdim // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2299249259Sdim defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">; 2300249259Sdim // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2301249259Sdim defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">; 2302249259Sdim // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2303249259Sdim defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">; 2304249259Sdim defm IFC : BranchInstr2<"IFC">; 2305249259Sdim defm BREAKC : BranchInstr2<"BREAKC">; 2306249259Sdim defm CONTINUEC : BranchInstr2<"CONTINUEC">; 2307249259Sdim} 2308249259Sdim 2309249259Sdim//===----------------------------------------------------------------------===// 2310249259Sdim// ISel Patterns 2311249259Sdim//===----------------------------------------------------------------------===// 2312249259Sdim 2313249259Sdim// CND*_INT Pattterns for f32 True / False values 2314249259Sdim 2315249259Sdimclass CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat < 2316251662Sdim (selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc), 2317251662Sdim (cnd $src0, $src1, $src2) 2318249259Sdim>; 2319249259Sdim 2320249259Sdimdef : CND_INT_f32 <CNDE_INT, SETEQ>; 2321249259Sdimdef : CND_INT_f32 <CNDGT_INT, SETGT>; 2322249259Sdimdef : CND_INT_f32 <CNDGE_INT, SETGE>; 2323249259Sdim 2324249259Sdim//CNDGE_INT extra pattern 2325249259Sdimdef : Pat < 2326263508Sdim (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_SGT), 2327251662Sdim (CNDGE_INT $src0, $src1, $src2) 2328249259Sdim>; 2329249259Sdim 2330249259Sdim// KIL Patterns 2331249259Sdimdef KILP : Pat < 2332249259Sdim (int_AMDGPU_kilp), 2333249259Sdim (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO))) 2334249259Sdim>; 2335249259Sdim 2336249259Sdimdef KIL : Pat < 2337251662Sdim (int_AMDGPU_kill f32:$src0), 2338251662Sdim (MASK_WRITE (KILLGT (f32 ZERO), $src0)) 2339249259Sdim>; 2340249259Sdim 2341251662Sdimdef : Extract_Element <f32, v4f32, 0, sub0>; 2342251662Sdimdef : Extract_Element <f32, v4f32, 1, sub1>; 2343251662Sdimdef : Extract_Element <f32, v4f32, 2, sub2>; 2344251662Sdimdef : Extract_Element <f32, v4f32, 3, sub3>; 2345249259Sdim 2346251662Sdimdef : Insert_Element <f32, v4f32, 0, sub0>; 2347251662Sdimdef : Insert_Element <f32, v4f32, 1, sub1>; 2348251662Sdimdef : Insert_Element <f32, v4f32, 2, sub2>; 2349251662Sdimdef : Insert_Element <f32, v4f32, 3, sub3>; 2350249259Sdim 2351251662Sdimdef : Extract_Element <i32, v4i32, 0, sub0>; 2352251662Sdimdef : Extract_Element <i32, v4i32, 1, sub1>; 2353251662Sdimdef : Extract_Element <i32, v4i32, 2, sub2>; 2354251662Sdimdef : Extract_Element <i32, v4i32, 3, sub3>; 2355249259Sdim 2356251662Sdimdef : Insert_Element <i32, v4i32, 0, sub0>; 2357251662Sdimdef : Insert_Element <i32, v4i32, 1, sub1>; 2358251662Sdimdef : Insert_Element <i32, v4i32, 2, sub2>; 2359251662Sdimdef : Insert_Element <i32, v4i32, 3, sub3>; 2360249259Sdim 2361251662Sdimdef : Vector4_Build <v4f32, f32>; 2362251662Sdimdef : Vector4_Build <v4i32, i32>; 2363249259Sdim 2364263508Sdimdef : Extract_Element <f32, v2f32, 0, sub0>; 2365263508Sdimdef : Extract_Element <f32, v2f32, 1, sub1>; 2366263508Sdim 2367263508Sdimdef : Insert_Element <f32, v2f32, 0, sub0>; 2368263508Sdimdef : Insert_Element <f32, v2f32, 1, sub1>; 2369263508Sdim 2370263508Sdimdef : Extract_Element <i32, v2i32, 0, sub0>; 2371263508Sdimdef : Extract_Element <i32, v2i32, 1, sub1>; 2372263508Sdim 2373263508Sdimdef : Insert_Element <i32, v2i32, 0, sub0>; 2374263508Sdimdef : Insert_Element <i32, v2i32, 1, sub1>; 2375263508Sdim 2376249259Sdim// bitconvert patterns 2377249259Sdim 2378249259Sdimdef : BitConvert <i32, f32, R600_Reg32>; 2379249259Sdimdef : BitConvert <f32, i32, R600_Reg32>; 2380263508Sdimdef : BitConvert <v2f32, v2i32, R600_Reg64>; 2381263508Sdimdef : BitConvert <v2i32, v2f32, R600_Reg64>; 2382249259Sdimdef : BitConvert <v4f32, v4i32, R600_Reg128>; 2383249259Sdimdef : BitConvert <v4i32, v4f32, R600_Reg128>; 2384249259Sdim 2385249259Sdim// DWORDADDR pattern 2386249259Sdimdef : DwordAddrPat <i32, R600_Reg32>; 2387249259Sdim 2388249259Sdim} // End isR600toCayman Predicate 2389263508Sdim 2390263508Sdimdef getLDSNoRetOp : InstrMapping { 2391263508Sdim let FilterClass = "R600_LDS_1A1D"; 2392263508Sdim let RowFields = ["BaseOp"]; 2393263508Sdim let ColFields = ["DisableEncoding"]; 2394263508Sdim let KeyCol = ["$dst"]; 2395263508Sdim let ValueCols = [[""""]]; 2396263508Sdim} 2397