R600Instructions.td revision 251662
1249259Sdim//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===// 2249259Sdim// 3249259Sdim// The LLVM Compiler Infrastructure 4249259Sdim// 5249259Sdim// This file is distributed under the University of Illinois Open Source 6249259Sdim// License. See LICENSE.TXT for details. 7249259Sdim// 8249259Sdim//===----------------------------------------------------------------------===// 9249259Sdim// 10249259Sdim// R600 Tablegen instruction definitions 11249259Sdim// 12249259Sdim//===----------------------------------------------------------------------===// 13249259Sdim 14249259Sdiminclude "R600Intrinsics.td" 15249259Sdim 16251662Sdimclass InstR600 <dag outs, dag ins, string asm, list<dag> pattern, 17249259Sdim InstrItinClass itin> 18249259Sdim : AMDGPUInst <outs, ins, asm, pattern> { 19249259Sdim 20249259Sdim field bits<64> Inst; 21251662Sdim bit TransOnly = 0; 22249259Sdim bit Trig = 0; 23249259Sdim bit Op3 = 0; 24249259Sdim bit isVector = 0; 25249259Sdim bits<2> FlagOperandIdx = 0; 26249259Sdim bit Op1 = 0; 27249259Sdim bit Op2 = 0; 28249259Sdim bit HasNativeOperands = 0; 29251662Sdim bit VTXInst = 0; 30251662Sdim bit TEXInst = 0; 31249259Sdim 32249259Sdim let Namespace = "AMDGPU"; 33249259Sdim let OutOperandList = outs; 34249259Sdim let InOperandList = ins; 35249259Sdim let AsmString = asm; 36249259Sdim let Pattern = pattern; 37249259Sdim let Itinerary = itin; 38249259Sdim 39251662Sdim let TSFlags{0} = TransOnly; 40249259Sdim let TSFlags{4} = Trig; 41249259Sdim let TSFlags{5} = Op3; 42249259Sdim 43249259Sdim // Vector instructions are instructions that must fill all slots in an 44249259Sdim // instruction group 45249259Sdim let TSFlags{6} = isVector; 46249259Sdim let TSFlags{8-7} = FlagOperandIdx; 47249259Sdim let TSFlags{9} = HasNativeOperands; 48249259Sdim let TSFlags{10} = Op1; 49249259Sdim let TSFlags{11} = Op2; 50251662Sdim let TSFlags{12} = VTXInst; 51251662Sdim let TSFlags{13} = TEXInst; 52249259Sdim} 53249259Sdim 54249259Sdimclass InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> : 55251662Sdim InstR600 <outs, ins, asm, pattern, NullALU> { 56249259Sdim 57249259Sdim let Namespace = "AMDGPU"; 58249259Sdim} 59249259Sdim 60249259Sdimdef MEMxi : Operand<iPTR> { 61249259Sdim let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index); 62249259Sdim let PrintMethod = "printMemOperand"; 63249259Sdim} 64249259Sdim 65249259Sdimdef MEMrr : Operand<iPTR> { 66249259Sdim let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index); 67249259Sdim} 68249259Sdim 69249259Sdim// Operands for non-registers 70249259Sdim 71249259Sdimclass InstFlag<string PM = "printOperand", int Default = 0> 72249259Sdim : OperandWithDefaultOps <i32, (ops (i32 Default))> { 73249259Sdim let PrintMethod = PM; 74249259Sdim} 75249259Sdim 76249259Sdim// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers 77249259Sdimdef SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> { 78249259Sdim let PrintMethod = "printSel"; 79249259Sdim} 80251662Sdimdef BANK_SWIZZLE : OperandWithDefaultOps <i32, (ops (i32 0))> { 81251662Sdim let PrintMethod = "printBankSwizzle"; 82251662Sdim} 83249259Sdim 84249259Sdimdef LITERAL : InstFlag<"printLiteral">; 85249259Sdim 86249259Sdimdef WRITE : InstFlag <"printWrite", 1>; 87249259Sdimdef OMOD : InstFlag <"printOMOD">; 88249259Sdimdef REL : InstFlag <"printRel">; 89249259Sdimdef CLAMP : InstFlag <"printClamp">; 90249259Sdimdef NEG : InstFlag <"printNeg">; 91249259Sdimdef ABS : InstFlag <"printAbs">; 92249259Sdimdef UEM : InstFlag <"printUpdateExecMask">; 93249259Sdimdef UP : InstFlag <"printUpdatePred">; 94249259Sdim 95249259Sdim// XXX: The r600g finalizer in Mesa expects last to be one in most cases. 96249259Sdim// Once we start using the packetizer in this backend we should have this 97249259Sdim// default to 0. 98249259Sdimdef LAST : InstFlag<"printLast", 1>; 99249259Sdim 100249259Sdimdef FRAMEri : Operand<iPTR> { 101249259Sdim let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index); 102249259Sdim} 103249259Sdim 104249259Sdimdef ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>; 105249259Sdimdef ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; 106249259Sdimdef ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; 107249259Sdimdef ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>; 108249259Sdimdef ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>; 109249259Sdimdef ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>; 110249259Sdim 111249259Sdimclass R600ALU_Word0 { 112249259Sdim field bits<32> Word0; 113249259Sdim 114249259Sdim bits<11> src0; 115249259Sdim bits<1> src0_neg; 116249259Sdim bits<1> src0_rel; 117249259Sdim bits<11> src1; 118249259Sdim bits<1> src1_rel; 119249259Sdim bits<1> src1_neg; 120249259Sdim bits<3> index_mode = 0; 121249259Sdim bits<2> pred_sel; 122249259Sdim bits<1> last; 123249259Sdim 124249259Sdim bits<9> src0_sel = src0{8-0}; 125249259Sdim bits<2> src0_chan = src0{10-9}; 126249259Sdim bits<9> src1_sel = src1{8-0}; 127249259Sdim bits<2> src1_chan = src1{10-9}; 128249259Sdim 129249259Sdim let Word0{8-0} = src0_sel; 130249259Sdim let Word0{9} = src0_rel; 131249259Sdim let Word0{11-10} = src0_chan; 132249259Sdim let Word0{12} = src0_neg; 133249259Sdim let Word0{21-13} = src1_sel; 134249259Sdim let Word0{22} = src1_rel; 135249259Sdim let Word0{24-23} = src1_chan; 136249259Sdim let Word0{25} = src1_neg; 137249259Sdim let Word0{28-26} = index_mode; 138249259Sdim let Word0{30-29} = pred_sel; 139249259Sdim let Word0{31} = last; 140249259Sdim} 141249259Sdim 142249259Sdimclass R600ALU_Word1 { 143249259Sdim field bits<32> Word1; 144249259Sdim 145249259Sdim bits<11> dst; 146251662Sdim bits<3> bank_swizzle; 147249259Sdim bits<1> dst_rel; 148249259Sdim bits<1> clamp; 149249259Sdim 150249259Sdim bits<7> dst_sel = dst{6-0}; 151249259Sdim bits<2> dst_chan = dst{10-9}; 152249259Sdim 153249259Sdim let Word1{20-18} = bank_swizzle; 154249259Sdim let Word1{27-21} = dst_sel; 155249259Sdim let Word1{28} = dst_rel; 156249259Sdim let Word1{30-29} = dst_chan; 157249259Sdim let Word1{31} = clamp; 158249259Sdim} 159249259Sdim 160249259Sdimclass R600ALU_Word1_OP2 <bits<11> alu_inst> : R600ALU_Word1{ 161249259Sdim 162249259Sdim bits<1> src0_abs; 163249259Sdim bits<1> src1_abs; 164249259Sdim bits<1> update_exec_mask; 165249259Sdim bits<1> update_pred; 166249259Sdim bits<1> write; 167249259Sdim bits<2> omod; 168249259Sdim 169249259Sdim let Word1{0} = src0_abs; 170249259Sdim let Word1{1} = src1_abs; 171249259Sdim let Word1{2} = update_exec_mask; 172249259Sdim let Word1{3} = update_pred; 173249259Sdim let Word1{4} = write; 174249259Sdim let Word1{6-5} = omod; 175249259Sdim let Word1{17-7} = alu_inst; 176249259Sdim} 177249259Sdim 178249259Sdimclass R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{ 179249259Sdim 180249259Sdim bits<11> src2; 181249259Sdim bits<1> src2_rel; 182249259Sdim bits<1> src2_neg; 183249259Sdim 184249259Sdim bits<9> src2_sel = src2{8-0}; 185249259Sdim bits<2> src2_chan = src2{10-9}; 186249259Sdim 187249259Sdim let Word1{8-0} = src2_sel; 188249259Sdim let Word1{9} = src2_rel; 189249259Sdim let Word1{11-10} = src2_chan; 190249259Sdim let Word1{12} = src2_neg; 191249259Sdim let Word1{17-13} = alu_inst; 192249259Sdim} 193249259Sdim 194249259Sdimclass VTX_WORD0 { 195249259Sdim field bits<32> Word0; 196249259Sdim bits<7> SRC_GPR; 197249259Sdim bits<5> VC_INST; 198249259Sdim bits<2> FETCH_TYPE; 199249259Sdim bits<1> FETCH_WHOLE_QUAD; 200249259Sdim bits<8> BUFFER_ID; 201249259Sdim bits<1> SRC_REL; 202249259Sdim bits<2> SRC_SEL_X; 203249259Sdim bits<6> MEGA_FETCH_COUNT; 204249259Sdim 205249259Sdim let Word0{4-0} = VC_INST; 206249259Sdim let Word0{6-5} = FETCH_TYPE; 207249259Sdim let Word0{7} = FETCH_WHOLE_QUAD; 208249259Sdim let Word0{15-8} = BUFFER_ID; 209249259Sdim let Word0{22-16} = SRC_GPR; 210249259Sdim let Word0{23} = SRC_REL; 211249259Sdim let Word0{25-24} = SRC_SEL_X; 212249259Sdim let Word0{31-26} = MEGA_FETCH_COUNT; 213249259Sdim} 214249259Sdim 215249259Sdimclass VTX_WORD1_GPR { 216249259Sdim field bits<32> Word1; 217249259Sdim bits<7> DST_GPR; 218249259Sdim bits<1> DST_REL; 219249259Sdim bits<3> DST_SEL_X; 220249259Sdim bits<3> DST_SEL_Y; 221249259Sdim bits<3> DST_SEL_Z; 222249259Sdim bits<3> DST_SEL_W; 223249259Sdim bits<1> USE_CONST_FIELDS; 224249259Sdim bits<6> DATA_FORMAT; 225249259Sdim bits<2> NUM_FORMAT_ALL; 226249259Sdim bits<1> FORMAT_COMP_ALL; 227249259Sdim bits<1> SRF_MODE_ALL; 228249259Sdim 229249259Sdim let Word1{6-0} = DST_GPR; 230249259Sdim let Word1{7} = DST_REL; 231249259Sdim let Word1{8} = 0; // Reserved 232249259Sdim let Word1{11-9} = DST_SEL_X; 233249259Sdim let Word1{14-12} = DST_SEL_Y; 234249259Sdim let Word1{17-15} = DST_SEL_Z; 235249259Sdim let Word1{20-18} = DST_SEL_W; 236249259Sdim let Word1{21} = USE_CONST_FIELDS; 237249259Sdim let Word1{27-22} = DATA_FORMAT; 238249259Sdim let Word1{29-28} = NUM_FORMAT_ALL; 239249259Sdim let Word1{30} = FORMAT_COMP_ALL; 240249259Sdim let Word1{31} = SRF_MODE_ALL; 241249259Sdim} 242249259Sdim 243249259Sdimclass TEX_WORD0 { 244249259Sdim field bits<32> Word0; 245249259Sdim 246249259Sdim bits<5> TEX_INST; 247249259Sdim bits<2> INST_MOD; 248249259Sdim bits<1> FETCH_WHOLE_QUAD; 249249259Sdim bits<8> RESOURCE_ID; 250249259Sdim bits<7> SRC_GPR; 251249259Sdim bits<1> SRC_REL; 252249259Sdim bits<1> ALT_CONST; 253249259Sdim bits<2> RESOURCE_INDEX_MODE; 254249259Sdim bits<2> SAMPLER_INDEX_MODE; 255249259Sdim 256249259Sdim let Word0{4-0} = TEX_INST; 257249259Sdim let Word0{6-5} = INST_MOD; 258249259Sdim let Word0{7} = FETCH_WHOLE_QUAD; 259249259Sdim let Word0{15-8} = RESOURCE_ID; 260249259Sdim let Word0{22-16} = SRC_GPR; 261249259Sdim let Word0{23} = SRC_REL; 262249259Sdim let Word0{24} = ALT_CONST; 263249259Sdim let Word0{26-25} = RESOURCE_INDEX_MODE; 264249259Sdim let Word0{28-27} = SAMPLER_INDEX_MODE; 265249259Sdim} 266249259Sdim 267249259Sdimclass TEX_WORD1 { 268249259Sdim field bits<32> Word1; 269249259Sdim 270249259Sdim bits<7> DST_GPR; 271249259Sdim bits<1> DST_REL; 272249259Sdim bits<3> DST_SEL_X; 273249259Sdim bits<3> DST_SEL_Y; 274249259Sdim bits<3> DST_SEL_Z; 275249259Sdim bits<3> DST_SEL_W; 276249259Sdim bits<7> LOD_BIAS; 277249259Sdim bits<1> COORD_TYPE_X; 278249259Sdim bits<1> COORD_TYPE_Y; 279249259Sdim bits<1> COORD_TYPE_Z; 280249259Sdim bits<1> COORD_TYPE_W; 281249259Sdim 282249259Sdim let Word1{6-0} = DST_GPR; 283249259Sdim let Word1{7} = DST_REL; 284249259Sdim let Word1{11-9} = DST_SEL_X; 285249259Sdim let Word1{14-12} = DST_SEL_Y; 286249259Sdim let Word1{17-15} = DST_SEL_Z; 287249259Sdim let Word1{20-18} = DST_SEL_W; 288249259Sdim let Word1{27-21} = LOD_BIAS; 289249259Sdim let Word1{28} = COORD_TYPE_X; 290249259Sdim let Word1{29} = COORD_TYPE_Y; 291249259Sdim let Word1{30} = COORD_TYPE_Z; 292249259Sdim let Word1{31} = COORD_TYPE_W; 293249259Sdim} 294249259Sdim 295249259Sdimclass TEX_WORD2 { 296249259Sdim field bits<32> Word2; 297249259Sdim 298249259Sdim bits<5> OFFSET_X; 299249259Sdim bits<5> OFFSET_Y; 300249259Sdim bits<5> OFFSET_Z; 301249259Sdim bits<5> SAMPLER_ID; 302249259Sdim bits<3> SRC_SEL_X; 303249259Sdim bits<3> SRC_SEL_Y; 304249259Sdim bits<3> SRC_SEL_Z; 305249259Sdim bits<3> SRC_SEL_W; 306249259Sdim 307249259Sdim let Word2{4-0} = OFFSET_X; 308249259Sdim let Word2{9-5} = OFFSET_Y; 309249259Sdim let Word2{14-10} = OFFSET_Z; 310249259Sdim let Word2{19-15} = SAMPLER_ID; 311249259Sdim let Word2{22-20} = SRC_SEL_X; 312249259Sdim let Word2{25-23} = SRC_SEL_Y; 313249259Sdim let Word2{28-26} = SRC_SEL_Z; 314249259Sdim let Word2{31-29} = SRC_SEL_W; 315249259Sdim} 316249259Sdim 317249259Sdim/* 318249259SdimXXX: R600 subtarget uses a slightly different encoding than the other 319249259Sdimsubtargets. We currently handle this in R600MCCodeEmitter, but we may 320249259Sdimwant to use these instruction classes in the future. 321249259Sdim 322249259Sdimclass R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 { 323249259Sdim 324249259Sdim bits<1> fog_merge; 325249259Sdim bits<10> alu_inst; 326249259Sdim 327249259Sdim let Inst{37} = fog_merge; 328249259Sdim let Inst{39-38} = omod; 329249259Sdim let Inst{49-40} = alu_inst; 330249259Sdim} 331249259Sdim 332249259Sdimclass R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 { 333249259Sdim 334249259Sdim bits<11> alu_inst; 335249259Sdim 336249259Sdim let Inst{38-37} = omod; 337249259Sdim let Inst{49-39} = alu_inst; 338249259Sdim} 339249259Sdim*/ 340249259Sdim 341249259Sdimdef R600_Pred : PredicateOperand<i32, (ops R600_Predicate), 342249259Sdim (ops PRED_SEL_OFF)>; 343249259Sdim 344249259Sdim 345249259Sdimlet mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { 346249259Sdim 347249259Sdim// Class for instructions with only one source register. 348249259Sdim// If you add new ins to this instruction, make sure they are listed before 349249259Sdim// $literal, because the backend currently assumes that the last operand is 350249259Sdim// a literal. Also be sure to update the enum R600Op1OperandIndex::ROI in 351249259Sdim// R600Defines.h, R600InstrInfo::buildDefaultInstruction(), 352249259Sdim// and R600InstrInfo::getOperandIdx(). 353249259Sdimclass R600_1OP <bits<11> inst, string opName, list<dag> pattern, 354249259Sdim InstrItinClass itin = AnyALU> : 355251662Sdim InstR600 <(outs R600_Reg32:$dst), 356249259Sdim (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, 357249259Sdim R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, 358251662Sdim LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal, 359251662Sdim BANK_SWIZZLE:$bank_swizzle), 360249259Sdim !strconcat(" ", opName, 361251662Sdim "$last$clamp $dst$write$dst_rel$omod, " 362249259Sdim "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " 363251662Sdim "$pred_sel $bank_swizzle"), 364249259Sdim pattern, 365249259Sdim itin>, 366249259Sdim R600ALU_Word0, 367249259Sdim R600ALU_Word1_OP2 <inst> { 368249259Sdim 369249259Sdim let src1 = 0; 370249259Sdim let src1_rel = 0; 371249259Sdim let src1_neg = 0; 372249259Sdim let src1_abs = 0; 373249259Sdim let update_exec_mask = 0; 374249259Sdim let update_pred = 0; 375249259Sdim let HasNativeOperands = 1; 376249259Sdim let Op1 = 1; 377249259Sdim let DisableEncoding = "$literal"; 378249259Sdim 379249259Sdim let Inst{31-0} = Word0; 380249259Sdim let Inst{63-32} = Word1; 381249259Sdim} 382249259Sdim 383249259Sdimclass R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node, 384249259Sdim InstrItinClass itin = AnyALU> : 385249259Sdim R600_1OP <inst, opName, 386249259Sdim [(set R600_Reg32:$dst, (node R600_Reg32:$src0))] 387249259Sdim>; 388249259Sdim 389249259Sdim// If you add our change the operands for R600_2OP instructions, you must 390249259Sdim// also update the R600Op2OperandIndex::ROI enum in R600Defines.h, 391249259Sdim// R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx(). 392249259Sdimclass R600_2OP <bits<11> inst, string opName, list<dag> pattern, 393249259Sdim InstrItinClass itin = AnyALU> : 394251662Sdim InstR600 <(outs R600_Reg32:$dst), 395249259Sdim (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write, 396249259Sdim OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, 397249259Sdim R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, 398249259Sdim R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel, 399251662Sdim LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal, 400251662Sdim BANK_SWIZZLE:$bank_swizzle), 401249259Sdim !strconcat(" ", opName, 402251662Sdim "$last$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, " 403249259Sdim "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " 404249259Sdim "$src1_neg$src1_abs$src1$src1_abs$src1_rel, " 405251662Sdim "$pred_sel $bank_swizzle"), 406249259Sdim pattern, 407249259Sdim itin>, 408249259Sdim R600ALU_Word0, 409249259Sdim R600ALU_Word1_OP2 <inst> { 410249259Sdim 411249259Sdim let HasNativeOperands = 1; 412249259Sdim let Op2 = 1; 413249259Sdim let DisableEncoding = "$literal"; 414249259Sdim 415249259Sdim let Inst{31-0} = Word0; 416249259Sdim let Inst{63-32} = Word1; 417249259Sdim} 418249259Sdim 419249259Sdimclass R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node, 420249259Sdim InstrItinClass itim = AnyALU> : 421249259Sdim R600_2OP <inst, opName, 422249259Sdim [(set R600_Reg32:$dst, (node R600_Reg32:$src0, 423249259Sdim R600_Reg32:$src1))] 424249259Sdim>; 425249259Sdim 426249259Sdim// If you add our change the operands for R600_3OP instructions, you must 427249259Sdim// also update the R600Op3OperandIndex::ROI enum in R600Defines.h, 428249259Sdim// R600InstrInfo::buildDefaultInstruction(), and 429249259Sdim// R600InstrInfo::getOperandIdx(). 430249259Sdimclass R600_3OP <bits<5> inst, string opName, list<dag> pattern, 431249259Sdim InstrItinClass itin = AnyALU> : 432251662Sdim InstR600 <(outs R600_Reg32:$dst), 433249259Sdim (ins REL:$dst_rel, CLAMP:$clamp, 434249259Sdim R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel, 435249259Sdim R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel, 436249259Sdim R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel, 437251662Sdim LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal, 438251662Sdim BANK_SWIZZLE:$bank_swizzle), 439251662Sdim !strconcat(" ", opName, "$last$clamp $dst$dst_rel, " 440249259Sdim "$src0_neg$src0$src0_rel, " 441249259Sdim "$src1_neg$src1$src1_rel, " 442249259Sdim "$src2_neg$src2$src2_rel, " 443251662Sdim "$pred_sel" 444251662Sdim "$bank_swizzle"), 445249259Sdim pattern, 446249259Sdim itin>, 447249259Sdim R600ALU_Word0, 448249259Sdim R600ALU_Word1_OP3<inst>{ 449249259Sdim 450249259Sdim let HasNativeOperands = 1; 451249259Sdim let DisableEncoding = "$literal"; 452249259Sdim let Op3 = 1; 453249259Sdim 454249259Sdim let Inst{31-0} = Word0; 455249259Sdim let Inst{63-32} = Word1; 456249259Sdim} 457249259Sdim 458249259Sdimclass R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern, 459249259Sdim InstrItinClass itin = VecALU> : 460251662Sdim InstR600 <(outs R600_Reg32:$dst), 461249259Sdim ins, 462249259Sdim asm, 463249259Sdim pattern, 464249259Sdim itin>; 465249259Sdim 466249259Sdimclass R600_TEX <bits<11> inst, string opName, list<dag> pattern, 467249259Sdim InstrItinClass itin = AnyALU> : 468251662Sdim InstR600 <(outs R600_Reg128:$DST_GPR), 469249259Sdim (ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget), 470249259Sdim !strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"), 471249259Sdim pattern, 472249259Sdim itin>, TEX_WORD0, TEX_WORD1, TEX_WORD2 { 473249259Sdim let Inst{31-0} = Word0; 474249259Sdim let Inst{63-32} = Word1; 475249259Sdim 476249259Sdim let TEX_INST = inst{4-0}; 477249259Sdim let SRC_REL = 0; 478249259Sdim let DST_REL = 0; 479249259Sdim let DST_SEL_X = 0; 480249259Sdim let DST_SEL_Y = 1; 481249259Sdim let DST_SEL_Z = 2; 482249259Sdim let DST_SEL_W = 3; 483249259Sdim let LOD_BIAS = 0; 484249259Sdim 485249259Sdim let INST_MOD = 0; 486249259Sdim let FETCH_WHOLE_QUAD = 0; 487249259Sdim let ALT_CONST = 0; 488249259Sdim let SAMPLER_INDEX_MODE = 0; 489251662Sdim let RESOURCE_INDEX_MODE = 0; 490249259Sdim 491249259Sdim let COORD_TYPE_X = 0; 492249259Sdim let COORD_TYPE_Y = 0; 493249259Sdim let COORD_TYPE_Z = 0; 494249259Sdim let COORD_TYPE_W = 0; 495251662Sdim 496251662Sdim let TEXInst = 1; 497249259Sdim } 498249259Sdim 499249259Sdim} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0 500249259Sdim 501249259Sdimdef TEX_SHADOW : PatLeaf< 502249259Sdim (imm), 503249259Sdim [{uint32_t TType = (uint32_t)N->getZExtValue(); 504249259Sdim return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13); 505249259Sdim }] 506249259Sdim>; 507249259Sdim 508249259Sdimdef TEX_RECT : PatLeaf< 509249259Sdim (imm), 510249259Sdim [{uint32_t TType = (uint32_t)N->getZExtValue(); 511249259Sdim return TType == 5; 512249259Sdim }] 513249259Sdim>; 514249259Sdim 515249259Sdimdef TEX_ARRAY : PatLeaf< 516249259Sdim (imm), 517249259Sdim [{uint32_t TType = (uint32_t)N->getZExtValue(); 518249259Sdim return TType == 9 || TType == 10 || TType == 15 || TType == 16; 519249259Sdim }] 520249259Sdim>; 521249259Sdim 522249259Sdimdef TEX_SHADOW_ARRAY : PatLeaf< 523249259Sdim (imm), 524249259Sdim [{uint32_t TType = (uint32_t)N->getZExtValue(); 525249259Sdim return TType == 11 || TType == 12 || TType == 17; 526249259Sdim }] 527249259Sdim>; 528249259Sdim 529249259Sdimclass EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs, 530249259Sdim dag ins, string asm, list<dag> pattern> : 531249259Sdim InstR600ISA <outs, ins, asm, pattern> { 532249259Sdim bits<7> RW_GPR; 533249259Sdim bits<7> INDEX_GPR; 534249259Sdim 535249259Sdim bits<2> RIM; 536249259Sdim bits<2> TYPE; 537249259Sdim bits<1> RW_REL; 538249259Sdim bits<2> ELEM_SIZE; 539249259Sdim 540249259Sdim bits<12> ARRAY_SIZE; 541249259Sdim bits<4> COMP_MASK; 542249259Sdim bits<4> BURST_COUNT; 543249259Sdim bits<1> VPM; 544249259Sdim bits<1> eop; 545249259Sdim bits<1> MARK; 546249259Sdim bits<1> BARRIER; 547249259Sdim 548249259Sdim // CF_ALLOC_EXPORT_WORD0_RAT 549249259Sdim let Inst{3-0} = rat_id; 550249259Sdim let Inst{9-4} = rat_inst; 551249259Sdim let Inst{10} = 0; // Reserved 552249259Sdim let Inst{12-11} = RIM; 553249259Sdim let Inst{14-13} = TYPE; 554249259Sdim let Inst{21-15} = RW_GPR; 555249259Sdim let Inst{22} = RW_REL; 556249259Sdim let Inst{29-23} = INDEX_GPR; 557249259Sdim let Inst{31-30} = ELEM_SIZE; 558249259Sdim 559249259Sdim // CF_ALLOC_EXPORT_WORD1_BUF 560249259Sdim let Inst{43-32} = ARRAY_SIZE; 561249259Sdim let Inst{47-44} = COMP_MASK; 562249259Sdim let Inst{51-48} = BURST_COUNT; 563249259Sdim let Inst{52} = VPM; 564249259Sdim let Inst{53} = eop; 565249259Sdim let Inst{61-54} = cf_inst; 566249259Sdim let Inst{62} = MARK; 567249259Sdim let Inst{63} = BARRIER; 568249259Sdim} 569249259Sdim 570249259Sdimclass LoadParamFrag <PatFrag load_type> : PatFrag < 571249259Sdim (ops node:$ptr), (load_type node:$ptr), 572249259Sdim [{ return isParamLoad(dyn_cast<LoadSDNode>(N)); }] 573249259Sdim>; 574249259Sdim 575249259Sdimdef load_param : LoadParamFrag<load>; 576249259Sdimdef load_param_zexti8 : LoadParamFrag<zextloadi8>; 577249259Sdimdef load_param_zexti16 : LoadParamFrag<zextloadi16>; 578249259Sdim 579249259Sdimdef isR600 : Predicate<"Subtarget.device()" 580249259Sdim "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">; 581249259Sdimdef isR700 : Predicate<"Subtarget.device()" 582249259Sdim "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&" 583249259Sdim "Subtarget.device()->getDeviceFlag()" 584249259Sdim ">= OCL_DEVICE_RV710">; 585249259Sdimdef isEG : Predicate< 586249259Sdim "Subtarget.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && " 587249259Sdim "Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX && " 588249259Sdim "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">; 589249259Sdim 590249259Sdimdef isCayman : Predicate<"Subtarget.device()" 591249259Sdim "->getDeviceFlag() == OCL_DEVICE_CAYMAN">; 592249259Sdimdef isEGorCayman : Predicate<"Subtarget.device()" 593249259Sdim "->getGeneration() == AMDGPUDeviceInfo::HD5XXX" 594249259Sdim "|| Subtarget.device()->getGeneration() ==" 595249259Sdim "AMDGPUDeviceInfo::HD6XXX">; 596249259Sdim 597249259Sdimdef isR600toCayman : Predicate< 598249259Sdim "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">; 599249259Sdim 600249259Sdim//===----------------------------------------------------------------------===// 601249259Sdim// R600 SDNodes 602249259Sdim//===----------------------------------------------------------------------===// 603249259Sdim 604249259Sdimdef INTERP_PAIR_XY : AMDGPUShaderInst < 605249259Sdim (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1), 606249259Sdim (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2), 607249259Sdim "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1", 608249259Sdim []>; 609249259Sdim 610249259Sdimdef INTERP_PAIR_ZW : AMDGPUShaderInst < 611249259Sdim (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1), 612249259Sdim (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2), 613249259Sdim "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1", 614249259Sdim []>; 615249259Sdim 616249259Sdimdef CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", 617249259Sdim SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, 618249259Sdim [SDNPVariadic] 619249259Sdim>; 620249259Sdim 621249259Sdim//===----------------------------------------------------------------------===// 622249259Sdim// Interpolation Instructions 623249259Sdim//===----------------------------------------------------------------------===// 624249259Sdim 625249259Sdimdef INTERP_VEC_LOAD : AMDGPUShaderInst < 626249259Sdim (outs R600_Reg128:$dst), 627249259Sdim (ins i32imm:$src0), 628249259Sdim "INTERP_LOAD $src0 : $dst", 629249259Sdim []>; 630249259Sdim 631249259Sdimdef INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { 632249259Sdim let bank_swizzle = 5; 633249259Sdim} 634249259Sdim 635249259Sdimdef INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> { 636249259Sdim let bank_swizzle = 5; 637249259Sdim} 638249259Sdim 639249259Sdimdef INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>; 640249259Sdim 641249259Sdim//===----------------------------------------------------------------------===// 642249259Sdim// Export Instructions 643249259Sdim//===----------------------------------------------------------------------===// 644249259Sdim 645249259Sdimdef ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; 646249259Sdim 647249259Sdimdef EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType, 648249259Sdim [SDNPHasChain, SDNPSideEffect]>; 649249259Sdim 650249259Sdimclass ExportWord0 { 651249259Sdim field bits<32> Word0; 652249259Sdim 653249259Sdim bits<13> arraybase; 654249259Sdim bits<2> type; 655249259Sdim bits<7> gpr; 656249259Sdim bits<2> elem_size; 657249259Sdim 658249259Sdim let Word0{12-0} = arraybase; 659249259Sdim let Word0{14-13} = type; 660249259Sdim let Word0{21-15} = gpr; 661249259Sdim let Word0{22} = 0; // RW_REL 662249259Sdim let Word0{29-23} = 0; // INDEX_GPR 663249259Sdim let Word0{31-30} = elem_size; 664249259Sdim} 665249259Sdim 666249259Sdimclass ExportSwzWord1 { 667249259Sdim field bits<32> Word1; 668249259Sdim 669249259Sdim bits<3> sw_x; 670249259Sdim bits<3> sw_y; 671249259Sdim bits<3> sw_z; 672249259Sdim bits<3> sw_w; 673249259Sdim bits<1> eop; 674249259Sdim bits<8> inst; 675249259Sdim 676249259Sdim let Word1{2-0} = sw_x; 677249259Sdim let Word1{5-3} = sw_y; 678249259Sdim let Word1{8-6} = sw_z; 679249259Sdim let Word1{11-9} = sw_w; 680249259Sdim} 681249259Sdim 682249259Sdimclass ExportBufWord1 { 683249259Sdim field bits<32> Word1; 684249259Sdim 685249259Sdim bits<12> arraySize; 686249259Sdim bits<4> compMask; 687249259Sdim bits<1> eop; 688249259Sdim bits<8> inst; 689249259Sdim 690249259Sdim let Word1{11-0} = arraySize; 691249259Sdim let Word1{15-12} = compMask; 692249259Sdim} 693249259Sdim 694249259Sdimmulticlass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { 695249259Sdim def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg), 696249259Sdim (ExportInst 697249259Sdim (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0), 698249259Sdim 0, 61, 0, 7, 7, 7, cf_inst, 0) 699249259Sdim >; 700249259Sdim 701249259Sdim def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg), 702249259Sdim (ExportInst 703249259Sdim (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0), 704249259Sdim 0, 61, 7, 0, 7, 7, cf_inst, 0) 705249259Sdim >; 706249259Sdim 707249259Sdim def : Pat<(int_R600_store_dummy (i32 imm:$type)), 708249259Sdim (ExportInst 709249259Sdim (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0) 710249259Sdim >; 711249259Sdim 712249259Sdim def : Pat<(int_R600_store_dummy 1), 713249259Sdim (ExportInst 714249259Sdim (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0) 715249259Sdim >; 716249259Sdim 717249259Sdim def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), 718249259Sdim (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)), 719249259Sdim (ExportInst R600_Reg128:$src, imm:$type, imm:$base, 720249259Sdim imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0) 721249259Sdim >; 722249259Sdim 723249259Sdim} 724249259Sdim 725249259Sdimmulticlass SteamOutputExportPattern<Instruction ExportInst, 726249259Sdim bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> { 727249259Sdim// Stream0 728249259Sdim def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 729249259Sdim (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)), 730249259Sdim (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 731249259Sdim 4095, imm:$mask, buf0inst, 0)>; 732249259Sdim// Stream1 733249259Sdim def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 734249259Sdim (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)), 735249259Sdim (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 736249259Sdim 4095, imm:$mask, buf1inst, 0)>; 737249259Sdim// Stream2 738249259Sdim def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 739249259Sdim (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)), 740249259Sdim (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 741249259Sdim 4095, imm:$mask, buf2inst, 0)>; 742249259Sdim// Stream3 743249259Sdim def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 744249259Sdim (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)), 745249259Sdim (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 746249259Sdim 4095, imm:$mask, buf3inst, 0)>; 747249259Sdim} 748249259Sdim 749251662Sdim// Export Instructions should not be duplicated by TailDuplication pass 750251662Sdim// (which assumes that duplicable instruction are affected by exec mask) 751251662Sdimlet usesCustomInserter = 1, isNotDuplicable = 1 in { 752249259Sdim 753249259Sdimclass ExportSwzInst : InstR600ISA<( 754249259Sdim outs), 755249259Sdim (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase, 756249259Sdim i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst, 757249259Sdim i32imm:$eop), 758249259Sdim !strconcat("EXPORT", " $gpr"), 759249259Sdim []>, ExportWord0, ExportSwzWord1 { 760249259Sdim let elem_size = 3; 761249259Sdim let Inst{31-0} = Word0; 762249259Sdim let Inst{63-32} = Word1; 763249259Sdim} 764249259Sdim 765249259Sdim} // End usesCustomInserter = 1 766249259Sdim 767249259Sdimclass ExportBufInst : InstR600ISA<( 768249259Sdim outs), 769249259Sdim (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase, 770249259Sdim i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop), 771249259Sdim !strconcat("EXPORT", " $gpr"), 772249259Sdim []>, ExportWord0, ExportBufWord1 { 773249259Sdim let elem_size = 0; 774249259Sdim let Inst{31-0} = Word0; 775249259Sdim let Inst{63-32} = Word1; 776249259Sdim} 777249259Sdim 778249259Sdim//===----------------------------------------------------------------------===// 779249259Sdim// Control Flow Instructions 780249259Sdim//===----------------------------------------------------------------------===// 781249259Sdim 782249259Sdimclass CF_ALU_WORD0 { 783249259Sdim field bits<32> Word0; 784249259Sdim 785249259Sdim bits<22> ADDR; 786249259Sdim bits<4> KCACHE_BANK0; 787249259Sdim bits<4> KCACHE_BANK1; 788249259Sdim bits<2> KCACHE_MODE0; 789249259Sdim 790249259Sdim let Word0{21-0} = ADDR; 791249259Sdim let Word0{25-22} = KCACHE_BANK0; 792249259Sdim let Word0{29-26} = KCACHE_BANK1; 793249259Sdim let Word0{31-30} = KCACHE_MODE0; 794249259Sdim} 795249259Sdim 796249259Sdimclass CF_ALU_WORD1 { 797249259Sdim field bits<32> Word1; 798249259Sdim 799249259Sdim bits<2> KCACHE_MODE1; 800249259Sdim bits<8> KCACHE_ADDR0; 801249259Sdim bits<8> KCACHE_ADDR1; 802249259Sdim bits<7> COUNT; 803249259Sdim bits<1> ALT_CONST; 804249259Sdim bits<4> CF_INST; 805249259Sdim bits<1> WHOLE_QUAD_MODE; 806249259Sdim bits<1> BARRIER; 807249259Sdim 808249259Sdim let Word1{1-0} = KCACHE_MODE1; 809249259Sdim let Word1{9-2} = KCACHE_ADDR0; 810249259Sdim let Word1{17-10} = KCACHE_ADDR1; 811249259Sdim let Word1{24-18} = COUNT; 812249259Sdim let Word1{25} = ALT_CONST; 813249259Sdim let Word1{29-26} = CF_INST; 814249259Sdim let Word1{30} = WHOLE_QUAD_MODE; 815249259Sdim let Word1{31} = BARRIER; 816249259Sdim} 817249259Sdim 818251662Sdimdef KCACHE : InstFlag<"printKCache">; 819251662Sdim 820249259Sdimclass ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs), 821251662Sdim(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, 822251662SdimKCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1, 823251662Sdimi32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, 824251662Sdimi32imm:$COUNT), 825249259Sdim!strconcat(OpName, " $COUNT, @$ADDR, " 826251662Sdim"KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"), 827249259Sdim[] >, CF_ALU_WORD0, CF_ALU_WORD1 { 828249259Sdim field bits<64> Inst; 829249259Sdim 830249259Sdim let CF_INST = inst; 831249259Sdim let ALT_CONST = 0; 832249259Sdim let WHOLE_QUAD_MODE = 0; 833249259Sdim let BARRIER = 1; 834249259Sdim 835249259Sdim let Inst{31-0} = Word0; 836249259Sdim let Inst{63-32} = Word1; 837249259Sdim} 838249259Sdim 839251662Sdimclass CF_WORD0_R600 { 840249259Sdim field bits<32> Word0; 841249259Sdim 842251662Sdim bits<32> ADDR; 843251662Sdim 844251662Sdim let Word0 = ADDR; 845251662Sdim} 846251662Sdim 847251662Sdimclass CF_WORD1_R600 { 848251662Sdim field bits<32> Word1; 849251662Sdim 850251662Sdim bits<3> POP_COUNT; 851251662Sdim bits<5> CF_CONST; 852251662Sdim bits<2> COND; 853251662Sdim bits<3> COUNT; 854251662Sdim bits<6> CALL_COUNT; 855251662Sdim bits<1> COUNT_3; 856251662Sdim bits<1> END_OF_PROGRAM; 857251662Sdim bits<1> VALID_PIXEL_MODE; 858251662Sdim bits<7> CF_INST; 859251662Sdim bits<1> WHOLE_QUAD_MODE; 860251662Sdim bits<1> BARRIER; 861251662Sdim 862251662Sdim let Word1{2-0} = POP_COUNT; 863251662Sdim let Word1{7-3} = CF_CONST; 864251662Sdim let Word1{9-8} = COND; 865251662Sdim let Word1{12-10} = COUNT; 866251662Sdim let Word1{18-13} = CALL_COUNT; 867251662Sdim let Word1{19} = COUNT_3; 868251662Sdim let Word1{21} = END_OF_PROGRAM; 869251662Sdim let Word1{22} = VALID_PIXEL_MODE; 870251662Sdim let Word1{29-23} = CF_INST; 871251662Sdim let Word1{30} = WHOLE_QUAD_MODE; 872251662Sdim let Word1{31} = BARRIER; 873251662Sdim} 874251662Sdim 875251662Sdimclass CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), 876251662Sdimins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 { 877251662Sdim field bits<64> Inst; 878251662Sdim 879251662Sdim let CF_INST = inst; 880251662Sdim let BARRIER = 1; 881251662Sdim let CF_CONST = 0; 882251662Sdim let VALID_PIXEL_MODE = 0; 883251662Sdim let COND = 0; 884251662Sdim let CALL_COUNT = 0; 885251662Sdim let COUNT_3 = 0; 886251662Sdim let END_OF_PROGRAM = 0; 887251662Sdim let WHOLE_QUAD_MODE = 0; 888251662Sdim 889251662Sdim let Inst{31-0} = Word0; 890251662Sdim let Inst{63-32} = Word1; 891251662Sdim} 892251662Sdim 893251662Sdimclass CF_WORD0_EG { 894251662Sdim field bits<32> Word0; 895251662Sdim 896249259Sdim bits<24> ADDR; 897249259Sdim bits<3> JUMPTABLE_SEL; 898249259Sdim 899249259Sdim let Word0{23-0} = ADDR; 900249259Sdim let Word0{26-24} = JUMPTABLE_SEL; 901249259Sdim} 902249259Sdim 903251662Sdimclass CF_WORD1_EG { 904249259Sdim field bits<32> Word1; 905249259Sdim 906249259Sdim bits<3> POP_COUNT; 907249259Sdim bits<5> CF_CONST; 908249259Sdim bits<2> COND; 909249259Sdim bits<6> COUNT; 910249259Sdim bits<1> VALID_PIXEL_MODE; 911251662Sdim bits<1> END_OF_PROGRAM; 912249259Sdim bits<8> CF_INST; 913249259Sdim bits<1> BARRIER; 914249259Sdim 915249259Sdim let Word1{2-0} = POP_COUNT; 916249259Sdim let Word1{7-3} = CF_CONST; 917249259Sdim let Word1{9-8} = COND; 918249259Sdim let Word1{15-10} = COUNT; 919249259Sdim let Word1{20} = VALID_PIXEL_MODE; 920251662Sdim let Word1{21} = END_OF_PROGRAM; 921249259Sdim let Word1{29-22} = CF_INST; 922249259Sdim let Word1{31} = BARRIER; 923249259Sdim} 924249259Sdim 925251662Sdimclass CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), 926251662Sdimins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG { 927249259Sdim field bits<64> Inst; 928249259Sdim 929249259Sdim let CF_INST = inst; 930249259Sdim let BARRIER = 1; 931249259Sdim let JUMPTABLE_SEL = 0; 932249259Sdim let CF_CONST = 0; 933249259Sdim let VALID_PIXEL_MODE = 0; 934249259Sdim let COND = 0; 935251662Sdim let END_OF_PROGRAM = 0; 936249259Sdim 937249259Sdim let Inst{31-0} = Word0; 938249259Sdim let Inst{63-32} = Word1; 939249259Sdim} 940249259Sdim 941251662Sdimdef CF_ALU : ALU_CLAUSE<8, "ALU">; 942251662Sdimdef CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">; 943249259Sdim 944251662Sdimdef FETCH_CLAUSE : AMDGPUInst <(outs), 945251662Sdim(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > { 946251662Sdim field bits<8> Inst; 947251662Sdim bits<8> num; 948251662Sdim let Inst = num; 949249259Sdim} 950249259Sdim 951251662Sdimdef ALU_CLAUSE : AMDGPUInst <(outs), 952251662Sdim(ins i32imm:$addr), "ALU clause starting at $addr:", [] > { 953251662Sdim field bits<8> Inst; 954251662Sdim bits<8> num; 955251662Sdim let Inst = num; 956249259Sdim} 957249259Sdim 958251662Sdimdef LITERALS : AMDGPUInst <(outs), 959251662Sdim(ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > { 960251662Sdim field bits<64> Inst; 961251662Sdim bits<32> literal1; 962251662Sdim bits<32> literal2; 963249259Sdim 964251662Sdim let Inst{31-0} = literal1; 965251662Sdim let Inst{63-32} = literal2; 966249259Sdim} 967249259Sdim 968251662Sdimdef PAD : AMDGPUInst <(outs), (ins), "PAD", [] > { 969251662Sdim field bits<64> Inst; 970249259Sdim} 971249259Sdim 972249259Sdimlet Predicates = [isR600toCayman] in { 973249259Sdim 974249259Sdim//===----------------------------------------------------------------------===// 975249259Sdim// Common Instructions R600, R700, Evergreen, Cayman 976249259Sdim//===----------------------------------------------------------------------===// 977249259Sdim 978249259Sdimdef ADD : R600_2OP_Helper <0x0, "ADD", fadd>; 979249259Sdim// Non-IEEE MUL: 0 * anything = 0 980249259Sdimdef MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>; 981249259Sdimdef MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>; 982249259Sdimdef MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>; 983249259Sdimdef MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>; 984249259Sdim 985249259Sdim// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td, 986249259Sdim// so some of the instruction names don't match the asm string. 987249259Sdim// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics. 988249259Sdimdef SETE : R600_2OP < 989249259Sdim 0x08, "SETE", 990251662Sdim [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_EQ))] 991249259Sdim>; 992249259Sdim 993249259Sdimdef SGT : R600_2OP < 994249259Sdim 0x09, "SETGT", 995251662Sdim [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GT))] 996249259Sdim>; 997249259Sdim 998249259Sdimdef SGE : R600_2OP < 999249259Sdim 0xA, "SETGE", 1000251662Sdim [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GE))] 1001249259Sdim>; 1002249259Sdim 1003249259Sdimdef SNE : R600_2OP < 1004249259Sdim 0xB, "SETNE", 1005251662Sdim [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_NE))] 1006249259Sdim>; 1007249259Sdim 1008249259Sdimdef SETE_DX10 : R600_2OP < 1009249259Sdim 0xC, "SETE_DX10", 1010251662Sdim [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_EQ))] 1011249259Sdim>; 1012249259Sdim 1013249259Sdimdef SETGT_DX10 : R600_2OP < 1014249259Sdim 0xD, "SETGT_DX10", 1015251662Sdim [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GT))] 1016249259Sdim>; 1017249259Sdim 1018249259Sdimdef SETGE_DX10 : R600_2OP < 1019249259Sdim 0xE, "SETGE_DX10", 1020251662Sdim [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GE))] 1021249259Sdim>; 1022249259Sdim 1023249259Sdimdef SETNE_DX10 : R600_2OP < 1024249259Sdim 0xF, "SETNE_DX10", 1025251662Sdim [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_NE))] 1026249259Sdim>; 1027249259Sdim 1028249259Sdimdef FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; 1029249259Sdimdef TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>; 1030249259Sdimdef CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>; 1031249259Sdimdef RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>; 1032249259Sdimdef FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>; 1033249259Sdim 1034249259Sdimdef MOV : R600_1OP <0x19, "MOV", []>; 1035249259Sdim 1036249259Sdimlet isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { 1037249259Sdim 1038249259Sdimclass MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst < 1039249259Sdim (outs R600_Reg32:$dst), 1040249259Sdim (ins immType:$imm), 1041249259Sdim "", 1042249259Sdim [] 1043249259Sdim>; 1044249259Sdim 1045249259Sdim} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 1046249259Sdim 1047249259Sdimdef MOV_IMM_I32 : MOV_IMM<i32, i32imm>; 1048249259Sdimdef : Pat < 1049249259Sdim (imm:$val), 1050249259Sdim (MOV_IMM_I32 imm:$val) 1051249259Sdim>; 1052249259Sdim 1053249259Sdimdef MOV_IMM_F32 : MOV_IMM<f32, f32imm>; 1054249259Sdimdef : Pat < 1055249259Sdim (fpimm:$val), 1056249259Sdim (MOV_IMM_F32 fpimm:$val) 1057249259Sdim>; 1058249259Sdim 1059249259Sdimdef PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>; 1060249259Sdimdef PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>; 1061249259Sdimdef PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>; 1062249259Sdimdef PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>; 1063249259Sdim 1064249259Sdimlet hasSideEffects = 1 in { 1065249259Sdim 1066249259Sdimdef KILLGT : R600_2OP <0x2D, "KILLGT", []>; 1067249259Sdim 1068249259Sdim} // end hasSideEffects 1069249259Sdim 1070249259Sdimdef AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>; 1071249259Sdimdef OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>; 1072249259Sdimdef XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>; 1073249259Sdimdef NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>; 1074249259Sdimdef ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>; 1075249259Sdimdef SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>; 1076249259Sdimdef MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>; 1077249259Sdimdef MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>; 1078249259Sdimdef MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>; 1079249259Sdimdef MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>; 1080249259Sdim 1081249259Sdimdef SETE_INT : R600_2OP < 1082249259Sdim 0x3A, "SETE_INT", 1083251662Sdim [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETEQ))] 1084249259Sdim>; 1085249259Sdim 1086249259Sdimdef SETGT_INT : R600_2OP < 1087249259Sdim 0x3B, "SETGT_INT", 1088251662Sdim [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGT))] 1089249259Sdim>; 1090249259Sdim 1091249259Sdimdef SETGE_INT : R600_2OP < 1092249259Sdim 0x3C, "SETGE_INT", 1093251662Sdim [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGE))] 1094249259Sdim>; 1095249259Sdim 1096249259Sdimdef SETNE_INT : R600_2OP < 1097249259Sdim 0x3D, "SETNE_INT", 1098251662Sdim [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETNE))] 1099249259Sdim>; 1100249259Sdim 1101249259Sdimdef SETGT_UINT : R600_2OP < 1102249259Sdim 0x3E, "SETGT_UINT", 1103251662Sdim [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGT))] 1104249259Sdim>; 1105249259Sdim 1106249259Sdimdef SETGE_UINT : R600_2OP < 1107249259Sdim 0x3F, "SETGE_UINT", 1108251662Sdim [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGE))] 1109249259Sdim>; 1110249259Sdim 1111249259Sdimdef PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>; 1112249259Sdimdef PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>; 1113249259Sdimdef PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>; 1114249259Sdimdef PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>; 1115249259Sdim 1116249259Sdimdef CNDE_INT : R600_3OP < 1117249259Sdim 0x1C, "CNDE_INT", 1118251662Sdim [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_EQ))] 1119249259Sdim>; 1120249259Sdim 1121249259Sdimdef CNDGE_INT : R600_3OP < 1122249259Sdim 0x1E, "CNDGE_INT", 1123251662Sdim [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GE))] 1124249259Sdim>; 1125249259Sdim 1126249259Sdimdef CNDGT_INT : R600_3OP < 1127249259Sdim 0x1D, "CNDGT_INT", 1128251662Sdim [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GT))] 1129249259Sdim>; 1130249259Sdim 1131249259Sdim//===----------------------------------------------------------------------===// 1132249259Sdim// Texture instructions 1133249259Sdim//===----------------------------------------------------------------------===// 1134249259Sdim 1135249259Sdimdef TEX_LD : R600_TEX < 1136249259Sdim 0x03, "TEX_LD", 1137251662Sdim [(set v4f32:$DST_GPR, (int_AMDGPU_txf v4f32:$SRC_GPR, 1138249259Sdim imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID, 1139249259Sdim imm:$SAMPLER_ID, imm:$textureTarget))] 1140249259Sdim> { 1141249259Sdimlet AsmString = "TEX_LD $DST_GPR, $SRC_GPR, $OFFSET_X, $OFFSET_Y, $OFFSET_Z," 1142249259Sdim "$RESOURCE_ID, $SAMPLER_ID, $textureTarget"; 1143249259Sdimlet InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X, 1144249259Sdim i32imm:$OFFSET_Y, i32imm:$OFFSET_Z, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, 1145249259Sdim i32imm:$textureTarget); 1146249259Sdim} 1147249259Sdim 1148249259Sdimdef TEX_GET_TEXTURE_RESINFO : R600_TEX < 1149249259Sdim 0x04, "TEX_GET_TEXTURE_RESINFO", 1150251662Sdim [(set v4f32:$DST_GPR, (int_AMDGPU_txq v4f32:$SRC_GPR, 1151249259Sdim imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] 1152249259Sdim>; 1153249259Sdim 1154249259Sdimdef TEX_GET_GRADIENTS_H : R600_TEX < 1155249259Sdim 0x07, "TEX_GET_GRADIENTS_H", 1156251662Sdim [(set v4f32:$DST_GPR, (int_AMDGPU_ddx v4f32:$SRC_GPR, 1157249259Sdim imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] 1158249259Sdim>; 1159249259Sdim 1160249259Sdimdef TEX_GET_GRADIENTS_V : R600_TEX < 1161249259Sdim 0x08, "TEX_GET_GRADIENTS_V", 1162251662Sdim [(set v4f32:$DST_GPR, (int_AMDGPU_ddy v4f32:$SRC_GPR, 1163249259Sdim imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] 1164249259Sdim>; 1165249259Sdim 1166249259Sdimdef TEX_SET_GRADIENTS_H : R600_TEX < 1167249259Sdim 0x0B, "TEX_SET_GRADIENTS_H", 1168249259Sdim [] 1169249259Sdim>; 1170249259Sdim 1171249259Sdimdef TEX_SET_GRADIENTS_V : R600_TEX < 1172249259Sdim 0x0C, "TEX_SET_GRADIENTS_V", 1173249259Sdim [] 1174249259Sdim>; 1175249259Sdim 1176249259Sdimdef TEX_SAMPLE : R600_TEX < 1177249259Sdim 0x10, "TEX_SAMPLE", 1178251662Sdim [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR, 1179249259Sdim imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] 1180249259Sdim>; 1181249259Sdim 1182249259Sdimdef TEX_SAMPLE_C : R600_TEX < 1183249259Sdim 0x18, "TEX_SAMPLE_C", 1184251662Sdim [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR, 1185249259Sdim imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] 1186249259Sdim>; 1187249259Sdim 1188249259Sdimdef TEX_SAMPLE_L : R600_TEX < 1189249259Sdim 0x11, "TEX_SAMPLE_L", 1190251662Sdim [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR, 1191249259Sdim imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] 1192249259Sdim>; 1193249259Sdim 1194249259Sdimdef TEX_SAMPLE_C_L : R600_TEX < 1195249259Sdim 0x19, "TEX_SAMPLE_C_L", 1196251662Sdim [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR, 1197249259Sdim imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] 1198249259Sdim>; 1199249259Sdim 1200249259Sdimdef TEX_SAMPLE_LB : R600_TEX < 1201249259Sdim 0x12, "TEX_SAMPLE_LB", 1202251662Sdim [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR, 1203249259Sdim imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] 1204249259Sdim>; 1205249259Sdim 1206249259Sdimdef TEX_SAMPLE_C_LB : R600_TEX < 1207249259Sdim 0x1A, "TEX_SAMPLE_C_LB", 1208251662Sdim [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR, 1209249259Sdim imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] 1210249259Sdim>; 1211249259Sdim 1212249259Sdimdef TEX_SAMPLE_G : R600_TEX < 1213249259Sdim 0x14, "TEX_SAMPLE_G", 1214249259Sdim [] 1215249259Sdim>; 1216249259Sdim 1217249259Sdimdef TEX_SAMPLE_C_G : R600_TEX < 1218249259Sdim 0x1C, "TEX_SAMPLE_C_G", 1219249259Sdim [] 1220249259Sdim>; 1221249259Sdim 1222249259Sdim//===----------------------------------------------------------------------===// 1223249259Sdim// Helper classes for common instructions 1224249259Sdim//===----------------------------------------------------------------------===// 1225249259Sdim 1226249259Sdimclass MUL_LIT_Common <bits<5> inst> : R600_3OP < 1227249259Sdim inst, "MUL_LIT", 1228249259Sdim [] 1229249259Sdim>; 1230249259Sdim 1231249259Sdimclass MULADD_Common <bits<5> inst> : R600_3OP < 1232249259Sdim inst, "MULADD", 1233249259Sdim [] 1234249259Sdim>; 1235249259Sdim 1236249259Sdimclass MULADD_IEEE_Common <bits<5> inst> : R600_3OP < 1237249259Sdim inst, "MULADD_IEEE", 1238251662Sdim [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))] 1239249259Sdim>; 1240249259Sdim 1241249259Sdimclass CNDE_Common <bits<5> inst> : R600_3OP < 1242249259Sdim inst, "CNDE", 1243251662Sdim [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_EQ))] 1244249259Sdim>; 1245249259Sdim 1246249259Sdimclass CNDGT_Common <bits<5> inst> : R600_3OP < 1247249259Sdim inst, "CNDGT", 1248251662Sdim [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GT))] 1249249259Sdim>; 1250249259Sdim 1251249259Sdimclass CNDGE_Common <bits<5> inst> : R600_3OP < 1252249259Sdim inst, "CNDGE", 1253251662Sdim [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))] 1254249259Sdim>; 1255249259Sdim 1256249259Sdimmulticlass DOT4_Common <bits<11> inst> { 1257249259Sdim 1258249259Sdim def _pseudo : R600_REDUCTION <inst, 1259249259Sdim (ins R600_Reg128:$src0, R600_Reg128:$src1), 1260249259Sdim "DOT4 $dst $src0, $src1", 1261251662Sdim [(set f32:$dst, (int_AMDGPU_dp4 v4f32:$src0, v4f32:$src1))] 1262249259Sdim >; 1263249259Sdim 1264249259Sdim def _real : R600_2OP <inst, "DOT4", []>; 1265249259Sdim} 1266249259Sdim 1267249259Sdimlet mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { 1268249259Sdimmulticlass CUBE_Common <bits<11> inst> { 1269249259Sdim 1270249259Sdim def _pseudo : InstR600 < 1271249259Sdim (outs R600_Reg128:$dst), 1272249259Sdim (ins R600_Reg128:$src), 1273249259Sdim "CUBE $dst $src", 1274251662Sdim [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src))], 1275249259Sdim VecALU 1276249259Sdim > { 1277249259Sdim let isPseudo = 1; 1278249259Sdim } 1279249259Sdim 1280249259Sdim def _real : R600_2OP <inst, "CUBE", []>; 1281249259Sdim} 1282249259Sdim} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0 1283249259Sdim 1284249259Sdimclass EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper < 1285249259Sdim inst, "EXP_IEEE", fexp2 1286251662Sdim> { 1287251662Sdim let TransOnly = 1; 1288251662Sdim let Itinerary = TransALU; 1289251662Sdim} 1290249259Sdim 1291249259Sdimclass FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper < 1292249259Sdim inst, "FLT_TO_INT", fp_to_sint 1293251662Sdim> { 1294251662Sdim let TransOnly = 1; 1295251662Sdim let Itinerary = TransALU; 1296251662Sdim} 1297249259Sdim 1298249259Sdimclass INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < 1299249259Sdim inst, "INT_TO_FLT", sint_to_fp 1300251662Sdim> { 1301251662Sdim let TransOnly = 1; 1302251662Sdim let Itinerary = TransALU; 1303251662Sdim} 1304249259Sdim 1305249259Sdimclass FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper < 1306249259Sdim inst, "FLT_TO_UINT", fp_to_uint 1307251662Sdim> { 1308251662Sdim let TransOnly = 1; 1309251662Sdim let Itinerary = TransALU; 1310251662Sdim} 1311249259Sdim 1312249259Sdimclass UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < 1313249259Sdim inst, "UINT_TO_FLT", uint_to_fp 1314251662Sdim> { 1315251662Sdim let TransOnly = 1; 1316251662Sdim let Itinerary = TransALU; 1317251662Sdim} 1318249259Sdim 1319249259Sdimclass LOG_CLAMPED_Common <bits<11> inst> : R600_1OP < 1320249259Sdim inst, "LOG_CLAMPED", [] 1321249259Sdim>; 1322249259Sdim 1323249259Sdimclass LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper < 1324249259Sdim inst, "LOG_IEEE", flog2 1325251662Sdim> { 1326251662Sdim let TransOnly = 1; 1327251662Sdim let Itinerary = TransALU; 1328251662Sdim} 1329249259Sdim 1330249259Sdimclass LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>; 1331249259Sdimclass LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>; 1332249259Sdimclass ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>; 1333249259Sdimclass MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper < 1334249259Sdim inst, "MULHI_INT", mulhs 1335251662Sdim> { 1336251662Sdim let TransOnly = 1; 1337251662Sdim let Itinerary = TransALU; 1338251662Sdim} 1339249259Sdimclass MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper < 1340249259Sdim inst, "MULHI", mulhu 1341251662Sdim> { 1342251662Sdim let TransOnly = 1; 1343251662Sdim let Itinerary = TransALU; 1344251662Sdim} 1345249259Sdimclass MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper < 1346249259Sdim inst, "MULLO_INT", mul 1347251662Sdim> { 1348251662Sdim let TransOnly = 1; 1349251662Sdim let Itinerary = TransALU; 1350251662Sdim} 1351251662Sdimclass MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> { 1352251662Sdim let TransOnly = 1; 1353251662Sdim let Itinerary = TransALU; 1354251662Sdim} 1355249259Sdim 1356249259Sdimclass RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP < 1357249259Sdim inst, "RECIP_CLAMPED", [] 1358251662Sdim> { 1359251662Sdim let TransOnly = 1; 1360251662Sdim let Itinerary = TransALU; 1361251662Sdim} 1362249259Sdim 1363249259Sdimclass RECIP_IEEE_Common <bits<11> inst> : R600_1OP < 1364251662Sdim inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))] 1365251662Sdim> { 1366251662Sdim let TransOnly = 1; 1367251662Sdim let Itinerary = TransALU; 1368251662Sdim} 1369249259Sdim 1370249259Sdimclass RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper < 1371249259Sdim inst, "RECIP_UINT", AMDGPUurecip 1372251662Sdim> { 1373251662Sdim let TransOnly = 1; 1374251662Sdim let Itinerary = TransALU; 1375251662Sdim} 1376249259Sdim 1377249259Sdimclass RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper < 1378249259Sdim inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq 1379251662Sdim> { 1380251662Sdim let TransOnly = 1; 1381251662Sdim let Itinerary = TransALU; 1382251662Sdim} 1383249259Sdim 1384249259Sdimclass RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP < 1385249259Sdim inst, "RECIPSQRT_IEEE", [] 1386251662Sdim> { 1387251662Sdim let TransOnly = 1; 1388251662Sdim let Itinerary = TransALU; 1389251662Sdim} 1390249259Sdim 1391249259Sdimclass SIN_Common <bits<11> inst> : R600_1OP < 1392249259Sdim inst, "SIN", []>{ 1393249259Sdim let Trig = 1; 1394251662Sdim let TransOnly = 1; 1395251662Sdim let Itinerary = TransALU; 1396249259Sdim} 1397249259Sdim 1398249259Sdimclass COS_Common <bits<11> inst> : R600_1OP < 1399249259Sdim inst, "COS", []> { 1400249259Sdim let Trig = 1; 1401251662Sdim let TransOnly = 1; 1402251662Sdim let Itinerary = TransALU; 1403249259Sdim} 1404249259Sdim 1405249259Sdim//===----------------------------------------------------------------------===// 1406249259Sdim// Helper patterns for complex intrinsics 1407249259Sdim//===----------------------------------------------------------------------===// 1408249259Sdim 1409249259Sdimmulticlass DIV_Common <InstR600 recip_ieee> { 1410249259Sdimdef : Pat< 1411251662Sdim (int_AMDGPU_div f32:$src0, f32:$src1), 1412251662Sdim (MUL_IEEE $src0, (recip_ieee $src1)) 1413249259Sdim>; 1414249259Sdim 1415249259Sdimdef : Pat< 1416251662Sdim (fdiv f32:$src0, f32:$src1), 1417251662Sdim (MUL_IEEE $src0, (recip_ieee $src1)) 1418249259Sdim>; 1419249259Sdim} 1420249259Sdim 1421251662Sdimclass TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> 1422251662Sdim : Pat < 1423251662Sdim (int_TGSI_lit_z f32:$src_x, f32:$src_y, f32:$src_w), 1424251662Sdim (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x)) 1425249259Sdim>; 1426249259Sdim 1427249259Sdim//===----------------------------------------------------------------------===// 1428249259Sdim// R600 / R700 Instructions 1429249259Sdim//===----------------------------------------------------------------------===// 1430249259Sdim 1431249259Sdimlet Predicates = [isR600] in { 1432249259Sdim 1433249259Sdim def MUL_LIT_r600 : MUL_LIT_Common<0x0C>; 1434249259Sdim def MULADD_r600 : MULADD_Common<0x10>; 1435249259Sdim def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>; 1436249259Sdim def CNDE_r600 : CNDE_Common<0x18>; 1437249259Sdim def CNDGT_r600 : CNDGT_Common<0x19>; 1438249259Sdim def CNDGE_r600 : CNDGE_Common<0x1A>; 1439249259Sdim defm DOT4_r600 : DOT4_Common<0x50>; 1440249259Sdim defm CUBE_r600 : CUBE_Common<0x52>; 1441249259Sdim def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; 1442249259Sdim def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; 1443249259Sdim def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>; 1444249259Sdim def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>; 1445249259Sdim def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>; 1446249259Sdim def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>; 1447249259Sdim def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>; 1448249259Sdim def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>; 1449249259Sdim def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>; 1450249259Sdim def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>; 1451249259Sdim def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>; 1452249259Sdim def SIN_r600 : SIN_Common<0x6E>; 1453249259Sdim def COS_r600 : COS_Common<0x6F>; 1454249259Sdim def ASHR_r600 : ASHR_Common<0x70>; 1455249259Sdim def LSHR_r600 : LSHR_Common<0x71>; 1456249259Sdim def LSHL_r600 : LSHL_Common<0x72>; 1457249259Sdim def MULLO_INT_r600 : MULLO_INT_Common<0x73>; 1458249259Sdim def MULHI_INT_r600 : MULHI_INT_Common<0x74>; 1459249259Sdim def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>; 1460249259Sdim def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>; 1461249259Sdim def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>; 1462249259Sdim 1463249259Sdim defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>; 1464251662Sdim def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>; 1465249259Sdim def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; 1466249259Sdim 1467251662Sdim def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; 1468249259Sdim 1469249259Sdim def R600_ExportSwz : ExportSwzInst { 1470251662Sdim let Word1{20-17} = 0; // BURST_COUNT 1471249259Sdim let Word1{21} = eop; 1472249259Sdim let Word1{22} = 1; // VALID_PIXEL_MODE 1473249259Sdim let Word1{30-23} = inst; 1474249259Sdim let Word1{31} = 1; // BARRIER 1475249259Sdim } 1476249259Sdim defm : ExportPattern<R600_ExportSwz, 39>; 1477249259Sdim 1478249259Sdim def R600_ExportBuf : ExportBufInst { 1479251662Sdim let Word1{20-17} = 0; // BURST_COUNT 1480249259Sdim let Word1{21} = eop; 1481249259Sdim let Word1{22} = 1; // VALID_PIXEL_MODE 1482249259Sdim let Word1{30-23} = inst; 1483249259Sdim let Word1{31} = 1; // BARRIER 1484249259Sdim } 1485249259Sdim defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>; 1486251662Sdim 1487251662Sdim def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT), 1488251662Sdim "TEX $COUNT @$ADDR"> { 1489251662Sdim let POP_COUNT = 0; 1490251662Sdim } 1491251662Sdim def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT), 1492251662Sdim "VTX $COUNT @$ADDR"> { 1493251662Sdim let POP_COUNT = 0; 1494251662Sdim } 1495251662Sdim def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR), 1496251662Sdim "LOOP_START_DX10 @$ADDR"> { 1497251662Sdim let POP_COUNT = 0; 1498251662Sdim let COUNT = 0; 1499251662Sdim } 1500251662Sdim def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { 1501251662Sdim let POP_COUNT = 0; 1502251662Sdim let COUNT = 0; 1503251662Sdim } 1504251662Sdim def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR), 1505251662Sdim "LOOP_BREAK @$ADDR"> { 1506251662Sdim let POP_COUNT = 0; 1507251662Sdim let COUNT = 0; 1508251662Sdim } 1509251662Sdim def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR), 1510251662Sdim "CONTINUE @$ADDR"> { 1511251662Sdim let POP_COUNT = 0; 1512251662Sdim let COUNT = 0; 1513251662Sdim } 1514251662Sdim def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1515251662Sdim "JUMP @$ADDR POP:$POP_COUNT"> { 1516251662Sdim let COUNT = 0; 1517251662Sdim } 1518251662Sdim def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1519251662Sdim "ELSE @$ADDR POP:$POP_COUNT"> { 1520251662Sdim let COUNT = 0; 1521251662Sdim } 1522251662Sdim def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> { 1523251662Sdim let ADDR = 0; 1524251662Sdim let COUNT = 0; 1525251662Sdim let POP_COUNT = 0; 1526251662Sdim } 1527251662Sdim def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1528251662Sdim "POP @$ADDR POP:$POP_COUNT"> { 1529251662Sdim let COUNT = 0; 1530251662Sdim } 1531251662Sdim def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> { 1532251662Sdim let COUNT = 0; 1533251662Sdim let POP_COUNT = 0; 1534251662Sdim let ADDR = 0; 1535251662Sdim let END_OF_PROGRAM = 1; 1536251662Sdim } 1537251662Sdim 1538249259Sdim} 1539249259Sdim 1540249259Sdim// Helper pattern for normalizing inputs to triginomic instructions for R700+ 1541249259Sdim// cards. 1542249259Sdimclass COS_PAT <InstR600 trig> : Pat< 1543251662Sdim (fcos f32:$src), 1544251662Sdim (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src)) 1545249259Sdim>; 1546249259Sdim 1547249259Sdimclass SIN_PAT <InstR600 trig> : Pat< 1548251662Sdim (fsin f32:$src), 1549251662Sdim (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src)) 1550249259Sdim>; 1551249259Sdim 1552249259Sdim//===----------------------------------------------------------------------===// 1553249259Sdim// R700 Only instructions 1554249259Sdim//===----------------------------------------------------------------------===// 1555249259Sdim 1556249259Sdimlet Predicates = [isR700] in { 1557249259Sdim def SIN_r700 : SIN_Common<0x6E>; 1558249259Sdim def COS_r700 : COS_Common<0x6F>; 1559249259Sdim 1560249259Sdim // R700 normalizes inputs to SIN/COS the same as EG 1561249259Sdim def : SIN_PAT <SIN_r700>; 1562249259Sdim def : COS_PAT <COS_r700>; 1563249259Sdim} 1564249259Sdim 1565249259Sdim//===----------------------------------------------------------------------===// 1566249259Sdim// Evergreen Only instructions 1567249259Sdim//===----------------------------------------------------------------------===// 1568249259Sdim 1569249259Sdimlet Predicates = [isEG] in { 1570249259Sdim 1571249259Sdimdef RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; 1572249259Sdimdefm DIV_eg : DIV_Common<RECIP_IEEE_eg>; 1573249259Sdim 1574249259Sdimdef MULLO_INT_eg : MULLO_INT_Common<0x8F>; 1575249259Sdimdef MULHI_INT_eg : MULHI_INT_Common<0x90>; 1576249259Sdimdef MULLO_UINT_eg : MULLO_UINT_Common<0x91>; 1577249259Sdimdef MULHI_UINT_eg : MULHI_UINT_Common<0x92>; 1578249259Sdimdef RECIP_UINT_eg : RECIP_UINT_Common<0x94>; 1579249259Sdimdef RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>; 1580249259Sdimdef EXP_IEEE_eg : EXP_IEEE_Common<0x81>; 1581249259Sdimdef LOG_IEEE_eg : LOG_IEEE_Common<0x83>; 1582249259Sdimdef RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; 1583249259Sdimdef RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; 1584249259Sdimdef SIN_eg : SIN_Common<0x8D>; 1585249259Sdimdef COS_eg : COS_Common<0x8E>; 1586249259Sdim 1587251662Sdimdef : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>; 1588249259Sdimdef : SIN_PAT <SIN_eg>; 1589249259Sdimdef : COS_PAT <COS_eg>; 1590251662Sdimdef : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; 1591249259Sdim} // End Predicates = [isEG] 1592249259Sdim 1593249259Sdim//===----------------------------------------------------------------------===// 1594249259Sdim// Evergreen / Cayman Instructions 1595249259Sdim//===----------------------------------------------------------------------===// 1596249259Sdim 1597249259Sdimlet Predicates = [isEGorCayman] in { 1598249259Sdim 1599249259Sdim // BFE_UINT - bit_extract, an optimization for mask and shift 1600249259Sdim // Src0 = Input 1601249259Sdim // Src1 = Offset 1602249259Sdim // Src2 = Width 1603249259Sdim // 1604249259Sdim // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width) 1605249259Sdim // 1606249259Sdim // Example Usage: 1607249259Sdim // (Offset, Width) 1608249259Sdim // 1609249259Sdim // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0 1610249259Sdim // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8 1611249259Sdim // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16 1612249259Sdim // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24 1613249259Sdim def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", 1614251662Sdim [(set i32:$dst, (int_AMDIL_bit_extract_u32 i32:$src0, i32:$src1, 1615251662Sdim i32:$src2))], 1616249259Sdim VecALU 1617249259Sdim >; 1618251662Sdim def : BFEPattern <BFE_UINT_eg>; 1619249259Sdim 1620251662Sdim def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>; 1621251662Sdim defm : BFIPatterns <BFI_INT_eg>; 1622251662Sdim 1623249259Sdim def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", 1624251662Sdim [(set i32:$dst, (AMDGPUbitalign i32:$src0, i32:$src1, i32:$src2))], 1625249259Sdim VecALU 1626249259Sdim >; 1627249259Sdim 1628249259Sdim def MULADD_eg : MULADD_Common<0x14>; 1629249259Sdim def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>; 1630249259Sdim def ASHR_eg : ASHR_Common<0x15>; 1631249259Sdim def LSHR_eg : LSHR_Common<0x16>; 1632249259Sdim def LSHL_eg : LSHL_Common<0x17>; 1633249259Sdim def CNDE_eg : CNDE_Common<0x19>; 1634249259Sdim def CNDGT_eg : CNDGT_Common<0x1A>; 1635249259Sdim def CNDGE_eg : CNDGE_Common<0x1B>; 1636249259Sdim def MUL_LIT_eg : MUL_LIT_Common<0x1F>; 1637249259Sdim def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>; 1638249259Sdim defm DOT4_eg : DOT4_Common<0xBE>; 1639249259Sdim defm CUBE_eg : CUBE_Common<0xC0>; 1640249259Sdim 1641249259Sdimlet hasSideEffects = 1 in { 1642249259Sdim def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>; 1643249259Sdim} 1644249259Sdim 1645249259Sdim def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>; 1646249259Sdim 1647249259Sdim def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { 1648249259Sdim let Pattern = []; 1649249259Sdim } 1650249259Sdim 1651249259Sdim def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; 1652249259Sdim 1653249259Sdim def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> { 1654249259Sdim let Pattern = []; 1655249259Sdim } 1656249259Sdim 1657249259Sdim def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>; 1658249259Sdim 1659249259Sdim // TRUNC is used for the FLT_TO_INT instructions to work around a 1660249259Sdim // perceived problem where the rounding modes are applied differently 1661249259Sdim // depending on the instruction and the slot they are in. 1662249259Sdim // See: 1663249259Sdim // https://bugs.freedesktop.org/show_bug.cgi?id=50232 1664249259Sdim // Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c 1665249259Sdim // 1666249259Sdim // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes, 1667249259Sdim // which do not need to be truncated since the fp values are 0.0f or 1.0f. 1668249259Sdim // We should look into handling these cases separately. 1669251662Sdim def : Pat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>; 1670249259Sdim 1671251662Sdim def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>; 1672249259Sdim 1673251662Sdim // SHA-256 Patterns 1674251662Sdim def : SHA256MaPattern <BFI_INT_eg, XOR_INT>; 1675251662Sdim 1676249259Sdim def EG_ExportSwz : ExportSwzInst { 1677251662Sdim let Word1{19-16} = 0; // BURST_COUNT 1678249259Sdim let Word1{20} = 1; // VALID_PIXEL_MODE 1679249259Sdim let Word1{21} = eop; 1680249259Sdim let Word1{29-22} = inst; 1681249259Sdim let Word1{30} = 0; // MARK 1682249259Sdim let Word1{31} = 1; // BARRIER 1683249259Sdim } 1684249259Sdim defm : ExportPattern<EG_ExportSwz, 83>; 1685249259Sdim 1686249259Sdim def EG_ExportBuf : ExportBufInst { 1687251662Sdim let Word1{19-16} = 0; // BURST_COUNT 1688249259Sdim let Word1{20} = 1; // VALID_PIXEL_MODE 1689249259Sdim let Word1{21} = eop; 1690249259Sdim let Word1{29-22} = inst; 1691249259Sdim let Word1{30} = 0; // MARK 1692249259Sdim let Word1{31} = 1; // BARRIER 1693249259Sdim } 1694249259Sdim defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>; 1695249259Sdim 1696251662Sdim def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT), 1697251662Sdim "TEX $COUNT @$ADDR"> { 1698251662Sdim let POP_COUNT = 0; 1699251662Sdim } 1700251662Sdim def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT), 1701251662Sdim "VTX $COUNT @$ADDR"> { 1702251662Sdim let POP_COUNT = 0; 1703251662Sdim } 1704251662Sdim def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR), 1705251662Sdim "LOOP_START_DX10 @$ADDR"> { 1706251662Sdim let POP_COUNT = 0; 1707251662Sdim let COUNT = 0; 1708251662Sdim } 1709251662Sdim def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { 1710251662Sdim let POP_COUNT = 0; 1711251662Sdim let COUNT = 0; 1712251662Sdim } 1713251662Sdim def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR), 1714251662Sdim "LOOP_BREAK @$ADDR"> { 1715251662Sdim let POP_COUNT = 0; 1716251662Sdim let COUNT = 0; 1717251662Sdim } 1718251662Sdim def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR), 1719251662Sdim "CONTINUE @$ADDR"> { 1720251662Sdim let POP_COUNT = 0; 1721251662Sdim let COUNT = 0; 1722251662Sdim } 1723251662Sdim def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1724251662Sdim "JUMP @$ADDR POP:$POP_COUNT"> { 1725251662Sdim let COUNT = 0; 1726251662Sdim } 1727251662Sdim def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1728251662Sdim "ELSE @$ADDR POP:$POP_COUNT"> { 1729251662Sdim let COUNT = 0; 1730251662Sdim } 1731251662Sdim def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> { 1732251662Sdim let ADDR = 0; 1733251662Sdim let COUNT = 0; 1734251662Sdim let POP_COUNT = 0; 1735251662Sdim } 1736251662Sdim def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), 1737251662Sdim "POP @$ADDR POP:$POP_COUNT"> { 1738251662Sdim let COUNT = 0; 1739251662Sdim } 1740251662Sdim def CF_END_EG : CF_CLAUSE_EG<0, (ins), "CF_END"> { 1741251662Sdim let COUNT = 0; 1742251662Sdim let POP_COUNT = 0; 1743251662Sdim let ADDR = 0; 1744251662Sdim let END_OF_PROGRAM = 1; 1745251662Sdim } 1746251662Sdim 1747249259Sdim//===----------------------------------------------------------------------===// 1748249259Sdim// Memory read/write instructions 1749249259Sdim//===----------------------------------------------------------------------===// 1750249259Sdimlet usesCustomInserter = 1 in { 1751249259Sdim 1752249259Sdimclass RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name, 1753249259Sdim list<dag> pattern> 1754249259Sdim : EG_CF_RAT <0x57, 0x2, 0, (outs), ins, 1755249259Sdim !strconcat(name, " $rw_gpr, $index_gpr, $eop"), pattern> { 1756249259Sdim let RIM = 0; 1757249259Sdim // XXX: Have a separate instruction for non-indexed writes. 1758249259Sdim let TYPE = 1; 1759249259Sdim let RW_REL = 0; 1760249259Sdim let ELEM_SIZE = 0; 1761249259Sdim 1762249259Sdim let ARRAY_SIZE = 0; 1763249259Sdim let COMP_MASK = comp_mask; 1764249259Sdim let BURST_COUNT = 0; 1765249259Sdim let VPM = 0; 1766249259Sdim let MARK = 0; 1767249259Sdim let BARRIER = 1; 1768249259Sdim} 1769249259Sdim 1770249259Sdim} // End usesCustomInserter = 1 1771249259Sdim 1772249259Sdim// 32-bit store 1773249259Sdimdef RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg < 1774249259Sdim (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), 1775249259Sdim 0x1, "RAT_WRITE_CACHELESS_32_eg", 1776251662Sdim [(global_store i32:$rw_gpr, i32:$index_gpr)] 1777249259Sdim>; 1778249259Sdim 1779249259Sdim//128-bit store 1780249259Sdimdef RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < 1781249259Sdim (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), 1782249259Sdim 0xf, "RAT_WRITE_CACHELESS_128", 1783251662Sdim [(global_store v4i32:$rw_gpr, i32:$index_gpr)] 1784249259Sdim>; 1785249259Sdim 1786249259Sdimclass VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> 1787249259Sdim : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern>, 1788249259Sdim VTX_WORD1_GPR, VTX_WORD0 { 1789249259Sdim 1790249259Sdim // Static fields 1791249259Sdim let VC_INST = 0; 1792249259Sdim let FETCH_TYPE = 2; 1793249259Sdim let FETCH_WHOLE_QUAD = 0; 1794249259Sdim let BUFFER_ID = buffer_id; 1795249259Sdim let SRC_REL = 0; 1796249259Sdim // XXX: We can infer this field based on the SRC_GPR. This would allow us 1797249259Sdim // to store vertex addresses in any channel, not just X. 1798249259Sdim let SRC_SEL_X = 0; 1799249259Sdim let DST_REL = 0; 1800249259Sdim // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, 1801249259Sdim // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, 1802249259Sdim // however, based on my testing if USE_CONST_FIELDS is set, then all 1803249259Sdim // these fields need to be set to 0. 1804249259Sdim let USE_CONST_FIELDS = 0; 1805249259Sdim let NUM_FORMAT_ALL = 1; 1806249259Sdim let FORMAT_COMP_ALL = 0; 1807249259Sdim let SRF_MODE_ALL = 0; 1808249259Sdim 1809249259Sdim let Inst{31-0} = Word0; 1810249259Sdim let Inst{63-32} = Word1; 1811249259Sdim // LLVM can only encode 64-bit instructions, so these fields are manually 1812249259Sdim // encoded in R600CodeEmitter 1813249259Sdim // 1814249259Sdim // bits<16> OFFSET; 1815249259Sdim // bits<2> ENDIAN_SWAP = 0; 1816249259Sdim // bits<1> CONST_BUF_NO_STRIDE = 0; 1817249259Sdim // bits<1> MEGA_FETCH = 0; 1818249259Sdim // bits<1> ALT_CONST = 0; 1819249259Sdim // bits<2> BUFFER_INDEX_MODE = 0; 1820249259Sdim 1821249259Sdim 1822249259Sdim 1823249259Sdim // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding 1824249259Sdim // is done in R600CodeEmitter 1825249259Sdim // 1826249259Sdim // Inst{79-64} = OFFSET; 1827249259Sdim // Inst{81-80} = ENDIAN_SWAP; 1828249259Sdim // Inst{82} = CONST_BUF_NO_STRIDE; 1829249259Sdim // Inst{83} = MEGA_FETCH; 1830249259Sdim // Inst{84} = ALT_CONST; 1831249259Sdim // Inst{86-85} = BUFFER_INDEX_MODE; 1832249259Sdim // Inst{95-86} = 0; Reserved 1833249259Sdim 1834249259Sdim // VTX_WORD3 (Padding) 1835249259Sdim // 1836249259Sdim // Inst{127-96} = 0; 1837251662Sdim 1838251662Sdim let VTXInst = 1; 1839249259Sdim} 1840249259Sdim 1841249259Sdimclass VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern> 1842249259Sdim : VTX_READ_eg <"VTX_READ_8", buffer_id, (outs R600_TReg32_X:$dst), 1843249259Sdim pattern> { 1844249259Sdim 1845249259Sdim let MEGA_FETCH_COUNT = 1; 1846249259Sdim let DST_SEL_X = 0; 1847249259Sdim let DST_SEL_Y = 7; // Masked 1848249259Sdim let DST_SEL_Z = 7; // Masked 1849249259Sdim let DST_SEL_W = 7; // Masked 1850249259Sdim let DATA_FORMAT = 1; // FMT_8 1851249259Sdim} 1852249259Sdim 1853249259Sdimclass VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern> 1854249259Sdim : VTX_READ_eg <"VTX_READ_16", buffer_id, (outs R600_TReg32_X:$dst), 1855249259Sdim pattern> { 1856249259Sdim let MEGA_FETCH_COUNT = 2; 1857249259Sdim let DST_SEL_X = 0; 1858249259Sdim let DST_SEL_Y = 7; // Masked 1859249259Sdim let DST_SEL_Z = 7; // Masked 1860249259Sdim let DST_SEL_W = 7; // Masked 1861249259Sdim let DATA_FORMAT = 5; // FMT_16 1862249259Sdim 1863249259Sdim} 1864249259Sdim 1865249259Sdimclass VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> 1866249259Sdim : VTX_READ_eg <"VTX_READ_32", buffer_id, (outs R600_TReg32_X:$dst), 1867249259Sdim pattern> { 1868249259Sdim 1869249259Sdim let MEGA_FETCH_COUNT = 4; 1870249259Sdim let DST_SEL_X = 0; 1871249259Sdim let DST_SEL_Y = 7; // Masked 1872249259Sdim let DST_SEL_Z = 7; // Masked 1873249259Sdim let DST_SEL_W = 7; // Masked 1874249259Sdim let DATA_FORMAT = 0xD; // COLOR_32 1875249259Sdim 1876249259Sdim // This is not really necessary, but there were some GPU hangs that appeared 1877249259Sdim // to be caused by ALU instructions in the next instruction group that wrote 1878249259Sdim // to the $ptr registers of the VTX_READ. 1879249259Sdim // e.g. 1880249259Sdim // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24 1881249259Sdim // %T2_X<def> = MOV %ZERO 1882249259Sdim //Adding this constraint prevents this from happening. 1883249259Sdim let Constraints = "$ptr.ptr = $dst"; 1884249259Sdim} 1885249259Sdim 1886249259Sdimclass VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> 1887249259Sdim : VTX_READ_eg <"VTX_READ_128", buffer_id, (outs R600_Reg128:$dst), 1888249259Sdim pattern> { 1889249259Sdim 1890249259Sdim let MEGA_FETCH_COUNT = 16; 1891249259Sdim let DST_SEL_X = 0; 1892249259Sdim let DST_SEL_Y = 1; 1893249259Sdim let DST_SEL_Z = 2; 1894249259Sdim let DST_SEL_W = 3; 1895249259Sdim let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 1896249259Sdim 1897249259Sdim // XXX: Need to force VTX_READ_128 instructions to write to the same register 1898249259Sdim // that holds its buffer address to avoid potential hangs. We can't use 1899249259Sdim // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst 1900249259Sdim // registers are different sizes. 1901249259Sdim} 1902249259Sdim 1903249259Sdim//===----------------------------------------------------------------------===// 1904249259Sdim// VTX Read from parameter memory space 1905249259Sdim//===----------------------------------------------------------------------===// 1906249259Sdim 1907249259Sdimdef VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, 1908251662Sdim [(set i32:$dst, (load_param_zexti8 ADDRVTX_READ:$ptr))] 1909249259Sdim>; 1910249259Sdim 1911249259Sdimdef VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, 1912251662Sdim [(set i32:$dst, (load_param_zexti16 ADDRVTX_READ:$ptr))] 1913249259Sdim>; 1914249259Sdim 1915249259Sdimdef VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, 1916251662Sdim [(set i32:$dst, (load_param ADDRVTX_READ:$ptr))] 1917249259Sdim>; 1918249259Sdim 1919249259Sdimdef VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, 1920251662Sdim [(set v4i32:$dst, (load_param ADDRVTX_READ:$ptr))] 1921249259Sdim>; 1922249259Sdim 1923249259Sdim//===----------------------------------------------------------------------===// 1924249259Sdim// VTX Read from global memory space 1925249259Sdim//===----------------------------------------------------------------------===// 1926249259Sdim 1927249259Sdim// 8-bit reads 1928249259Sdimdef VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1, 1929251662Sdim [(set i32:$dst, (zextloadi8_global ADDRVTX_READ:$ptr))] 1930249259Sdim>; 1931249259Sdim 1932249259Sdim// 32-bit reads 1933249259Sdimdef VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, 1934251662Sdim [(set i32:$dst, (global_load ADDRVTX_READ:$ptr))] 1935249259Sdim>; 1936249259Sdim 1937249259Sdim// 128-bit reads 1938249259Sdimdef VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, 1939251662Sdim [(set v4i32:$dst, (global_load ADDRVTX_READ:$ptr))] 1940249259Sdim>; 1941249259Sdim 1942249259Sdim//===----------------------------------------------------------------------===// 1943249259Sdim// Constant Loads 1944249259Sdim// XXX: We are currently storing all constants in the global address space. 1945249259Sdim//===----------------------------------------------------------------------===// 1946249259Sdim 1947249259Sdimdef CONSTANT_LOAD_eg : VTX_READ_32_eg <1, 1948251662Sdim [(set i32:$dst, (constant_load ADDRVTX_READ:$ptr))] 1949249259Sdim>; 1950249259Sdim 1951249259Sdim} 1952249259Sdim 1953249259Sdim//===----------------------------------------------------------------------===// 1954249259Sdim// Regist loads and stores - for indirect addressing 1955249259Sdim//===----------------------------------------------------------------------===// 1956249259Sdim 1957249259Sdimdefm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>; 1958249259Sdim 1959249259Sdimlet Predicates = [isCayman] in { 1960249259Sdim 1961249259Sdimlet isVector = 1 in { 1962249259Sdim 1963249259Sdimdef RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; 1964249259Sdim 1965249259Sdimdef MULLO_INT_cm : MULLO_INT_Common<0x8F>; 1966249259Sdimdef MULHI_INT_cm : MULHI_INT_Common<0x90>; 1967249259Sdimdef MULLO_UINT_cm : MULLO_UINT_Common<0x91>; 1968249259Sdimdef MULHI_UINT_cm : MULHI_UINT_Common<0x92>; 1969249259Sdimdef RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>; 1970249259Sdimdef EXP_IEEE_cm : EXP_IEEE_Common<0x81>; 1971249259Sdimdef LOG_IEEE_cm : LOG_IEEE_Common<0x83>; 1972249259Sdimdef RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>; 1973249259Sdimdef RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>; 1974249259Sdimdef SIN_cm : SIN_Common<0x8D>; 1975249259Sdimdef COS_cm : COS_Common<0x8E>; 1976249259Sdim} // End isVector = 1 1977249259Sdim 1978251662Sdimdef : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>; 1979249259Sdimdef : SIN_PAT <SIN_cm>; 1980249259Sdimdef : COS_PAT <COS_cm>; 1981249259Sdim 1982249259Sdimdefm DIV_cm : DIV_Common<RECIP_IEEE_cm>; 1983249259Sdim 1984249259Sdim// RECIP_UINT emulation for Cayman 1985251662Sdim// The multiplication scales from [0,1] to the unsigned integer range 1986249259Sdimdef : Pat < 1987251662Sdim (AMDGPUurecip i32:$src0), 1988251662Sdim (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)), 1989251662Sdim (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1))) 1990249259Sdim>; 1991249259Sdim 1992251662Sdim def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { 1993251662Sdim let ADDR = 0; 1994251662Sdim let POP_COUNT = 0; 1995251662Sdim let COUNT = 0; 1996251662Sdim } 1997249259Sdim 1998251662Sdimdef : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; 1999249259Sdim 2000249259Sdim} // End isCayman 2001249259Sdim 2002249259Sdim//===----------------------------------------------------------------------===// 2003249259Sdim// Branch Instructions 2004249259Sdim//===----------------------------------------------------------------------===// 2005249259Sdim 2006249259Sdim 2007249259Sdimdef IF_PREDICATE_SET : ILFormat<(outs), (ins GPRI32:$src), 2008249259Sdim "IF_PREDICATE_SET $src", []>; 2009249259Sdim 2010249259Sdimdef PREDICATED_BREAK : ILFormat<(outs), (ins GPRI32:$src), 2011249259Sdim "PREDICATED_BREAK $src", []>; 2012249259Sdim 2013249259Sdim//===----------------------------------------------------------------------===// 2014249259Sdim// Pseudo instructions 2015249259Sdim//===----------------------------------------------------------------------===// 2016249259Sdim 2017249259Sdimlet isPseudo = 1 in { 2018249259Sdim 2019249259Sdimdef PRED_X : InstR600 < 2020251662Sdim (outs R600_Predicate_Bit:$dst), 2021249259Sdim (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags), 2022249259Sdim "", [], NullALU> { 2023249259Sdim let FlagOperandIdx = 3; 2024249259Sdim} 2025249259Sdim 2026249259Sdimlet isTerminator = 1, isBranch = 1 in { 2027251662Sdimdef JUMP_COND : InstR600 < 2028249259Sdim (outs), 2029249259Sdim (ins brtarget:$target, R600_Predicate_Bit:$p), 2030249259Sdim "JUMP $target ($p)", 2031249259Sdim [], AnyALU 2032249259Sdim >; 2033249259Sdim 2034251662Sdimdef JUMP : InstR600 < 2035249259Sdim (outs), 2036249259Sdim (ins brtarget:$target), 2037249259Sdim "JUMP $target", 2038249259Sdim [], AnyALU 2039249259Sdim > 2040249259Sdim{ 2041249259Sdim let isPredicable = 1; 2042249259Sdim let isBarrier = 1; 2043249259Sdim} 2044249259Sdim 2045249259Sdim} // End isTerminator = 1, isBranch = 1 2046249259Sdim 2047249259Sdimlet usesCustomInserter = 1 in { 2048249259Sdim 2049249259Sdimlet mayLoad = 0, mayStore = 0, hasSideEffects = 1 in { 2050249259Sdim 2051249259Sdimdef MASK_WRITE : AMDGPUShaderInst < 2052249259Sdim (outs), 2053249259Sdim (ins R600_Reg32:$src), 2054249259Sdim "MASK_WRITE $src", 2055249259Sdim [] 2056249259Sdim>; 2057249259Sdim 2058249259Sdim} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1 2059249259Sdim 2060249259Sdim 2061251662Sdimdef TXD: InstR600 < 2062249259Sdim (outs R600_Reg128:$dst), 2063251662Sdim (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, 2064251662Sdim i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), 2065249259Sdim "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", 2066251662Sdim [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2, 2067251662Sdim imm:$resourceId, imm:$samplerId, imm:$textureTarget))], 2068251662Sdim NullALU > { 2069251662Sdim let TEXInst = 1; 2070251662Sdim} 2071249259Sdim 2072251662Sdimdef TXD_SHADOW: InstR600 < 2073249259Sdim (outs R600_Reg128:$dst), 2074251662Sdim (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, 2075251662Sdim i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), 2076249259Sdim "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", 2077251662Sdim [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2, 2078251662Sdim imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))], 2079251662Sdim NullALU 2080251662Sdim> { 2081251662Sdim let TEXInst = 1; 2082251662Sdim} 2083249259Sdim} // End isPseudo = 1 2084249259Sdim} // End usesCustomInserter = 1 2085249259Sdim 2086249259Sdimdef CLAMP_R600 : CLAMP <R600_Reg32>; 2087249259Sdimdef FABS_R600 : FABS<R600_Reg32>; 2088249259Sdimdef FNEG_R600 : FNEG<R600_Reg32>; 2089249259Sdim 2090249259Sdim//===---------------------------------------------------------------------===// 2091249259Sdim// Return instruction 2092249259Sdim//===---------------------------------------------------------------------===// 2093249259Sdimlet isTerminator = 1, isReturn = 1, hasCtrlDep = 1, 2094249259Sdim usesCustomInserter = 1 in { 2095249259Sdim def RETURN : ILFormat<(outs), (ins variable_ops), 2096249259Sdim "RETURN", [(IL_retflag)]>; 2097249259Sdim} 2098249259Sdim 2099249259Sdim 2100249259Sdim//===----------------------------------------------------------------------===// 2101249259Sdim// Constant Buffer Addressing Support 2102249259Sdim//===----------------------------------------------------------------------===// 2103249259Sdim 2104249259Sdimlet usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { 2105249259Sdimdef CONST_COPY : Instruction { 2106249259Sdim let OutOperandList = (outs R600_Reg32:$dst); 2107249259Sdim let InOperandList = (ins i32imm:$src); 2108249259Sdim let Pattern = 2109249259Sdim [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))]; 2110249259Sdim let AsmString = "CONST_COPY"; 2111249259Sdim let neverHasSideEffects = 1; 2112249259Sdim let isAsCheapAsAMove = 1; 2113249259Sdim let Itinerary = NullALU; 2114249259Sdim} 2115249259Sdim} // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" 2116249259Sdim 2117249259Sdimdef TEX_VTX_CONSTBUF : 2118249259Sdim InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr", 2119251662Sdim [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>, 2120249259Sdim VTX_WORD1_GPR, VTX_WORD0 { 2121249259Sdim 2122249259Sdim let VC_INST = 0; 2123249259Sdim let FETCH_TYPE = 2; 2124249259Sdim let FETCH_WHOLE_QUAD = 0; 2125249259Sdim let SRC_REL = 0; 2126249259Sdim let SRC_SEL_X = 0; 2127249259Sdim let DST_REL = 0; 2128249259Sdim let USE_CONST_FIELDS = 0; 2129249259Sdim let NUM_FORMAT_ALL = 2; 2130249259Sdim let FORMAT_COMP_ALL = 1; 2131249259Sdim let SRF_MODE_ALL = 1; 2132249259Sdim let MEGA_FETCH_COUNT = 16; 2133249259Sdim let DST_SEL_X = 0; 2134249259Sdim let DST_SEL_Y = 1; 2135249259Sdim let DST_SEL_Z = 2; 2136249259Sdim let DST_SEL_W = 3; 2137249259Sdim let DATA_FORMAT = 35; 2138249259Sdim 2139249259Sdim let Inst{31-0} = Word0; 2140249259Sdim let Inst{63-32} = Word1; 2141249259Sdim 2142249259Sdim// LLVM can only encode 64-bit instructions, so these fields are manually 2143249259Sdim// encoded in R600CodeEmitter 2144249259Sdim// 2145249259Sdim// bits<16> OFFSET; 2146249259Sdim// bits<2> ENDIAN_SWAP = 0; 2147249259Sdim// bits<1> CONST_BUF_NO_STRIDE = 0; 2148249259Sdim// bits<1> MEGA_FETCH = 0; 2149249259Sdim// bits<1> ALT_CONST = 0; 2150249259Sdim// bits<2> BUFFER_INDEX_MODE = 0; 2151249259Sdim 2152249259Sdim 2153249259Sdim 2154249259Sdim// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding 2155249259Sdim// is done in R600CodeEmitter 2156249259Sdim// 2157249259Sdim// Inst{79-64} = OFFSET; 2158249259Sdim// Inst{81-80} = ENDIAN_SWAP; 2159249259Sdim// Inst{82} = CONST_BUF_NO_STRIDE; 2160249259Sdim// Inst{83} = MEGA_FETCH; 2161249259Sdim// Inst{84} = ALT_CONST; 2162249259Sdim// Inst{86-85} = BUFFER_INDEX_MODE; 2163249259Sdim// Inst{95-86} = 0; Reserved 2164249259Sdim 2165249259Sdim// VTX_WORD3 (Padding) 2166249259Sdim// 2167249259Sdim// Inst{127-96} = 0; 2168251662Sdim let VTXInst = 1; 2169249259Sdim} 2170249259Sdim 2171249259Sdimdef TEX_VTX_TEXBUF: 2172249259Sdim InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr", 2173251662Sdim [(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>, 2174249259SdimVTX_WORD1_GPR, VTX_WORD0 { 2175249259Sdim 2176249259Sdimlet VC_INST = 0; 2177249259Sdimlet FETCH_TYPE = 2; 2178249259Sdimlet FETCH_WHOLE_QUAD = 0; 2179249259Sdimlet SRC_REL = 0; 2180249259Sdimlet SRC_SEL_X = 0; 2181249259Sdimlet DST_REL = 0; 2182249259Sdimlet USE_CONST_FIELDS = 1; 2183249259Sdimlet NUM_FORMAT_ALL = 0; 2184249259Sdimlet FORMAT_COMP_ALL = 0; 2185249259Sdimlet SRF_MODE_ALL = 1; 2186249259Sdimlet MEGA_FETCH_COUNT = 16; 2187249259Sdimlet DST_SEL_X = 0; 2188249259Sdimlet DST_SEL_Y = 1; 2189249259Sdimlet DST_SEL_Z = 2; 2190249259Sdimlet DST_SEL_W = 3; 2191249259Sdimlet DATA_FORMAT = 0; 2192249259Sdim 2193249259Sdimlet Inst{31-0} = Word0; 2194249259Sdimlet Inst{63-32} = Word1; 2195249259Sdim 2196249259Sdim// LLVM can only encode 64-bit instructions, so these fields are manually 2197249259Sdim// encoded in R600CodeEmitter 2198249259Sdim// 2199249259Sdim// bits<16> OFFSET; 2200249259Sdim// bits<2> ENDIAN_SWAP = 0; 2201249259Sdim// bits<1> CONST_BUF_NO_STRIDE = 0; 2202249259Sdim// bits<1> MEGA_FETCH = 0; 2203249259Sdim// bits<1> ALT_CONST = 0; 2204249259Sdim// bits<2> BUFFER_INDEX_MODE = 0; 2205249259Sdim 2206249259Sdim 2207249259Sdim 2208249259Sdim// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding 2209249259Sdim// is done in R600CodeEmitter 2210249259Sdim// 2211249259Sdim// Inst{79-64} = OFFSET; 2212249259Sdim// Inst{81-80} = ENDIAN_SWAP; 2213249259Sdim// Inst{82} = CONST_BUF_NO_STRIDE; 2214249259Sdim// Inst{83} = MEGA_FETCH; 2215249259Sdim// Inst{84} = ALT_CONST; 2216249259Sdim// Inst{86-85} = BUFFER_INDEX_MODE; 2217249259Sdim// Inst{95-86} = 0; Reserved 2218249259Sdim 2219249259Sdim// VTX_WORD3 (Padding) 2220249259Sdim// 2221249259Sdim// Inst{127-96} = 0; 2222251662Sdim let VTXInst = 1; 2223249259Sdim} 2224249259Sdim 2225249259Sdim 2226249259Sdim 2227249259Sdim//===--------------------------------------------------------------------===// 2228249259Sdim// Instructions support 2229249259Sdim//===--------------------------------------------------------------------===// 2230249259Sdim//===---------------------------------------------------------------------===// 2231249259Sdim// Custom Inserter for Branches and returns, this eventually will be a 2232249259Sdim// seperate pass 2233249259Sdim//===---------------------------------------------------------------------===// 2234249259Sdimlet isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { 2235249259Sdim def BRANCH : ILFormat<(outs), (ins brtarget:$target), 2236249259Sdim "; Pseudo unconditional branch instruction", 2237249259Sdim [(br bb:$target)]>; 2238249259Sdim defm BRANCH_COND : BranchConditional<IL_brcond>; 2239249259Sdim} 2240249259Sdim 2241249259Sdim//===---------------------------------------------------------------------===// 2242249259Sdim// Flow and Program control Instructions 2243249259Sdim//===---------------------------------------------------------------------===// 2244249259Sdimlet isTerminator=1 in { 2245249259Sdim def SWITCH : ILFormat< (outs), (ins GPRI32:$src), 2246249259Sdim !strconcat("SWITCH", " $src"), []>; 2247249259Sdim def CASE : ILFormat< (outs), (ins GPRI32:$src), 2248249259Sdim !strconcat("CASE", " $src"), []>; 2249249259Sdim def BREAK : ILFormat< (outs), (ins), 2250249259Sdim "BREAK", []>; 2251249259Sdim def CONTINUE : ILFormat< (outs), (ins), 2252249259Sdim "CONTINUE", []>; 2253249259Sdim def DEFAULT : ILFormat< (outs), (ins), 2254249259Sdim "DEFAULT", []>; 2255249259Sdim def ELSE : ILFormat< (outs), (ins), 2256249259Sdim "ELSE", []>; 2257249259Sdim def ENDSWITCH : ILFormat< (outs), (ins), 2258249259Sdim "ENDSWITCH", []>; 2259249259Sdim def ENDMAIN : ILFormat< (outs), (ins), 2260249259Sdim "ENDMAIN", []>; 2261249259Sdim def END : ILFormat< (outs), (ins), 2262249259Sdim "END", []>; 2263249259Sdim def ENDFUNC : ILFormat< (outs), (ins), 2264249259Sdim "ENDFUNC", []>; 2265249259Sdim def ENDIF : ILFormat< (outs), (ins), 2266249259Sdim "ENDIF", []>; 2267249259Sdim def WHILELOOP : ILFormat< (outs), (ins), 2268249259Sdim "WHILE", []>; 2269249259Sdim def ENDLOOP : ILFormat< (outs), (ins), 2270249259Sdim "ENDLOOP", []>; 2271249259Sdim def FUNC : ILFormat< (outs), (ins), 2272249259Sdim "FUNC", []>; 2273249259Sdim def RETDYN : ILFormat< (outs), (ins), 2274249259Sdim "RET_DYN", []>; 2275249259Sdim // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2276249259Sdim defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">; 2277249259Sdim // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2278249259Sdim defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">; 2279249259Sdim // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2280249259Sdim defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">; 2281249259Sdim // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2282249259Sdim defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">; 2283249259Sdim // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2284249259Sdim defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">; 2285249259Sdim // This opcode has custom swizzle pattern encoded in Swizzle Encoder 2286249259Sdim defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">; 2287249259Sdim defm IFC : BranchInstr2<"IFC">; 2288249259Sdim defm BREAKC : BranchInstr2<"BREAKC">; 2289249259Sdim defm CONTINUEC : BranchInstr2<"CONTINUEC">; 2290249259Sdim} 2291249259Sdim 2292249259Sdim//===----------------------------------------------------------------------===// 2293249259Sdim// ISel Patterns 2294249259Sdim//===----------------------------------------------------------------------===// 2295249259Sdim 2296249259Sdim// CND*_INT Pattterns for f32 True / False values 2297249259Sdim 2298249259Sdimclass CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat < 2299251662Sdim (selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc), 2300251662Sdim (cnd $src0, $src1, $src2) 2301249259Sdim>; 2302249259Sdim 2303249259Sdimdef : CND_INT_f32 <CNDE_INT, SETEQ>; 2304249259Sdimdef : CND_INT_f32 <CNDGT_INT, SETGT>; 2305249259Sdimdef : CND_INT_f32 <CNDGE_INT, SETGE>; 2306249259Sdim 2307249259Sdim//CNDGE_INT extra pattern 2308249259Sdimdef : Pat < 2309251662Sdim (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_GT), 2310251662Sdim (CNDGE_INT $src0, $src1, $src2) 2311249259Sdim>; 2312249259Sdim 2313249259Sdim// KIL Patterns 2314249259Sdimdef KILP : Pat < 2315249259Sdim (int_AMDGPU_kilp), 2316249259Sdim (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO))) 2317249259Sdim>; 2318249259Sdim 2319249259Sdimdef KIL : Pat < 2320251662Sdim (int_AMDGPU_kill f32:$src0), 2321251662Sdim (MASK_WRITE (KILLGT (f32 ZERO), $src0)) 2322249259Sdim>; 2323249259Sdim 2324249259Sdim// SGT Reverse args 2325249259Sdimdef : Pat < 2326251662Sdim (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LT), 2327251662Sdim (SGT $src1, $src0) 2328249259Sdim>; 2329249259Sdim 2330249259Sdim// SGE Reverse args 2331249259Sdimdef : Pat < 2332251662Sdim (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LE), 2333251662Sdim (SGE $src1, $src0) 2334249259Sdim>; 2335249259Sdim 2336249259Sdim// SETGT_DX10 reverse args 2337249259Sdimdef : Pat < 2338251662Sdim (selectcc f32:$src0, f32:$src1, -1, 0, COND_LT), 2339251662Sdim (SETGT_DX10 $src1, $src0) 2340249259Sdim>; 2341249259Sdim 2342249259Sdim// SETGE_DX10 reverse args 2343249259Sdimdef : Pat < 2344251662Sdim (selectcc f32:$src0, f32:$src1, -1, 0, COND_LE), 2345251662Sdim (SETGE_DX10 $src1, $src0) 2346249259Sdim>; 2347249259Sdim 2348249259Sdim// SETGT_INT reverse args 2349249259Sdimdef : Pat < 2350251662Sdim (selectcc i32:$src0, i32:$src1, -1, 0, SETLT), 2351251662Sdim (SETGT_INT $src1, $src0) 2352249259Sdim>; 2353249259Sdim 2354249259Sdim// SETGE_INT reverse args 2355249259Sdimdef : Pat < 2356251662Sdim (selectcc i32:$src0, i32:$src1, -1, 0, SETLE), 2357251662Sdim (SETGE_INT $src1, $src0) 2358249259Sdim>; 2359249259Sdim 2360249259Sdim// SETGT_UINT reverse args 2361249259Sdimdef : Pat < 2362251662Sdim (selectcc i32:$src0, i32:$src1, -1, 0, SETULT), 2363251662Sdim (SETGT_UINT $src1, $src0) 2364249259Sdim>; 2365249259Sdim 2366249259Sdim// SETGE_UINT reverse args 2367249259Sdimdef : Pat < 2368251662Sdim (selectcc i32:$src0, i32:$src1, -1, 0, SETULE), 2369251662Sdim (SETGE_UINT $src1, $src0) 2370249259Sdim>; 2371249259Sdim 2372249259Sdim// The next two patterns are special cases for handling 'true if ordered' and 2373249259Sdim// 'true if unordered' conditionals. The assumption here is that the behavior of 2374249259Sdim// SETE and SNE conforms to the Direct3D 10 rules for floating point values 2375249259Sdim// described here: 2376249259Sdim// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit 2377249259Sdim// We assume that SETE returns false when one of the operands is NAN and 2378249259Sdim// SNE returns true when on of the operands is NAN 2379249259Sdim 2380249259Sdim//SETE - 'true if ordered' 2381249259Sdimdef : Pat < 2382251662Sdim (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETO), 2383251662Sdim (SETE $src0, $src1) 2384249259Sdim>; 2385249259Sdim 2386249259Sdim//SETE_DX10 - 'true if ordered' 2387249259Sdimdef : Pat < 2388251662Sdim (selectcc f32:$src0, f32:$src1, -1, 0, SETO), 2389251662Sdim (SETE_DX10 $src0, $src1) 2390249259Sdim>; 2391249259Sdim 2392249259Sdim//SNE - 'true if unordered' 2393249259Sdimdef : Pat < 2394251662Sdim (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETUO), 2395251662Sdim (SNE $src0, $src1) 2396249259Sdim>; 2397249259Sdim 2398249259Sdim//SETNE_DX10 - 'true if ordered' 2399249259Sdimdef : Pat < 2400251662Sdim (selectcc f32:$src0, f32:$src1, -1, 0, SETUO), 2401251662Sdim (SETNE_DX10 $src0, $src1) 2402249259Sdim>; 2403249259Sdim 2404251662Sdimdef : Extract_Element <f32, v4f32, 0, sub0>; 2405251662Sdimdef : Extract_Element <f32, v4f32, 1, sub1>; 2406251662Sdimdef : Extract_Element <f32, v4f32, 2, sub2>; 2407251662Sdimdef : Extract_Element <f32, v4f32, 3, sub3>; 2408249259Sdim 2409251662Sdimdef : Insert_Element <f32, v4f32, 0, sub0>; 2410251662Sdimdef : Insert_Element <f32, v4f32, 1, sub1>; 2411251662Sdimdef : Insert_Element <f32, v4f32, 2, sub2>; 2412251662Sdimdef : Insert_Element <f32, v4f32, 3, sub3>; 2413249259Sdim 2414251662Sdimdef : Extract_Element <i32, v4i32, 0, sub0>; 2415251662Sdimdef : Extract_Element <i32, v4i32, 1, sub1>; 2416251662Sdimdef : Extract_Element <i32, v4i32, 2, sub2>; 2417251662Sdimdef : Extract_Element <i32, v4i32, 3, sub3>; 2418249259Sdim 2419251662Sdimdef : Insert_Element <i32, v4i32, 0, sub0>; 2420251662Sdimdef : Insert_Element <i32, v4i32, 1, sub1>; 2421251662Sdimdef : Insert_Element <i32, v4i32, 2, sub2>; 2422251662Sdimdef : Insert_Element <i32, v4i32, 3, sub3>; 2423249259Sdim 2424251662Sdimdef : Vector4_Build <v4f32, f32>; 2425251662Sdimdef : Vector4_Build <v4i32, i32>; 2426249259Sdim 2427249259Sdim// bitconvert patterns 2428249259Sdim 2429249259Sdimdef : BitConvert <i32, f32, R600_Reg32>; 2430249259Sdimdef : BitConvert <f32, i32, R600_Reg32>; 2431249259Sdimdef : BitConvert <v4f32, v4i32, R600_Reg128>; 2432249259Sdimdef : BitConvert <v4i32, v4f32, R600_Reg128>; 2433249259Sdim 2434249259Sdim// DWORDADDR pattern 2435249259Sdimdef : DwordAddrPat <i32, R600_Reg32>; 2436249259Sdim 2437249259Sdim} // End isR600toCayman Predicate 2438