R600Instructions.td revision 256281
1//===-- R600Instructions.td - R600 Instruction defs  -------*- tablegen -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// R600 Tablegen instruction definitions
11//
12//===----------------------------------------------------------------------===//
13
14include "R600Intrinsics.td"
15
16class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
17                InstrItinClass itin>
18    : AMDGPUInst <outs, ins, asm, pattern> {
19
20  field bits<64> Inst;
21  bit TransOnly = 0;
22  bit Trig = 0;
23  bit Op3 = 0;
24  bit isVector = 0;
25  bits<2> FlagOperandIdx = 0;
26  bit Op1 = 0;
27  bit Op2 = 0;
28  bit HasNativeOperands = 0;
29  bit VTXInst = 0;
30  bit TEXInst = 0;
31
32  let Namespace = "AMDGPU";
33  let OutOperandList = outs;
34  let InOperandList = ins;
35  let AsmString = asm;
36  let Pattern = pattern;
37  let Itinerary = itin;
38
39  let TSFlags{0} = TransOnly;
40  let TSFlags{4} = Trig;
41  let TSFlags{5} = Op3;
42
43  // Vector instructions are instructions that must fill all slots in an
44  // instruction group
45  let TSFlags{6} = isVector;
46  let TSFlags{8-7} = FlagOperandIdx;
47  let TSFlags{9} = HasNativeOperands;
48  let TSFlags{10} = Op1;
49  let TSFlags{11} = Op2;
50  let TSFlags{12} = VTXInst;
51  let TSFlags{13} = TEXInst;
52}
53
54class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
55    InstR600 <outs, ins, asm, pattern, NullALU> {
56
57  let Namespace = "AMDGPU";
58}
59
60def MEMxi : Operand<iPTR> {
61  let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index);
62  let PrintMethod = "printMemOperand";
63}
64
65def MEMrr : Operand<iPTR> {
66  let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index);
67}
68
69// Operands for non-registers
70
71class InstFlag<string PM = "printOperand", int Default = 0>
72    : OperandWithDefaultOps <i32, (ops (i32 Default))> {
73  let PrintMethod = PM;
74}
75
76// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers
77def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
78  let PrintMethod = "printSel";
79}
80def BANK_SWIZZLE : OperandWithDefaultOps <i32, (ops (i32 0))> {
81  let PrintMethod = "printBankSwizzle";
82}
83
84def LITERAL : InstFlag<"printLiteral">;
85
86def WRITE : InstFlag <"printWrite", 1>;
87def OMOD : InstFlag <"printOMOD">;
88def REL : InstFlag <"printRel">;
89def CLAMP : InstFlag <"printClamp">;
90def NEG : InstFlag <"printNeg">;
91def ABS : InstFlag <"printAbs">;
92def UEM : InstFlag <"printUpdateExecMask">;
93def UP : InstFlag <"printUpdatePred">;
94
95// XXX: The r600g finalizer in Mesa expects last to be one in most cases.
96// Once we start using the packetizer in this backend we should have this
97// default to 0.
98def LAST : InstFlag<"printLast", 1>;
99
100def FRAMEri : Operand<iPTR> {
101  let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
102}
103
104def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
105def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
106def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
107def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
108def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
109def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
110
111class R600ALU_Word0 {
112  field bits<32> Word0;
113
114  bits<11> src0;
115  bits<1>  src0_neg;
116  bits<1>  src0_rel;
117  bits<11> src1;
118  bits<1>  src1_rel;
119  bits<1>  src1_neg;
120  bits<3>  index_mode = 0;
121  bits<2>  pred_sel;
122  bits<1>  last;
123
124  bits<9>  src0_sel  = src0{8-0};
125  bits<2>  src0_chan = src0{10-9};
126  bits<9>  src1_sel  = src1{8-0};
127  bits<2>  src1_chan = src1{10-9};
128
129  let Word0{8-0}   = src0_sel;
130  let Word0{9}     = src0_rel;
131  let Word0{11-10} = src0_chan;
132  let Word0{12}    = src0_neg;
133  let Word0{21-13} = src1_sel;
134  let Word0{22}    = src1_rel;
135  let Word0{24-23} = src1_chan;
136  let Word0{25}    = src1_neg;
137  let Word0{28-26} = index_mode;
138  let Word0{30-29} = pred_sel;
139  let Word0{31}    = last;
140}
141
142class R600ALU_Word1 {
143  field bits<32> Word1;
144
145  bits<11> dst;
146  bits<3>  bank_swizzle;
147  bits<1>  dst_rel;
148  bits<1>  clamp;
149
150  bits<7>  dst_sel  = dst{6-0};
151  bits<2>  dst_chan = dst{10-9};
152
153  let Word1{20-18} = bank_swizzle;
154  let Word1{27-21} = dst_sel;
155  let Word1{28}    = dst_rel;
156  let Word1{30-29} = dst_chan;
157  let Word1{31}    = clamp;
158}
159
160class R600ALU_Word1_OP2 <bits<11> alu_inst> : R600ALU_Word1{
161
162  bits<1>  src0_abs;
163  bits<1>  src1_abs;
164  bits<1>  update_exec_mask;
165  bits<1>  update_pred;
166  bits<1>  write;
167  bits<2>  omod;
168
169  let Word1{0}     = src0_abs;
170  let Word1{1}     = src1_abs;
171  let Word1{2}     = update_exec_mask;
172  let Word1{3}     = update_pred;
173  let Word1{4}     = write;
174  let Word1{6-5}   = omod;
175  let Word1{17-7}  = alu_inst;
176}
177
178class R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{
179
180  bits<11> src2;
181  bits<1>  src2_rel;
182  bits<1>  src2_neg;
183
184  bits<9>  src2_sel = src2{8-0};
185  bits<2>  src2_chan = src2{10-9};
186
187  let Word1{8-0}   = src2_sel;
188  let Word1{9}     = src2_rel;
189  let Word1{11-10} = src2_chan;
190  let Word1{12}    = src2_neg;
191  let Word1{17-13} = alu_inst;
192}
193
194class VTX_WORD0 {
195  field bits<32> Word0;
196  bits<7> SRC_GPR;
197  bits<5> VC_INST;
198  bits<2> FETCH_TYPE;
199  bits<1> FETCH_WHOLE_QUAD;
200  bits<8> BUFFER_ID;
201  bits<1> SRC_REL;
202  bits<2> SRC_SEL_X;
203  bits<6> MEGA_FETCH_COUNT;
204
205  let Word0{4-0}   = VC_INST;
206  let Word0{6-5}   = FETCH_TYPE;
207  let Word0{7}     = FETCH_WHOLE_QUAD;
208  let Word0{15-8}  = BUFFER_ID;
209  let Word0{22-16} = SRC_GPR;
210  let Word0{23}    = SRC_REL;
211  let Word0{25-24} = SRC_SEL_X;
212  let Word0{31-26} = MEGA_FETCH_COUNT;
213}
214
215class VTX_WORD1_GPR {
216  field bits<32> Word1;
217  bits<7> DST_GPR;
218  bits<1> DST_REL;
219  bits<3> DST_SEL_X;
220  bits<3> DST_SEL_Y;
221  bits<3> DST_SEL_Z;
222  bits<3> DST_SEL_W;
223  bits<1> USE_CONST_FIELDS;
224  bits<6> DATA_FORMAT;
225  bits<2> NUM_FORMAT_ALL;
226  bits<1> FORMAT_COMP_ALL;
227  bits<1> SRF_MODE_ALL;
228
229  let Word1{6-0} = DST_GPR;
230  let Word1{7}    = DST_REL;
231  let Word1{8}    = 0; // Reserved
232  let Word1{11-9} = DST_SEL_X;
233  let Word1{14-12} = DST_SEL_Y;
234  let Word1{17-15} = DST_SEL_Z;
235  let Word1{20-18} = DST_SEL_W;
236  let Word1{21}    = USE_CONST_FIELDS;
237  let Word1{27-22} = DATA_FORMAT;
238  let Word1{29-28} = NUM_FORMAT_ALL;
239  let Word1{30}    = FORMAT_COMP_ALL;
240  let Word1{31}    = SRF_MODE_ALL;
241}
242
243class TEX_WORD0 {
244  field bits<32> Word0;
245
246  bits<5> TEX_INST;
247  bits<2> INST_MOD;
248  bits<1> FETCH_WHOLE_QUAD;
249  bits<8> RESOURCE_ID;
250  bits<7> SRC_GPR;
251  bits<1> SRC_REL;
252  bits<1> ALT_CONST;
253  bits<2> RESOURCE_INDEX_MODE;
254  bits<2> SAMPLER_INDEX_MODE;
255
256  let Word0{4-0} = TEX_INST;
257  let Word0{6-5} = INST_MOD;
258  let Word0{7} = FETCH_WHOLE_QUAD;
259  let Word0{15-8} = RESOURCE_ID;
260  let Word0{22-16} = SRC_GPR;
261  let Word0{23} = SRC_REL;
262  let Word0{24} = ALT_CONST;
263  let Word0{26-25} = RESOURCE_INDEX_MODE;
264  let Word0{28-27} = SAMPLER_INDEX_MODE;
265}
266
267class TEX_WORD1 {
268  field bits<32> Word1;
269
270  bits<7> DST_GPR;
271  bits<1> DST_REL;
272  bits<3> DST_SEL_X;
273  bits<3> DST_SEL_Y;
274  bits<3> DST_SEL_Z;
275  bits<3> DST_SEL_W;
276  bits<7> LOD_BIAS;
277  bits<1> COORD_TYPE_X;
278  bits<1> COORD_TYPE_Y;
279  bits<1> COORD_TYPE_Z;
280  bits<1> COORD_TYPE_W;
281
282  let Word1{6-0} = DST_GPR;
283  let Word1{7} = DST_REL;
284  let Word1{11-9} = DST_SEL_X;
285  let Word1{14-12} = DST_SEL_Y;
286  let Word1{17-15} = DST_SEL_Z;
287  let Word1{20-18} = DST_SEL_W;
288  let Word1{27-21} = LOD_BIAS;
289  let Word1{28} = COORD_TYPE_X;
290  let Word1{29} = COORD_TYPE_Y;
291  let Word1{30} = COORD_TYPE_Z;
292  let Word1{31} = COORD_TYPE_W;
293}
294
295class TEX_WORD2 {
296  field bits<32> Word2;
297
298  bits<5> OFFSET_X;
299  bits<5> OFFSET_Y;
300  bits<5> OFFSET_Z;
301  bits<5> SAMPLER_ID;
302  bits<3> SRC_SEL_X;
303  bits<3> SRC_SEL_Y;
304  bits<3> SRC_SEL_Z;
305  bits<3> SRC_SEL_W;
306
307  let Word2{4-0} = OFFSET_X;
308  let Word2{9-5} = OFFSET_Y;
309  let Word2{14-10} = OFFSET_Z;
310  let Word2{19-15} = SAMPLER_ID;
311  let Word2{22-20} = SRC_SEL_X;
312  let Word2{25-23} = SRC_SEL_Y;
313  let Word2{28-26} = SRC_SEL_Z;
314  let Word2{31-29} = SRC_SEL_W;
315}
316
317/*
318XXX: R600 subtarget uses a slightly different encoding than the other
319subtargets.  We currently handle this in R600MCCodeEmitter, but we may
320want to use these instruction classes in the future.
321
322class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 {
323
324  bits<1>  fog_merge;
325  bits<10> alu_inst;
326
327  let Inst{37}    = fog_merge;
328  let Inst{39-38} = omod;
329  let Inst{49-40} = alu_inst;
330}
331
332class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 {
333
334  bits<11> alu_inst;
335
336  let Inst{38-37} = omod;
337  let Inst{49-39} = alu_inst;
338}
339*/
340
341def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
342                                     (ops PRED_SEL_OFF)>;
343
344
345let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
346
347// Class for instructions with only one source register.
348// If you add new ins to this instruction, make sure they are listed before
349// $literal, because the backend currently assumes that the last operand is
350// a literal.  Also be sure to update the enum R600Op1OperandIndex::ROI in
351// R600Defines.h, R600InstrInfo::buildDefaultInstruction(),
352// and R600InstrInfo::getOperandIdx().
353class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
354                InstrItinClass itin = AnyALU> :
355    InstR600 <(outs R600_Reg32:$dst),
356              (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
357                   R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
358                   LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
359                   BANK_SWIZZLE:$bank_swizzle),
360              !strconcat("  ", opName,
361                   "$last$clamp $dst$write$dst_rel$omod, "
362                   "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
363                   "$pred_sel $bank_swizzle"),
364              pattern,
365              itin>,
366    R600ALU_Word0,
367    R600ALU_Word1_OP2 <inst> {
368
369  let src1 = 0;
370  let src1_rel = 0;
371  let src1_neg = 0;
372  let src1_abs = 0;
373  let update_exec_mask = 0;
374  let update_pred = 0;
375  let HasNativeOperands = 1;
376  let Op1 = 1;
377  let DisableEncoding = "$literal";
378
379  let Inst{31-0}  = Word0;
380  let Inst{63-32} = Word1;
381}
382
383class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
384                    InstrItinClass itin = AnyALU> :
385    R600_1OP <inst, opName,
386              [(set R600_Reg32:$dst, (node R600_Reg32:$src0))]
387>;
388
389// If you add our change the operands for R600_2OP instructions, you must
390// also update the R600Op2OperandIndex::ROI enum in R600Defines.h,
391// R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx().
392class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
393                InstrItinClass itin = AnyALU> :
394  InstR600 <(outs R600_Reg32:$dst),
395          (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
396               OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
397               R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
398               R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
399               LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
400               BANK_SWIZZLE:$bank_swizzle),
401          !strconcat("  ", opName,
402                "$last$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
403                "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
404                "$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
405                "$pred_sel $bank_swizzle"),
406          pattern,
407          itin>,
408    R600ALU_Word0,
409    R600ALU_Word1_OP2 <inst> {
410
411  let HasNativeOperands = 1;
412  let Op2 = 1;
413  let DisableEncoding = "$literal";
414
415  let Inst{31-0}  = Word0;
416  let Inst{63-32} = Word1;
417}
418
419class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
420                       InstrItinClass itim = AnyALU> :
421    R600_2OP <inst, opName,
422              [(set R600_Reg32:$dst, (node R600_Reg32:$src0,
423                                           R600_Reg32:$src1))]
424>;
425
426// If you add our change the operands for R600_3OP instructions, you must
427// also update the R600Op3OperandIndex::ROI enum in R600Defines.h,
428// R600InstrInfo::buildDefaultInstruction(), and
429// R600InstrInfo::getOperandIdx().
430class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
431                InstrItinClass itin = AnyALU> :
432  InstR600 <(outs R600_Reg32:$dst),
433          (ins REL:$dst_rel, CLAMP:$clamp,
434               R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
435               R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
436               R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
437               LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
438               BANK_SWIZZLE:$bank_swizzle),
439          !strconcat("  ", opName, "$last$clamp $dst$dst_rel, "
440                             "$src0_neg$src0$src0_rel, "
441                             "$src1_neg$src1$src1_rel, "
442                             "$src2_neg$src2$src2_rel, "
443                             "$pred_sel"
444                             "$bank_swizzle"),
445          pattern,
446          itin>,
447    R600ALU_Word0,
448    R600ALU_Word1_OP3<inst>{
449
450  let HasNativeOperands = 1;
451  let DisableEncoding = "$literal";
452  let Op3 = 1;
453
454  let Inst{31-0}  = Word0;
455  let Inst{63-32} = Word1;
456}
457
458class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
459                      InstrItinClass itin = VecALU> :
460  InstR600 <(outs R600_Reg32:$dst),
461          ins,
462          asm,
463          pattern,
464          itin>;
465
466class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
467                InstrItinClass itin = AnyALU> :
468  InstR600 <(outs R600_Reg128:$DST_GPR),
469          (ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget),
470          !strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"),
471          pattern,
472          itin>, TEX_WORD0, TEX_WORD1, TEX_WORD2 {
473    let Inst{31-0} = Word0;
474    let Inst{63-32} = Word1;
475
476    let TEX_INST = inst{4-0};
477    let SRC_REL = 0;
478    let DST_REL = 0;
479    let DST_SEL_X = 0;
480    let DST_SEL_Y = 1;
481    let DST_SEL_Z = 2;
482    let DST_SEL_W = 3;
483    let LOD_BIAS = 0;
484
485    let INST_MOD = 0;
486    let FETCH_WHOLE_QUAD = 0;
487    let ALT_CONST = 0;
488    let SAMPLER_INDEX_MODE = 0;
489    let RESOURCE_INDEX_MODE = 0;
490
491    let COORD_TYPE_X = 0;
492    let COORD_TYPE_Y = 0;
493    let COORD_TYPE_Z = 0;
494    let COORD_TYPE_W = 0;
495
496    let TEXInst = 1;
497  }
498
499} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
500
501def TEX_SHADOW : PatLeaf<
502  (imm),
503  [{uint32_t TType = (uint32_t)N->getZExtValue();
504    return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13);
505  }]
506>;
507
508def TEX_RECT : PatLeaf<
509  (imm),
510  [{uint32_t TType = (uint32_t)N->getZExtValue();
511    return TType == 5;
512  }]
513>;
514
515def TEX_ARRAY : PatLeaf<
516  (imm),
517  [{uint32_t TType = (uint32_t)N->getZExtValue();
518    return TType == 9 || TType == 10 || TType == 15 || TType == 16;
519  }]
520>;
521
522def TEX_SHADOW_ARRAY : PatLeaf<
523  (imm),
524  [{uint32_t TType = (uint32_t)N->getZExtValue();
525    return TType == 11 || TType == 12 || TType == 17;
526  }]
527>;
528
529class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs,
530                 dag ins, string asm, list<dag> pattern> :
531    InstR600ISA <outs, ins, asm, pattern> {
532  bits<7>  RW_GPR;
533  bits<7>  INDEX_GPR;
534
535  bits<2>  RIM;
536  bits<2>  TYPE;
537  bits<1>  RW_REL;
538  bits<2>  ELEM_SIZE;
539
540  bits<12> ARRAY_SIZE;
541  bits<4>  COMP_MASK;
542  bits<4>  BURST_COUNT;
543  bits<1>  VPM;
544  bits<1>  eop;
545  bits<1>  MARK;
546  bits<1>  BARRIER;
547
548  // CF_ALLOC_EXPORT_WORD0_RAT
549  let Inst{3-0}   = rat_id;
550  let Inst{9-4}   = rat_inst;
551  let Inst{10}    = 0; // Reserved
552  let Inst{12-11} = RIM;
553  let Inst{14-13} = TYPE;
554  let Inst{21-15} = RW_GPR;
555  let Inst{22}    = RW_REL;
556  let Inst{29-23} = INDEX_GPR;
557  let Inst{31-30} = ELEM_SIZE;
558
559  // CF_ALLOC_EXPORT_WORD1_BUF
560  let Inst{43-32} = ARRAY_SIZE;
561  let Inst{47-44} = COMP_MASK;
562  let Inst{51-48} = BURST_COUNT;
563  let Inst{52}    = VPM;
564  let Inst{53}    = eop;
565  let Inst{61-54} = cf_inst;
566  let Inst{62}    = MARK;
567  let Inst{63}    = BARRIER;
568}
569
570class LoadParamFrag <PatFrag load_type> : PatFrag <
571  (ops node:$ptr), (load_type node:$ptr),
572  [{ return isParamLoad(dyn_cast<LoadSDNode>(N)); }]
573>;
574
575def load_param : LoadParamFrag<load>;
576def load_param_zexti8 : LoadParamFrag<zextloadi8>;
577def load_param_zexti16 : LoadParamFrag<zextloadi16>;
578
579def isR600 : Predicate<"Subtarget.device()"
580                            "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">;
581def isR700 : Predicate<"Subtarget.device()"
582                            "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
583                            "Subtarget.device()->getDeviceFlag()"
584                            ">= OCL_DEVICE_RV710">;
585def isEG : Predicate<
586  "Subtarget.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && "
587  "Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX && "
588  "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">;
589
590def isCayman : Predicate<"Subtarget.device()"
591                            "->getDeviceFlag() == OCL_DEVICE_CAYMAN">;
592def isEGorCayman : Predicate<"Subtarget.device()"
593                            "->getGeneration() == AMDGPUDeviceInfo::HD5XXX"
594                            "|| Subtarget.device()->getGeneration() =="
595                            "AMDGPUDeviceInfo::HD6XXX">;
596
597def isR600toCayman : Predicate<
598                     "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
599
600//===----------------------------------------------------------------------===//
601// R600 SDNodes
602//===----------------------------------------------------------------------===//
603
604def INTERP_PAIR_XY :  AMDGPUShaderInst <
605  (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
606  (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
607  "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1",
608  []>;
609
610def INTERP_PAIR_ZW :  AMDGPUShaderInst <
611  (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1),
612  (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
613  "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
614  []>;
615
616def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
617  SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
618  [SDNPVariadic]
619>;
620
621//===----------------------------------------------------------------------===//
622// Interpolation Instructions
623//===----------------------------------------------------------------------===//
624
625def INTERP_VEC_LOAD :  AMDGPUShaderInst <
626  (outs R600_Reg128:$dst),
627  (ins i32imm:$src0),
628  "INTERP_LOAD $src0 : $dst",
629  []>;
630
631def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
632  let bank_swizzle = 5;
633}
634
635def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> {
636  let bank_swizzle = 5;
637}
638
639def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>;
640
641//===----------------------------------------------------------------------===//
642// Export Instructions
643//===----------------------------------------------------------------------===//
644
645def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
646
647def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
648  [SDNPHasChain, SDNPSideEffect]>;
649
650class ExportWord0 {
651  field bits<32> Word0;
652
653  bits<13> arraybase;
654  bits<2> type;
655  bits<7> gpr;
656  bits<2> elem_size;
657
658  let Word0{12-0} = arraybase;
659  let Word0{14-13} = type;
660  let Word0{21-15} = gpr;
661  let Word0{22} = 0; // RW_REL
662  let Word0{29-23} = 0; // INDEX_GPR
663  let Word0{31-30} = elem_size;
664}
665
666class ExportSwzWord1 {
667  field bits<32> Word1;
668
669  bits<3> sw_x;
670  bits<3> sw_y;
671  bits<3> sw_z;
672  bits<3> sw_w;
673  bits<1> eop;
674  bits<8> inst;
675
676  let Word1{2-0} = sw_x;
677  let Word1{5-3} = sw_y;
678  let Word1{8-6} = sw_z;
679  let Word1{11-9} = sw_w;
680}
681
682class ExportBufWord1 {
683  field bits<32> Word1;
684
685  bits<12> arraySize;
686  bits<4> compMask;
687  bits<1> eop;
688  bits<8> inst;
689
690  let Word1{11-0} = arraySize;
691  let Word1{15-12} = compMask;
692}
693
694multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
695  def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
696    (ExportInst
697        (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
698        0, 61, 0, 7, 7, 7, cf_inst, 0)
699  >;
700
701  def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
702    (ExportInst
703        (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
704        0, 61, 7, 0, 7, 7, cf_inst, 0)
705  >;
706
707  def : Pat<(int_R600_store_dummy (i32 imm:$type)),
708    (ExportInst
709        (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0)
710  >;
711
712  def : Pat<(int_R600_store_dummy 1),
713    (ExportInst
714        (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0)
715  >;
716
717  def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type),
718    (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)),
719        (ExportInst R600_Reg128:$src, imm:$type, imm:$base,
720        imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0)
721  >;
722
723}
724
725multiclass SteamOutputExportPattern<Instruction ExportInst,
726    bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> {
727// Stream0
728  def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
729      (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)),
730      (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
731      4095, imm:$mask, buf0inst, 0)>;
732// Stream1
733  def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
734      (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)),
735      (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
736      4095, imm:$mask, buf1inst, 0)>;
737// Stream2
738  def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
739      (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)),
740      (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
741      4095, imm:$mask, buf2inst, 0)>;
742// Stream3
743  def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
744      (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)),
745      (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
746      4095, imm:$mask, buf3inst, 0)>;
747}
748
749// Export Instructions should not be duplicated by TailDuplication pass
750// (which assumes that duplicable instruction are affected by exec mask)
751let usesCustomInserter = 1, isNotDuplicable = 1 in {
752
753class ExportSwzInst : InstR600ISA<(
754    outs),
755    (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
756    i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst,
757    i32imm:$eop),
758    !strconcat("EXPORT", " $gpr"),
759    []>, ExportWord0, ExportSwzWord1 {
760  let elem_size = 3;
761  let Inst{31-0} = Word0;
762  let Inst{63-32} = Word1;
763}
764
765} // End usesCustomInserter = 1
766
767class ExportBufInst : InstR600ISA<(
768    outs),
769    (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
770    i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop),
771    !strconcat("EXPORT", " $gpr"),
772    []>, ExportWord0, ExportBufWord1 {
773  let elem_size = 0;
774  let Inst{31-0} = Word0;
775  let Inst{63-32} = Word1;
776}
777
778//===----------------------------------------------------------------------===//
779// Control Flow Instructions
780//===----------------------------------------------------------------------===//
781
782class CF_ALU_WORD0 {
783  field bits<32> Word0;
784
785  bits<22> ADDR;
786  bits<4> KCACHE_BANK0;
787  bits<4> KCACHE_BANK1;
788  bits<2> KCACHE_MODE0;
789
790  let Word0{21-0} = ADDR;
791  let Word0{25-22} = KCACHE_BANK0;
792  let Word0{29-26} = KCACHE_BANK1;
793  let Word0{31-30} = KCACHE_MODE0;
794}
795
796class CF_ALU_WORD1 {
797  field bits<32> Word1;
798
799  bits<2> KCACHE_MODE1;
800  bits<8> KCACHE_ADDR0;
801  bits<8> KCACHE_ADDR1;
802  bits<7> COUNT;
803  bits<1> ALT_CONST;
804  bits<4> CF_INST;
805  bits<1> WHOLE_QUAD_MODE;
806  bits<1> BARRIER;
807
808  let Word1{1-0} = KCACHE_MODE1;
809  let Word1{9-2} = KCACHE_ADDR0;
810  let Word1{17-10} = KCACHE_ADDR1;
811  let Word1{24-18} = COUNT;
812  let Word1{25} = ALT_CONST;
813  let Word1{29-26} = CF_INST;
814  let Word1{30} = WHOLE_QUAD_MODE;
815  let Word1{31} = BARRIER;
816}
817
818def KCACHE : InstFlag<"printKCache">;
819
820class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs),
821(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1,
822KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1,
823i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1,
824i32imm:$COUNT),
825!strconcat(OpName, " $COUNT, @$ADDR, "
826"KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"),
827[] >, CF_ALU_WORD0, CF_ALU_WORD1 {
828  field bits<64> Inst;
829
830  let CF_INST = inst;
831  let ALT_CONST = 0;
832  let WHOLE_QUAD_MODE = 0;
833  let BARRIER = 1;
834
835  let Inst{31-0} = Word0;
836  let Inst{63-32} = Word1;
837}
838
839class CF_WORD0_R600 {
840  field bits<32> Word0;
841
842  bits<32> ADDR;
843
844  let Word0 = ADDR;
845}
846
847class CF_WORD1_R600 {
848  field bits<32> Word1;
849
850  bits<3> POP_COUNT;
851  bits<5> CF_CONST;
852  bits<2> COND;
853  bits<3> COUNT;
854  bits<6> CALL_COUNT;
855  bits<1> COUNT_3;
856  bits<1> END_OF_PROGRAM;
857  bits<1> VALID_PIXEL_MODE;
858  bits<7> CF_INST;
859  bits<1> WHOLE_QUAD_MODE;
860  bits<1> BARRIER;
861
862  let Word1{2-0} = POP_COUNT;
863  let Word1{7-3} = CF_CONST;
864  let Word1{9-8} = COND;
865  let Word1{12-10} = COUNT;
866  let Word1{18-13} = CALL_COUNT;
867  let Word1{19} = COUNT_3;
868  let Word1{21} = END_OF_PROGRAM;
869  let Word1{22} = VALID_PIXEL_MODE;
870  let Word1{29-23} = CF_INST;
871  let Word1{30} = WHOLE_QUAD_MODE;
872  let Word1{31} = BARRIER;
873}
874
875class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
876ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
877  field bits<64> Inst;
878
879  let CF_INST = inst;
880  let BARRIER = 1;
881  let CF_CONST = 0;
882  let VALID_PIXEL_MODE = 0;
883  let COND = 0;
884  let CALL_COUNT = 0;
885  let COUNT_3 = 0;
886  let END_OF_PROGRAM = 0;
887  let WHOLE_QUAD_MODE = 0;
888
889  let Inst{31-0} = Word0;
890  let Inst{63-32} = Word1;
891}
892
893class CF_WORD0_EG {
894  field bits<32> Word0;
895
896  bits<24> ADDR;
897  bits<3> JUMPTABLE_SEL;
898
899  let Word0{23-0} = ADDR;
900  let Word0{26-24} = JUMPTABLE_SEL;
901}
902
903class CF_WORD1_EG {
904  field bits<32> Word1;
905
906  bits<3> POP_COUNT;
907  bits<5> CF_CONST;
908  bits<2> COND;
909  bits<6> COUNT;
910  bits<1> VALID_PIXEL_MODE;
911  bits<1> END_OF_PROGRAM;
912  bits<8> CF_INST;
913  bits<1> BARRIER;
914
915  let Word1{2-0} = POP_COUNT;
916  let Word1{7-3} = CF_CONST;
917  let Word1{9-8} = COND;
918  let Word1{15-10} = COUNT;
919  let Word1{20} = VALID_PIXEL_MODE;
920  let Word1{21} = END_OF_PROGRAM;
921  let Word1{29-22} = CF_INST;
922  let Word1{31} = BARRIER;
923}
924
925class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
926ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
927  field bits<64> Inst;
928
929  let CF_INST = inst;
930  let BARRIER = 1;
931  let JUMPTABLE_SEL = 0;
932  let CF_CONST = 0;
933  let VALID_PIXEL_MODE = 0;
934  let COND = 0;
935  let END_OF_PROGRAM = 0;
936
937  let Inst{31-0} = Word0;
938  let Inst{63-32} = Word1;
939}
940
941def CF_ALU : ALU_CLAUSE<8, "ALU">;
942def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
943
944def FETCH_CLAUSE : AMDGPUInst <(outs),
945(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > {
946  field bits<8> Inst;
947  bits<8> num;
948  let Inst = num;
949}
950
951def ALU_CLAUSE : AMDGPUInst <(outs),
952(ins i32imm:$addr), "ALU clause starting at $addr:", [] > {
953  field bits<8> Inst;
954  bits<8> num;
955  let Inst = num;
956}
957
958def LITERALS : AMDGPUInst <(outs),
959(ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > {
960  field bits<64> Inst;
961  bits<32> literal1;
962  bits<32> literal2;
963
964  let Inst{31-0} = literal1;
965  let Inst{63-32} = literal2;
966}
967
968def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > {
969  field bits<64> Inst;
970}
971
972let Predicates = [isR600toCayman] in {
973
974//===----------------------------------------------------------------------===//
975// Common Instructions R600, R700, Evergreen, Cayman
976//===----------------------------------------------------------------------===//
977
978def ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
979// Non-IEEE MUL: 0 * anything = 0
980def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>;
981def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
982def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>;
983def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
984
985// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
986// so some of the instruction names don't match the asm string.
987// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
988def SETE : R600_2OP <
989  0x08, "SETE",
990  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_EQ))]
991>;
992
993def SGT : R600_2OP <
994  0x09, "SETGT",
995  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GT))]
996>;
997
998def SGE : R600_2OP <
999  0xA, "SETGE",
1000  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GE))]
1001>;
1002
1003def SNE : R600_2OP <
1004  0xB, "SETNE",
1005  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_NE))]
1006>;
1007
1008def SETE_DX10 : R600_2OP <
1009  0xC, "SETE_DX10",
1010  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_EQ))]
1011>;
1012
1013def SETGT_DX10 : R600_2OP <
1014  0xD, "SETGT_DX10",
1015  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GT))]
1016>;
1017
1018def SETGE_DX10 : R600_2OP <
1019  0xE, "SETGE_DX10",
1020  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GE))]
1021>;
1022
1023def SETNE_DX10 : R600_2OP <
1024  0xF, "SETNE_DX10",
1025  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_NE))]
1026>;
1027
1028def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
1029def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>;
1030def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
1031def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>;
1032def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
1033
1034def MOV : R600_1OP <0x19, "MOV", []>;
1035
1036let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
1037
1038class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
1039  (outs R600_Reg32:$dst),
1040  (ins immType:$imm),
1041  "",
1042  []
1043>;
1044
1045} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1
1046
1047def MOV_IMM_I32 : MOV_IMM<i32, i32imm>;
1048def : Pat <
1049  (imm:$val),
1050  (MOV_IMM_I32 imm:$val)
1051>;
1052
1053def MOV_IMM_F32 : MOV_IMM<f32, f32imm>;
1054def : Pat <
1055  (fpimm:$val),
1056  (MOV_IMM_F32  fpimm:$val)
1057>;
1058
1059def PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>;
1060def PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>;
1061def PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>;
1062def PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>;
1063
1064let hasSideEffects = 1 in {
1065
1066def KILLGT : R600_2OP <0x2D, "KILLGT", []>;
1067
1068} // end hasSideEffects
1069
1070def AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>;
1071def OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>;
1072def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>;
1073def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>;
1074def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>;
1075def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>;
1076def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>;
1077def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>;
1078def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>;
1079def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>;
1080
1081def SETE_INT : R600_2OP <
1082  0x3A, "SETE_INT",
1083  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETEQ))]
1084>;
1085
1086def SETGT_INT : R600_2OP <
1087  0x3B, "SETGT_INT",
1088  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGT))]
1089>;
1090
1091def SETGE_INT : R600_2OP <
1092  0x3C, "SETGE_INT",
1093  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGE))]
1094>;
1095
1096def SETNE_INT : R600_2OP <
1097  0x3D, "SETNE_INT",
1098  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETNE))]
1099>;
1100
1101def SETGT_UINT : R600_2OP <
1102  0x3E, "SETGT_UINT",
1103  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGT))]
1104>;
1105
1106def SETGE_UINT : R600_2OP <
1107  0x3F, "SETGE_UINT",
1108  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGE))]
1109>;
1110
1111def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>;
1112def PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>;
1113def PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>;
1114def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>;
1115
1116def CNDE_INT : R600_3OP <
1117  0x1C, "CNDE_INT",
1118  [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_EQ))]
1119>;
1120
1121def CNDGE_INT : R600_3OP <
1122  0x1E, "CNDGE_INT",
1123  [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GE))]
1124>;
1125
1126def CNDGT_INT : R600_3OP <
1127  0x1D, "CNDGT_INT",
1128  [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GT))]
1129>;
1130
1131//===----------------------------------------------------------------------===//
1132// Texture instructions
1133//===----------------------------------------------------------------------===//
1134
1135def TEX_LD : R600_TEX <
1136  0x03, "TEX_LD",
1137  [(set v4f32:$DST_GPR, (int_AMDGPU_txf v4f32:$SRC_GPR,
1138      imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID,
1139      imm:$SAMPLER_ID, imm:$textureTarget))]
1140> {
1141let AsmString = "TEX_LD $DST_GPR, $SRC_GPR, $OFFSET_X, $OFFSET_Y, $OFFSET_Z,"
1142    "$RESOURCE_ID, $SAMPLER_ID, $textureTarget";
1143let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X,
1144    i32imm:$OFFSET_Y, i32imm:$OFFSET_Z, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID,
1145    i32imm:$textureTarget);
1146}
1147
1148def TEX_GET_TEXTURE_RESINFO : R600_TEX <
1149  0x04, "TEX_GET_TEXTURE_RESINFO",
1150  [(set v4f32:$DST_GPR, (int_AMDGPU_txq v4f32:$SRC_GPR,
1151      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
1152>;
1153
1154def TEX_GET_GRADIENTS_H : R600_TEX <
1155  0x07, "TEX_GET_GRADIENTS_H",
1156  [(set v4f32:$DST_GPR, (int_AMDGPU_ddx v4f32:$SRC_GPR,
1157      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
1158>;
1159
1160def TEX_GET_GRADIENTS_V : R600_TEX <
1161  0x08, "TEX_GET_GRADIENTS_V",
1162  [(set v4f32:$DST_GPR, (int_AMDGPU_ddy v4f32:$SRC_GPR,
1163      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
1164>;
1165
1166def TEX_SET_GRADIENTS_H : R600_TEX <
1167  0x0B, "TEX_SET_GRADIENTS_H",
1168  []
1169>;
1170
1171def TEX_SET_GRADIENTS_V : R600_TEX <
1172  0x0C, "TEX_SET_GRADIENTS_V",
1173  []
1174>;
1175
1176def TEX_SAMPLE : R600_TEX <
1177  0x10, "TEX_SAMPLE",
1178  [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR,
1179      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
1180>;
1181
1182def TEX_SAMPLE_C : R600_TEX <
1183  0x18, "TEX_SAMPLE_C",
1184  [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR,
1185      imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
1186>;
1187
1188def TEX_SAMPLE_L : R600_TEX <
1189  0x11, "TEX_SAMPLE_L",
1190  [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR,
1191      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
1192>;
1193
1194def TEX_SAMPLE_C_L : R600_TEX <
1195  0x19, "TEX_SAMPLE_C_L",
1196  [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR,
1197      imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
1198>;
1199
1200def TEX_SAMPLE_LB : R600_TEX <
1201  0x12, "TEX_SAMPLE_LB",
1202  [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR,
1203      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
1204>;
1205
1206def TEX_SAMPLE_C_LB : R600_TEX <
1207  0x1A, "TEX_SAMPLE_C_LB",
1208  [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR,
1209      imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
1210>;
1211
1212def TEX_SAMPLE_G : R600_TEX <
1213  0x14, "TEX_SAMPLE_G",
1214  []
1215>;
1216
1217def TEX_SAMPLE_C_G : R600_TEX <
1218  0x1C, "TEX_SAMPLE_C_G",
1219  []
1220>;
1221
1222//===----------------------------------------------------------------------===//
1223// Helper classes for common instructions
1224//===----------------------------------------------------------------------===//
1225
1226class MUL_LIT_Common <bits<5> inst> : R600_3OP <
1227  inst, "MUL_LIT",
1228  []
1229>;
1230
1231class MULADD_Common <bits<5> inst> : R600_3OP <
1232  inst, "MULADD",
1233  []
1234>;
1235
1236class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
1237  inst, "MULADD_IEEE",
1238  [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
1239>;
1240
1241class CNDE_Common <bits<5> inst> : R600_3OP <
1242  inst, "CNDE",
1243  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_EQ))]
1244>;
1245
1246class CNDGT_Common <bits<5> inst> : R600_3OP <
1247  inst, "CNDGT",
1248  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GT))]
1249>;
1250
1251class CNDGE_Common <bits<5> inst> : R600_3OP <
1252  inst, "CNDGE",
1253  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))]
1254>;
1255
1256multiclass DOT4_Common <bits<11> inst> {
1257
1258  def _pseudo : R600_REDUCTION <inst,
1259    (ins R600_Reg128:$src0, R600_Reg128:$src1),
1260    "DOT4 $dst $src0, $src1",
1261    [(set f32:$dst, (int_AMDGPU_dp4 v4f32:$src0, v4f32:$src1))]
1262  >;
1263
1264  def _real : R600_2OP <inst, "DOT4", []>;
1265}
1266
1267let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
1268multiclass CUBE_Common <bits<11> inst> {
1269
1270  def _pseudo : InstR600 <
1271    (outs R600_Reg128:$dst),
1272    (ins R600_Reg128:$src),
1273    "CUBE $dst $src",
1274    [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src))],
1275    VecALU
1276  > {
1277    let isPseudo = 1;
1278  }
1279
1280  def _real : R600_2OP <inst, "CUBE", []>;
1281}
1282} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
1283
1284class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
1285  inst, "EXP_IEEE", fexp2
1286> {
1287  let TransOnly = 1;
1288  let Itinerary = TransALU;
1289}
1290
1291class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper <
1292  inst, "FLT_TO_INT", fp_to_sint
1293> {
1294  let TransOnly = 1;
1295  let Itinerary = TransALU;
1296}
1297
1298class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
1299  inst, "INT_TO_FLT", sint_to_fp
1300> {
1301  let TransOnly = 1;
1302  let Itinerary = TransALU;
1303}
1304
1305class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper <
1306  inst, "FLT_TO_UINT", fp_to_uint
1307> {
1308  let TransOnly = 1;
1309  let Itinerary = TransALU;
1310}
1311
1312class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
1313  inst, "UINT_TO_FLT", uint_to_fp
1314> {
1315  let TransOnly = 1;
1316  let Itinerary = TransALU;
1317}
1318
1319class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
1320  inst, "LOG_CLAMPED", []
1321>;
1322
1323class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
1324  inst, "LOG_IEEE", flog2
1325> {
1326  let TransOnly = 1;
1327  let Itinerary = TransALU;
1328}
1329
1330class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>;
1331class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>;
1332class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>;
1333class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper <
1334  inst, "MULHI_INT", mulhs
1335> {
1336  let TransOnly = 1;
1337  let Itinerary = TransALU;
1338}
1339class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper <
1340  inst, "MULHI", mulhu
1341> {
1342  let TransOnly = 1;
1343  let Itinerary = TransALU;
1344}
1345class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper <
1346  inst, "MULLO_INT", mul
1347> {
1348  let TransOnly = 1;
1349  let Itinerary = TransALU;
1350}
1351class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> {
1352  let TransOnly = 1;
1353  let Itinerary = TransALU;
1354}
1355
1356class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
1357  inst, "RECIP_CLAMPED", []
1358> {
1359  let TransOnly = 1;
1360  let Itinerary = TransALU;
1361}
1362
1363class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
1364  inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
1365> {
1366  let TransOnly = 1;
1367  let Itinerary = TransALU;
1368}
1369
1370class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
1371  inst, "RECIP_UINT", AMDGPUurecip
1372> {
1373  let TransOnly = 1;
1374  let Itinerary = TransALU;
1375}
1376
1377class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
1378  inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
1379> {
1380  let TransOnly = 1;
1381  let Itinerary = TransALU;
1382}
1383
1384class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
1385  inst, "RECIPSQRT_IEEE", []
1386> {
1387  let TransOnly = 1;
1388  let Itinerary = TransALU;
1389}
1390
1391class SIN_Common <bits<11> inst> : R600_1OP <
1392  inst, "SIN", []>{
1393  let Trig = 1;
1394  let TransOnly = 1;
1395  let Itinerary = TransALU;
1396}
1397
1398class COS_Common <bits<11> inst> : R600_1OP <
1399  inst, "COS", []> {
1400  let Trig = 1;
1401  let TransOnly = 1;
1402  let Itinerary = TransALU;
1403}
1404
1405//===----------------------------------------------------------------------===//
1406// Helper patterns for complex intrinsics
1407//===----------------------------------------------------------------------===//
1408
1409multiclass DIV_Common <InstR600 recip_ieee> {
1410def : Pat<
1411  (int_AMDGPU_div f32:$src0, f32:$src1),
1412  (MUL_IEEE $src0, (recip_ieee $src1))
1413>;
1414
1415def : Pat<
1416  (fdiv f32:$src0, f32:$src1),
1417  (MUL_IEEE $src0, (recip_ieee $src1))
1418>;
1419}
1420
1421class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee>
1422  : Pat <
1423  (int_TGSI_lit_z f32:$src_x, f32:$src_y, f32:$src_w),
1424  (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x))
1425>;
1426
1427//===----------------------------------------------------------------------===//
1428// R600 / R700 Instructions
1429//===----------------------------------------------------------------------===//
1430
1431let Predicates = [isR600] in {
1432
1433  def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
1434  def MULADD_r600 : MULADD_Common<0x10>;
1435  def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>;
1436  def CNDE_r600 : CNDE_Common<0x18>;
1437  def CNDGT_r600 : CNDGT_Common<0x19>;
1438  def CNDGE_r600 : CNDGE_Common<0x1A>;
1439  defm DOT4_r600 : DOT4_Common<0x50>;
1440  defm CUBE_r600 : CUBE_Common<0x52>;
1441  def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
1442  def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
1443  def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
1444  def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>;
1445  def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>;
1446  def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>;
1447  def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>;
1448  def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>;
1449  def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
1450  def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>;
1451  def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>;
1452  def SIN_r600 : SIN_Common<0x6E>;
1453  def COS_r600 : COS_Common<0x6F>;
1454  def ASHR_r600 : ASHR_Common<0x70>;
1455  def LSHR_r600 : LSHR_Common<0x71>;
1456  def LSHL_r600 : LSHL_Common<0x72>;
1457  def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
1458  def MULHI_INT_r600 : MULHI_INT_Common<0x74>;
1459  def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>;
1460  def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>;
1461  def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
1462
1463  defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
1464  def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>;
1465  def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
1466
1467  def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
1468
1469  def R600_ExportSwz : ExportSwzInst {
1470    let Word1{20-17} = 0; // BURST_COUNT
1471    let Word1{21} = eop;
1472    let Word1{22} = 1; // VALID_PIXEL_MODE
1473    let Word1{30-23} = inst;
1474    let Word1{31} = 1; // BARRIER
1475  }
1476  defm : ExportPattern<R600_ExportSwz, 39>;
1477
1478  def R600_ExportBuf : ExportBufInst {
1479    let Word1{20-17} = 0; // BURST_COUNT
1480    let Word1{21} = eop;
1481    let Word1{22} = 1; // VALID_PIXEL_MODE
1482    let Word1{30-23} = inst;
1483    let Word1{31} = 1; // BARRIER
1484  }
1485  defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
1486
1487  def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT),
1488  "TEX $COUNT @$ADDR"> {
1489    let POP_COUNT = 0;
1490  }
1491  def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT),
1492  "VTX $COUNT @$ADDR"> {
1493    let POP_COUNT = 0;
1494  }
1495  def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
1496  "LOOP_START_DX10 @$ADDR"> {
1497    let POP_COUNT = 0;
1498    let COUNT = 0;
1499  }
1500  def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
1501    let POP_COUNT = 0;
1502    let COUNT = 0;
1503  }
1504  def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
1505  "LOOP_BREAK @$ADDR"> {
1506    let POP_COUNT = 0;
1507    let COUNT = 0;
1508  }
1509  def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
1510  "CONTINUE @$ADDR"> {
1511    let POP_COUNT = 0;
1512    let COUNT = 0;
1513  }
1514  def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1515  "JUMP @$ADDR POP:$POP_COUNT"> {
1516    let COUNT = 0;
1517  }
1518  def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1519  "ELSE @$ADDR POP:$POP_COUNT"> {
1520    let COUNT = 0;
1521  }
1522  def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
1523    let ADDR = 0;
1524    let COUNT = 0;
1525    let POP_COUNT = 0;
1526  }
1527  def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1528  "POP @$ADDR POP:$POP_COUNT"> {
1529    let COUNT = 0;
1530  }
1531  def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> {
1532    let COUNT = 0;
1533    let POP_COUNT = 0;
1534    let ADDR = 0;
1535    let END_OF_PROGRAM = 1;
1536  }
1537
1538}
1539
1540// Helper pattern for normalizing inputs to triginomic instructions for R700+
1541// cards.
1542class COS_PAT <InstR600 trig> : Pat<
1543  (fcos f32:$src),
1544  (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src))
1545>;
1546
1547class SIN_PAT <InstR600 trig> : Pat<
1548  (fsin f32:$src),
1549  (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src))
1550>;
1551
1552//===----------------------------------------------------------------------===//
1553// R700 Only instructions
1554//===----------------------------------------------------------------------===//
1555
1556let Predicates = [isR700] in {
1557  def SIN_r700 : SIN_Common<0x6E>;
1558  def COS_r700 : COS_Common<0x6F>;
1559
1560  // R700 normalizes inputs to SIN/COS the same as EG
1561  def : SIN_PAT <SIN_r700>;
1562  def : COS_PAT <COS_r700>;
1563}
1564
1565//===----------------------------------------------------------------------===//
1566// Evergreen Only instructions
1567//===----------------------------------------------------------------------===//
1568
1569let Predicates = [isEG] in {
1570
1571def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
1572defm DIV_eg : DIV_Common<RECIP_IEEE_eg>;
1573
1574def MULLO_INT_eg : MULLO_INT_Common<0x8F>;
1575def MULHI_INT_eg : MULHI_INT_Common<0x90>;
1576def MULLO_UINT_eg : MULLO_UINT_Common<0x91>;
1577def MULHI_UINT_eg : MULHI_UINT_Common<0x92>;
1578def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
1579def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>;
1580def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
1581def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
1582def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
1583def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
1584def SIN_eg : SIN_Common<0x8D>;
1585def COS_eg : COS_Common<0x8E>;
1586
1587def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
1588def : SIN_PAT <SIN_eg>;
1589def : COS_PAT <COS_eg>;
1590def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
1591} // End Predicates = [isEG]
1592
1593//===----------------------------------------------------------------------===//
1594// Evergreen / Cayman Instructions
1595//===----------------------------------------------------------------------===//
1596
1597let Predicates = [isEGorCayman] in {
1598
1599  // BFE_UINT - bit_extract, an optimization for mask and shift
1600  // Src0 = Input
1601  // Src1 = Offset
1602  // Src2 = Width
1603  //
1604  // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
1605  //
1606  // Example Usage:
1607  // (Offset, Width)
1608  //
1609  // (0, 8)           = (Input << 24) >> 24  = (Input &  0xff)       >> 0
1610  // (8, 8)           = (Input << 16) >> 24  = (Input &  0xffff)     >> 8
1611  // (16,8)           = (Input <<  8) >> 24  = (Input &  0xffffff)   >> 16
1612  // (24,8)           = (Input <<  0) >> 24  = (Input &  0xffffffff) >> 24
1613  def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
1614    [(set i32:$dst, (int_AMDIL_bit_extract_u32 i32:$src0, i32:$src1,
1615                                               i32:$src2))],
1616    VecALU
1617  >;
1618  def : BFEPattern <BFE_UINT_eg>;
1619
1620  def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
1621  defm : BFIPatterns <BFI_INT_eg>;
1622
1623  def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
1624    [(set i32:$dst, (AMDGPUbitalign i32:$src0, i32:$src1, i32:$src2))],
1625    VecALU
1626  >;
1627
1628  def MULADD_eg : MULADD_Common<0x14>;
1629  def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
1630  def ASHR_eg : ASHR_Common<0x15>;
1631  def LSHR_eg : LSHR_Common<0x16>;
1632  def LSHL_eg : LSHL_Common<0x17>;
1633  def CNDE_eg : CNDE_Common<0x19>;
1634  def CNDGT_eg : CNDGT_Common<0x1A>;
1635  def CNDGE_eg : CNDGE_Common<0x1B>;
1636  def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
1637  def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
1638  defm DOT4_eg : DOT4_Common<0xBE>;
1639  defm CUBE_eg : CUBE_Common<0xC0>;
1640
1641let hasSideEffects = 1 in {
1642  def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>;
1643}
1644
1645  def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
1646
1647  def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
1648    let Pattern = [];
1649  }
1650
1651  def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
1652
1653  def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
1654    let Pattern = [];
1655  }
1656
1657  def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
1658
1659  // TRUNC is used for the FLT_TO_INT instructions to work around a
1660  // perceived problem where the rounding modes are applied differently
1661  // depending on the instruction and the slot they are in.
1662  // See:
1663  // https://bugs.freedesktop.org/show_bug.cgi?id=50232
1664  // Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c
1665  //
1666  // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes,
1667  // which do not need to be truncated since the fp values are 0.0f or 1.0f.
1668  // We should look into handling these cases separately.
1669  def : Pat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>;
1670
1671  def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>;
1672
1673  // SHA-256 Patterns
1674  def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
1675
1676  def EG_ExportSwz : ExportSwzInst {
1677    let Word1{19-16} = 0; // BURST_COUNT
1678    let Word1{20} = 1; // VALID_PIXEL_MODE
1679    let Word1{21} = eop;
1680    let Word1{29-22} = inst;
1681    let Word1{30} = 0; // MARK
1682    let Word1{31} = 1; // BARRIER
1683  }
1684  defm : ExportPattern<EG_ExportSwz, 83>;
1685
1686  def EG_ExportBuf : ExportBufInst {
1687    let Word1{19-16} = 0; // BURST_COUNT
1688    let Word1{20} = 1; // VALID_PIXEL_MODE
1689    let Word1{21} = eop;
1690    let Word1{29-22} = inst;
1691    let Word1{30} = 0; // MARK
1692    let Word1{31} = 1; // BARRIER
1693  }
1694  defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
1695
1696  def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT),
1697  "TEX $COUNT @$ADDR"> {
1698    let POP_COUNT = 0;
1699  }
1700  def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT),
1701  "VTX $COUNT @$ADDR"> {
1702    let POP_COUNT = 0;
1703  }
1704  def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR),
1705  "LOOP_START_DX10 @$ADDR"> {
1706    let POP_COUNT = 0;
1707    let COUNT = 0;
1708  }
1709  def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
1710    let POP_COUNT = 0;
1711    let COUNT = 0;
1712  }
1713  def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR),
1714  "LOOP_BREAK @$ADDR"> {
1715    let POP_COUNT = 0;
1716    let COUNT = 0;
1717  }
1718  def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR),
1719  "CONTINUE @$ADDR"> {
1720    let POP_COUNT = 0;
1721    let COUNT = 0;
1722  }
1723  def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1724  "JUMP @$ADDR POP:$POP_COUNT"> {
1725    let COUNT = 0;
1726  }
1727  def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1728  "ELSE @$ADDR POP:$POP_COUNT"> {
1729    let COUNT = 0;
1730  }
1731  def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> {
1732    let ADDR = 0;
1733    let COUNT = 0;
1734    let POP_COUNT = 0;
1735  }
1736  def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1737  "POP @$ADDR POP:$POP_COUNT"> {
1738    let COUNT = 0;
1739  }
1740  def CF_END_EG :  CF_CLAUSE_EG<0, (ins), "CF_END"> {
1741    let COUNT = 0;
1742    let POP_COUNT = 0;
1743    let ADDR = 0;
1744    let END_OF_PROGRAM = 1;
1745  }
1746
1747//===----------------------------------------------------------------------===//
1748// Memory read/write instructions
1749//===----------------------------------------------------------------------===//
1750let usesCustomInserter = 1 in {
1751
1752class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name,
1753                              list<dag> pattern>
1754    : EG_CF_RAT <0x57, 0x2, 0, (outs), ins,
1755                 !strconcat(name, " $rw_gpr, $index_gpr, $eop"), pattern> {
1756  let RIM         = 0;
1757  // XXX: Have a separate instruction for non-indexed writes.
1758  let TYPE        = 1;
1759  let RW_REL      = 0;
1760  let ELEM_SIZE   = 0;
1761
1762  let ARRAY_SIZE  = 0;
1763  let COMP_MASK   = comp_mask;
1764  let BURST_COUNT = 0;
1765  let VPM         = 0;
1766  let MARK        = 0;
1767  let BARRIER     = 1;
1768}
1769
1770} // End usesCustomInserter = 1
1771
1772// 32-bit store
1773def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
1774  (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
1775  0x1, "RAT_WRITE_CACHELESS_32_eg",
1776  [(global_store i32:$rw_gpr, i32:$index_gpr)]
1777>;
1778
1779//128-bit store
1780def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
1781  (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
1782  0xf, "RAT_WRITE_CACHELESS_128",
1783  [(global_store v4i32:$rw_gpr, i32:$index_gpr)]
1784>;
1785
1786class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
1787    : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern>,
1788      VTX_WORD1_GPR, VTX_WORD0 {
1789
1790  // Static fields
1791  let VC_INST = 0;
1792  let FETCH_TYPE = 2;
1793  let FETCH_WHOLE_QUAD = 0;
1794  let BUFFER_ID = buffer_id;
1795  let SRC_REL = 0;
1796  // XXX: We can infer this field based on the SRC_GPR.  This would allow us
1797  // to store vertex addresses in any channel, not just X.
1798  let SRC_SEL_X = 0;
1799  let DST_REL = 0;
1800  // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
1801  // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
1802  // however, based on my testing if USE_CONST_FIELDS is set, then all
1803  // these fields need to be set to 0.
1804  let USE_CONST_FIELDS = 0;
1805  let NUM_FORMAT_ALL = 1;
1806  let FORMAT_COMP_ALL = 0;
1807  let SRF_MODE_ALL = 0;
1808
1809  let Inst{31-0} = Word0;
1810  let Inst{63-32} = Word1;
1811  // LLVM can only encode 64-bit instructions, so these fields are manually
1812  // encoded in R600CodeEmitter
1813  //
1814  // bits<16> OFFSET;
1815  // bits<2>  ENDIAN_SWAP = 0;
1816  // bits<1>  CONST_BUF_NO_STRIDE = 0;
1817  // bits<1>  MEGA_FETCH = 0;
1818  // bits<1>  ALT_CONST = 0;
1819  // bits<2>  BUFFER_INDEX_MODE = 0;
1820
1821
1822
1823  // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
1824  // is done in R600CodeEmitter
1825  //
1826  // Inst{79-64} = OFFSET;
1827  // Inst{81-80} = ENDIAN_SWAP;
1828  // Inst{82}    = CONST_BUF_NO_STRIDE;
1829  // Inst{83}    = MEGA_FETCH;
1830  // Inst{84}    = ALT_CONST;
1831  // Inst{86-85} = BUFFER_INDEX_MODE;
1832  // Inst{95-86} = 0; Reserved
1833
1834  // VTX_WORD3 (Padding)
1835  //
1836  // Inst{127-96} = 0;
1837
1838  let VTXInst = 1;
1839}
1840
1841class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
1842    : VTX_READ_eg <"VTX_READ_8", buffer_id, (outs R600_TReg32_X:$dst),
1843                   pattern> {
1844
1845  let MEGA_FETCH_COUNT = 1;
1846  let DST_SEL_X = 0;
1847  let DST_SEL_Y = 7;   // Masked
1848  let DST_SEL_Z = 7;   // Masked
1849  let DST_SEL_W = 7;   // Masked
1850  let DATA_FORMAT = 1; // FMT_8
1851}
1852
1853class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
1854    : VTX_READ_eg <"VTX_READ_16", buffer_id, (outs R600_TReg32_X:$dst),
1855                    pattern> {
1856  let MEGA_FETCH_COUNT = 2;
1857  let DST_SEL_X = 0;
1858  let DST_SEL_Y = 7;   // Masked
1859  let DST_SEL_Z = 7;   // Masked
1860  let DST_SEL_W = 7;   // Masked
1861  let DATA_FORMAT = 5; // FMT_16
1862
1863}
1864
1865class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
1866    : VTX_READ_eg <"VTX_READ_32", buffer_id, (outs R600_TReg32_X:$dst),
1867                   pattern> {
1868
1869  let MEGA_FETCH_COUNT = 4;
1870  let DST_SEL_X        = 0;
1871  let DST_SEL_Y        = 7;   // Masked
1872  let DST_SEL_Z        = 7;   // Masked
1873  let DST_SEL_W        = 7;   // Masked
1874  let DATA_FORMAT      = 0xD; // COLOR_32
1875
1876  // This is not really necessary, but there were some GPU hangs that appeared
1877  // to be caused by ALU instructions in the next instruction group that wrote
1878  // to the $ptr registers of the VTX_READ.
1879  // e.g.
1880  // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
1881  // %T2_X<def> = MOV %ZERO
1882  //Adding this constraint prevents this from happening.
1883  let Constraints = "$ptr.ptr = $dst";
1884}
1885
1886class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
1887    : VTX_READ_eg <"VTX_READ_128", buffer_id, (outs R600_Reg128:$dst),
1888                   pattern> {
1889
1890  let MEGA_FETCH_COUNT = 16;
1891  let DST_SEL_X        =  0;
1892  let DST_SEL_Y        =  1;
1893  let DST_SEL_Z        =  2;
1894  let DST_SEL_W        =  3;
1895  let DATA_FORMAT      =  0x22; // COLOR_32_32_32_32
1896
1897  // XXX: Need to force VTX_READ_128 instructions to write to the same register
1898  // that holds its buffer address to avoid potential hangs.  We can't use
1899  // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst
1900  // registers are different sizes.
1901}
1902
1903//===----------------------------------------------------------------------===//
1904// VTX Read from parameter memory space
1905//===----------------------------------------------------------------------===//
1906
1907def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
1908  [(set i32:$dst, (load_param_zexti8 ADDRVTX_READ:$ptr))]
1909>;
1910
1911def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
1912  [(set i32:$dst, (load_param_zexti16 ADDRVTX_READ:$ptr))]
1913>;
1914
1915def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
1916  [(set i32:$dst, (load_param ADDRVTX_READ:$ptr))]
1917>;
1918
1919def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
1920  [(set v4i32:$dst, (load_param ADDRVTX_READ:$ptr))]
1921>;
1922
1923//===----------------------------------------------------------------------===//
1924// VTX Read from global memory space
1925//===----------------------------------------------------------------------===//
1926
1927// 8-bit reads
1928def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
1929  [(set i32:$dst, (zextloadi8_global ADDRVTX_READ:$ptr))]
1930>;
1931
1932// 32-bit reads
1933def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
1934  [(set i32:$dst, (global_load ADDRVTX_READ:$ptr))]
1935>;
1936
1937// 128-bit reads
1938def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
1939  [(set v4i32:$dst, (global_load ADDRVTX_READ:$ptr))]
1940>;
1941
1942//===----------------------------------------------------------------------===//
1943// Constant Loads
1944// XXX: We are currently storing all constants in the global address space.
1945//===----------------------------------------------------------------------===//
1946
1947def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
1948  [(set i32:$dst, (constant_load ADDRVTX_READ:$ptr))]
1949>;
1950
1951}
1952
1953//===----------------------------------------------------------------------===//
1954// Regist loads and stores - for indirect addressing
1955//===----------------------------------------------------------------------===//
1956
1957defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
1958
1959let Predicates = [isCayman] in {
1960
1961let isVector = 1 in {
1962
1963def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
1964
1965def MULLO_INT_cm : MULLO_INT_Common<0x8F>;
1966def MULHI_INT_cm : MULHI_INT_Common<0x90>;
1967def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
1968def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
1969def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
1970def EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
1971def LOG_IEEE_cm : LOG_IEEE_Common<0x83>;
1972def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>;
1973def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>;
1974def SIN_cm : SIN_Common<0x8D>;
1975def COS_cm : COS_Common<0x8E>;
1976} // End isVector = 1
1977
1978def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
1979def : SIN_PAT <SIN_cm>;
1980def : COS_PAT <COS_cm>;
1981
1982defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
1983
1984// RECIP_UINT emulation for Cayman
1985// The multiplication scales from [0,1] to the unsigned integer range
1986def : Pat <
1987  (AMDGPUurecip i32:$src0),
1988  (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)),
1989                            (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
1990>;
1991
1992  def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
1993    let ADDR = 0;
1994    let POP_COUNT = 0;
1995    let COUNT = 0;
1996  }
1997
1998def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
1999
2000} // End isCayman
2001
2002//===----------------------------------------------------------------------===//
2003// Branch Instructions
2004//===----------------------------------------------------------------------===//
2005
2006
2007def IF_PREDICATE_SET  : ILFormat<(outs), (ins GPRI32:$src),
2008  "IF_PREDICATE_SET $src", []>;
2009
2010def PREDICATED_BREAK : ILFormat<(outs), (ins GPRI32:$src),
2011  "PREDICATED_BREAK $src", []>;
2012
2013//===----------------------------------------------------------------------===//
2014// Pseudo instructions
2015//===----------------------------------------------------------------------===//
2016
2017let isPseudo = 1 in {
2018
2019def PRED_X : InstR600 <
2020  (outs R600_Predicate_Bit:$dst),
2021  (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
2022  "", [], NullALU> {
2023  let FlagOperandIdx = 3;
2024}
2025
2026let isTerminator = 1, isBranch = 1 in {
2027def JUMP_COND : InstR600 <
2028          (outs),
2029          (ins brtarget:$target, R600_Predicate_Bit:$p),
2030          "JUMP $target ($p)",
2031          [], AnyALU
2032  >;
2033
2034def JUMP : InstR600 <
2035          (outs),
2036          (ins brtarget:$target),
2037          "JUMP $target",
2038          [], AnyALU
2039  >
2040{
2041  let isPredicable = 1;
2042  let isBarrier = 1;
2043}
2044
2045}  // End isTerminator = 1, isBranch = 1
2046
2047let usesCustomInserter = 1 in {
2048
2049let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in {
2050
2051def MASK_WRITE : AMDGPUShaderInst <
2052    (outs),
2053    (ins R600_Reg32:$src),
2054    "MASK_WRITE $src",
2055    []
2056>;
2057
2058} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
2059
2060
2061def TXD: InstR600 <
2062  (outs R600_Reg128:$dst),
2063  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
2064       i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
2065  "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
2066  [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
2067                     imm:$resourceId, imm:$samplerId, imm:$textureTarget))],
2068  NullALU > {
2069  let TEXInst = 1;
2070}
2071
2072def TXD_SHADOW: InstR600 <
2073  (outs R600_Reg128:$dst),
2074  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
2075       i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
2076  "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
2077  [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
2078        imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))],
2079   NullALU
2080> {
2081  let TEXInst = 1;
2082}
2083} // End isPseudo = 1
2084} // End usesCustomInserter = 1
2085
2086def CLAMP_R600 :  CLAMP <R600_Reg32>;
2087def FABS_R600 : FABS<R600_Reg32>;
2088def FNEG_R600 : FNEG<R600_Reg32>;
2089
2090//===---------------------------------------------------------------------===//
2091// Return instruction
2092//===---------------------------------------------------------------------===//
2093let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
2094    usesCustomInserter = 1 in {
2095  def RETURN          : ILFormat<(outs), (ins variable_ops),
2096      "RETURN", [(IL_retflag)]>;
2097}
2098
2099
2100//===----------------------------------------------------------------------===//
2101// Constant Buffer Addressing Support
2102//===----------------------------------------------------------------------===//
2103
2104let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"  in {
2105def CONST_COPY : Instruction {
2106  let OutOperandList = (outs R600_Reg32:$dst);
2107  let InOperandList = (ins i32imm:$src);
2108  let Pattern =
2109      [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
2110  let AsmString = "CONST_COPY";
2111  let neverHasSideEffects = 1;
2112  let isAsCheapAsAMove = 1;
2113  let Itinerary = NullALU;
2114}
2115} // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
2116
2117def TEX_VTX_CONSTBUF :
2118  InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
2119      [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
2120  VTX_WORD1_GPR, VTX_WORD0 {
2121
2122  let VC_INST = 0;
2123  let FETCH_TYPE = 2;
2124  let FETCH_WHOLE_QUAD = 0;
2125  let SRC_REL = 0;
2126  let SRC_SEL_X = 0;
2127  let DST_REL = 0;
2128  let USE_CONST_FIELDS = 0;
2129  let NUM_FORMAT_ALL = 2;
2130  let FORMAT_COMP_ALL = 1;
2131  let SRF_MODE_ALL = 1;
2132  let MEGA_FETCH_COUNT = 16;
2133  let DST_SEL_X        = 0;
2134  let DST_SEL_Y        = 1;
2135  let DST_SEL_Z        = 2;
2136  let DST_SEL_W        = 3;
2137  let DATA_FORMAT      = 35;
2138
2139  let Inst{31-0} = Word0;
2140  let Inst{63-32} = Word1;
2141
2142// LLVM can only encode 64-bit instructions, so these fields are manually
2143// encoded in R600CodeEmitter
2144//
2145// bits<16> OFFSET;
2146// bits<2>  ENDIAN_SWAP = 0;
2147// bits<1>  CONST_BUF_NO_STRIDE = 0;
2148// bits<1>  MEGA_FETCH = 0;
2149// bits<1>  ALT_CONST = 0;
2150// bits<2>  BUFFER_INDEX_MODE = 0;
2151
2152
2153
2154// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
2155// is done in R600CodeEmitter
2156//
2157// Inst{79-64} = OFFSET;
2158// Inst{81-80} = ENDIAN_SWAP;
2159// Inst{82}    = CONST_BUF_NO_STRIDE;
2160// Inst{83}    = MEGA_FETCH;
2161// Inst{84}    = ALT_CONST;
2162// Inst{86-85} = BUFFER_INDEX_MODE;
2163// Inst{95-86} = 0; Reserved
2164
2165// VTX_WORD3 (Padding)
2166//
2167// Inst{127-96} = 0;
2168  let VTXInst = 1;
2169}
2170
2171def TEX_VTX_TEXBUF:
2172  InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
2173      [(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
2174VTX_WORD1_GPR, VTX_WORD0 {
2175
2176let VC_INST = 0;
2177let FETCH_TYPE = 2;
2178let FETCH_WHOLE_QUAD = 0;
2179let SRC_REL = 0;
2180let SRC_SEL_X = 0;
2181let DST_REL = 0;
2182let USE_CONST_FIELDS = 1;
2183let NUM_FORMAT_ALL = 0;
2184let FORMAT_COMP_ALL = 0;
2185let SRF_MODE_ALL = 1;
2186let MEGA_FETCH_COUNT = 16;
2187let DST_SEL_X        = 0;
2188let DST_SEL_Y        = 1;
2189let DST_SEL_Z        = 2;
2190let DST_SEL_W        = 3;
2191let DATA_FORMAT      = 0;
2192
2193let Inst{31-0} = Word0;
2194let Inst{63-32} = Word1;
2195
2196// LLVM can only encode 64-bit instructions, so these fields are manually
2197// encoded in R600CodeEmitter
2198//
2199// bits<16> OFFSET;
2200// bits<2>  ENDIAN_SWAP = 0;
2201// bits<1>  CONST_BUF_NO_STRIDE = 0;
2202// bits<1>  MEGA_FETCH = 0;
2203// bits<1>  ALT_CONST = 0;
2204// bits<2>  BUFFER_INDEX_MODE = 0;
2205
2206
2207
2208// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
2209// is done in R600CodeEmitter
2210//
2211// Inst{79-64} = OFFSET;
2212// Inst{81-80} = ENDIAN_SWAP;
2213// Inst{82}    = CONST_BUF_NO_STRIDE;
2214// Inst{83}    = MEGA_FETCH;
2215// Inst{84}    = ALT_CONST;
2216// Inst{86-85} = BUFFER_INDEX_MODE;
2217// Inst{95-86} = 0; Reserved
2218
2219// VTX_WORD3 (Padding)
2220//
2221// Inst{127-96} = 0;
2222  let VTXInst = 1;
2223}
2224
2225
2226
2227//===--------------------------------------------------------------------===//
2228// Instructions support
2229//===--------------------------------------------------------------------===//
2230//===---------------------------------------------------------------------===//
2231// Custom Inserter for Branches and returns, this eventually will be a
2232// seperate pass
2233//===---------------------------------------------------------------------===//
2234let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
2235  def BRANCH : ILFormat<(outs), (ins brtarget:$target),
2236      "; Pseudo unconditional branch instruction",
2237      [(br bb:$target)]>;
2238  defm BRANCH_COND : BranchConditional<IL_brcond>;
2239}
2240
2241//===---------------------------------------------------------------------===//
2242// Flow and Program control Instructions
2243//===---------------------------------------------------------------------===//
2244let isTerminator=1 in {
2245  def SWITCH      : ILFormat< (outs), (ins GPRI32:$src),
2246  !strconcat("SWITCH", " $src"), []>;
2247  def CASE        : ILFormat< (outs), (ins GPRI32:$src),
2248      !strconcat("CASE", " $src"), []>;
2249  def BREAK       : ILFormat< (outs), (ins),
2250      "BREAK", []>;
2251  def CONTINUE    : ILFormat< (outs), (ins),
2252      "CONTINUE", []>;
2253  def DEFAULT     : ILFormat< (outs), (ins),
2254      "DEFAULT", []>;
2255  def ELSE        : ILFormat< (outs), (ins),
2256      "ELSE", []>;
2257  def ENDSWITCH   : ILFormat< (outs), (ins),
2258      "ENDSWITCH", []>;
2259  def ENDMAIN     : ILFormat< (outs), (ins),
2260      "ENDMAIN", []>;
2261  def END         : ILFormat< (outs), (ins),
2262      "END", []>;
2263  def ENDFUNC     : ILFormat< (outs), (ins),
2264      "ENDFUNC", []>;
2265  def ENDIF       : ILFormat< (outs), (ins),
2266      "ENDIF", []>;
2267  def WHILELOOP   : ILFormat< (outs), (ins),
2268      "WHILE", []>;
2269  def ENDLOOP     : ILFormat< (outs), (ins),
2270      "ENDLOOP", []>;
2271  def FUNC        : ILFormat< (outs), (ins),
2272      "FUNC", []>;
2273  def RETDYN      : ILFormat< (outs), (ins),
2274      "RET_DYN", []>;
2275  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
2276  defm IF_LOGICALNZ  : BranchInstr<"IF_LOGICALNZ">;
2277  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
2278  defm IF_LOGICALZ   : BranchInstr<"IF_LOGICALZ">;
2279  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
2280  defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
2281  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
2282  defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
2283  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
2284  defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
2285  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
2286  defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
2287  defm IFC         : BranchInstr2<"IFC">;
2288  defm BREAKC      : BranchInstr2<"BREAKC">;
2289  defm CONTINUEC   : BranchInstr2<"CONTINUEC">;
2290}
2291
2292//===----------------------------------------------------------------------===//
2293// ISel Patterns
2294//===----------------------------------------------------------------------===//
2295
2296// CND*_INT Pattterns for f32 True / False values
2297
2298class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
2299  (selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc),
2300  (cnd $src0, $src1, $src2)
2301>;
2302
2303def : CND_INT_f32 <CNDE_INT,  SETEQ>;
2304def : CND_INT_f32 <CNDGT_INT, SETGT>;
2305def : CND_INT_f32 <CNDGE_INT, SETGE>;
2306
2307//CNDGE_INT extra pattern
2308def : Pat <
2309  (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_GT),
2310  (CNDGE_INT $src0, $src1, $src2)
2311>;
2312
2313// KIL Patterns
2314def KILP : Pat <
2315  (int_AMDGPU_kilp),
2316  (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO)))
2317>;
2318
2319def KIL : Pat <
2320  (int_AMDGPU_kill f32:$src0),
2321  (MASK_WRITE (KILLGT (f32 ZERO), $src0))
2322>;
2323
2324// SGT Reverse args
2325def : Pat <
2326  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LT),
2327  (SGT $src1, $src0)
2328>;
2329
2330// SGE Reverse args
2331def : Pat <
2332  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LE),
2333  (SGE $src1, $src0)
2334>;
2335
2336// SETGT_DX10 reverse args
2337def : Pat <
2338  (selectcc f32:$src0, f32:$src1, -1, 0, COND_LT),
2339  (SETGT_DX10 $src1, $src0)
2340>;
2341
2342// SETGE_DX10 reverse args
2343def : Pat <
2344  (selectcc f32:$src0, f32:$src1, -1, 0, COND_LE),
2345  (SETGE_DX10 $src1, $src0)
2346>;
2347
2348// SETGT_INT reverse args
2349def : Pat <
2350  (selectcc i32:$src0, i32:$src1, -1, 0, SETLT),
2351  (SETGT_INT $src1, $src0)
2352>;
2353
2354// SETGE_INT reverse args
2355def : Pat <
2356  (selectcc i32:$src0, i32:$src1, -1, 0, SETLE),
2357  (SETGE_INT $src1, $src0)
2358>;
2359
2360// SETGT_UINT reverse args
2361def : Pat <
2362  (selectcc i32:$src0, i32:$src1, -1, 0, SETULT),
2363  (SETGT_UINT $src1, $src0)
2364>;
2365
2366// SETGE_UINT reverse args
2367def : Pat <
2368  (selectcc i32:$src0, i32:$src1, -1, 0, SETULE),
2369  (SETGE_UINT $src1, $src0)
2370>;
2371
2372// The next two patterns are special cases for handling 'true if ordered' and
2373// 'true if unordered' conditionals.  The assumption here is that the behavior of
2374// SETE and SNE conforms to the Direct3D 10 rules for floating point values
2375// described here:
2376// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit
2377// We assume that  SETE returns false when one of the operands is NAN and
2378// SNE returns true when on of the operands is NAN
2379
2380//SETE - 'true if ordered'
2381def : Pat <
2382  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETO),
2383  (SETE $src0, $src1)
2384>;
2385
2386//SETE_DX10 - 'true if ordered'
2387def : Pat <
2388  (selectcc f32:$src0, f32:$src1, -1, 0, SETO),
2389  (SETE_DX10 $src0, $src1)
2390>;
2391
2392//SNE - 'true if unordered'
2393def : Pat <
2394  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETUO),
2395  (SNE $src0, $src1)
2396>;
2397
2398//SETNE_DX10 - 'true if ordered'
2399def : Pat <
2400  (selectcc f32:$src0, f32:$src1, -1, 0, SETUO),
2401  (SETNE_DX10 $src0, $src1)
2402>;
2403
2404def : Extract_Element <f32, v4f32, 0, sub0>;
2405def : Extract_Element <f32, v4f32, 1, sub1>;
2406def : Extract_Element <f32, v4f32, 2, sub2>;
2407def : Extract_Element <f32, v4f32, 3, sub3>;
2408
2409def : Insert_Element <f32, v4f32, 0, sub0>;
2410def : Insert_Element <f32, v4f32, 1, sub1>;
2411def : Insert_Element <f32, v4f32, 2, sub2>;
2412def : Insert_Element <f32, v4f32, 3, sub3>;
2413
2414def : Extract_Element <i32, v4i32, 0, sub0>;
2415def : Extract_Element <i32, v4i32, 1, sub1>;
2416def : Extract_Element <i32, v4i32, 2, sub2>;
2417def : Extract_Element <i32, v4i32, 3, sub3>;
2418
2419def : Insert_Element <i32, v4i32, 0, sub0>;
2420def : Insert_Element <i32, v4i32, 1, sub1>;
2421def : Insert_Element <i32, v4i32, 2, sub2>;
2422def : Insert_Element <i32, v4i32, 3, sub3>;
2423
2424def : Vector4_Build <v4f32, f32>;
2425def : Vector4_Build <v4i32, i32>;
2426
2427// bitconvert patterns
2428
2429def : BitConvert <i32, f32, R600_Reg32>;
2430def : BitConvert <f32, i32, R600_Reg32>;
2431def : BitConvert <v4f32, v4i32, R600_Reg128>;
2432def : BitConvert <v4i32, v4f32, R600_Reg128>;
2433
2434// DWORDADDR pattern
2435def : DwordAddrPat  <i32, R600_Reg32>;
2436
2437} // End isR600toCayman Predicate
2438