R600Instructions.td revision 251662
1249259Sdim//===-- R600Instructions.td - R600 Instruction defs  -------*- tablegen -*-===//
2249259Sdim//
3249259Sdim//                     The LLVM Compiler Infrastructure
4249259Sdim//
5249259Sdim// This file is distributed under the University of Illinois Open Source
6249259Sdim// License. See LICENSE.TXT for details.
7249259Sdim//
8249259Sdim//===----------------------------------------------------------------------===//
9249259Sdim//
10249259Sdim// R600 Tablegen instruction definitions
11249259Sdim//
12249259Sdim//===----------------------------------------------------------------------===//
13249259Sdim
14249259Sdiminclude "R600Intrinsics.td"
15249259Sdim
16251662Sdimclass InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
17249259Sdim                InstrItinClass itin>
18249259Sdim    : AMDGPUInst <outs, ins, asm, pattern> {
19249259Sdim
20249259Sdim  field bits<64> Inst;
21251662Sdim  bit TransOnly = 0;
22249259Sdim  bit Trig = 0;
23249259Sdim  bit Op3 = 0;
24249259Sdim  bit isVector = 0;
25249259Sdim  bits<2> FlagOperandIdx = 0;
26249259Sdim  bit Op1 = 0;
27249259Sdim  bit Op2 = 0;
28249259Sdim  bit HasNativeOperands = 0;
29251662Sdim  bit VTXInst = 0;
30251662Sdim  bit TEXInst = 0;
31249259Sdim
32249259Sdim  let Namespace = "AMDGPU";
33249259Sdim  let OutOperandList = outs;
34249259Sdim  let InOperandList = ins;
35249259Sdim  let AsmString = asm;
36249259Sdim  let Pattern = pattern;
37249259Sdim  let Itinerary = itin;
38249259Sdim
39251662Sdim  let TSFlags{0} = TransOnly;
40249259Sdim  let TSFlags{4} = Trig;
41249259Sdim  let TSFlags{5} = Op3;
42249259Sdim
43249259Sdim  // Vector instructions are instructions that must fill all slots in an
44249259Sdim  // instruction group
45249259Sdim  let TSFlags{6} = isVector;
46249259Sdim  let TSFlags{8-7} = FlagOperandIdx;
47249259Sdim  let TSFlags{9} = HasNativeOperands;
48249259Sdim  let TSFlags{10} = Op1;
49249259Sdim  let TSFlags{11} = Op2;
50251662Sdim  let TSFlags{12} = VTXInst;
51251662Sdim  let TSFlags{13} = TEXInst;
52249259Sdim}
53249259Sdim
54249259Sdimclass InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
55251662Sdim    InstR600 <outs, ins, asm, pattern, NullALU> {
56249259Sdim
57249259Sdim  let Namespace = "AMDGPU";
58249259Sdim}
59249259Sdim
60249259Sdimdef MEMxi : Operand<iPTR> {
61249259Sdim  let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index);
62249259Sdim  let PrintMethod = "printMemOperand";
63249259Sdim}
64249259Sdim
65249259Sdimdef MEMrr : Operand<iPTR> {
66249259Sdim  let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index);
67249259Sdim}
68249259Sdim
69249259Sdim// Operands for non-registers
70249259Sdim
71249259Sdimclass InstFlag<string PM = "printOperand", int Default = 0>
72249259Sdim    : OperandWithDefaultOps <i32, (ops (i32 Default))> {
73249259Sdim  let PrintMethod = PM;
74249259Sdim}
75249259Sdim
76249259Sdim// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers
77249259Sdimdef SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
78249259Sdim  let PrintMethod = "printSel";
79249259Sdim}
80251662Sdimdef BANK_SWIZZLE : OperandWithDefaultOps <i32, (ops (i32 0))> {
81251662Sdim  let PrintMethod = "printBankSwizzle";
82251662Sdim}
83249259Sdim
84249259Sdimdef LITERAL : InstFlag<"printLiteral">;
85249259Sdim
86249259Sdimdef WRITE : InstFlag <"printWrite", 1>;
87249259Sdimdef OMOD : InstFlag <"printOMOD">;
88249259Sdimdef REL : InstFlag <"printRel">;
89249259Sdimdef CLAMP : InstFlag <"printClamp">;
90249259Sdimdef NEG : InstFlag <"printNeg">;
91249259Sdimdef ABS : InstFlag <"printAbs">;
92249259Sdimdef UEM : InstFlag <"printUpdateExecMask">;
93249259Sdimdef UP : InstFlag <"printUpdatePred">;
94249259Sdim
95249259Sdim// XXX: The r600g finalizer in Mesa expects last to be one in most cases.
96249259Sdim// Once we start using the packetizer in this backend we should have this
97249259Sdim// default to 0.
98249259Sdimdef LAST : InstFlag<"printLast", 1>;
99249259Sdim
100249259Sdimdef FRAMEri : Operand<iPTR> {
101249259Sdim  let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
102249259Sdim}
103249259Sdim
104249259Sdimdef ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
105249259Sdimdef ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
106249259Sdimdef ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
107249259Sdimdef ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
108249259Sdimdef ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
109249259Sdimdef ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
110249259Sdim
111249259Sdimclass R600ALU_Word0 {
112249259Sdim  field bits<32> Word0;
113249259Sdim
114249259Sdim  bits<11> src0;
115249259Sdim  bits<1>  src0_neg;
116249259Sdim  bits<1>  src0_rel;
117249259Sdim  bits<11> src1;
118249259Sdim  bits<1>  src1_rel;
119249259Sdim  bits<1>  src1_neg;
120249259Sdim  bits<3>  index_mode = 0;
121249259Sdim  bits<2>  pred_sel;
122249259Sdim  bits<1>  last;
123249259Sdim
124249259Sdim  bits<9>  src0_sel  = src0{8-0};
125249259Sdim  bits<2>  src0_chan = src0{10-9};
126249259Sdim  bits<9>  src1_sel  = src1{8-0};
127249259Sdim  bits<2>  src1_chan = src1{10-9};
128249259Sdim
129249259Sdim  let Word0{8-0}   = src0_sel;
130249259Sdim  let Word0{9}     = src0_rel;
131249259Sdim  let Word0{11-10} = src0_chan;
132249259Sdim  let Word0{12}    = src0_neg;
133249259Sdim  let Word0{21-13} = src1_sel;
134249259Sdim  let Word0{22}    = src1_rel;
135249259Sdim  let Word0{24-23} = src1_chan;
136249259Sdim  let Word0{25}    = src1_neg;
137249259Sdim  let Word0{28-26} = index_mode;
138249259Sdim  let Word0{30-29} = pred_sel;
139249259Sdim  let Word0{31}    = last;
140249259Sdim}
141249259Sdim
142249259Sdimclass R600ALU_Word1 {
143249259Sdim  field bits<32> Word1;
144249259Sdim
145249259Sdim  bits<11> dst;
146251662Sdim  bits<3>  bank_swizzle;
147249259Sdim  bits<1>  dst_rel;
148249259Sdim  bits<1>  clamp;
149249259Sdim
150249259Sdim  bits<7>  dst_sel  = dst{6-0};
151249259Sdim  bits<2>  dst_chan = dst{10-9};
152249259Sdim
153249259Sdim  let Word1{20-18} = bank_swizzle;
154249259Sdim  let Word1{27-21} = dst_sel;
155249259Sdim  let Word1{28}    = dst_rel;
156249259Sdim  let Word1{30-29} = dst_chan;
157249259Sdim  let Word1{31}    = clamp;
158249259Sdim}
159249259Sdim
160249259Sdimclass R600ALU_Word1_OP2 <bits<11> alu_inst> : R600ALU_Word1{
161249259Sdim
162249259Sdim  bits<1>  src0_abs;
163249259Sdim  bits<1>  src1_abs;
164249259Sdim  bits<1>  update_exec_mask;
165249259Sdim  bits<1>  update_pred;
166249259Sdim  bits<1>  write;
167249259Sdim  bits<2>  omod;
168249259Sdim
169249259Sdim  let Word1{0}     = src0_abs;
170249259Sdim  let Word1{1}     = src1_abs;
171249259Sdim  let Word1{2}     = update_exec_mask;
172249259Sdim  let Word1{3}     = update_pred;
173249259Sdim  let Word1{4}     = write;
174249259Sdim  let Word1{6-5}   = omod;
175249259Sdim  let Word1{17-7}  = alu_inst;
176249259Sdim}
177249259Sdim
178249259Sdimclass R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{
179249259Sdim
180249259Sdim  bits<11> src2;
181249259Sdim  bits<1>  src2_rel;
182249259Sdim  bits<1>  src2_neg;
183249259Sdim
184249259Sdim  bits<9>  src2_sel = src2{8-0};
185249259Sdim  bits<2>  src2_chan = src2{10-9};
186249259Sdim
187249259Sdim  let Word1{8-0}   = src2_sel;
188249259Sdim  let Word1{9}     = src2_rel;
189249259Sdim  let Word1{11-10} = src2_chan;
190249259Sdim  let Word1{12}    = src2_neg;
191249259Sdim  let Word1{17-13} = alu_inst;
192249259Sdim}
193249259Sdim
194249259Sdimclass VTX_WORD0 {
195249259Sdim  field bits<32> Word0;
196249259Sdim  bits<7> SRC_GPR;
197249259Sdim  bits<5> VC_INST;
198249259Sdim  bits<2> FETCH_TYPE;
199249259Sdim  bits<1> FETCH_WHOLE_QUAD;
200249259Sdim  bits<8> BUFFER_ID;
201249259Sdim  bits<1> SRC_REL;
202249259Sdim  bits<2> SRC_SEL_X;
203249259Sdim  bits<6> MEGA_FETCH_COUNT;
204249259Sdim
205249259Sdim  let Word0{4-0}   = VC_INST;
206249259Sdim  let Word0{6-5}   = FETCH_TYPE;
207249259Sdim  let Word0{7}     = FETCH_WHOLE_QUAD;
208249259Sdim  let Word0{15-8}  = BUFFER_ID;
209249259Sdim  let Word0{22-16} = SRC_GPR;
210249259Sdim  let Word0{23}    = SRC_REL;
211249259Sdim  let Word0{25-24} = SRC_SEL_X;
212249259Sdim  let Word0{31-26} = MEGA_FETCH_COUNT;
213249259Sdim}
214249259Sdim
215249259Sdimclass VTX_WORD1_GPR {
216249259Sdim  field bits<32> Word1;
217249259Sdim  bits<7> DST_GPR;
218249259Sdim  bits<1> DST_REL;
219249259Sdim  bits<3> DST_SEL_X;
220249259Sdim  bits<3> DST_SEL_Y;
221249259Sdim  bits<3> DST_SEL_Z;
222249259Sdim  bits<3> DST_SEL_W;
223249259Sdim  bits<1> USE_CONST_FIELDS;
224249259Sdim  bits<6> DATA_FORMAT;
225249259Sdim  bits<2> NUM_FORMAT_ALL;
226249259Sdim  bits<1> FORMAT_COMP_ALL;
227249259Sdim  bits<1> SRF_MODE_ALL;
228249259Sdim
229249259Sdim  let Word1{6-0} = DST_GPR;
230249259Sdim  let Word1{7}    = DST_REL;
231249259Sdim  let Word1{8}    = 0; // Reserved
232249259Sdim  let Word1{11-9} = DST_SEL_X;
233249259Sdim  let Word1{14-12} = DST_SEL_Y;
234249259Sdim  let Word1{17-15} = DST_SEL_Z;
235249259Sdim  let Word1{20-18} = DST_SEL_W;
236249259Sdim  let Word1{21}    = USE_CONST_FIELDS;
237249259Sdim  let Word1{27-22} = DATA_FORMAT;
238249259Sdim  let Word1{29-28} = NUM_FORMAT_ALL;
239249259Sdim  let Word1{30}    = FORMAT_COMP_ALL;
240249259Sdim  let Word1{31}    = SRF_MODE_ALL;
241249259Sdim}
242249259Sdim
243249259Sdimclass TEX_WORD0 {
244249259Sdim  field bits<32> Word0;
245249259Sdim
246249259Sdim  bits<5> TEX_INST;
247249259Sdim  bits<2> INST_MOD;
248249259Sdim  bits<1> FETCH_WHOLE_QUAD;
249249259Sdim  bits<8> RESOURCE_ID;
250249259Sdim  bits<7> SRC_GPR;
251249259Sdim  bits<1> SRC_REL;
252249259Sdim  bits<1> ALT_CONST;
253249259Sdim  bits<2> RESOURCE_INDEX_MODE;
254249259Sdim  bits<2> SAMPLER_INDEX_MODE;
255249259Sdim
256249259Sdim  let Word0{4-0} = TEX_INST;
257249259Sdim  let Word0{6-5} = INST_MOD;
258249259Sdim  let Word0{7} = FETCH_WHOLE_QUAD;
259249259Sdim  let Word0{15-8} = RESOURCE_ID;
260249259Sdim  let Word0{22-16} = SRC_GPR;
261249259Sdim  let Word0{23} = SRC_REL;
262249259Sdim  let Word0{24} = ALT_CONST;
263249259Sdim  let Word0{26-25} = RESOURCE_INDEX_MODE;
264249259Sdim  let Word0{28-27} = SAMPLER_INDEX_MODE;
265249259Sdim}
266249259Sdim
267249259Sdimclass TEX_WORD1 {
268249259Sdim  field bits<32> Word1;
269249259Sdim
270249259Sdim  bits<7> DST_GPR;
271249259Sdim  bits<1> DST_REL;
272249259Sdim  bits<3> DST_SEL_X;
273249259Sdim  bits<3> DST_SEL_Y;
274249259Sdim  bits<3> DST_SEL_Z;
275249259Sdim  bits<3> DST_SEL_W;
276249259Sdim  bits<7> LOD_BIAS;
277249259Sdim  bits<1> COORD_TYPE_X;
278249259Sdim  bits<1> COORD_TYPE_Y;
279249259Sdim  bits<1> COORD_TYPE_Z;
280249259Sdim  bits<1> COORD_TYPE_W;
281249259Sdim
282249259Sdim  let Word1{6-0} = DST_GPR;
283249259Sdim  let Word1{7} = DST_REL;
284249259Sdim  let Word1{11-9} = DST_SEL_X;
285249259Sdim  let Word1{14-12} = DST_SEL_Y;
286249259Sdim  let Word1{17-15} = DST_SEL_Z;
287249259Sdim  let Word1{20-18} = DST_SEL_W;
288249259Sdim  let Word1{27-21} = LOD_BIAS;
289249259Sdim  let Word1{28} = COORD_TYPE_X;
290249259Sdim  let Word1{29} = COORD_TYPE_Y;
291249259Sdim  let Word1{30} = COORD_TYPE_Z;
292249259Sdim  let Word1{31} = COORD_TYPE_W;
293249259Sdim}
294249259Sdim
295249259Sdimclass TEX_WORD2 {
296249259Sdim  field bits<32> Word2;
297249259Sdim
298249259Sdim  bits<5> OFFSET_X;
299249259Sdim  bits<5> OFFSET_Y;
300249259Sdim  bits<5> OFFSET_Z;
301249259Sdim  bits<5> SAMPLER_ID;
302249259Sdim  bits<3> SRC_SEL_X;
303249259Sdim  bits<3> SRC_SEL_Y;
304249259Sdim  bits<3> SRC_SEL_Z;
305249259Sdim  bits<3> SRC_SEL_W;
306249259Sdim
307249259Sdim  let Word2{4-0} = OFFSET_X;
308249259Sdim  let Word2{9-5} = OFFSET_Y;
309249259Sdim  let Word2{14-10} = OFFSET_Z;
310249259Sdim  let Word2{19-15} = SAMPLER_ID;
311249259Sdim  let Word2{22-20} = SRC_SEL_X;
312249259Sdim  let Word2{25-23} = SRC_SEL_Y;
313249259Sdim  let Word2{28-26} = SRC_SEL_Z;
314249259Sdim  let Word2{31-29} = SRC_SEL_W;
315249259Sdim}
316249259Sdim
317249259Sdim/*
318249259SdimXXX: R600 subtarget uses a slightly different encoding than the other
319249259Sdimsubtargets.  We currently handle this in R600MCCodeEmitter, but we may
320249259Sdimwant to use these instruction classes in the future.
321249259Sdim
322249259Sdimclass R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 {
323249259Sdim
324249259Sdim  bits<1>  fog_merge;
325249259Sdim  bits<10> alu_inst;
326249259Sdim
327249259Sdim  let Inst{37}    = fog_merge;
328249259Sdim  let Inst{39-38} = omod;
329249259Sdim  let Inst{49-40} = alu_inst;
330249259Sdim}
331249259Sdim
332249259Sdimclass R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 {
333249259Sdim
334249259Sdim  bits<11> alu_inst;
335249259Sdim
336249259Sdim  let Inst{38-37} = omod;
337249259Sdim  let Inst{49-39} = alu_inst;
338249259Sdim}
339249259Sdim*/
340249259Sdim
341249259Sdimdef R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
342249259Sdim                                     (ops PRED_SEL_OFF)>;
343249259Sdim
344249259Sdim
345249259Sdimlet mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
346249259Sdim
347249259Sdim// Class for instructions with only one source register.
348249259Sdim// If you add new ins to this instruction, make sure they are listed before
349249259Sdim// $literal, because the backend currently assumes that the last operand is
350249259Sdim// a literal.  Also be sure to update the enum R600Op1OperandIndex::ROI in
351249259Sdim// R600Defines.h, R600InstrInfo::buildDefaultInstruction(),
352249259Sdim// and R600InstrInfo::getOperandIdx().
353249259Sdimclass R600_1OP <bits<11> inst, string opName, list<dag> pattern,
354249259Sdim                InstrItinClass itin = AnyALU> :
355251662Sdim    InstR600 <(outs R600_Reg32:$dst),
356249259Sdim              (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
357249259Sdim                   R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
358251662Sdim                   LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
359251662Sdim                   BANK_SWIZZLE:$bank_swizzle),
360249259Sdim              !strconcat("  ", opName,
361251662Sdim                   "$last$clamp $dst$write$dst_rel$omod, "
362249259Sdim                   "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
363251662Sdim                   "$pred_sel $bank_swizzle"),
364249259Sdim              pattern,
365249259Sdim              itin>,
366249259Sdim    R600ALU_Word0,
367249259Sdim    R600ALU_Word1_OP2 <inst> {
368249259Sdim
369249259Sdim  let src1 = 0;
370249259Sdim  let src1_rel = 0;
371249259Sdim  let src1_neg = 0;
372249259Sdim  let src1_abs = 0;
373249259Sdim  let update_exec_mask = 0;
374249259Sdim  let update_pred = 0;
375249259Sdim  let HasNativeOperands = 1;
376249259Sdim  let Op1 = 1;
377249259Sdim  let DisableEncoding = "$literal";
378249259Sdim
379249259Sdim  let Inst{31-0}  = Word0;
380249259Sdim  let Inst{63-32} = Word1;
381249259Sdim}
382249259Sdim
383249259Sdimclass R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
384249259Sdim                    InstrItinClass itin = AnyALU> :
385249259Sdim    R600_1OP <inst, opName,
386249259Sdim              [(set R600_Reg32:$dst, (node R600_Reg32:$src0))]
387249259Sdim>;
388249259Sdim
389249259Sdim// If you add our change the operands for R600_2OP instructions, you must
390249259Sdim// also update the R600Op2OperandIndex::ROI enum in R600Defines.h,
391249259Sdim// R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx().
392249259Sdimclass R600_2OP <bits<11> inst, string opName, list<dag> pattern,
393249259Sdim                InstrItinClass itin = AnyALU> :
394251662Sdim  InstR600 <(outs R600_Reg32:$dst),
395249259Sdim          (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
396249259Sdim               OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
397249259Sdim               R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
398249259Sdim               R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
399251662Sdim               LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
400251662Sdim               BANK_SWIZZLE:$bank_swizzle),
401249259Sdim          !strconcat("  ", opName,
402251662Sdim                "$last$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
403249259Sdim                "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
404249259Sdim                "$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
405251662Sdim                "$pred_sel $bank_swizzle"),
406249259Sdim          pattern,
407249259Sdim          itin>,
408249259Sdim    R600ALU_Word0,
409249259Sdim    R600ALU_Word1_OP2 <inst> {
410249259Sdim
411249259Sdim  let HasNativeOperands = 1;
412249259Sdim  let Op2 = 1;
413249259Sdim  let DisableEncoding = "$literal";
414249259Sdim
415249259Sdim  let Inst{31-0}  = Word0;
416249259Sdim  let Inst{63-32} = Word1;
417249259Sdim}
418249259Sdim
419249259Sdimclass R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
420249259Sdim                       InstrItinClass itim = AnyALU> :
421249259Sdim    R600_2OP <inst, opName,
422249259Sdim              [(set R600_Reg32:$dst, (node R600_Reg32:$src0,
423249259Sdim                                           R600_Reg32:$src1))]
424249259Sdim>;
425249259Sdim
426249259Sdim// If you add our change the operands for R600_3OP instructions, you must
427249259Sdim// also update the R600Op3OperandIndex::ROI enum in R600Defines.h,
428249259Sdim// R600InstrInfo::buildDefaultInstruction(), and
429249259Sdim// R600InstrInfo::getOperandIdx().
430249259Sdimclass R600_3OP <bits<5> inst, string opName, list<dag> pattern,
431249259Sdim                InstrItinClass itin = AnyALU> :
432251662Sdim  InstR600 <(outs R600_Reg32:$dst),
433249259Sdim          (ins REL:$dst_rel, CLAMP:$clamp,
434249259Sdim               R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
435249259Sdim               R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
436249259Sdim               R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
437251662Sdim               LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
438251662Sdim               BANK_SWIZZLE:$bank_swizzle),
439251662Sdim          !strconcat("  ", opName, "$last$clamp $dst$dst_rel, "
440249259Sdim                             "$src0_neg$src0$src0_rel, "
441249259Sdim                             "$src1_neg$src1$src1_rel, "
442249259Sdim                             "$src2_neg$src2$src2_rel, "
443251662Sdim                             "$pred_sel"
444251662Sdim                             "$bank_swizzle"),
445249259Sdim          pattern,
446249259Sdim          itin>,
447249259Sdim    R600ALU_Word0,
448249259Sdim    R600ALU_Word1_OP3<inst>{
449249259Sdim
450249259Sdim  let HasNativeOperands = 1;
451249259Sdim  let DisableEncoding = "$literal";
452249259Sdim  let Op3 = 1;
453249259Sdim
454249259Sdim  let Inst{31-0}  = Word0;
455249259Sdim  let Inst{63-32} = Word1;
456249259Sdim}
457249259Sdim
458249259Sdimclass R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
459249259Sdim                      InstrItinClass itin = VecALU> :
460251662Sdim  InstR600 <(outs R600_Reg32:$dst),
461249259Sdim          ins,
462249259Sdim          asm,
463249259Sdim          pattern,
464249259Sdim          itin>;
465249259Sdim
466249259Sdimclass R600_TEX <bits<11> inst, string opName, list<dag> pattern,
467249259Sdim                InstrItinClass itin = AnyALU> :
468251662Sdim  InstR600 <(outs R600_Reg128:$DST_GPR),
469249259Sdim          (ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget),
470249259Sdim          !strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"),
471249259Sdim          pattern,
472249259Sdim          itin>, TEX_WORD0, TEX_WORD1, TEX_WORD2 {
473249259Sdim    let Inst{31-0} = Word0;
474249259Sdim    let Inst{63-32} = Word1;
475249259Sdim
476249259Sdim    let TEX_INST = inst{4-0};
477249259Sdim    let SRC_REL = 0;
478249259Sdim    let DST_REL = 0;
479249259Sdim    let DST_SEL_X = 0;
480249259Sdim    let DST_SEL_Y = 1;
481249259Sdim    let DST_SEL_Z = 2;
482249259Sdim    let DST_SEL_W = 3;
483249259Sdim    let LOD_BIAS = 0;
484249259Sdim
485249259Sdim    let INST_MOD = 0;
486249259Sdim    let FETCH_WHOLE_QUAD = 0;
487249259Sdim    let ALT_CONST = 0;
488249259Sdim    let SAMPLER_INDEX_MODE = 0;
489251662Sdim    let RESOURCE_INDEX_MODE = 0;
490249259Sdim
491249259Sdim    let COORD_TYPE_X = 0;
492249259Sdim    let COORD_TYPE_Y = 0;
493249259Sdim    let COORD_TYPE_Z = 0;
494249259Sdim    let COORD_TYPE_W = 0;
495251662Sdim
496251662Sdim    let TEXInst = 1;
497249259Sdim  }
498249259Sdim
499249259Sdim} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
500249259Sdim
501249259Sdimdef TEX_SHADOW : PatLeaf<
502249259Sdim  (imm),
503249259Sdim  [{uint32_t TType = (uint32_t)N->getZExtValue();
504249259Sdim    return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13);
505249259Sdim  }]
506249259Sdim>;
507249259Sdim
508249259Sdimdef TEX_RECT : PatLeaf<
509249259Sdim  (imm),
510249259Sdim  [{uint32_t TType = (uint32_t)N->getZExtValue();
511249259Sdim    return TType == 5;
512249259Sdim  }]
513249259Sdim>;
514249259Sdim
515249259Sdimdef TEX_ARRAY : PatLeaf<
516249259Sdim  (imm),
517249259Sdim  [{uint32_t TType = (uint32_t)N->getZExtValue();
518249259Sdim    return TType == 9 || TType == 10 || TType == 15 || TType == 16;
519249259Sdim  }]
520249259Sdim>;
521249259Sdim
522249259Sdimdef TEX_SHADOW_ARRAY : PatLeaf<
523249259Sdim  (imm),
524249259Sdim  [{uint32_t TType = (uint32_t)N->getZExtValue();
525249259Sdim    return TType == 11 || TType == 12 || TType == 17;
526249259Sdim  }]
527249259Sdim>;
528249259Sdim
529249259Sdimclass EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs,
530249259Sdim                 dag ins, string asm, list<dag> pattern> :
531249259Sdim    InstR600ISA <outs, ins, asm, pattern> {
532249259Sdim  bits<7>  RW_GPR;
533249259Sdim  bits<7>  INDEX_GPR;
534249259Sdim
535249259Sdim  bits<2>  RIM;
536249259Sdim  bits<2>  TYPE;
537249259Sdim  bits<1>  RW_REL;
538249259Sdim  bits<2>  ELEM_SIZE;
539249259Sdim
540249259Sdim  bits<12> ARRAY_SIZE;
541249259Sdim  bits<4>  COMP_MASK;
542249259Sdim  bits<4>  BURST_COUNT;
543249259Sdim  bits<1>  VPM;
544249259Sdim  bits<1>  eop;
545249259Sdim  bits<1>  MARK;
546249259Sdim  bits<1>  BARRIER;
547249259Sdim
548249259Sdim  // CF_ALLOC_EXPORT_WORD0_RAT
549249259Sdim  let Inst{3-0}   = rat_id;
550249259Sdim  let Inst{9-4}   = rat_inst;
551249259Sdim  let Inst{10}    = 0; // Reserved
552249259Sdim  let Inst{12-11} = RIM;
553249259Sdim  let Inst{14-13} = TYPE;
554249259Sdim  let Inst{21-15} = RW_GPR;
555249259Sdim  let Inst{22}    = RW_REL;
556249259Sdim  let Inst{29-23} = INDEX_GPR;
557249259Sdim  let Inst{31-30} = ELEM_SIZE;
558249259Sdim
559249259Sdim  // CF_ALLOC_EXPORT_WORD1_BUF
560249259Sdim  let Inst{43-32} = ARRAY_SIZE;
561249259Sdim  let Inst{47-44} = COMP_MASK;
562249259Sdim  let Inst{51-48} = BURST_COUNT;
563249259Sdim  let Inst{52}    = VPM;
564249259Sdim  let Inst{53}    = eop;
565249259Sdim  let Inst{61-54} = cf_inst;
566249259Sdim  let Inst{62}    = MARK;
567249259Sdim  let Inst{63}    = BARRIER;
568249259Sdim}
569249259Sdim
570249259Sdimclass LoadParamFrag <PatFrag load_type> : PatFrag <
571249259Sdim  (ops node:$ptr), (load_type node:$ptr),
572249259Sdim  [{ return isParamLoad(dyn_cast<LoadSDNode>(N)); }]
573249259Sdim>;
574249259Sdim
575249259Sdimdef load_param : LoadParamFrag<load>;
576249259Sdimdef load_param_zexti8 : LoadParamFrag<zextloadi8>;
577249259Sdimdef load_param_zexti16 : LoadParamFrag<zextloadi16>;
578249259Sdim
579249259Sdimdef isR600 : Predicate<"Subtarget.device()"
580249259Sdim                            "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">;
581249259Sdimdef isR700 : Predicate<"Subtarget.device()"
582249259Sdim                            "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
583249259Sdim                            "Subtarget.device()->getDeviceFlag()"
584249259Sdim                            ">= OCL_DEVICE_RV710">;
585249259Sdimdef isEG : Predicate<
586249259Sdim  "Subtarget.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && "
587249259Sdim  "Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX && "
588249259Sdim  "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">;
589249259Sdim
590249259Sdimdef isCayman : Predicate<"Subtarget.device()"
591249259Sdim                            "->getDeviceFlag() == OCL_DEVICE_CAYMAN">;
592249259Sdimdef isEGorCayman : Predicate<"Subtarget.device()"
593249259Sdim                            "->getGeneration() == AMDGPUDeviceInfo::HD5XXX"
594249259Sdim                            "|| Subtarget.device()->getGeneration() =="
595249259Sdim                            "AMDGPUDeviceInfo::HD6XXX">;
596249259Sdim
597249259Sdimdef isR600toCayman : Predicate<
598249259Sdim                     "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
599249259Sdim
600249259Sdim//===----------------------------------------------------------------------===//
601249259Sdim// R600 SDNodes
602249259Sdim//===----------------------------------------------------------------------===//
603249259Sdim
604249259Sdimdef INTERP_PAIR_XY :  AMDGPUShaderInst <
605249259Sdim  (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
606249259Sdim  (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
607249259Sdim  "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1",
608249259Sdim  []>;
609249259Sdim
610249259Sdimdef INTERP_PAIR_ZW :  AMDGPUShaderInst <
611249259Sdim  (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1),
612249259Sdim  (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
613249259Sdim  "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
614249259Sdim  []>;
615249259Sdim
616249259Sdimdef CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
617249259Sdim  SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
618249259Sdim  [SDNPVariadic]
619249259Sdim>;
620249259Sdim
621249259Sdim//===----------------------------------------------------------------------===//
622249259Sdim// Interpolation Instructions
623249259Sdim//===----------------------------------------------------------------------===//
624249259Sdim
625249259Sdimdef INTERP_VEC_LOAD :  AMDGPUShaderInst <
626249259Sdim  (outs R600_Reg128:$dst),
627249259Sdim  (ins i32imm:$src0),
628249259Sdim  "INTERP_LOAD $src0 : $dst",
629249259Sdim  []>;
630249259Sdim
631249259Sdimdef INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
632249259Sdim  let bank_swizzle = 5;
633249259Sdim}
634249259Sdim
635249259Sdimdef INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> {
636249259Sdim  let bank_swizzle = 5;
637249259Sdim}
638249259Sdim
639249259Sdimdef INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>;
640249259Sdim
641249259Sdim//===----------------------------------------------------------------------===//
642249259Sdim// Export Instructions
643249259Sdim//===----------------------------------------------------------------------===//
644249259Sdim
645249259Sdimdef ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
646249259Sdim
647249259Sdimdef EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
648249259Sdim  [SDNPHasChain, SDNPSideEffect]>;
649249259Sdim
650249259Sdimclass ExportWord0 {
651249259Sdim  field bits<32> Word0;
652249259Sdim
653249259Sdim  bits<13> arraybase;
654249259Sdim  bits<2> type;
655249259Sdim  bits<7> gpr;
656249259Sdim  bits<2> elem_size;
657249259Sdim
658249259Sdim  let Word0{12-0} = arraybase;
659249259Sdim  let Word0{14-13} = type;
660249259Sdim  let Word0{21-15} = gpr;
661249259Sdim  let Word0{22} = 0; // RW_REL
662249259Sdim  let Word0{29-23} = 0; // INDEX_GPR
663249259Sdim  let Word0{31-30} = elem_size;
664249259Sdim}
665249259Sdim
666249259Sdimclass ExportSwzWord1 {
667249259Sdim  field bits<32> Word1;
668249259Sdim
669249259Sdim  bits<3> sw_x;
670249259Sdim  bits<3> sw_y;
671249259Sdim  bits<3> sw_z;
672249259Sdim  bits<3> sw_w;
673249259Sdim  bits<1> eop;
674249259Sdim  bits<8> inst;
675249259Sdim
676249259Sdim  let Word1{2-0} = sw_x;
677249259Sdim  let Word1{5-3} = sw_y;
678249259Sdim  let Word1{8-6} = sw_z;
679249259Sdim  let Word1{11-9} = sw_w;
680249259Sdim}
681249259Sdim
682249259Sdimclass ExportBufWord1 {
683249259Sdim  field bits<32> Word1;
684249259Sdim
685249259Sdim  bits<12> arraySize;
686249259Sdim  bits<4> compMask;
687249259Sdim  bits<1> eop;
688249259Sdim  bits<8> inst;
689249259Sdim
690249259Sdim  let Word1{11-0} = arraySize;
691249259Sdim  let Word1{15-12} = compMask;
692249259Sdim}
693249259Sdim
694249259Sdimmulticlass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
695249259Sdim  def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
696249259Sdim    (ExportInst
697249259Sdim        (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
698249259Sdim        0, 61, 0, 7, 7, 7, cf_inst, 0)
699249259Sdim  >;
700249259Sdim
701249259Sdim  def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
702249259Sdim    (ExportInst
703249259Sdim        (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
704249259Sdim        0, 61, 7, 0, 7, 7, cf_inst, 0)
705249259Sdim  >;
706249259Sdim
707249259Sdim  def : Pat<(int_R600_store_dummy (i32 imm:$type)),
708249259Sdim    (ExportInst
709249259Sdim        (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0)
710249259Sdim  >;
711249259Sdim
712249259Sdim  def : Pat<(int_R600_store_dummy 1),
713249259Sdim    (ExportInst
714249259Sdim        (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0)
715249259Sdim  >;
716249259Sdim
717249259Sdim  def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type),
718249259Sdim    (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)),
719249259Sdim        (ExportInst R600_Reg128:$src, imm:$type, imm:$base,
720249259Sdim        imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0)
721249259Sdim  >;
722249259Sdim
723249259Sdim}
724249259Sdim
725249259Sdimmulticlass SteamOutputExportPattern<Instruction ExportInst,
726249259Sdim    bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> {
727249259Sdim// Stream0
728249259Sdim  def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
729249259Sdim      (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)),
730249259Sdim      (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
731249259Sdim      4095, imm:$mask, buf0inst, 0)>;
732249259Sdim// Stream1
733249259Sdim  def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
734249259Sdim      (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)),
735249259Sdim      (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
736249259Sdim      4095, imm:$mask, buf1inst, 0)>;
737249259Sdim// Stream2
738249259Sdim  def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
739249259Sdim      (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)),
740249259Sdim      (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
741249259Sdim      4095, imm:$mask, buf2inst, 0)>;
742249259Sdim// Stream3
743249259Sdim  def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
744249259Sdim      (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)),
745249259Sdim      (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
746249259Sdim      4095, imm:$mask, buf3inst, 0)>;
747249259Sdim}
748249259Sdim
749251662Sdim// Export Instructions should not be duplicated by TailDuplication pass
750251662Sdim// (which assumes that duplicable instruction are affected by exec mask)
751251662Sdimlet usesCustomInserter = 1, isNotDuplicable = 1 in {
752249259Sdim
753249259Sdimclass ExportSwzInst : InstR600ISA<(
754249259Sdim    outs),
755249259Sdim    (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
756249259Sdim    i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst,
757249259Sdim    i32imm:$eop),
758249259Sdim    !strconcat("EXPORT", " $gpr"),
759249259Sdim    []>, ExportWord0, ExportSwzWord1 {
760249259Sdim  let elem_size = 3;
761249259Sdim  let Inst{31-0} = Word0;
762249259Sdim  let Inst{63-32} = Word1;
763249259Sdim}
764249259Sdim
765249259Sdim} // End usesCustomInserter = 1
766249259Sdim
767249259Sdimclass ExportBufInst : InstR600ISA<(
768249259Sdim    outs),
769249259Sdim    (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
770249259Sdim    i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop),
771249259Sdim    !strconcat("EXPORT", " $gpr"),
772249259Sdim    []>, ExportWord0, ExportBufWord1 {
773249259Sdim  let elem_size = 0;
774249259Sdim  let Inst{31-0} = Word0;
775249259Sdim  let Inst{63-32} = Word1;
776249259Sdim}
777249259Sdim
778249259Sdim//===----------------------------------------------------------------------===//
779249259Sdim// Control Flow Instructions
780249259Sdim//===----------------------------------------------------------------------===//
781249259Sdim
782249259Sdimclass CF_ALU_WORD0 {
783249259Sdim  field bits<32> Word0;
784249259Sdim
785249259Sdim  bits<22> ADDR;
786249259Sdim  bits<4> KCACHE_BANK0;
787249259Sdim  bits<4> KCACHE_BANK1;
788249259Sdim  bits<2> KCACHE_MODE0;
789249259Sdim
790249259Sdim  let Word0{21-0} = ADDR;
791249259Sdim  let Word0{25-22} = KCACHE_BANK0;
792249259Sdim  let Word0{29-26} = KCACHE_BANK1;
793249259Sdim  let Word0{31-30} = KCACHE_MODE0;
794249259Sdim}
795249259Sdim
796249259Sdimclass CF_ALU_WORD1 {
797249259Sdim  field bits<32> Word1;
798249259Sdim
799249259Sdim  bits<2> KCACHE_MODE1;
800249259Sdim  bits<8> KCACHE_ADDR0;
801249259Sdim  bits<8> KCACHE_ADDR1;
802249259Sdim  bits<7> COUNT;
803249259Sdim  bits<1> ALT_CONST;
804249259Sdim  bits<4> CF_INST;
805249259Sdim  bits<1> WHOLE_QUAD_MODE;
806249259Sdim  bits<1> BARRIER;
807249259Sdim
808249259Sdim  let Word1{1-0} = KCACHE_MODE1;
809249259Sdim  let Word1{9-2} = KCACHE_ADDR0;
810249259Sdim  let Word1{17-10} = KCACHE_ADDR1;
811249259Sdim  let Word1{24-18} = COUNT;
812249259Sdim  let Word1{25} = ALT_CONST;
813249259Sdim  let Word1{29-26} = CF_INST;
814249259Sdim  let Word1{30} = WHOLE_QUAD_MODE;
815249259Sdim  let Word1{31} = BARRIER;
816249259Sdim}
817249259Sdim
818251662Sdimdef KCACHE : InstFlag<"printKCache">;
819251662Sdim
820249259Sdimclass ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs),
821251662Sdim(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1,
822251662SdimKCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1,
823251662Sdimi32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1,
824251662Sdimi32imm:$COUNT),
825249259Sdim!strconcat(OpName, " $COUNT, @$ADDR, "
826251662Sdim"KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"),
827249259Sdim[] >, CF_ALU_WORD0, CF_ALU_WORD1 {
828249259Sdim  field bits<64> Inst;
829249259Sdim
830249259Sdim  let CF_INST = inst;
831249259Sdim  let ALT_CONST = 0;
832249259Sdim  let WHOLE_QUAD_MODE = 0;
833249259Sdim  let BARRIER = 1;
834249259Sdim
835249259Sdim  let Inst{31-0} = Word0;
836249259Sdim  let Inst{63-32} = Word1;
837249259Sdim}
838249259Sdim
839251662Sdimclass CF_WORD0_R600 {
840249259Sdim  field bits<32> Word0;
841249259Sdim
842251662Sdim  bits<32> ADDR;
843251662Sdim
844251662Sdim  let Word0 = ADDR;
845251662Sdim}
846251662Sdim
847251662Sdimclass CF_WORD1_R600 {
848251662Sdim  field bits<32> Word1;
849251662Sdim
850251662Sdim  bits<3> POP_COUNT;
851251662Sdim  bits<5> CF_CONST;
852251662Sdim  bits<2> COND;
853251662Sdim  bits<3> COUNT;
854251662Sdim  bits<6> CALL_COUNT;
855251662Sdim  bits<1> COUNT_3;
856251662Sdim  bits<1> END_OF_PROGRAM;
857251662Sdim  bits<1> VALID_PIXEL_MODE;
858251662Sdim  bits<7> CF_INST;
859251662Sdim  bits<1> WHOLE_QUAD_MODE;
860251662Sdim  bits<1> BARRIER;
861251662Sdim
862251662Sdim  let Word1{2-0} = POP_COUNT;
863251662Sdim  let Word1{7-3} = CF_CONST;
864251662Sdim  let Word1{9-8} = COND;
865251662Sdim  let Word1{12-10} = COUNT;
866251662Sdim  let Word1{18-13} = CALL_COUNT;
867251662Sdim  let Word1{19} = COUNT_3;
868251662Sdim  let Word1{21} = END_OF_PROGRAM;
869251662Sdim  let Word1{22} = VALID_PIXEL_MODE;
870251662Sdim  let Word1{29-23} = CF_INST;
871251662Sdim  let Word1{30} = WHOLE_QUAD_MODE;
872251662Sdim  let Word1{31} = BARRIER;
873251662Sdim}
874251662Sdim
875251662Sdimclass CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
876251662Sdimins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
877251662Sdim  field bits<64> Inst;
878251662Sdim
879251662Sdim  let CF_INST = inst;
880251662Sdim  let BARRIER = 1;
881251662Sdim  let CF_CONST = 0;
882251662Sdim  let VALID_PIXEL_MODE = 0;
883251662Sdim  let COND = 0;
884251662Sdim  let CALL_COUNT = 0;
885251662Sdim  let COUNT_3 = 0;
886251662Sdim  let END_OF_PROGRAM = 0;
887251662Sdim  let WHOLE_QUAD_MODE = 0;
888251662Sdim
889251662Sdim  let Inst{31-0} = Word0;
890251662Sdim  let Inst{63-32} = Word1;
891251662Sdim}
892251662Sdim
893251662Sdimclass CF_WORD0_EG {
894251662Sdim  field bits<32> Word0;
895251662Sdim
896249259Sdim  bits<24> ADDR;
897249259Sdim  bits<3> JUMPTABLE_SEL;
898249259Sdim
899249259Sdim  let Word0{23-0} = ADDR;
900249259Sdim  let Word0{26-24} = JUMPTABLE_SEL;
901249259Sdim}
902249259Sdim
903251662Sdimclass CF_WORD1_EG {
904249259Sdim  field bits<32> Word1;
905249259Sdim
906249259Sdim  bits<3> POP_COUNT;
907249259Sdim  bits<5> CF_CONST;
908249259Sdim  bits<2> COND;
909249259Sdim  bits<6> COUNT;
910249259Sdim  bits<1> VALID_PIXEL_MODE;
911251662Sdim  bits<1> END_OF_PROGRAM;
912249259Sdim  bits<8> CF_INST;
913249259Sdim  bits<1> BARRIER;
914249259Sdim
915249259Sdim  let Word1{2-0} = POP_COUNT;
916249259Sdim  let Word1{7-3} = CF_CONST;
917249259Sdim  let Word1{9-8} = COND;
918249259Sdim  let Word1{15-10} = COUNT;
919249259Sdim  let Word1{20} = VALID_PIXEL_MODE;
920251662Sdim  let Word1{21} = END_OF_PROGRAM;
921249259Sdim  let Word1{29-22} = CF_INST;
922249259Sdim  let Word1{31} = BARRIER;
923249259Sdim}
924249259Sdim
925251662Sdimclass CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
926251662Sdimins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
927249259Sdim  field bits<64> Inst;
928249259Sdim
929249259Sdim  let CF_INST = inst;
930249259Sdim  let BARRIER = 1;
931249259Sdim  let JUMPTABLE_SEL = 0;
932249259Sdim  let CF_CONST = 0;
933249259Sdim  let VALID_PIXEL_MODE = 0;
934249259Sdim  let COND = 0;
935251662Sdim  let END_OF_PROGRAM = 0;
936249259Sdim
937249259Sdim  let Inst{31-0} = Word0;
938249259Sdim  let Inst{63-32} = Word1;
939249259Sdim}
940249259Sdim
941251662Sdimdef CF_ALU : ALU_CLAUSE<8, "ALU">;
942251662Sdimdef CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
943249259Sdim
944251662Sdimdef FETCH_CLAUSE : AMDGPUInst <(outs),
945251662Sdim(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > {
946251662Sdim  field bits<8> Inst;
947251662Sdim  bits<8> num;
948251662Sdim  let Inst = num;
949249259Sdim}
950249259Sdim
951251662Sdimdef ALU_CLAUSE : AMDGPUInst <(outs),
952251662Sdim(ins i32imm:$addr), "ALU clause starting at $addr:", [] > {
953251662Sdim  field bits<8> Inst;
954251662Sdim  bits<8> num;
955251662Sdim  let Inst = num;
956249259Sdim}
957249259Sdim
958251662Sdimdef LITERALS : AMDGPUInst <(outs),
959251662Sdim(ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > {
960251662Sdim  field bits<64> Inst;
961251662Sdim  bits<32> literal1;
962251662Sdim  bits<32> literal2;
963249259Sdim
964251662Sdim  let Inst{31-0} = literal1;
965251662Sdim  let Inst{63-32} = literal2;
966249259Sdim}
967249259Sdim
968251662Sdimdef PAD : AMDGPUInst <(outs), (ins), "PAD", [] > {
969251662Sdim  field bits<64> Inst;
970249259Sdim}
971249259Sdim
972249259Sdimlet Predicates = [isR600toCayman] in {
973249259Sdim
974249259Sdim//===----------------------------------------------------------------------===//
975249259Sdim// Common Instructions R600, R700, Evergreen, Cayman
976249259Sdim//===----------------------------------------------------------------------===//
977249259Sdim
978249259Sdimdef ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
979249259Sdim// Non-IEEE MUL: 0 * anything = 0
980249259Sdimdef MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>;
981249259Sdimdef MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
982249259Sdimdef MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>;
983249259Sdimdef MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
984249259Sdim
985249259Sdim// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
986249259Sdim// so some of the instruction names don't match the asm string.
987249259Sdim// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
988249259Sdimdef SETE : R600_2OP <
989249259Sdim  0x08, "SETE",
990251662Sdim  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_EQ))]
991249259Sdim>;
992249259Sdim
993249259Sdimdef SGT : R600_2OP <
994249259Sdim  0x09, "SETGT",
995251662Sdim  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GT))]
996249259Sdim>;
997249259Sdim
998249259Sdimdef SGE : R600_2OP <
999249259Sdim  0xA, "SETGE",
1000251662Sdim  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GE))]
1001249259Sdim>;
1002249259Sdim
1003249259Sdimdef SNE : R600_2OP <
1004249259Sdim  0xB, "SETNE",
1005251662Sdim  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_NE))]
1006249259Sdim>;
1007249259Sdim
1008249259Sdimdef SETE_DX10 : R600_2OP <
1009249259Sdim  0xC, "SETE_DX10",
1010251662Sdim  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_EQ))]
1011249259Sdim>;
1012249259Sdim
1013249259Sdimdef SETGT_DX10 : R600_2OP <
1014249259Sdim  0xD, "SETGT_DX10",
1015251662Sdim  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GT))]
1016249259Sdim>;
1017249259Sdim
1018249259Sdimdef SETGE_DX10 : R600_2OP <
1019249259Sdim  0xE, "SETGE_DX10",
1020251662Sdim  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GE))]
1021249259Sdim>;
1022249259Sdim
1023249259Sdimdef SETNE_DX10 : R600_2OP <
1024249259Sdim  0xF, "SETNE_DX10",
1025251662Sdim  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_NE))]
1026249259Sdim>;
1027249259Sdim
1028249259Sdimdef FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
1029249259Sdimdef TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>;
1030249259Sdimdef CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
1031249259Sdimdef RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>;
1032249259Sdimdef FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
1033249259Sdim
1034249259Sdimdef MOV : R600_1OP <0x19, "MOV", []>;
1035249259Sdim
1036249259Sdimlet isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
1037249259Sdim
1038249259Sdimclass MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
1039249259Sdim  (outs R600_Reg32:$dst),
1040249259Sdim  (ins immType:$imm),
1041249259Sdim  "",
1042249259Sdim  []
1043249259Sdim>;
1044249259Sdim
1045249259Sdim} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1
1046249259Sdim
1047249259Sdimdef MOV_IMM_I32 : MOV_IMM<i32, i32imm>;
1048249259Sdimdef : Pat <
1049249259Sdim  (imm:$val),
1050249259Sdim  (MOV_IMM_I32 imm:$val)
1051249259Sdim>;
1052249259Sdim
1053249259Sdimdef MOV_IMM_F32 : MOV_IMM<f32, f32imm>;
1054249259Sdimdef : Pat <
1055249259Sdim  (fpimm:$val),
1056249259Sdim  (MOV_IMM_F32  fpimm:$val)
1057249259Sdim>;
1058249259Sdim
1059249259Sdimdef PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>;
1060249259Sdimdef PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>;
1061249259Sdimdef PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>;
1062249259Sdimdef PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>;
1063249259Sdim
1064249259Sdimlet hasSideEffects = 1 in {
1065249259Sdim
1066249259Sdimdef KILLGT : R600_2OP <0x2D, "KILLGT", []>;
1067249259Sdim
1068249259Sdim} // end hasSideEffects
1069249259Sdim
1070249259Sdimdef AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>;
1071249259Sdimdef OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>;
1072249259Sdimdef XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>;
1073249259Sdimdef NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>;
1074249259Sdimdef ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>;
1075249259Sdimdef SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>;
1076249259Sdimdef MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>;
1077249259Sdimdef MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>;
1078249259Sdimdef MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>;
1079249259Sdimdef MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>;
1080249259Sdim
1081249259Sdimdef SETE_INT : R600_2OP <
1082249259Sdim  0x3A, "SETE_INT",
1083251662Sdim  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETEQ))]
1084249259Sdim>;
1085249259Sdim
1086249259Sdimdef SETGT_INT : R600_2OP <
1087249259Sdim  0x3B, "SETGT_INT",
1088251662Sdim  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGT))]
1089249259Sdim>;
1090249259Sdim
1091249259Sdimdef SETGE_INT : R600_2OP <
1092249259Sdim  0x3C, "SETGE_INT",
1093251662Sdim  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGE))]
1094249259Sdim>;
1095249259Sdim
1096249259Sdimdef SETNE_INT : R600_2OP <
1097249259Sdim  0x3D, "SETNE_INT",
1098251662Sdim  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETNE))]
1099249259Sdim>;
1100249259Sdim
1101249259Sdimdef SETGT_UINT : R600_2OP <
1102249259Sdim  0x3E, "SETGT_UINT",
1103251662Sdim  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGT))]
1104249259Sdim>;
1105249259Sdim
1106249259Sdimdef SETGE_UINT : R600_2OP <
1107249259Sdim  0x3F, "SETGE_UINT",
1108251662Sdim  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGE))]
1109249259Sdim>;
1110249259Sdim
1111249259Sdimdef PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>;
1112249259Sdimdef PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>;
1113249259Sdimdef PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>;
1114249259Sdimdef PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>;
1115249259Sdim
1116249259Sdimdef CNDE_INT : R600_3OP <
1117249259Sdim  0x1C, "CNDE_INT",
1118251662Sdim  [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_EQ))]
1119249259Sdim>;
1120249259Sdim
1121249259Sdimdef CNDGE_INT : R600_3OP <
1122249259Sdim  0x1E, "CNDGE_INT",
1123251662Sdim  [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GE))]
1124249259Sdim>;
1125249259Sdim
1126249259Sdimdef CNDGT_INT : R600_3OP <
1127249259Sdim  0x1D, "CNDGT_INT",
1128251662Sdim  [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GT))]
1129249259Sdim>;
1130249259Sdim
1131249259Sdim//===----------------------------------------------------------------------===//
1132249259Sdim// Texture instructions
1133249259Sdim//===----------------------------------------------------------------------===//
1134249259Sdim
1135249259Sdimdef TEX_LD : R600_TEX <
1136249259Sdim  0x03, "TEX_LD",
1137251662Sdim  [(set v4f32:$DST_GPR, (int_AMDGPU_txf v4f32:$SRC_GPR,
1138249259Sdim      imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID,
1139249259Sdim      imm:$SAMPLER_ID, imm:$textureTarget))]
1140249259Sdim> {
1141249259Sdimlet AsmString = "TEX_LD $DST_GPR, $SRC_GPR, $OFFSET_X, $OFFSET_Y, $OFFSET_Z,"
1142249259Sdim    "$RESOURCE_ID, $SAMPLER_ID, $textureTarget";
1143249259Sdimlet InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X,
1144249259Sdim    i32imm:$OFFSET_Y, i32imm:$OFFSET_Z, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID,
1145249259Sdim    i32imm:$textureTarget);
1146249259Sdim}
1147249259Sdim
1148249259Sdimdef TEX_GET_TEXTURE_RESINFO : R600_TEX <
1149249259Sdim  0x04, "TEX_GET_TEXTURE_RESINFO",
1150251662Sdim  [(set v4f32:$DST_GPR, (int_AMDGPU_txq v4f32:$SRC_GPR,
1151249259Sdim      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
1152249259Sdim>;
1153249259Sdim
1154249259Sdimdef TEX_GET_GRADIENTS_H : R600_TEX <
1155249259Sdim  0x07, "TEX_GET_GRADIENTS_H",
1156251662Sdim  [(set v4f32:$DST_GPR, (int_AMDGPU_ddx v4f32:$SRC_GPR,
1157249259Sdim      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
1158249259Sdim>;
1159249259Sdim
1160249259Sdimdef TEX_GET_GRADIENTS_V : R600_TEX <
1161249259Sdim  0x08, "TEX_GET_GRADIENTS_V",
1162251662Sdim  [(set v4f32:$DST_GPR, (int_AMDGPU_ddy v4f32:$SRC_GPR,
1163249259Sdim      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
1164249259Sdim>;
1165249259Sdim
1166249259Sdimdef TEX_SET_GRADIENTS_H : R600_TEX <
1167249259Sdim  0x0B, "TEX_SET_GRADIENTS_H",
1168249259Sdim  []
1169249259Sdim>;
1170249259Sdim
1171249259Sdimdef TEX_SET_GRADIENTS_V : R600_TEX <
1172249259Sdim  0x0C, "TEX_SET_GRADIENTS_V",
1173249259Sdim  []
1174249259Sdim>;
1175249259Sdim
1176249259Sdimdef TEX_SAMPLE : R600_TEX <
1177249259Sdim  0x10, "TEX_SAMPLE",
1178251662Sdim  [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR,
1179249259Sdim      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
1180249259Sdim>;
1181249259Sdim
1182249259Sdimdef TEX_SAMPLE_C : R600_TEX <
1183249259Sdim  0x18, "TEX_SAMPLE_C",
1184251662Sdim  [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR,
1185249259Sdim      imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
1186249259Sdim>;
1187249259Sdim
1188249259Sdimdef TEX_SAMPLE_L : R600_TEX <
1189249259Sdim  0x11, "TEX_SAMPLE_L",
1190251662Sdim  [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR,
1191249259Sdim      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
1192249259Sdim>;
1193249259Sdim
1194249259Sdimdef TEX_SAMPLE_C_L : R600_TEX <
1195249259Sdim  0x19, "TEX_SAMPLE_C_L",
1196251662Sdim  [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR,
1197249259Sdim      imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
1198249259Sdim>;
1199249259Sdim
1200249259Sdimdef TEX_SAMPLE_LB : R600_TEX <
1201249259Sdim  0x12, "TEX_SAMPLE_LB",
1202251662Sdim  [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR,
1203249259Sdim      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
1204249259Sdim>;
1205249259Sdim
1206249259Sdimdef TEX_SAMPLE_C_LB : R600_TEX <
1207249259Sdim  0x1A, "TEX_SAMPLE_C_LB",
1208251662Sdim  [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR,
1209249259Sdim      imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
1210249259Sdim>;
1211249259Sdim
1212249259Sdimdef TEX_SAMPLE_G : R600_TEX <
1213249259Sdim  0x14, "TEX_SAMPLE_G",
1214249259Sdim  []
1215249259Sdim>;
1216249259Sdim
1217249259Sdimdef TEX_SAMPLE_C_G : R600_TEX <
1218249259Sdim  0x1C, "TEX_SAMPLE_C_G",
1219249259Sdim  []
1220249259Sdim>;
1221249259Sdim
1222249259Sdim//===----------------------------------------------------------------------===//
1223249259Sdim// Helper classes for common instructions
1224249259Sdim//===----------------------------------------------------------------------===//
1225249259Sdim
1226249259Sdimclass MUL_LIT_Common <bits<5> inst> : R600_3OP <
1227249259Sdim  inst, "MUL_LIT",
1228249259Sdim  []
1229249259Sdim>;
1230249259Sdim
1231249259Sdimclass MULADD_Common <bits<5> inst> : R600_3OP <
1232249259Sdim  inst, "MULADD",
1233249259Sdim  []
1234249259Sdim>;
1235249259Sdim
1236249259Sdimclass MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
1237249259Sdim  inst, "MULADD_IEEE",
1238251662Sdim  [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
1239249259Sdim>;
1240249259Sdim
1241249259Sdimclass CNDE_Common <bits<5> inst> : R600_3OP <
1242249259Sdim  inst, "CNDE",
1243251662Sdim  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_EQ))]
1244249259Sdim>;
1245249259Sdim
1246249259Sdimclass CNDGT_Common <bits<5> inst> : R600_3OP <
1247249259Sdim  inst, "CNDGT",
1248251662Sdim  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GT))]
1249249259Sdim>;
1250249259Sdim
1251249259Sdimclass CNDGE_Common <bits<5> inst> : R600_3OP <
1252249259Sdim  inst, "CNDGE",
1253251662Sdim  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))]
1254249259Sdim>;
1255249259Sdim
1256249259Sdimmulticlass DOT4_Common <bits<11> inst> {
1257249259Sdim
1258249259Sdim  def _pseudo : R600_REDUCTION <inst,
1259249259Sdim    (ins R600_Reg128:$src0, R600_Reg128:$src1),
1260249259Sdim    "DOT4 $dst $src0, $src1",
1261251662Sdim    [(set f32:$dst, (int_AMDGPU_dp4 v4f32:$src0, v4f32:$src1))]
1262249259Sdim  >;
1263249259Sdim
1264249259Sdim  def _real : R600_2OP <inst, "DOT4", []>;
1265249259Sdim}
1266249259Sdim
1267249259Sdimlet mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
1268249259Sdimmulticlass CUBE_Common <bits<11> inst> {
1269249259Sdim
1270249259Sdim  def _pseudo : InstR600 <
1271249259Sdim    (outs R600_Reg128:$dst),
1272249259Sdim    (ins R600_Reg128:$src),
1273249259Sdim    "CUBE $dst $src",
1274251662Sdim    [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src))],
1275249259Sdim    VecALU
1276249259Sdim  > {
1277249259Sdim    let isPseudo = 1;
1278249259Sdim  }
1279249259Sdim
1280249259Sdim  def _real : R600_2OP <inst, "CUBE", []>;
1281249259Sdim}
1282249259Sdim} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
1283249259Sdim
1284249259Sdimclass EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
1285249259Sdim  inst, "EXP_IEEE", fexp2
1286251662Sdim> {
1287251662Sdim  let TransOnly = 1;
1288251662Sdim  let Itinerary = TransALU;
1289251662Sdim}
1290249259Sdim
1291249259Sdimclass FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper <
1292249259Sdim  inst, "FLT_TO_INT", fp_to_sint
1293251662Sdim> {
1294251662Sdim  let TransOnly = 1;
1295251662Sdim  let Itinerary = TransALU;
1296251662Sdim}
1297249259Sdim
1298249259Sdimclass INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
1299249259Sdim  inst, "INT_TO_FLT", sint_to_fp
1300251662Sdim> {
1301251662Sdim  let TransOnly = 1;
1302251662Sdim  let Itinerary = TransALU;
1303251662Sdim}
1304249259Sdim
1305249259Sdimclass FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper <
1306249259Sdim  inst, "FLT_TO_UINT", fp_to_uint
1307251662Sdim> {
1308251662Sdim  let TransOnly = 1;
1309251662Sdim  let Itinerary = TransALU;
1310251662Sdim}
1311249259Sdim
1312249259Sdimclass UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
1313249259Sdim  inst, "UINT_TO_FLT", uint_to_fp
1314251662Sdim> {
1315251662Sdim  let TransOnly = 1;
1316251662Sdim  let Itinerary = TransALU;
1317251662Sdim}
1318249259Sdim
1319249259Sdimclass LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
1320249259Sdim  inst, "LOG_CLAMPED", []
1321249259Sdim>;
1322249259Sdim
1323249259Sdimclass LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
1324249259Sdim  inst, "LOG_IEEE", flog2
1325251662Sdim> {
1326251662Sdim  let TransOnly = 1;
1327251662Sdim  let Itinerary = TransALU;
1328251662Sdim}
1329249259Sdim
1330249259Sdimclass LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>;
1331249259Sdimclass LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>;
1332249259Sdimclass ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>;
1333249259Sdimclass MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper <
1334249259Sdim  inst, "MULHI_INT", mulhs
1335251662Sdim> {
1336251662Sdim  let TransOnly = 1;
1337251662Sdim  let Itinerary = TransALU;
1338251662Sdim}
1339249259Sdimclass MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper <
1340249259Sdim  inst, "MULHI", mulhu
1341251662Sdim> {
1342251662Sdim  let TransOnly = 1;
1343251662Sdim  let Itinerary = TransALU;
1344251662Sdim}
1345249259Sdimclass MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper <
1346249259Sdim  inst, "MULLO_INT", mul
1347251662Sdim> {
1348251662Sdim  let TransOnly = 1;
1349251662Sdim  let Itinerary = TransALU;
1350251662Sdim}
1351251662Sdimclass MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> {
1352251662Sdim  let TransOnly = 1;
1353251662Sdim  let Itinerary = TransALU;
1354251662Sdim}
1355249259Sdim
1356249259Sdimclass RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
1357249259Sdim  inst, "RECIP_CLAMPED", []
1358251662Sdim> {
1359251662Sdim  let TransOnly = 1;
1360251662Sdim  let Itinerary = TransALU;
1361251662Sdim}
1362249259Sdim
1363249259Sdimclass RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
1364251662Sdim  inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
1365251662Sdim> {
1366251662Sdim  let TransOnly = 1;
1367251662Sdim  let Itinerary = TransALU;
1368251662Sdim}
1369249259Sdim
1370249259Sdimclass RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
1371249259Sdim  inst, "RECIP_UINT", AMDGPUurecip
1372251662Sdim> {
1373251662Sdim  let TransOnly = 1;
1374251662Sdim  let Itinerary = TransALU;
1375251662Sdim}
1376249259Sdim
1377249259Sdimclass RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
1378249259Sdim  inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
1379251662Sdim> {
1380251662Sdim  let TransOnly = 1;
1381251662Sdim  let Itinerary = TransALU;
1382251662Sdim}
1383249259Sdim
1384249259Sdimclass RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
1385249259Sdim  inst, "RECIPSQRT_IEEE", []
1386251662Sdim> {
1387251662Sdim  let TransOnly = 1;
1388251662Sdim  let Itinerary = TransALU;
1389251662Sdim}
1390249259Sdim
1391249259Sdimclass SIN_Common <bits<11> inst> : R600_1OP <
1392249259Sdim  inst, "SIN", []>{
1393249259Sdim  let Trig = 1;
1394251662Sdim  let TransOnly = 1;
1395251662Sdim  let Itinerary = TransALU;
1396249259Sdim}
1397249259Sdim
1398249259Sdimclass COS_Common <bits<11> inst> : R600_1OP <
1399249259Sdim  inst, "COS", []> {
1400249259Sdim  let Trig = 1;
1401251662Sdim  let TransOnly = 1;
1402251662Sdim  let Itinerary = TransALU;
1403249259Sdim}
1404249259Sdim
1405249259Sdim//===----------------------------------------------------------------------===//
1406249259Sdim// Helper patterns for complex intrinsics
1407249259Sdim//===----------------------------------------------------------------------===//
1408249259Sdim
1409249259Sdimmulticlass DIV_Common <InstR600 recip_ieee> {
1410249259Sdimdef : Pat<
1411251662Sdim  (int_AMDGPU_div f32:$src0, f32:$src1),
1412251662Sdim  (MUL_IEEE $src0, (recip_ieee $src1))
1413249259Sdim>;
1414249259Sdim
1415249259Sdimdef : Pat<
1416251662Sdim  (fdiv f32:$src0, f32:$src1),
1417251662Sdim  (MUL_IEEE $src0, (recip_ieee $src1))
1418249259Sdim>;
1419249259Sdim}
1420249259Sdim
1421251662Sdimclass TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee>
1422251662Sdim  : Pat <
1423251662Sdim  (int_TGSI_lit_z f32:$src_x, f32:$src_y, f32:$src_w),
1424251662Sdim  (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x))
1425249259Sdim>;
1426249259Sdim
1427249259Sdim//===----------------------------------------------------------------------===//
1428249259Sdim// R600 / R700 Instructions
1429249259Sdim//===----------------------------------------------------------------------===//
1430249259Sdim
1431249259Sdimlet Predicates = [isR600] in {
1432249259Sdim
1433249259Sdim  def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
1434249259Sdim  def MULADD_r600 : MULADD_Common<0x10>;
1435249259Sdim  def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>;
1436249259Sdim  def CNDE_r600 : CNDE_Common<0x18>;
1437249259Sdim  def CNDGT_r600 : CNDGT_Common<0x19>;
1438249259Sdim  def CNDGE_r600 : CNDGE_Common<0x1A>;
1439249259Sdim  defm DOT4_r600 : DOT4_Common<0x50>;
1440249259Sdim  defm CUBE_r600 : CUBE_Common<0x52>;
1441249259Sdim  def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
1442249259Sdim  def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
1443249259Sdim  def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
1444249259Sdim  def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>;
1445249259Sdim  def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>;
1446249259Sdim  def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>;
1447249259Sdim  def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>;
1448249259Sdim  def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>;
1449249259Sdim  def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
1450249259Sdim  def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>;
1451249259Sdim  def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>;
1452249259Sdim  def SIN_r600 : SIN_Common<0x6E>;
1453249259Sdim  def COS_r600 : COS_Common<0x6F>;
1454249259Sdim  def ASHR_r600 : ASHR_Common<0x70>;
1455249259Sdim  def LSHR_r600 : LSHR_Common<0x71>;
1456249259Sdim  def LSHL_r600 : LSHL_Common<0x72>;
1457249259Sdim  def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
1458249259Sdim  def MULHI_INT_r600 : MULHI_INT_Common<0x74>;
1459249259Sdim  def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>;
1460249259Sdim  def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>;
1461249259Sdim  def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
1462249259Sdim
1463249259Sdim  defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
1464251662Sdim  def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>;
1465249259Sdim  def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
1466249259Sdim
1467251662Sdim  def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
1468249259Sdim
1469249259Sdim  def R600_ExportSwz : ExportSwzInst {
1470251662Sdim    let Word1{20-17} = 0; // BURST_COUNT
1471249259Sdim    let Word1{21} = eop;
1472249259Sdim    let Word1{22} = 1; // VALID_PIXEL_MODE
1473249259Sdim    let Word1{30-23} = inst;
1474249259Sdim    let Word1{31} = 1; // BARRIER
1475249259Sdim  }
1476249259Sdim  defm : ExportPattern<R600_ExportSwz, 39>;
1477249259Sdim
1478249259Sdim  def R600_ExportBuf : ExportBufInst {
1479251662Sdim    let Word1{20-17} = 0; // BURST_COUNT
1480249259Sdim    let Word1{21} = eop;
1481249259Sdim    let Word1{22} = 1; // VALID_PIXEL_MODE
1482249259Sdim    let Word1{30-23} = inst;
1483249259Sdim    let Word1{31} = 1; // BARRIER
1484249259Sdim  }
1485249259Sdim  defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
1486251662Sdim
1487251662Sdim  def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT),
1488251662Sdim  "TEX $COUNT @$ADDR"> {
1489251662Sdim    let POP_COUNT = 0;
1490251662Sdim  }
1491251662Sdim  def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT),
1492251662Sdim  "VTX $COUNT @$ADDR"> {
1493251662Sdim    let POP_COUNT = 0;
1494251662Sdim  }
1495251662Sdim  def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
1496251662Sdim  "LOOP_START_DX10 @$ADDR"> {
1497251662Sdim    let POP_COUNT = 0;
1498251662Sdim    let COUNT = 0;
1499251662Sdim  }
1500251662Sdim  def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
1501251662Sdim    let POP_COUNT = 0;
1502251662Sdim    let COUNT = 0;
1503251662Sdim  }
1504251662Sdim  def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
1505251662Sdim  "LOOP_BREAK @$ADDR"> {
1506251662Sdim    let POP_COUNT = 0;
1507251662Sdim    let COUNT = 0;
1508251662Sdim  }
1509251662Sdim  def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
1510251662Sdim  "CONTINUE @$ADDR"> {
1511251662Sdim    let POP_COUNT = 0;
1512251662Sdim    let COUNT = 0;
1513251662Sdim  }
1514251662Sdim  def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1515251662Sdim  "JUMP @$ADDR POP:$POP_COUNT"> {
1516251662Sdim    let COUNT = 0;
1517251662Sdim  }
1518251662Sdim  def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1519251662Sdim  "ELSE @$ADDR POP:$POP_COUNT"> {
1520251662Sdim    let COUNT = 0;
1521251662Sdim  }
1522251662Sdim  def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
1523251662Sdim    let ADDR = 0;
1524251662Sdim    let COUNT = 0;
1525251662Sdim    let POP_COUNT = 0;
1526251662Sdim  }
1527251662Sdim  def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1528251662Sdim  "POP @$ADDR POP:$POP_COUNT"> {
1529251662Sdim    let COUNT = 0;
1530251662Sdim  }
1531251662Sdim  def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> {
1532251662Sdim    let COUNT = 0;
1533251662Sdim    let POP_COUNT = 0;
1534251662Sdim    let ADDR = 0;
1535251662Sdim    let END_OF_PROGRAM = 1;
1536251662Sdim  }
1537251662Sdim
1538249259Sdim}
1539249259Sdim
1540249259Sdim// Helper pattern for normalizing inputs to triginomic instructions for R700+
1541249259Sdim// cards.
1542249259Sdimclass COS_PAT <InstR600 trig> : Pat<
1543251662Sdim  (fcos f32:$src),
1544251662Sdim  (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src))
1545249259Sdim>;
1546249259Sdim
1547249259Sdimclass SIN_PAT <InstR600 trig> : Pat<
1548251662Sdim  (fsin f32:$src),
1549251662Sdim  (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src))
1550249259Sdim>;
1551249259Sdim
1552249259Sdim//===----------------------------------------------------------------------===//
1553249259Sdim// R700 Only instructions
1554249259Sdim//===----------------------------------------------------------------------===//
1555249259Sdim
1556249259Sdimlet Predicates = [isR700] in {
1557249259Sdim  def SIN_r700 : SIN_Common<0x6E>;
1558249259Sdim  def COS_r700 : COS_Common<0x6F>;
1559249259Sdim
1560249259Sdim  // R700 normalizes inputs to SIN/COS the same as EG
1561249259Sdim  def : SIN_PAT <SIN_r700>;
1562249259Sdim  def : COS_PAT <COS_r700>;
1563249259Sdim}
1564249259Sdim
1565249259Sdim//===----------------------------------------------------------------------===//
1566249259Sdim// Evergreen Only instructions
1567249259Sdim//===----------------------------------------------------------------------===//
1568249259Sdim
1569249259Sdimlet Predicates = [isEG] in {
1570249259Sdim
1571249259Sdimdef RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
1572249259Sdimdefm DIV_eg : DIV_Common<RECIP_IEEE_eg>;
1573249259Sdim
1574249259Sdimdef MULLO_INT_eg : MULLO_INT_Common<0x8F>;
1575249259Sdimdef MULHI_INT_eg : MULHI_INT_Common<0x90>;
1576249259Sdimdef MULLO_UINT_eg : MULLO_UINT_Common<0x91>;
1577249259Sdimdef MULHI_UINT_eg : MULHI_UINT_Common<0x92>;
1578249259Sdimdef RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
1579249259Sdimdef RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>;
1580249259Sdimdef EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
1581249259Sdimdef LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
1582249259Sdimdef RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
1583249259Sdimdef RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
1584249259Sdimdef SIN_eg : SIN_Common<0x8D>;
1585249259Sdimdef COS_eg : COS_Common<0x8E>;
1586249259Sdim
1587251662Sdimdef : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
1588249259Sdimdef : SIN_PAT <SIN_eg>;
1589249259Sdimdef : COS_PAT <COS_eg>;
1590251662Sdimdef : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
1591249259Sdim} // End Predicates = [isEG]
1592249259Sdim
1593249259Sdim//===----------------------------------------------------------------------===//
1594249259Sdim// Evergreen / Cayman Instructions
1595249259Sdim//===----------------------------------------------------------------------===//
1596249259Sdim
1597249259Sdimlet Predicates = [isEGorCayman] in {
1598249259Sdim
1599249259Sdim  // BFE_UINT - bit_extract, an optimization for mask and shift
1600249259Sdim  // Src0 = Input
1601249259Sdim  // Src1 = Offset
1602249259Sdim  // Src2 = Width
1603249259Sdim  //
1604249259Sdim  // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
1605249259Sdim  //
1606249259Sdim  // Example Usage:
1607249259Sdim  // (Offset, Width)
1608249259Sdim  //
1609249259Sdim  // (0, 8)           = (Input << 24) >> 24  = (Input &  0xff)       >> 0
1610249259Sdim  // (8, 8)           = (Input << 16) >> 24  = (Input &  0xffff)     >> 8
1611249259Sdim  // (16,8)           = (Input <<  8) >> 24  = (Input &  0xffffff)   >> 16
1612249259Sdim  // (24,8)           = (Input <<  0) >> 24  = (Input &  0xffffffff) >> 24
1613249259Sdim  def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
1614251662Sdim    [(set i32:$dst, (int_AMDIL_bit_extract_u32 i32:$src0, i32:$src1,
1615251662Sdim                                               i32:$src2))],
1616249259Sdim    VecALU
1617249259Sdim  >;
1618251662Sdim  def : BFEPattern <BFE_UINT_eg>;
1619249259Sdim
1620251662Sdim  def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
1621251662Sdim  defm : BFIPatterns <BFI_INT_eg>;
1622251662Sdim
1623249259Sdim  def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
1624251662Sdim    [(set i32:$dst, (AMDGPUbitalign i32:$src0, i32:$src1, i32:$src2))],
1625249259Sdim    VecALU
1626249259Sdim  >;
1627249259Sdim
1628249259Sdim  def MULADD_eg : MULADD_Common<0x14>;
1629249259Sdim  def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
1630249259Sdim  def ASHR_eg : ASHR_Common<0x15>;
1631249259Sdim  def LSHR_eg : LSHR_Common<0x16>;
1632249259Sdim  def LSHL_eg : LSHL_Common<0x17>;
1633249259Sdim  def CNDE_eg : CNDE_Common<0x19>;
1634249259Sdim  def CNDGT_eg : CNDGT_Common<0x1A>;
1635249259Sdim  def CNDGE_eg : CNDGE_Common<0x1B>;
1636249259Sdim  def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
1637249259Sdim  def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
1638249259Sdim  defm DOT4_eg : DOT4_Common<0xBE>;
1639249259Sdim  defm CUBE_eg : CUBE_Common<0xC0>;
1640249259Sdim
1641249259Sdimlet hasSideEffects = 1 in {
1642249259Sdim  def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>;
1643249259Sdim}
1644249259Sdim
1645249259Sdim  def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
1646249259Sdim
1647249259Sdim  def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
1648249259Sdim    let Pattern = [];
1649249259Sdim  }
1650249259Sdim
1651249259Sdim  def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
1652249259Sdim
1653249259Sdim  def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
1654249259Sdim    let Pattern = [];
1655249259Sdim  }
1656249259Sdim
1657249259Sdim  def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
1658249259Sdim
1659249259Sdim  // TRUNC is used for the FLT_TO_INT instructions to work around a
1660249259Sdim  // perceived problem where the rounding modes are applied differently
1661249259Sdim  // depending on the instruction and the slot they are in.
1662249259Sdim  // See:
1663249259Sdim  // https://bugs.freedesktop.org/show_bug.cgi?id=50232
1664249259Sdim  // Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c
1665249259Sdim  //
1666249259Sdim  // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes,
1667249259Sdim  // which do not need to be truncated since the fp values are 0.0f or 1.0f.
1668249259Sdim  // We should look into handling these cases separately.
1669251662Sdim  def : Pat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>;
1670249259Sdim
1671251662Sdim  def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>;
1672249259Sdim
1673251662Sdim  // SHA-256 Patterns
1674251662Sdim  def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
1675251662Sdim
1676249259Sdim  def EG_ExportSwz : ExportSwzInst {
1677251662Sdim    let Word1{19-16} = 0; // BURST_COUNT
1678249259Sdim    let Word1{20} = 1; // VALID_PIXEL_MODE
1679249259Sdim    let Word1{21} = eop;
1680249259Sdim    let Word1{29-22} = inst;
1681249259Sdim    let Word1{30} = 0; // MARK
1682249259Sdim    let Word1{31} = 1; // BARRIER
1683249259Sdim  }
1684249259Sdim  defm : ExportPattern<EG_ExportSwz, 83>;
1685249259Sdim
1686249259Sdim  def EG_ExportBuf : ExportBufInst {
1687251662Sdim    let Word1{19-16} = 0; // BURST_COUNT
1688249259Sdim    let Word1{20} = 1; // VALID_PIXEL_MODE
1689249259Sdim    let Word1{21} = eop;
1690249259Sdim    let Word1{29-22} = inst;
1691249259Sdim    let Word1{30} = 0; // MARK
1692249259Sdim    let Word1{31} = 1; // BARRIER
1693249259Sdim  }
1694249259Sdim  defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
1695249259Sdim
1696251662Sdim  def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT),
1697251662Sdim  "TEX $COUNT @$ADDR"> {
1698251662Sdim    let POP_COUNT = 0;
1699251662Sdim  }
1700251662Sdim  def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT),
1701251662Sdim  "VTX $COUNT @$ADDR"> {
1702251662Sdim    let POP_COUNT = 0;
1703251662Sdim  }
1704251662Sdim  def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR),
1705251662Sdim  "LOOP_START_DX10 @$ADDR"> {
1706251662Sdim    let POP_COUNT = 0;
1707251662Sdim    let COUNT = 0;
1708251662Sdim  }
1709251662Sdim  def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
1710251662Sdim    let POP_COUNT = 0;
1711251662Sdim    let COUNT = 0;
1712251662Sdim  }
1713251662Sdim  def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR),
1714251662Sdim  "LOOP_BREAK @$ADDR"> {
1715251662Sdim    let POP_COUNT = 0;
1716251662Sdim    let COUNT = 0;
1717251662Sdim  }
1718251662Sdim  def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR),
1719251662Sdim  "CONTINUE @$ADDR"> {
1720251662Sdim    let POP_COUNT = 0;
1721251662Sdim    let COUNT = 0;
1722251662Sdim  }
1723251662Sdim  def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1724251662Sdim  "JUMP @$ADDR POP:$POP_COUNT"> {
1725251662Sdim    let COUNT = 0;
1726251662Sdim  }
1727251662Sdim  def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1728251662Sdim  "ELSE @$ADDR POP:$POP_COUNT"> {
1729251662Sdim    let COUNT = 0;
1730251662Sdim  }
1731251662Sdim  def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> {
1732251662Sdim    let ADDR = 0;
1733251662Sdim    let COUNT = 0;
1734251662Sdim    let POP_COUNT = 0;
1735251662Sdim  }
1736251662Sdim  def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1737251662Sdim  "POP @$ADDR POP:$POP_COUNT"> {
1738251662Sdim    let COUNT = 0;
1739251662Sdim  }
1740251662Sdim  def CF_END_EG :  CF_CLAUSE_EG<0, (ins), "CF_END"> {
1741251662Sdim    let COUNT = 0;
1742251662Sdim    let POP_COUNT = 0;
1743251662Sdim    let ADDR = 0;
1744251662Sdim    let END_OF_PROGRAM = 1;
1745251662Sdim  }
1746251662Sdim
1747249259Sdim//===----------------------------------------------------------------------===//
1748249259Sdim// Memory read/write instructions
1749249259Sdim//===----------------------------------------------------------------------===//
1750249259Sdimlet usesCustomInserter = 1 in {
1751249259Sdim
1752249259Sdimclass RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name,
1753249259Sdim                              list<dag> pattern>
1754249259Sdim    : EG_CF_RAT <0x57, 0x2, 0, (outs), ins,
1755249259Sdim                 !strconcat(name, " $rw_gpr, $index_gpr, $eop"), pattern> {
1756249259Sdim  let RIM         = 0;
1757249259Sdim  // XXX: Have a separate instruction for non-indexed writes.
1758249259Sdim  let TYPE        = 1;
1759249259Sdim  let RW_REL      = 0;
1760249259Sdim  let ELEM_SIZE   = 0;
1761249259Sdim
1762249259Sdim  let ARRAY_SIZE  = 0;
1763249259Sdim  let COMP_MASK   = comp_mask;
1764249259Sdim  let BURST_COUNT = 0;
1765249259Sdim  let VPM         = 0;
1766249259Sdim  let MARK        = 0;
1767249259Sdim  let BARRIER     = 1;
1768249259Sdim}
1769249259Sdim
1770249259Sdim} // End usesCustomInserter = 1
1771249259Sdim
1772249259Sdim// 32-bit store
1773249259Sdimdef RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
1774249259Sdim  (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
1775249259Sdim  0x1, "RAT_WRITE_CACHELESS_32_eg",
1776251662Sdim  [(global_store i32:$rw_gpr, i32:$index_gpr)]
1777249259Sdim>;
1778249259Sdim
1779249259Sdim//128-bit store
1780249259Sdimdef RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
1781249259Sdim  (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
1782249259Sdim  0xf, "RAT_WRITE_CACHELESS_128",
1783251662Sdim  [(global_store v4i32:$rw_gpr, i32:$index_gpr)]
1784249259Sdim>;
1785249259Sdim
1786249259Sdimclass VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
1787249259Sdim    : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern>,
1788249259Sdim      VTX_WORD1_GPR, VTX_WORD0 {
1789249259Sdim
1790249259Sdim  // Static fields
1791249259Sdim  let VC_INST = 0;
1792249259Sdim  let FETCH_TYPE = 2;
1793249259Sdim  let FETCH_WHOLE_QUAD = 0;
1794249259Sdim  let BUFFER_ID = buffer_id;
1795249259Sdim  let SRC_REL = 0;
1796249259Sdim  // XXX: We can infer this field based on the SRC_GPR.  This would allow us
1797249259Sdim  // to store vertex addresses in any channel, not just X.
1798249259Sdim  let SRC_SEL_X = 0;
1799249259Sdim  let DST_REL = 0;
1800249259Sdim  // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
1801249259Sdim  // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
1802249259Sdim  // however, based on my testing if USE_CONST_FIELDS is set, then all
1803249259Sdim  // these fields need to be set to 0.
1804249259Sdim  let USE_CONST_FIELDS = 0;
1805249259Sdim  let NUM_FORMAT_ALL = 1;
1806249259Sdim  let FORMAT_COMP_ALL = 0;
1807249259Sdim  let SRF_MODE_ALL = 0;
1808249259Sdim
1809249259Sdim  let Inst{31-0} = Word0;
1810249259Sdim  let Inst{63-32} = Word1;
1811249259Sdim  // LLVM can only encode 64-bit instructions, so these fields are manually
1812249259Sdim  // encoded in R600CodeEmitter
1813249259Sdim  //
1814249259Sdim  // bits<16> OFFSET;
1815249259Sdim  // bits<2>  ENDIAN_SWAP = 0;
1816249259Sdim  // bits<1>  CONST_BUF_NO_STRIDE = 0;
1817249259Sdim  // bits<1>  MEGA_FETCH = 0;
1818249259Sdim  // bits<1>  ALT_CONST = 0;
1819249259Sdim  // bits<2>  BUFFER_INDEX_MODE = 0;
1820249259Sdim
1821249259Sdim
1822249259Sdim
1823249259Sdim  // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
1824249259Sdim  // is done in R600CodeEmitter
1825249259Sdim  //
1826249259Sdim  // Inst{79-64} = OFFSET;
1827249259Sdim  // Inst{81-80} = ENDIAN_SWAP;
1828249259Sdim  // Inst{82}    = CONST_BUF_NO_STRIDE;
1829249259Sdim  // Inst{83}    = MEGA_FETCH;
1830249259Sdim  // Inst{84}    = ALT_CONST;
1831249259Sdim  // Inst{86-85} = BUFFER_INDEX_MODE;
1832249259Sdim  // Inst{95-86} = 0; Reserved
1833249259Sdim
1834249259Sdim  // VTX_WORD3 (Padding)
1835249259Sdim  //
1836249259Sdim  // Inst{127-96} = 0;
1837251662Sdim
1838251662Sdim  let VTXInst = 1;
1839249259Sdim}
1840249259Sdim
1841249259Sdimclass VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
1842249259Sdim    : VTX_READ_eg <"VTX_READ_8", buffer_id, (outs R600_TReg32_X:$dst),
1843249259Sdim                   pattern> {
1844249259Sdim
1845249259Sdim  let MEGA_FETCH_COUNT = 1;
1846249259Sdim  let DST_SEL_X = 0;
1847249259Sdim  let DST_SEL_Y = 7;   // Masked
1848249259Sdim  let DST_SEL_Z = 7;   // Masked
1849249259Sdim  let DST_SEL_W = 7;   // Masked
1850249259Sdim  let DATA_FORMAT = 1; // FMT_8
1851249259Sdim}
1852249259Sdim
1853249259Sdimclass VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
1854249259Sdim    : VTX_READ_eg <"VTX_READ_16", buffer_id, (outs R600_TReg32_X:$dst),
1855249259Sdim                    pattern> {
1856249259Sdim  let MEGA_FETCH_COUNT = 2;
1857249259Sdim  let DST_SEL_X = 0;
1858249259Sdim  let DST_SEL_Y = 7;   // Masked
1859249259Sdim  let DST_SEL_Z = 7;   // Masked
1860249259Sdim  let DST_SEL_W = 7;   // Masked
1861249259Sdim  let DATA_FORMAT = 5; // FMT_16
1862249259Sdim
1863249259Sdim}
1864249259Sdim
1865249259Sdimclass VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
1866249259Sdim    : VTX_READ_eg <"VTX_READ_32", buffer_id, (outs R600_TReg32_X:$dst),
1867249259Sdim                   pattern> {
1868249259Sdim
1869249259Sdim  let MEGA_FETCH_COUNT = 4;
1870249259Sdim  let DST_SEL_X        = 0;
1871249259Sdim  let DST_SEL_Y        = 7;   // Masked
1872249259Sdim  let DST_SEL_Z        = 7;   // Masked
1873249259Sdim  let DST_SEL_W        = 7;   // Masked
1874249259Sdim  let DATA_FORMAT      = 0xD; // COLOR_32
1875249259Sdim
1876249259Sdim  // This is not really necessary, but there were some GPU hangs that appeared
1877249259Sdim  // to be caused by ALU instructions in the next instruction group that wrote
1878249259Sdim  // to the $ptr registers of the VTX_READ.
1879249259Sdim  // e.g.
1880249259Sdim  // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
1881249259Sdim  // %T2_X<def> = MOV %ZERO
1882249259Sdim  //Adding this constraint prevents this from happening.
1883249259Sdim  let Constraints = "$ptr.ptr = $dst";
1884249259Sdim}
1885249259Sdim
1886249259Sdimclass VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
1887249259Sdim    : VTX_READ_eg <"VTX_READ_128", buffer_id, (outs R600_Reg128:$dst),
1888249259Sdim                   pattern> {
1889249259Sdim
1890249259Sdim  let MEGA_FETCH_COUNT = 16;
1891249259Sdim  let DST_SEL_X        =  0;
1892249259Sdim  let DST_SEL_Y        =  1;
1893249259Sdim  let DST_SEL_Z        =  2;
1894249259Sdim  let DST_SEL_W        =  3;
1895249259Sdim  let DATA_FORMAT      =  0x22; // COLOR_32_32_32_32
1896249259Sdim
1897249259Sdim  // XXX: Need to force VTX_READ_128 instructions to write to the same register
1898249259Sdim  // that holds its buffer address to avoid potential hangs.  We can't use
1899249259Sdim  // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst
1900249259Sdim  // registers are different sizes.
1901249259Sdim}
1902249259Sdim
1903249259Sdim//===----------------------------------------------------------------------===//
1904249259Sdim// VTX Read from parameter memory space
1905249259Sdim//===----------------------------------------------------------------------===//
1906249259Sdim
1907249259Sdimdef VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
1908251662Sdim  [(set i32:$dst, (load_param_zexti8 ADDRVTX_READ:$ptr))]
1909249259Sdim>;
1910249259Sdim
1911249259Sdimdef VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
1912251662Sdim  [(set i32:$dst, (load_param_zexti16 ADDRVTX_READ:$ptr))]
1913249259Sdim>;
1914249259Sdim
1915249259Sdimdef VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
1916251662Sdim  [(set i32:$dst, (load_param ADDRVTX_READ:$ptr))]
1917249259Sdim>;
1918249259Sdim
1919249259Sdimdef VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
1920251662Sdim  [(set v4i32:$dst, (load_param ADDRVTX_READ:$ptr))]
1921249259Sdim>;
1922249259Sdim
1923249259Sdim//===----------------------------------------------------------------------===//
1924249259Sdim// VTX Read from global memory space
1925249259Sdim//===----------------------------------------------------------------------===//
1926249259Sdim
1927249259Sdim// 8-bit reads
1928249259Sdimdef VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
1929251662Sdim  [(set i32:$dst, (zextloadi8_global ADDRVTX_READ:$ptr))]
1930249259Sdim>;
1931249259Sdim
1932249259Sdim// 32-bit reads
1933249259Sdimdef VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
1934251662Sdim  [(set i32:$dst, (global_load ADDRVTX_READ:$ptr))]
1935249259Sdim>;
1936249259Sdim
1937249259Sdim// 128-bit reads
1938249259Sdimdef VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
1939251662Sdim  [(set v4i32:$dst, (global_load ADDRVTX_READ:$ptr))]
1940249259Sdim>;
1941249259Sdim
1942249259Sdim//===----------------------------------------------------------------------===//
1943249259Sdim// Constant Loads
1944249259Sdim// XXX: We are currently storing all constants in the global address space.
1945249259Sdim//===----------------------------------------------------------------------===//
1946249259Sdim
1947249259Sdimdef CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
1948251662Sdim  [(set i32:$dst, (constant_load ADDRVTX_READ:$ptr))]
1949249259Sdim>;
1950249259Sdim
1951249259Sdim}
1952249259Sdim
1953249259Sdim//===----------------------------------------------------------------------===//
1954249259Sdim// Regist loads and stores - for indirect addressing
1955249259Sdim//===----------------------------------------------------------------------===//
1956249259Sdim
1957249259Sdimdefm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
1958249259Sdim
1959249259Sdimlet Predicates = [isCayman] in {
1960249259Sdim
1961249259Sdimlet isVector = 1 in {
1962249259Sdim
1963249259Sdimdef RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
1964249259Sdim
1965249259Sdimdef MULLO_INT_cm : MULLO_INT_Common<0x8F>;
1966249259Sdimdef MULHI_INT_cm : MULHI_INT_Common<0x90>;
1967249259Sdimdef MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
1968249259Sdimdef MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
1969249259Sdimdef RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
1970249259Sdimdef EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
1971249259Sdimdef LOG_IEEE_cm : LOG_IEEE_Common<0x83>;
1972249259Sdimdef RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>;
1973249259Sdimdef RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>;
1974249259Sdimdef SIN_cm : SIN_Common<0x8D>;
1975249259Sdimdef COS_cm : COS_Common<0x8E>;
1976249259Sdim} // End isVector = 1
1977249259Sdim
1978251662Sdimdef : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
1979249259Sdimdef : SIN_PAT <SIN_cm>;
1980249259Sdimdef : COS_PAT <COS_cm>;
1981249259Sdim
1982249259Sdimdefm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
1983249259Sdim
1984249259Sdim// RECIP_UINT emulation for Cayman
1985251662Sdim// The multiplication scales from [0,1] to the unsigned integer range
1986249259Sdimdef : Pat <
1987251662Sdim  (AMDGPUurecip i32:$src0),
1988251662Sdim  (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)),
1989251662Sdim                            (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
1990249259Sdim>;
1991249259Sdim
1992251662Sdim  def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
1993251662Sdim    let ADDR = 0;
1994251662Sdim    let POP_COUNT = 0;
1995251662Sdim    let COUNT = 0;
1996251662Sdim  }
1997249259Sdim
1998251662Sdimdef : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
1999249259Sdim
2000249259Sdim} // End isCayman
2001249259Sdim
2002249259Sdim//===----------------------------------------------------------------------===//
2003249259Sdim// Branch Instructions
2004249259Sdim//===----------------------------------------------------------------------===//
2005249259Sdim
2006249259Sdim
2007249259Sdimdef IF_PREDICATE_SET  : ILFormat<(outs), (ins GPRI32:$src),
2008249259Sdim  "IF_PREDICATE_SET $src", []>;
2009249259Sdim
2010249259Sdimdef PREDICATED_BREAK : ILFormat<(outs), (ins GPRI32:$src),
2011249259Sdim  "PREDICATED_BREAK $src", []>;
2012249259Sdim
2013249259Sdim//===----------------------------------------------------------------------===//
2014249259Sdim// Pseudo instructions
2015249259Sdim//===----------------------------------------------------------------------===//
2016249259Sdim
2017249259Sdimlet isPseudo = 1 in {
2018249259Sdim
2019249259Sdimdef PRED_X : InstR600 <
2020251662Sdim  (outs R600_Predicate_Bit:$dst),
2021249259Sdim  (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
2022249259Sdim  "", [], NullALU> {
2023249259Sdim  let FlagOperandIdx = 3;
2024249259Sdim}
2025249259Sdim
2026249259Sdimlet isTerminator = 1, isBranch = 1 in {
2027251662Sdimdef JUMP_COND : InstR600 <
2028249259Sdim          (outs),
2029249259Sdim          (ins brtarget:$target, R600_Predicate_Bit:$p),
2030249259Sdim          "JUMP $target ($p)",
2031249259Sdim          [], AnyALU
2032249259Sdim  >;
2033249259Sdim
2034251662Sdimdef JUMP : InstR600 <
2035249259Sdim          (outs),
2036249259Sdim          (ins brtarget:$target),
2037249259Sdim          "JUMP $target",
2038249259Sdim          [], AnyALU
2039249259Sdim  >
2040249259Sdim{
2041249259Sdim  let isPredicable = 1;
2042249259Sdim  let isBarrier = 1;
2043249259Sdim}
2044249259Sdim
2045249259Sdim}  // End isTerminator = 1, isBranch = 1
2046249259Sdim
2047249259Sdimlet usesCustomInserter = 1 in {
2048249259Sdim
2049249259Sdimlet mayLoad = 0, mayStore = 0, hasSideEffects = 1 in {
2050249259Sdim
2051249259Sdimdef MASK_WRITE : AMDGPUShaderInst <
2052249259Sdim    (outs),
2053249259Sdim    (ins R600_Reg32:$src),
2054249259Sdim    "MASK_WRITE $src",
2055249259Sdim    []
2056249259Sdim>;
2057249259Sdim
2058249259Sdim} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
2059249259Sdim
2060249259Sdim
2061251662Sdimdef TXD: InstR600 <
2062249259Sdim  (outs R600_Reg128:$dst),
2063251662Sdim  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
2064251662Sdim       i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
2065249259Sdim  "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
2066251662Sdim  [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
2067251662Sdim                     imm:$resourceId, imm:$samplerId, imm:$textureTarget))],
2068251662Sdim  NullALU > {
2069251662Sdim  let TEXInst = 1;
2070251662Sdim}
2071249259Sdim
2072251662Sdimdef TXD_SHADOW: InstR600 <
2073249259Sdim  (outs R600_Reg128:$dst),
2074251662Sdim  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
2075251662Sdim       i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
2076249259Sdim  "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
2077251662Sdim  [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
2078251662Sdim        imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))],
2079251662Sdim   NullALU
2080251662Sdim> {
2081251662Sdim  let TEXInst = 1;
2082251662Sdim}
2083249259Sdim} // End isPseudo = 1
2084249259Sdim} // End usesCustomInserter = 1
2085249259Sdim
2086249259Sdimdef CLAMP_R600 :  CLAMP <R600_Reg32>;
2087249259Sdimdef FABS_R600 : FABS<R600_Reg32>;
2088249259Sdimdef FNEG_R600 : FNEG<R600_Reg32>;
2089249259Sdim
2090249259Sdim//===---------------------------------------------------------------------===//
2091249259Sdim// Return instruction
2092249259Sdim//===---------------------------------------------------------------------===//
2093249259Sdimlet isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
2094249259Sdim    usesCustomInserter = 1 in {
2095249259Sdim  def RETURN          : ILFormat<(outs), (ins variable_ops),
2096249259Sdim      "RETURN", [(IL_retflag)]>;
2097249259Sdim}
2098249259Sdim
2099249259Sdim
2100249259Sdim//===----------------------------------------------------------------------===//
2101249259Sdim// Constant Buffer Addressing Support
2102249259Sdim//===----------------------------------------------------------------------===//
2103249259Sdim
2104249259Sdimlet usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"  in {
2105249259Sdimdef CONST_COPY : Instruction {
2106249259Sdim  let OutOperandList = (outs R600_Reg32:$dst);
2107249259Sdim  let InOperandList = (ins i32imm:$src);
2108249259Sdim  let Pattern =
2109249259Sdim      [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
2110249259Sdim  let AsmString = "CONST_COPY";
2111249259Sdim  let neverHasSideEffects = 1;
2112249259Sdim  let isAsCheapAsAMove = 1;
2113249259Sdim  let Itinerary = NullALU;
2114249259Sdim}
2115249259Sdim} // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
2116249259Sdim
2117249259Sdimdef TEX_VTX_CONSTBUF :
2118249259Sdim  InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
2119251662Sdim      [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
2120249259Sdim  VTX_WORD1_GPR, VTX_WORD0 {
2121249259Sdim
2122249259Sdim  let VC_INST = 0;
2123249259Sdim  let FETCH_TYPE = 2;
2124249259Sdim  let FETCH_WHOLE_QUAD = 0;
2125249259Sdim  let SRC_REL = 0;
2126249259Sdim  let SRC_SEL_X = 0;
2127249259Sdim  let DST_REL = 0;
2128249259Sdim  let USE_CONST_FIELDS = 0;
2129249259Sdim  let NUM_FORMAT_ALL = 2;
2130249259Sdim  let FORMAT_COMP_ALL = 1;
2131249259Sdim  let SRF_MODE_ALL = 1;
2132249259Sdim  let MEGA_FETCH_COUNT = 16;
2133249259Sdim  let DST_SEL_X        = 0;
2134249259Sdim  let DST_SEL_Y        = 1;
2135249259Sdim  let DST_SEL_Z        = 2;
2136249259Sdim  let DST_SEL_W        = 3;
2137249259Sdim  let DATA_FORMAT      = 35;
2138249259Sdim
2139249259Sdim  let Inst{31-0} = Word0;
2140249259Sdim  let Inst{63-32} = Word1;
2141249259Sdim
2142249259Sdim// LLVM can only encode 64-bit instructions, so these fields are manually
2143249259Sdim// encoded in R600CodeEmitter
2144249259Sdim//
2145249259Sdim// bits<16> OFFSET;
2146249259Sdim// bits<2>  ENDIAN_SWAP = 0;
2147249259Sdim// bits<1>  CONST_BUF_NO_STRIDE = 0;
2148249259Sdim// bits<1>  MEGA_FETCH = 0;
2149249259Sdim// bits<1>  ALT_CONST = 0;
2150249259Sdim// bits<2>  BUFFER_INDEX_MODE = 0;
2151249259Sdim
2152249259Sdim
2153249259Sdim
2154249259Sdim// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
2155249259Sdim// is done in R600CodeEmitter
2156249259Sdim//
2157249259Sdim// Inst{79-64} = OFFSET;
2158249259Sdim// Inst{81-80} = ENDIAN_SWAP;
2159249259Sdim// Inst{82}    = CONST_BUF_NO_STRIDE;
2160249259Sdim// Inst{83}    = MEGA_FETCH;
2161249259Sdim// Inst{84}    = ALT_CONST;
2162249259Sdim// Inst{86-85} = BUFFER_INDEX_MODE;
2163249259Sdim// Inst{95-86} = 0; Reserved
2164249259Sdim
2165249259Sdim// VTX_WORD3 (Padding)
2166249259Sdim//
2167249259Sdim// Inst{127-96} = 0;
2168251662Sdim  let VTXInst = 1;
2169249259Sdim}
2170249259Sdim
2171249259Sdimdef TEX_VTX_TEXBUF:
2172249259Sdim  InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
2173251662Sdim      [(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
2174249259SdimVTX_WORD1_GPR, VTX_WORD0 {
2175249259Sdim
2176249259Sdimlet VC_INST = 0;
2177249259Sdimlet FETCH_TYPE = 2;
2178249259Sdimlet FETCH_WHOLE_QUAD = 0;
2179249259Sdimlet SRC_REL = 0;
2180249259Sdimlet SRC_SEL_X = 0;
2181249259Sdimlet DST_REL = 0;
2182249259Sdimlet USE_CONST_FIELDS = 1;
2183249259Sdimlet NUM_FORMAT_ALL = 0;
2184249259Sdimlet FORMAT_COMP_ALL = 0;
2185249259Sdimlet SRF_MODE_ALL = 1;
2186249259Sdimlet MEGA_FETCH_COUNT = 16;
2187249259Sdimlet DST_SEL_X        = 0;
2188249259Sdimlet DST_SEL_Y        = 1;
2189249259Sdimlet DST_SEL_Z        = 2;
2190249259Sdimlet DST_SEL_W        = 3;
2191249259Sdimlet DATA_FORMAT      = 0;
2192249259Sdim
2193249259Sdimlet Inst{31-0} = Word0;
2194249259Sdimlet Inst{63-32} = Word1;
2195249259Sdim
2196249259Sdim// LLVM can only encode 64-bit instructions, so these fields are manually
2197249259Sdim// encoded in R600CodeEmitter
2198249259Sdim//
2199249259Sdim// bits<16> OFFSET;
2200249259Sdim// bits<2>  ENDIAN_SWAP = 0;
2201249259Sdim// bits<1>  CONST_BUF_NO_STRIDE = 0;
2202249259Sdim// bits<1>  MEGA_FETCH = 0;
2203249259Sdim// bits<1>  ALT_CONST = 0;
2204249259Sdim// bits<2>  BUFFER_INDEX_MODE = 0;
2205249259Sdim
2206249259Sdim
2207249259Sdim
2208249259Sdim// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
2209249259Sdim// is done in R600CodeEmitter
2210249259Sdim//
2211249259Sdim// Inst{79-64} = OFFSET;
2212249259Sdim// Inst{81-80} = ENDIAN_SWAP;
2213249259Sdim// Inst{82}    = CONST_BUF_NO_STRIDE;
2214249259Sdim// Inst{83}    = MEGA_FETCH;
2215249259Sdim// Inst{84}    = ALT_CONST;
2216249259Sdim// Inst{86-85} = BUFFER_INDEX_MODE;
2217249259Sdim// Inst{95-86} = 0; Reserved
2218249259Sdim
2219249259Sdim// VTX_WORD3 (Padding)
2220249259Sdim//
2221249259Sdim// Inst{127-96} = 0;
2222251662Sdim  let VTXInst = 1;
2223249259Sdim}
2224249259Sdim
2225249259Sdim
2226249259Sdim
2227249259Sdim//===--------------------------------------------------------------------===//
2228249259Sdim// Instructions support
2229249259Sdim//===--------------------------------------------------------------------===//
2230249259Sdim//===---------------------------------------------------------------------===//
2231249259Sdim// Custom Inserter for Branches and returns, this eventually will be a
2232249259Sdim// seperate pass
2233249259Sdim//===---------------------------------------------------------------------===//
2234249259Sdimlet isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
2235249259Sdim  def BRANCH : ILFormat<(outs), (ins brtarget:$target),
2236249259Sdim      "; Pseudo unconditional branch instruction",
2237249259Sdim      [(br bb:$target)]>;
2238249259Sdim  defm BRANCH_COND : BranchConditional<IL_brcond>;
2239249259Sdim}
2240249259Sdim
2241249259Sdim//===---------------------------------------------------------------------===//
2242249259Sdim// Flow and Program control Instructions
2243249259Sdim//===---------------------------------------------------------------------===//
2244249259Sdimlet isTerminator=1 in {
2245249259Sdim  def SWITCH      : ILFormat< (outs), (ins GPRI32:$src),
2246249259Sdim  !strconcat("SWITCH", " $src"), []>;
2247249259Sdim  def CASE        : ILFormat< (outs), (ins GPRI32:$src),
2248249259Sdim      !strconcat("CASE", " $src"), []>;
2249249259Sdim  def BREAK       : ILFormat< (outs), (ins),
2250249259Sdim      "BREAK", []>;
2251249259Sdim  def CONTINUE    : ILFormat< (outs), (ins),
2252249259Sdim      "CONTINUE", []>;
2253249259Sdim  def DEFAULT     : ILFormat< (outs), (ins),
2254249259Sdim      "DEFAULT", []>;
2255249259Sdim  def ELSE        : ILFormat< (outs), (ins),
2256249259Sdim      "ELSE", []>;
2257249259Sdim  def ENDSWITCH   : ILFormat< (outs), (ins),
2258249259Sdim      "ENDSWITCH", []>;
2259249259Sdim  def ENDMAIN     : ILFormat< (outs), (ins),
2260249259Sdim      "ENDMAIN", []>;
2261249259Sdim  def END         : ILFormat< (outs), (ins),
2262249259Sdim      "END", []>;
2263249259Sdim  def ENDFUNC     : ILFormat< (outs), (ins),
2264249259Sdim      "ENDFUNC", []>;
2265249259Sdim  def ENDIF       : ILFormat< (outs), (ins),
2266249259Sdim      "ENDIF", []>;
2267249259Sdim  def WHILELOOP   : ILFormat< (outs), (ins),
2268249259Sdim      "WHILE", []>;
2269249259Sdim  def ENDLOOP     : ILFormat< (outs), (ins),
2270249259Sdim      "ENDLOOP", []>;
2271249259Sdim  def FUNC        : ILFormat< (outs), (ins),
2272249259Sdim      "FUNC", []>;
2273249259Sdim  def RETDYN      : ILFormat< (outs), (ins),
2274249259Sdim      "RET_DYN", []>;
2275249259Sdim  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
2276249259Sdim  defm IF_LOGICALNZ  : BranchInstr<"IF_LOGICALNZ">;
2277249259Sdim  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
2278249259Sdim  defm IF_LOGICALZ   : BranchInstr<"IF_LOGICALZ">;
2279249259Sdim  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
2280249259Sdim  defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
2281249259Sdim  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
2282249259Sdim  defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
2283249259Sdim  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
2284249259Sdim  defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
2285249259Sdim  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
2286249259Sdim  defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
2287249259Sdim  defm IFC         : BranchInstr2<"IFC">;
2288249259Sdim  defm BREAKC      : BranchInstr2<"BREAKC">;
2289249259Sdim  defm CONTINUEC   : BranchInstr2<"CONTINUEC">;
2290249259Sdim}
2291249259Sdim
2292249259Sdim//===----------------------------------------------------------------------===//
2293249259Sdim// ISel Patterns
2294249259Sdim//===----------------------------------------------------------------------===//
2295249259Sdim
2296249259Sdim// CND*_INT Pattterns for f32 True / False values
2297249259Sdim
2298249259Sdimclass CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
2299251662Sdim  (selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc),
2300251662Sdim  (cnd $src0, $src1, $src2)
2301249259Sdim>;
2302249259Sdim
2303249259Sdimdef : CND_INT_f32 <CNDE_INT,  SETEQ>;
2304249259Sdimdef : CND_INT_f32 <CNDGT_INT, SETGT>;
2305249259Sdimdef : CND_INT_f32 <CNDGE_INT, SETGE>;
2306249259Sdim
2307249259Sdim//CNDGE_INT extra pattern
2308249259Sdimdef : Pat <
2309251662Sdim  (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_GT),
2310251662Sdim  (CNDGE_INT $src0, $src1, $src2)
2311249259Sdim>;
2312249259Sdim
2313249259Sdim// KIL Patterns
2314249259Sdimdef KILP : Pat <
2315249259Sdim  (int_AMDGPU_kilp),
2316249259Sdim  (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO)))
2317249259Sdim>;
2318249259Sdim
2319249259Sdimdef KIL : Pat <
2320251662Sdim  (int_AMDGPU_kill f32:$src0),
2321251662Sdim  (MASK_WRITE (KILLGT (f32 ZERO), $src0))
2322249259Sdim>;
2323249259Sdim
2324249259Sdim// SGT Reverse args
2325249259Sdimdef : Pat <
2326251662Sdim  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LT),
2327251662Sdim  (SGT $src1, $src0)
2328249259Sdim>;
2329249259Sdim
2330249259Sdim// SGE Reverse args
2331249259Sdimdef : Pat <
2332251662Sdim  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LE),
2333251662Sdim  (SGE $src1, $src0)
2334249259Sdim>;
2335249259Sdim
2336249259Sdim// SETGT_DX10 reverse args
2337249259Sdimdef : Pat <
2338251662Sdim  (selectcc f32:$src0, f32:$src1, -1, 0, COND_LT),
2339251662Sdim  (SETGT_DX10 $src1, $src0)
2340249259Sdim>;
2341249259Sdim
2342249259Sdim// SETGE_DX10 reverse args
2343249259Sdimdef : Pat <
2344251662Sdim  (selectcc f32:$src0, f32:$src1, -1, 0, COND_LE),
2345251662Sdim  (SETGE_DX10 $src1, $src0)
2346249259Sdim>;
2347249259Sdim
2348249259Sdim// SETGT_INT reverse args
2349249259Sdimdef : Pat <
2350251662Sdim  (selectcc i32:$src0, i32:$src1, -1, 0, SETLT),
2351251662Sdim  (SETGT_INT $src1, $src0)
2352249259Sdim>;
2353249259Sdim
2354249259Sdim// SETGE_INT reverse args
2355249259Sdimdef : Pat <
2356251662Sdim  (selectcc i32:$src0, i32:$src1, -1, 0, SETLE),
2357251662Sdim  (SETGE_INT $src1, $src0)
2358249259Sdim>;
2359249259Sdim
2360249259Sdim// SETGT_UINT reverse args
2361249259Sdimdef : Pat <
2362251662Sdim  (selectcc i32:$src0, i32:$src1, -1, 0, SETULT),
2363251662Sdim  (SETGT_UINT $src1, $src0)
2364249259Sdim>;
2365249259Sdim
2366249259Sdim// SETGE_UINT reverse args
2367249259Sdimdef : Pat <
2368251662Sdim  (selectcc i32:$src0, i32:$src1, -1, 0, SETULE),
2369251662Sdim  (SETGE_UINT $src1, $src0)
2370249259Sdim>;
2371249259Sdim
2372249259Sdim// The next two patterns are special cases for handling 'true if ordered' and
2373249259Sdim// 'true if unordered' conditionals.  The assumption here is that the behavior of
2374249259Sdim// SETE and SNE conforms to the Direct3D 10 rules for floating point values
2375249259Sdim// described here:
2376249259Sdim// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit
2377249259Sdim// We assume that  SETE returns false when one of the operands is NAN and
2378249259Sdim// SNE returns true when on of the operands is NAN
2379249259Sdim
2380249259Sdim//SETE - 'true if ordered'
2381249259Sdimdef : Pat <
2382251662Sdim  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETO),
2383251662Sdim  (SETE $src0, $src1)
2384249259Sdim>;
2385249259Sdim
2386249259Sdim//SETE_DX10 - 'true if ordered'
2387249259Sdimdef : Pat <
2388251662Sdim  (selectcc f32:$src0, f32:$src1, -1, 0, SETO),
2389251662Sdim  (SETE_DX10 $src0, $src1)
2390249259Sdim>;
2391249259Sdim
2392249259Sdim//SNE - 'true if unordered'
2393249259Sdimdef : Pat <
2394251662Sdim  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETUO),
2395251662Sdim  (SNE $src0, $src1)
2396249259Sdim>;
2397249259Sdim
2398249259Sdim//SETNE_DX10 - 'true if ordered'
2399249259Sdimdef : Pat <
2400251662Sdim  (selectcc f32:$src0, f32:$src1, -1, 0, SETUO),
2401251662Sdim  (SETNE_DX10 $src0, $src1)
2402249259Sdim>;
2403249259Sdim
2404251662Sdimdef : Extract_Element <f32, v4f32, 0, sub0>;
2405251662Sdimdef : Extract_Element <f32, v4f32, 1, sub1>;
2406251662Sdimdef : Extract_Element <f32, v4f32, 2, sub2>;
2407251662Sdimdef : Extract_Element <f32, v4f32, 3, sub3>;
2408249259Sdim
2409251662Sdimdef : Insert_Element <f32, v4f32, 0, sub0>;
2410251662Sdimdef : Insert_Element <f32, v4f32, 1, sub1>;
2411251662Sdimdef : Insert_Element <f32, v4f32, 2, sub2>;
2412251662Sdimdef : Insert_Element <f32, v4f32, 3, sub3>;
2413249259Sdim
2414251662Sdimdef : Extract_Element <i32, v4i32, 0, sub0>;
2415251662Sdimdef : Extract_Element <i32, v4i32, 1, sub1>;
2416251662Sdimdef : Extract_Element <i32, v4i32, 2, sub2>;
2417251662Sdimdef : Extract_Element <i32, v4i32, 3, sub3>;
2418249259Sdim
2419251662Sdimdef : Insert_Element <i32, v4i32, 0, sub0>;
2420251662Sdimdef : Insert_Element <i32, v4i32, 1, sub1>;
2421251662Sdimdef : Insert_Element <i32, v4i32, 2, sub2>;
2422251662Sdimdef : Insert_Element <i32, v4i32, 3, sub3>;
2423249259Sdim
2424251662Sdimdef : Vector4_Build <v4f32, f32>;
2425251662Sdimdef : Vector4_Build <v4i32, i32>;
2426249259Sdim
2427249259Sdim// bitconvert patterns
2428249259Sdim
2429249259Sdimdef : BitConvert <i32, f32, R600_Reg32>;
2430249259Sdimdef : BitConvert <f32, i32, R600_Reg32>;
2431249259Sdimdef : BitConvert <v4f32, v4i32, R600_Reg128>;
2432249259Sdimdef : BitConvert <v4i32, v4f32, R600_Reg128>;
2433249259Sdim
2434249259Sdim// DWORDADDR pattern
2435249259Sdimdef : DwordAddrPat  <i32, R600_Reg32>;
2436249259Sdim
2437249259Sdim} // End isR600toCayman Predicate
2438