R600MCCodeEmitter.cpp revision 249259
1//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11///
12/// This code emitter outputs bytecode that is understood by the r600g driver
13/// in the Mesa [1] project.  The bytecode is very similar to the hardware's ISA,
14/// but it still needs to be run through a finalizer in order to be executed
15/// by the GPU.
16///
17/// [1] http://www.mesa3d.org/
18//
19//===----------------------------------------------------------------------===//
20
21#include "R600Defines.h"
22#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
23#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
24#include "llvm/MC/MCCodeEmitter.h"
25#include "llvm/MC/MCContext.h"
26#include "llvm/MC/MCInst.h"
27#include "llvm/MC/MCInstrInfo.h"
28#include "llvm/MC/MCRegisterInfo.h"
29#include "llvm/MC/MCSubtargetInfo.h"
30#include "llvm/Support/raw_ostream.h"
31#include <stdio.h>
32
33#define SRC_BYTE_COUNT 11
34#define DST_BYTE_COUNT 5
35
36using namespace llvm;
37
38namespace {
39
40class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
41  R600MCCodeEmitter(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION;
42  void operator=(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION;
43  const MCInstrInfo &MCII;
44  const MCRegisterInfo &MRI;
45  const MCSubtargetInfo &STI;
46  MCContext &Ctx;
47
48public:
49
50  R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
51                    const MCSubtargetInfo &sti, MCContext &ctx)
52    : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { }
53
54  /// \brief Encode the instruction and write it to the OS.
55  virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
56                         SmallVectorImpl<MCFixup> &Fixups) const;
57
58  /// \returns the encoding for an MCOperand.
59  virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
60                                     SmallVectorImpl<MCFixup> &Fixups) const;
61private:
62
63  void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
64                    raw_ostream &OS) const;
65  void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
66  void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
67                    raw_ostream &OS) const;
68  void EmitDst(const MCInst &MI, raw_ostream &OS) const;
69  void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
70
71  void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
72
73  void EmitByte(unsigned int byte, raw_ostream &OS) const;
74
75  void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const;
76
77  void Emit(uint32_t value, raw_ostream &OS) const;
78  void Emit(uint64_t value, raw_ostream &OS) const;
79
80  unsigned getHWRegChan(unsigned reg) const;
81  unsigned getHWReg(unsigned regNo) const;
82
83  bool isFCOp(unsigned opcode) const;
84  bool isTexOp(unsigned opcode) const;
85  bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const;
86
87};
88
89} // End anonymous namespace
90
91enum RegElement {
92  ELEMENT_X = 0,
93  ELEMENT_Y,
94  ELEMENT_Z,
95  ELEMENT_W
96};
97
98enum InstrTypes {
99  INSTR_ALU = 0,
100  INSTR_TEX,
101  INSTR_FC,
102  INSTR_NATIVE,
103  INSTR_VTX,
104  INSTR_EXPORT,
105  INSTR_CFALU
106};
107
108enum FCInstr {
109  FC_IF_PREDICATE = 0,
110  FC_ELSE,
111  FC_ENDIF,
112  FC_BGNLOOP,
113  FC_ENDLOOP,
114  FC_BREAK_PREDICATE,
115  FC_CONTINUE
116};
117
118enum TextureTypes {
119  TEXTURE_1D = 1,
120  TEXTURE_2D,
121  TEXTURE_3D,
122  TEXTURE_CUBE,
123  TEXTURE_RECT,
124  TEXTURE_SHADOW1D,
125  TEXTURE_SHADOW2D,
126  TEXTURE_SHADOWRECT,
127  TEXTURE_1D_ARRAY,
128  TEXTURE_2D_ARRAY,
129  TEXTURE_SHADOW1D_ARRAY,
130  TEXTURE_SHADOW2D_ARRAY
131};
132
133MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
134                                           const MCRegisterInfo &MRI,
135                                           const MCSubtargetInfo &STI,
136                                           MCContext &Ctx) {
137  return new R600MCCodeEmitter(MCII, MRI, STI, Ctx);
138}
139
140void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
141                                       SmallVectorImpl<MCFixup> &Fixups) const {
142  if (isFCOp(MI.getOpcode())){
143    EmitFCInstr(MI, OS);
144  } else if (MI.getOpcode() == AMDGPU::RETURN ||
145    MI.getOpcode() == AMDGPU::BUNDLE ||
146    MI.getOpcode() == AMDGPU::KILL) {
147    return;
148  } else {
149    switch(MI.getOpcode()) {
150    case AMDGPU::STACK_SIZE: {
151      EmitByte(MI.getOperand(0).getImm(), OS);
152      break;
153    }
154    case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
155    case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
156      uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
157      EmitByte(INSTR_NATIVE, OS);
158      Emit(inst, OS);
159      break;
160    }
161    case AMDGPU::CONSTANT_LOAD_eg:
162    case AMDGPU::VTX_READ_PARAM_8_eg:
163    case AMDGPU::VTX_READ_PARAM_16_eg:
164    case AMDGPU::VTX_READ_PARAM_32_eg:
165    case AMDGPU::VTX_READ_PARAM_128_eg:
166    case AMDGPU::VTX_READ_GLOBAL_8_eg:
167    case AMDGPU::VTX_READ_GLOBAL_32_eg:
168    case AMDGPU::VTX_READ_GLOBAL_128_eg:
169    case AMDGPU::TEX_VTX_CONSTBUF:
170    case AMDGPU::TEX_VTX_TEXBUF : {
171      uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
172      uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
173
174      EmitByte(INSTR_VTX, OS);
175      Emit(InstWord01, OS);
176      Emit(InstWord2, OS);
177      break;
178    }
179    case AMDGPU::TEX_LD:
180    case AMDGPU::TEX_GET_TEXTURE_RESINFO:
181    case AMDGPU::TEX_SAMPLE:
182    case AMDGPU::TEX_SAMPLE_C:
183    case AMDGPU::TEX_SAMPLE_L:
184    case AMDGPU::TEX_SAMPLE_C_L:
185    case AMDGPU::TEX_SAMPLE_LB:
186    case AMDGPU::TEX_SAMPLE_C_LB:
187    case AMDGPU::TEX_SAMPLE_G:
188    case AMDGPU::TEX_SAMPLE_C_G:
189    case AMDGPU::TEX_GET_GRADIENTS_H:
190    case AMDGPU::TEX_GET_GRADIENTS_V:
191    case AMDGPU::TEX_SET_GRADIENTS_H:
192    case AMDGPU::TEX_SET_GRADIENTS_V: {
193      unsigned Opcode = MI.getOpcode();
194      bool HasOffsets = (Opcode == AMDGPU::TEX_LD);
195      unsigned OpOffset = HasOffsets ? 3 : 0;
196      int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
197      int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
198
199      uint32_t SrcSelect[4] = {0, 1, 2, 3};
200      uint32_t Offsets[3] = {0, 0, 0};
201      uint64_t CoordType[4] = {1, 1, 1, 1};
202
203      if (HasOffsets)
204        for (unsigned i = 0; i < 3; i++) {
205          int SignedOffset = MI.getOperand(i + 2).getImm();
206          Offsets[i] = (SignedOffset & 0x1F);
207        }
208
209
210      if (TextureType == TEXTURE_RECT ||
211          TextureType == TEXTURE_SHADOWRECT) {
212        CoordType[ELEMENT_X] = 0;
213        CoordType[ELEMENT_Y] = 0;
214      }
215
216      if (TextureType == TEXTURE_1D_ARRAY ||
217          TextureType == TEXTURE_SHADOW1D_ARRAY) {
218        if (Opcode == AMDGPU::TEX_SAMPLE_C_L ||
219            Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
220          CoordType[ELEMENT_Y] = 0;
221        } else {
222          CoordType[ELEMENT_Z] = 0;
223          SrcSelect[ELEMENT_Z] = ELEMENT_Y;
224        }
225      } else if (TextureType == TEXTURE_2D_ARRAY ||
226          TextureType == TEXTURE_SHADOW2D_ARRAY) {
227        CoordType[ELEMENT_Z] = 0;
228      }
229
230
231      if ((TextureType == TEXTURE_SHADOW1D ||
232          TextureType == TEXTURE_SHADOW2D ||
233          TextureType == TEXTURE_SHADOWRECT ||
234          TextureType == TEXTURE_SHADOW1D_ARRAY) &&
235          Opcode != AMDGPU::TEX_SAMPLE_C_L &&
236          Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
237        SrcSelect[ELEMENT_W] = ELEMENT_Z;
238      }
239
240      uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) |
241          CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 |
242          CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63;
243      uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
244          SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
245          SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
246          Offsets[2] << 10;
247
248      EmitByte(INSTR_TEX, OS);
249      Emit(Word01, OS);
250      Emit(Word2, OS);
251      break;
252    }
253    case AMDGPU::EG_ExportSwz:
254    case AMDGPU::R600_ExportSwz:
255    case AMDGPU::EG_ExportBuf:
256    case AMDGPU::R600_ExportBuf: {
257      uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
258      EmitByte(INSTR_EXPORT, OS);
259      Emit(Inst, OS);
260      break;
261    }
262    case AMDGPU::CF_ALU:
263    case AMDGPU::CF_ALU_PUSH_BEFORE: {
264      uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
265      EmitByte(INSTR_CFALU, OS);
266      Emit(Inst, OS);
267      break;
268    }
269    case AMDGPU::CF_TC:
270    case AMDGPU::CF_VC:
271    case AMDGPU::CF_CALL_FS:
272      return;
273    case AMDGPU::WHILE_LOOP:
274    case AMDGPU::END_LOOP:
275    case AMDGPU::LOOP_BREAK:
276    case AMDGPU::CF_CONTINUE:
277    case AMDGPU::CF_JUMP:
278    case AMDGPU::CF_ELSE:
279    case AMDGPU::POP: {
280      uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
281      EmitByte(INSTR_NATIVE, OS);
282      Emit(Inst, OS);
283      break;
284    }
285    default:
286      EmitALUInstr(MI, Fixups, OS);
287      break;
288    }
289  }
290}
291
292void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
293                                     SmallVectorImpl<MCFixup> &Fixups,
294                                     raw_ostream &OS) const {
295  const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
296
297  // Emit instruction type
298  EmitByte(INSTR_ALU, OS);
299
300  uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
301
302  //older alu have different encoding for instructions with one or two src
303  //parameters.
304  if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
305      !(MCDesc.TSFlags & R600_InstFlag::OP3)) {
306    uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39);
307    InstWord01 &= ~(0x3FFULL << 39);
308    InstWord01 |= ISAOpCode << 1;
309  }
310
311  unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 :
312      MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1;
313
314  EmitByte(SrcNum, OS);
315
316  const unsigned SrcOps[3][2] = {
317      {R600Operands::SRC0, R600Operands::SRC0_SEL},
318      {R600Operands::SRC1, R600Operands::SRC1_SEL},
319      {R600Operands::SRC2, R600Operands::SRC2_SEL}
320  };
321
322  for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) {
323    unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]];
324    unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]];
325    EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS);
326  }
327
328  Emit(InstWord01, OS);
329  return;
330}
331
332void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
333                                raw_ostream &OS) const {
334  const MCOperand &MO = MI.getOperand(OpIdx);
335  union {
336    float f;
337    uint32_t i;
338  } Value;
339  Value.i = 0;
340  // Emit the source select (2 bytes).  For GPRs, this is the register index.
341  // For other potential instruction operands, (e.g. constant registers) the
342  // value of the source select is defined in the r600isa docs.
343  if (MO.isReg()) {
344    unsigned reg = MO.getReg();
345    EmitTwoBytes(getHWReg(reg), OS);
346    if (reg == AMDGPU::ALU_LITERAL_X) {
347      unsigned ImmOpIndex = MI.getNumOperands() - 1;
348      MCOperand ImmOp = MI.getOperand(ImmOpIndex);
349      if (ImmOp.isFPImm()) {
350        Value.f = ImmOp.getFPImm();
351      } else {
352        assert(ImmOp.isImm());
353        Value.i = ImmOp.getImm();
354      }
355    }
356  } else {
357    // XXX: Handle other operand types.
358    EmitTwoBytes(0, OS);
359  }
360
361  // Emit the source channel (1 byte)
362  if (MO.isReg()) {
363    EmitByte(getHWRegChan(MO.getReg()), OS);
364  } else {
365    EmitByte(0, OS);
366  }
367
368  // XXX: Emit isNegated (1 byte)
369  if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
370      && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
371     (MO.isReg() &&
372      (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
373    EmitByte(1, OS);
374  } else {
375    EmitByte(0, OS);
376  }
377
378  // Emit isAbsolute (1 byte)
379  if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
380    EmitByte(1, OS);
381  } else {
382    EmitByte(0, OS);
383  }
384
385  // XXX: Emit relative addressing mode (1 byte)
386  EmitByte(0, OS);
387
388  // Emit kc_bank, This will be adjusted later by r600_asm
389  EmitByte(0, OS);
390
391  // Emit the literal value, if applicable (4 bytes).
392  Emit(Value.i, OS);
393
394}
395
396void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
397                                   unsigned SelOpIdx, raw_ostream &OS) const {
398  const MCOperand &RegMO = MI.getOperand(RegOpIdx);
399  const MCOperand &SelMO = MI.getOperand(SelOpIdx);
400
401  union {
402    float f;
403    uint32_t i;
404  } InlineConstant;
405  InlineConstant.i = 0;
406  // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0
407  // and select is 0 (GPR index is encoded in the instr encoding. For constants
408  // type is 1 and select is the original const select passed from the driver.
409  unsigned Reg = RegMO.getReg();
410  if (Reg == AMDGPU::ALU_CONST) {
411    EmitByte(1, OS);
412    uint32_t Sel = SelMO.getImm();
413    Emit(Sel, OS);
414  } else {
415    EmitByte(0, OS);
416    Emit((uint32_t)0, OS);
417  }
418
419  if (Reg == AMDGPU::ALU_LITERAL_X) {
420    unsigned ImmOpIndex = MI.getNumOperands() - 1;
421    MCOperand ImmOp = MI.getOperand(ImmOpIndex);
422    if (ImmOp.isFPImm()) {
423      InlineConstant.f = ImmOp.getFPImm();
424    } else {
425      assert(ImmOp.isImm());
426      InlineConstant.i = ImmOp.getImm();
427    }
428  }
429
430  // Emit the literal value, if applicable (4 bytes).
431  Emit(InlineConstant.i, OS);
432}
433
434void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
435
436  // Emit instruction type
437  EmitByte(INSTR_FC, OS);
438
439  // Emit SRC
440  unsigned NumOperands = MI.getNumOperands();
441  if (NumOperands > 0) {
442    assert(NumOperands == 1);
443    EmitSrc(MI, 0, OS);
444  } else {
445    EmitNullBytes(SRC_BYTE_COUNT, OS);
446  }
447
448  // Emit FC Instruction
449  enum FCInstr instr;
450  switch (MI.getOpcode()) {
451  case AMDGPU::PREDICATED_BREAK:
452    instr = FC_BREAK_PREDICATE;
453    break;
454  case AMDGPU::CONTINUE:
455    instr = FC_CONTINUE;
456    break;
457  case AMDGPU::IF_PREDICATE_SET:
458    instr = FC_IF_PREDICATE;
459    break;
460  case AMDGPU::ELSE:
461    instr = FC_ELSE;
462    break;
463  case AMDGPU::ENDIF:
464    instr = FC_ENDIF;
465    break;
466  case AMDGPU::ENDLOOP:
467    instr = FC_ENDLOOP;
468    break;
469  case AMDGPU::WHILELOOP:
470    instr = FC_BGNLOOP;
471    break;
472  default:
473    abort();
474    break;
475  }
476  EmitByte(instr, OS);
477}
478
479void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount,
480                                      raw_ostream &OS) const {
481
482  for (unsigned int i = 0; i < ByteCount; i++) {
483    EmitByte(0, OS);
484  }
485}
486
487void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
488  OS.write((uint8_t) Byte & 0xff);
489}
490
491void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes,
492                                     raw_ostream &OS) const {
493  OS.write((uint8_t) (Bytes & 0xff));
494  OS.write((uint8_t) ((Bytes >> 8) & 0xff));
495}
496
497void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
498  for (unsigned i = 0; i < 4; i++) {
499    OS.write((uint8_t) ((Value >> (8 * i)) & 0xff));
500  }
501}
502
503void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const {
504  for (unsigned i = 0; i < 8; i++) {
505    EmitByte((Value >> (8 * i)) & 0xff, OS);
506  }
507}
508
509unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const {
510  return MRI.getEncodingValue(reg) >> HW_CHAN_SHIFT;
511}
512
513unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const {
514  return MRI.getEncodingValue(RegNo) & HW_REG_MASK;
515}
516
517uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
518                                              const MCOperand &MO,
519                                        SmallVectorImpl<MCFixup> &Fixup) const {
520  if (MO.isReg()) {
521    if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags)) {
522      return MRI.getEncodingValue(MO.getReg());
523    } else {
524      return getHWReg(MO.getReg());
525    }
526  } else if (MO.isImm()) {
527    return MO.getImm();
528  } else {
529    assert(0);
530    return 0;
531  }
532}
533
534//===----------------------------------------------------------------------===//
535// Encoding helper functions
536//===----------------------------------------------------------------------===//
537
538bool R600MCCodeEmitter::isFCOp(unsigned opcode) const {
539  switch(opcode) {
540  default: return false;
541  case AMDGPU::PREDICATED_BREAK:
542  case AMDGPU::CONTINUE:
543  case AMDGPU::IF_PREDICATE_SET:
544  case AMDGPU::ELSE:
545  case AMDGPU::ENDIF:
546  case AMDGPU::ENDLOOP:
547  case AMDGPU::WHILELOOP:
548    return true;
549  }
550}
551
552bool R600MCCodeEmitter::isTexOp(unsigned opcode) const {
553  switch(opcode) {
554  default: return false;
555  case AMDGPU::TEX_LD:
556  case AMDGPU::TEX_GET_TEXTURE_RESINFO:
557  case AMDGPU::TEX_SAMPLE:
558  case AMDGPU::TEX_SAMPLE_C:
559  case AMDGPU::TEX_SAMPLE_L:
560  case AMDGPU::TEX_SAMPLE_C_L:
561  case AMDGPU::TEX_SAMPLE_LB:
562  case AMDGPU::TEX_SAMPLE_C_LB:
563  case AMDGPU::TEX_SAMPLE_G:
564  case AMDGPU::TEX_SAMPLE_C_G:
565  case AMDGPU::TEX_GET_GRADIENTS_H:
566  case AMDGPU::TEX_GET_GRADIENTS_V:
567  case AMDGPU::TEX_SET_GRADIENTS_H:
568  case AMDGPU::TEX_SET_GRADIENTS_V:
569    return true;
570  }
571}
572
573bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand,
574                                  unsigned Flag) const {
575  const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
576  unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags);
577  if (FlagIndex == 0) {
578    return false;
579  }
580  assert(MI.getOperand(FlagIndex).isImm());
581  return !!((MI.getOperand(FlagIndex).getImm() >>
582            (NUM_MO_FLAGS * Operand)) & Flag);
583}
584
585#include "AMDGPUGenMCCodeEmitter.inc"
586