1//===-- SIMCCodeEmitter.cpp - SI Code Emitter -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// The SI code emitter produces machine code that can be executed
11/// directly on the GPU device.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPU.h"
16#include "MCTargetDesc/AMDGPUFixupKinds.h"
17#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
18#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19#include "SIDefines.h"
20#include "Utils/AMDGPUBaseInfo.h"
21#include "llvm/MC/MCCodeEmitter.h"
22#include "llvm/MC/MCContext.h"
23#include "llvm/MC/MCExpr.h"
24#include "llvm/MC/MCFixup.h"
25#include "llvm/MC/MCInst.h"
26#include "llvm/MC/MCInstrDesc.h"
27#include "llvm/MC/MCInstrInfo.h"
28#include "llvm/MC/MCRegisterInfo.h"
29#include "llvm/MC/MCSubtargetInfo.h"
30#include "llvm/MC/MCSymbol.h"
31#include "llvm/Support/Casting.h"
32#include "llvm/Support/ErrorHandling.h"
33#include "llvm/Support/MathExtras.h"
34#include "llvm/Support/raw_ostream.h"
35#include <cassert>
36#include <cstdint>
37#include <cstdlib>
38
39using namespace llvm;
40
41namespace {
42
43class SIMCCodeEmitter : public  AMDGPUMCCodeEmitter {
44  const MCRegisterInfo &MRI;
45
46  /// Encode an fp or int literal
47  uint32_t getLitEncoding(const MCOperand &MO, const MCOperandInfo &OpInfo,
48                          const MCSubtargetInfo &STI) const;
49
50public:
51  SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
52                  MCContext &ctx)
53      : AMDGPUMCCodeEmitter(mcii), MRI(mri) {}
54  SIMCCodeEmitter(const SIMCCodeEmitter &) = delete;
55  SIMCCodeEmitter &operator=(const SIMCCodeEmitter &) = delete;
56
57  /// Encode the instruction and write it to the OS.
58  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
59                         SmallVectorImpl<MCFixup> &Fixups,
60                         const MCSubtargetInfo &STI) const override;
61
62  /// \returns the encoding for an MCOperand.
63  uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
64                             SmallVectorImpl<MCFixup> &Fixups,
65                             const MCSubtargetInfo &STI) const override;
66
67  /// Use a fixup to encode the simm16 field for SOPP branch
68  ///        instructions.
69  unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
70                             SmallVectorImpl<MCFixup> &Fixups,
71                             const MCSubtargetInfo &STI) const override;
72
73  unsigned getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo,
74                                 SmallVectorImpl<MCFixup> &Fixups,
75                                 const MCSubtargetInfo &STI) const override;
76
77  unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
78                              SmallVectorImpl<MCFixup> &Fixups,
79                              const MCSubtargetInfo &STI) const override;
80
81  unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
82                                  SmallVectorImpl<MCFixup> &Fixups,
83                                  const MCSubtargetInfo &STI) const override;
84
85  unsigned getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
86                                SmallVectorImpl<MCFixup> &Fixups,
87                                const MCSubtargetInfo &STI) const override;
88};
89
90} // end anonymous namespace
91
92MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
93                                           const MCRegisterInfo &MRI,
94                                           MCContext &Ctx) {
95  return new SIMCCodeEmitter(MCII, MRI, Ctx);
96}
97
98// Returns the encoding value to use if the given integer is an integer inline
99// immediate value, or 0 if it is not.
100template <typename IntTy>
101static uint32_t getIntInlineImmEncoding(IntTy Imm) {
102  if (Imm >= 0 && Imm <= 64)
103    return 128 + Imm;
104
105  if (Imm >= -16 && Imm <= -1)
106    return 192 + std::abs(Imm);
107
108  return 0;
109}
110
111static uint32_t getLit16IntEncoding(uint16_t Val, const MCSubtargetInfo &STI) {
112  uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val));
113  return IntImm == 0 ? 255 : IntImm;
114}
115
116static uint32_t getLit16Encoding(uint16_t Val, const MCSubtargetInfo &STI) {
117  uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val));
118  if (IntImm != 0)
119    return IntImm;
120
121  if (Val == 0x3800) // 0.5
122    return 240;
123
124  if (Val == 0xB800) // -0.5
125    return 241;
126
127  if (Val == 0x3C00) // 1.0
128    return 242;
129
130  if (Val == 0xBC00) // -1.0
131    return 243;
132
133  if (Val == 0x4000) // 2.0
134    return 244;
135
136  if (Val == 0xC000) // -2.0
137    return 245;
138
139  if (Val == 0x4400) // 4.0
140    return 246;
141
142  if (Val == 0xC400) // -4.0
143    return 247;
144
145  if (Val == 0x3118 && // 1.0 / (2.0 * pi)
146      STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
147    return 248;
148
149  return 255;
150}
151
152static uint32_t getLit32Encoding(uint32_t Val, const MCSubtargetInfo &STI) {
153  uint32_t IntImm = getIntInlineImmEncoding(static_cast<int32_t>(Val));
154  if (IntImm != 0)
155    return IntImm;
156
157  if (Val == FloatToBits(0.5f))
158    return 240;
159
160  if (Val == FloatToBits(-0.5f))
161    return 241;
162
163  if (Val == FloatToBits(1.0f))
164    return 242;
165
166  if (Val == FloatToBits(-1.0f))
167    return 243;
168
169  if (Val == FloatToBits(2.0f))
170    return 244;
171
172  if (Val == FloatToBits(-2.0f))
173    return 245;
174
175  if (Val == FloatToBits(4.0f))
176    return 246;
177
178  if (Val == FloatToBits(-4.0f))
179    return 247;
180
181  if (Val == 0x3e22f983 && // 1.0 / (2.0 * pi)
182      STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
183    return 248;
184
185  return 255;
186}
187
188static uint32_t getLit64Encoding(uint64_t Val, const MCSubtargetInfo &STI) {
189  uint32_t IntImm = getIntInlineImmEncoding(static_cast<int64_t>(Val));
190  if (IntImm != 0)
191    return IntImm;
192
193  if (Val == DoubleToBits(0.5))
194    return 240;
195
196  if (Val == DoubleToBits(-0.5))
197    return 241;
198
199  if (Val == DoubleToBits(1.0))
200    return 242;
201
202  if (Val == DoubleToBits(-1.0))
203    return 243;
204
205  if (Val == DoubleToBits(2.0))
206    return 244;
207
208  if (Val == DoubleToBits(-2.0))
209    return 245;
210
211  if (Val == DoubleToBits(4.0))
212    return 246;
213
214  if (Val == DoubleToBits(-4.0))
215    return 247;
216
217  if (Val == 0x3fc45f306dc9c882 && // 1.0 / (2.0 * pi)
218      STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
219    return 248;
220
221  return 255;
222}
223
224uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
225                                         const MCOperandInfo &OpInfo,
226                                         const MCSubtargetInfo &STI) const {
227  int64_t Imm;
228  if (MO.isExpr()) {
229    const auto *C = dyn_cast<MCConstantExpr>(MO.getExpr());
230    if (!C)
231      return 255;
232
233    Imm = C->getValue();
234  } else {
235
236    assert(!MO.isFPImm());
237
238    if (!MO.isImm())
239      return ~0;
240
241    Imm = MO.getImm();
242  }
243
244  switch (OpInfo.OperandType) {
245  case AMDGPU::OPERAND_REG_IMM_INT32:
246  case AMDGPU::OPERAND_REG_IMM_FP32:
247  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
248  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
249  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
250  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
251    return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
252
253  case AMDGPU::OPERAND_REG_IMM_INT64:
254  case AMDGPU::OPERAND_REG_IMM_FP64:
255  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
256  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
257    return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
258
259  case AMDGPU::OPERAND_REG_IMM_INT16:
260  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
261  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
262    return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
263  case AMDGPU::OPERAND_REG_IMM_FP16:
264  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
265  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
266    // FIXME Is this correct? What do inline immediates do on SI for f16 src
267    // which does not have f16 support?
268    return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
269  case AMDGPU::OPERAND_REG_IMM_V2INT16:
270  case AMDGPU::OPERAND_REG_IMM_V2FP16: {
271    if (!isUInt<16>(Imm) && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal])
272      return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
273    if (OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
274      return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
275    LLVM_FALLTHROUGH;
276  }
277  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
278  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
279    return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
280  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
281  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
282    uint16_t Lo16 = static_cast<uint16_t>(Imm);
283    uint32_t Encoding = getLit16Encoding(Lo16, STI);
284    return Encoding;
285  }
286  default:
287    llvm_unreachable("invalid operand size");
288  }
289}
290
291void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
292                                       SmallVectorImpl<MCFixup> &Fixups,
293                                       const MCSubtargetInfo &STI) const {
294  verifyInstructionPredicates(MI,
295                              computeAvailableFeatures(STI.getFeatureBits()));
296
297  uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups, STI);
298  const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
299  unsigned bytes = Desc.getSize();
300
301  for (unsigned i = 0; i < bytes; i++) {
302    OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
303  }
304
305  // NSA encoding.
306  if (AMDGPU::isGFX10(STI) && Desc.TSFlags & SIInstrFlags::MIMG) {
307    int vaddr0 = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
308                                            AMDGPU::OpName::vaddr0);
309    int srsrc = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
310                                           AMDGPU::OpName::srsrc);
311    assert(vaddr0 >= 0 && srsrc > vaddr0);
312    unsigned NumExtraAddrs = srsrc - vaddr0 - 1;
313    unsigned NumPadding = (-NumExtraAddrs) & 3;
314
315    for (unsigned i = 0; i < NumExtraAddrs; ++i)
316      OS.write((uint8_t)getMachineOpValue(MI, MI.getOperand(vaddr0 + 1 + i),
317                                          Fixups, STI));
318    for (unsigned i = 0; i < NumPadding; ++i)
319      OS.write(0);
320  }
321
322  if ((bytes > 8 && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) ||
323      (bytes > 4 && !STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]))
324    return;
325
326  // Check for additional literals in SRC0/1/2 (Op 1/2/3)
327  for (unsigned i = 0, e = Desc.getNumOperands(); i < e; ++i) {
328
329    // Check if this operand should be encoded as [SV]Src
330    if (!AMDGPU::isSISrcOperand(Desc, i))
331      continue;
332
333    // Is this operand a literal immediate?
334    const MCOperand &Op = MI.getOperand(i);
335    if (getLitEncoding(Op, Desc.OpInfo[i], STI) != 255)
336      continue;
337
338    // Yes! Encode it
339    int64_t Imm = 0;
340
341    if (Op.isImm())
342      Imm = Op.getImm();
343    else if (Op.isExpr()) {
344      if (const auto *C = dyn_cast<MCConstantExpr>(Op.getExpr()))
345        Imm = C->getValue();
346
347    } else if (!Op.isExpr()) // Exprs will be replaced with a fixup value.
348      llvm_unreachable("Must be immediate or expr");
349
350    for (unsigned j = 0; j < 4; j++) {
351      OS.write((uint8_t) ((Imm >> (8 * j)) & 0xff));
352    }
353
354    // Only one literal value allowed
355    break;
356  }
357}
358
359unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
360                                            SmallVectorImpl<MCFixup> &Fixups,
361                                            const MCSubtargetInfo &STI) const {
362  const MCOperand &MO = MI.getOperand(OpNo);
363
364  if (MO.isExpr()) {
365    const MCExpr *Expr = MO.getExpr();
366    MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_sopp_br;
367    Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
368    return 0;
369  }
370
371  return getMachineOpValue(MI, MO, Fixups, STI);
372}
373
374unsigned SIMCCodeEmitter::getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo,
375                                                SmallVectorImpl<MCFixup> &Fixups,
376                                                const MCSubtargetInfo &STI) const {
377  auto Offset = MI.getOperand(OpNo).getImm();
378  // VI only supports 20-bit unsigned offsets.
379  assert(!AMDGPU::isVI(STI) || isUInt<20>(Offset));
380  return Offset;
381}
382
383unsigned
384SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
385                                    SmallVectorImpl<MCFixup> &Fixups,
386                                    const MCSubtargetInfo &STI) const {
387  using namespace AMDGPU::SDWA;
388
389  uint64_t RegEnc = 0;
390
391  const MCOperand &MO = MI.getOperand(OpNo);
392
393  if (MO.isReg()) {
394    unsigned Reg = MO.getReg();
395    RegEnc |= MRI.getEncodingValue(Reg);
396    RegEnc &= SDWA9EncValues::SRC_VGPR_MASK;
397    if (AMDGPU::isSGPR(AMDGPU::mc2PseudoReg(Reg), &MRI)) {
398      RegEnc |= SDWA9EncValues::SRC_SGPR_MASK;
399    }
400    return RegEnc;
401  } else {
402    const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
403    uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI);
404    if (Enc != ~0U && Enc != 255) {
405      return Enc | SDWA9EncValues::SRC_SGPR_MASK;
406    }
407  }
408
409  llvm_unreachable("Unsupported operand kind");
410  return 0;
411}
412
413unsigned
414SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
415                                        SmallVectorImpl<MCFixup> &Fixups,
416                                        const MCSubtargetInfo &STI) const {
417  using namespace AMDGPU::SDWA;
418
419  uint64_t RegEnc = 0;
420
421  const MCOperand &MO = MI.getOperand(OpNo);
422
423  unsigned Reg = MO.getReg();
424  if (Reg != AMDGPU::VCC && Reg != AMDGPU::VCC_LO) {
425    RegEnc |= MRI.getEncodingValue(Reg);
426    RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
427    RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK;
428  }
429  return RegEnc;
430}
431
432unsigned
433SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
434                                      SmallVectorImpl<MCFixup> &Fixups,
435                                      const MCSubtargetInfo &STI) const {
436  unsigned Reg = MI.getOperand(OpNo).getReg();
437  uint64_t Enc = MRI.getEncodingValue(Reg);
438
439  // VGPR and AGPR have the same encoding, but SrcA and SrcB operands of mfma
440  // instructions use acc[0:1] modifier bits to distinguish. These bits are
441  // encoded as a virtual 9th bit of the register for these operands.
442  if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Reg) ||
443      MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg) ||
444      MRI.getRegClass(AMDGPU::AReg_96RegClassID).contains(Reg) ||
445      MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(Reg) ||
446      MRI.getRegClass(AMDGPU::AReg_160RegClassID).contains(Reg) ||
447      MRI.getRegClass(AMDGPU::AReg_192RegClassID).contains(Reg) ||
448      MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg) ||
449      MRI.getRegClass(AMDGPU::AGPR_LO16RegClassID).contains(Reg))
450    Enc |= 512;
451
452  return Enc;
453}
454
455static bool needsPCRel(const MCExpr *Expr) {
456  switch (Expr->getKind()) {
457  case MCExpr::SymbolRef: {
458    auto *SE = cast<MCSymbolRefExpr>(Expr);
459    MCSymbolRefExpr::VariantKind Kind = SE->getKind();
460    return Kind != MCSymbolRefExpr::VK_AMDGPU_ABS32_LO &&
461           Kind != MCSymbolRefExpr::VK_AMDGPU_ABS32_HI;
462  }
463  case MCExpr::Binary: {
464    auto *BE = cast<MCBinaryExpr>(Expr);
465    if (BE->getOpcode() == MCBinaryExpr::Sub)
466      return false;
467    return needsPCRel(BE->getLHS()) || needsPCRel(BE->getRHS());
468  }
469  case MCExpr::Unary:
470    return needsPCRel(cast<MCUnaryExpr>(Expr)->getSubExpr());
471  case MCExpr::Target:
472  case MCExpr::Constant:
473    return false;
474  }
475  llvm_unreachable("invalid kind");
476}
477
478uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
479                                            const MCOperand &MO,
480                                       SmallVectorImpl<MCFixup> &Fixups,
481                                       const MCSubtargetInfo &STI) const {
482  if (MO.isReg())
483    return MRI.getEncodingValue(MO.getReg());
484
485  if (MO.isExpr() && MO.getExpr()->getKind() != MCExpr::Constant) {
486    // FIXME: If this is expression is PCRel or not should not depend on what
487    // the expression looks like. Given that this is just a general expression,
488    // it should probably be FK_Data_4 and whatever is producing
489    //
490    //    s_add_u32 s2, s2, (extern_const_addrspace+16
491    //
492    // And expecting a PCRel should instead produce
493    //
494    // .Ltmp1:
495    //   s_add_u32 s2, s2, (extern_const_addrspace+16)-.Ltmp1
496    MCFixupKind Kind;
497    if (needsPCRel(MO.getExpr()))
498      Kind = FK_PCRel_4;
499    else
500      Kind = FK_Data_4;
501
502    const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
503    uint32_t Offset = Desc.getSize();
504    assert(Offset == 4 || Offset == 8);
505
506    Fixups.push_back(
507      MCFixup::create(Offset, MO.getExpr(), Kind, MI.getLoc()));
508  }
509
510  // Figure out the operand number, needed for isSrcOperand check
511  unsigned OpNo = 0;
512  for (unsigned e = MI.getNumOperands(); OpNo < e; ++OpNo) {
513    if (&MO == &MI.getOperand(OpNo))
514      break;
515  }
516
517  const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
518  if (AMDGPU::isSISrcOperand(Desc, OpNo)) {
519    uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI);
520    if (Enc != ~0U &&
521        (Enc != 255 || Desc.getSize() == 4 || Desc.getSize() == 8))
522      return Enc;
523
524  } else if (MO.isImm())
525    return MO.getImm();
526
527  llvm_unreachable("Encoding of this operand type is not supported yet.");
528  return 0;
529}
530
531#define ENABLE_INSTR_PREDICATE_VERIFIER
532#include "AMDGPUGenMCCodeEmitter.inc"
533