R600MCCodeEmitter.cpp revision 249259
1//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// 12/// This code emitter outputs bytecode that is understood by the r600g driver 13/// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA, 14/// but it still needs to be run through a finalizer in order to be executed 15/// by the GPU. 16/// 17/// [1] http://www.mesa3d.org/ 18// 19//===----------------------------------------------------------------------===// 20 21#include "R600Defines.h" 22#include "MCTargetDesc/AMDGPUMCCodeEmitter.h" 23#include "MCTargetDesc/AMDGPUMCTargetDesc.h" 24#include "llvm/MC/MCCodeEmitter.h" 25#include "llvm/MC/MCContext.h" 26#include "llvm/MC/MCInst.h" 27#include "llvm/MC/MCInstrInfo.h" 28#include "llvm/MC/MCRegisterInfo.h" 29#include "llvm/MC/MCSubtargetInfo.h" 30#include "llvm/Support/raw_ostream.h" 31#include <stdio.h> 32 33#define SRC_BYTE_COUNT 11 34#define DST_BYTE_COUNT 5 35 36using namespace llvm; 37 38namespace { 39 40class R600MCCodeEmitter : public AMDGPUMCCodeEmitter { 41 R600MCCodeEmitter(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION; 42 void operator=(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION; 43 const MCInstrInfo &MCII; 44 const MCRegisterInfo &MRI; 45 const MCSubtargetInfo &STI; 46 MCContext &Ctx; 47 48public: 49 50 R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, 51 const MCSubtargetInfo &sti, MCContext &ctx) 52 : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { } 53 54 /// \brief Encode the instruction and write it to the OS. 55 virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS, 56 SmallVectorImpl<MCFixup> &Fixups) const; 57 58 /// \returns the encoding for an MCOperand. 59 virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, 60 SmallVectorImpl<MCFixup> &Fixups) const; 61private: 62 63 void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, 64 raw_ostream &OS) const; 65 void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const; 66 void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx, 67 raw_ostream &OS) const; 68 void EmitDst(const MCInst &MI, raw_ostream &OS) const; 69 void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const; 70 71 void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const; 72 73 void EmitByte(unsigned int byte, raw_ostream &OS) const; 74 75 void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const; 76 77 void Emit(uint32_t value, raw_ostream &OS) const; 78 void Emit(uint64_t value, raw_ostream &OS) const; 79 80 unsigned getHWRegChan(unsigned reg) const; 81 unsigned getHWReg(unsigned regNo) const; 82 83 bool isFCOp(unsigned opcode) const; 84 bool isTexOp(unsigned opcode) const; 85 bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const; 86 87}; 88 89} // End anonymous namespace 90 91enum RegElement { 92 ELEMENT_X = 0, 93 ELEMENT_Y, 94 ELEMENT_Z, 95 ELEMENT_W 96}; 97 98enum InstrTypes { 99 INSTR_ALU = 0, 100 INSTR_TEX, 101 INSTR_FC, 102 INSTR_NATIVE, 103 INSTR_VTX, 104 INSTR_EXPORT, 105 INSTR_CFALU 106}; 107 108enum FCInstr { 109 FC_IF_PREDICATE = 0, 110 FC_ELSE, 111 FC_ENDIF, 112 FC_BGNLOOP, 113 FC_ENDLOOP, 114 FC_BREAK_PREDICATE, 115 FC_CONTINUE 116}; 117 118enum TextureTypes { 119 TEXTURE_1D = 1, 120 TEXTURE_2D, 121 TEXTURE_3D, 122 TEXTURE_CUBE, 123 TEXTURE_RECT, 124 TEXTURE_SHADOW1D, 125 TEXTURE_SHADOW2D, 126 TEXTURE_SHADOWRECT, 127 TEXTURE_1D_ARRAY, 128 TEXTURE_2D_ARRAY, 129 TEXTURE_SHADOW1D_ARRAY, 130 TEXTURE_SHADOW2D_ARRAY 131}; 132 133MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII, 134 const MCRegisterInfo &MRI, 135 const MCSubtargetInfo &STI, 136 MCContext &Ctx) { 137 return new R600MCCodeEmitter(MCII, MRI, STI, Ctx); 138} 139 140void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, 141 SmallVectorImpl<MCFixup> &Fixups) const { 142 if (isFCOp(MI.getOpcode())){ 143 EmitFCInstr(MI, OS); 144 } else if (MI.getOpcode() == AMDGPU::RETURN || 145 MI.getOpcode() == AMDGPU::BUNDLE || 146 MI.getOpcode() == AMDGPU::KILL) { 147 return; 148 } else { 149 switch(MI.getOpcode()) { 150 case AMDGPU::STACK_SIZE: { 151 EmitByte(MI.getOperand(0).getImm(), OS); 152 break; 153 } 154 case AMDGPU::RAT_WRITE_CACHELESS_32_eg: 155 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { 156 uint64_t inst = getBinaryCodeForInstr(MI, Fixups); 157 EmitByte(INSTR_NATIVE, OS); 158 Emit(inst, OS); 159 break; 160 } 161 case AMDGPU::CONSTANT_LOAD_eg: 162 case AMDGPU::VTX_READ_PARAM_8_eg: 163 case AMDGPU::VTX_READ_PARAM_16_eg: 164 case AMDGPU::VTX_READ_PARAM_32_eg: 165 case AMDGPU::VTX_READ_PARAM_128_eg: 166 case AMDGPU::VTX_READ_GLOBAL_8_eg: 167 case AMDGPU::VTX_READ_GLOBAL_32_eg: 168 case AMDGPU::VTX_READ_GLOBAL_128_eg: 169 case AMDGPU::TEX_VTX_CONSTBUF: 170 case AMDGPU::TEX_VTX_TEXBUF : { 171 uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); 172 uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset 173 174 EmitByte(INSTR_VTX, OS); 175 Emit(InstWord01, OS); 176 Emit(InstWord2, OS); 177 break; 178 } 179 case AMDGPU::TEX_LD: 180 case AMDGPU::TEX_GET_TEXTURE_RESINFO: 181 case AMDGPU::TEX_SAMPLE: 182 case AMDGPU::TEX_SAMPLE_C: 183 case AMDGPU::TEX_SAMPLE_L: 184 case AMDGPU::TEX_SAMPLE_C_L: 185 case AMDGPU::TEX_SAMPLE_LB: 186 case AMDGPU::TEX_SAMPLE_C_LB: 187 case AMDGPU::TEX_SAMPLE_G: 188 case AMDGPU::TEX_SAMPLE_C_G: 189 case AMDGPU::TEX_GET_GRADIENTS_H: 190 case AMDGPU::TEX_GET_GRADIENTS_V: 191 case AMDGPU::TEX_SET_GRADIENTS_H: 192 case AMDGPU::TEX_SET_GRADIENTS_V: { 193 unsigned Opcode = MI.getOpcode(); 194 bool HasOffsets = (Opcode == AMDGPU::TEX_LD); 195 unsigned OpOffset = HasOffsets ? 3 : 0; 196 int64_t Sampler = MI.getOperand(OpOffset + 3).getImm(); 197 int64_t TextureType = MI.getOperand(OpOffset + 4).getImm(); 198 199 uint32_t SrcSelect[4] = {0, 1, 2, 3}; 200 uint32_t Offsets[3] = {0, 0, 0}; 201 uint64_t CoordType[4] = {1, 1, 1, 1}; 202 203 if (HasOffsets) 204 for (unsigned i = 0; i < 3; i++) { 205 int SignedOffset = MI.getOperand(i + 2).getImm(); 206 Offsets[i] = (SignedOffset & 0x1F); 207 } 208 209 210 if (TextureType == TEXTURE_RECT || 211 TextureType == TEXTURE_SHADOWRECT) { 212 CoordType[ELEMENT_X] = 0; 213 CoordType[ELEMENT_Y] = 0; 214 } 215 216 if (TextureType == TEXTURE_1D_ARRAY || 217 TextureType == TEXTURE_SHADOW1D_ARRAY) { 218 if (Opcode == AMDGPU::TEX_SAMPLE_C_L || 219 Opcode == AMDGPU::TEX_SAMPLE_C_LB) { 220 CoordType[ELEMENT_Y] = 0; 221 } else { 222 CoordType[ELEMENT_Z] = 0; 223 SrcSelect[ELEMENT_Z] = ELEMENT_Y; 224 } 225 } else if (TextureType == TEXTURE_2D_ARRAY || 226 TextureType == TEXTURE_SHADOW2D_ARRAY) { 227 CoordType[ELEMENT_Z] = 0; 228 } 229 230 231 if ((TextureType == TEXTURE_SHADOW1D || 232 TextureType == TEXTURE_SHADOW2D || 233 TextureType == TEXTURE_SHADOWRECT || 234 TextureType == TEXTURE_SHADOW1D_ARRAY) && 235 Opcode != AMDGPU::TEX_SAMPLE_C_L && 236 Opcode != AMDGPU::TEX_SAMPLE_C_LB) { 237 SrcSelect[ELEMENT_W] = ELEMENT_Z; 238 } 239 240 uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) | 241 CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 | 242 CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63; 243 uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 | 244 SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 | 245 SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 | 246 Offsets[2] << 10; 247 248 EmitByte(INSTR_TEX, OS); 249 Emit(Word01, OS); 250 Emit(Word2, OS); 251 break; 252 } 253 case AMDGPU::EG_ExportSwz: 254 case AMDGPU::R600_ExportSwz: 255 case AMDGPU::EG_ExportBuf: 256 case AMDGPU::R600_ExportBuf: { 257 uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); 258 EmitByte(INSTR_EXPORT, OS); 259 Emit(Inst, OS); 260 break; 261 } 262 case AMDGPU::CF_ALU: 263 case AMDGPU::CF_ALU_PUSH_BEFORE: { 264 uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); 265 EmitByte(INSTR_CFALU, OS); 266 Emit(Inst, OS); 267 break; 268 } 269 case AMDGPU::CF_TC: 270 case AMDGPU::CF_VC: 271 case AMDGPU::CF_CALL_FS: 272 return; 273 case AMDGPU::WHILE_LOOP: 274 case AMDGPU::END_LOOP: 275 case AMDGPU::LOOP_BREAK: 276 case AMDGPU::CF_CONTINUE: 277 case AMDGPU::CF_JUMP: 278 case AMDGPU::CF_ELSE: 279 case AMDGPU::POP: { 280 uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); 281 EmitByte(INSTR_NATIVE, OS); 282 Emit(Inst, OS); 283 break; 284 } 285 default: 286 EmitALUInstr(MI, Fixups, OS); 287 break; 288 } 289 } 290} 291 292void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI, 293 SmallVectorImpl<MCFixup> &Fixups, 294 raw_ostream &OS) const { 295 const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); 296 297 // Emit instruction type 298 EmitByte(INSTR_ALU, OS); 299 300 uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); 301 302 //older alu have different encoding for instructions with one or two src 303 //parameters. 304 if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) && 305 !(MCDesc.TSFlags & R600_InstFlag::OP3)) { 306 uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39); 307 InstWord01 &= ~(0x3FFULL << 39); 308 InstWord01 |= ISAOpCode << 1; 309 } 310 311 unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 : 312 MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1; 313 314 EmitByte(SrcNum, OS); 315 316 const unsigned SrcOps[3][2] = { 317 {R600Operands::SRC0, R600Operands::SRC0_SEL}, 318 {R600Operands::SRC1, R600Operands::SRC1_SEL}, 319 {R600Operands::SRC2, R600Operands::SRC2_SEL} 320 }; 321 322 for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) { 323 unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]]; 324 unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]]; 325 EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS); 326 } 327 328 Emit(InstWord01, OS); 329 return; 330} 331 332void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx, 333 raw_ostream &OS) const { 334 const MCOperand &MO = MI.getOperand(OpIdx); 335 union { 336 float f; 337 uint32_t i; 338 } Value; 339 Value.i = 0; 340 // Emit the source select (2 bytes). For GPRs, this is the register index. 341 // For other potential instruction operands, (e.g. constant registers) the 342 // value of the source select is defined in the r600isa docs. 343 if (MO.isReg()) { 344 unsigned reg = MO.getReg(); 345 EmitTwoBytes(getHWReg(reg), OS); 346 if (reg == AMDGPU::ALU_LITERAL_X) { 347 unsigned ImmOpIndex = MI.getNumOperands() - 1; 348 MCOperand ImmOp = MI.getOperand(ImmOpIndex); 349 if (ImmOp.isFPImm()) { 350 Value.f = ImmOp.getFPImm(); 351 } else { 352 assert(ImmOp.isImm()); 353 Value.i = ImmOp.getImm(); 354 } 355 } 356 } else { 357 // XXX: Handle other operand types. 358 EmitTwoBytes(0, OS); 359 } 360 361 // Emit the source channel (1 byte) 362 if (MO.isReg()) { 363 EmitByte(getHWRegChan(MO.getReg()), OS); 364 } else { 365 EmitByte(0, OS); 366 } 367 368 // XXX: Emit isNegated (1 byte) 369 if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS))) 370 && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) || 371 (MO.isReg() && 372 (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){ 373 EmitByte(1, OS); 374 } else { 375 EmitByte(0, OS); 376 } 377 378 // Emit isAbsolute (1 byte) 379 if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) { 380 EmitByte(1, OS); 381 } else { 382 EmitByte(0, OS); 383 } 384 385 // XXX: Emit relative addressing mode (1 byte) 386 EmitByte(0, OS); 387 388 // Emit kc_bank, This will be adjusted later by r600_asm 389 EmitByte(0, OS); 390 391 // Emit the literal value, if applicable (4 bytes). 392 Emit(Value.i, OS); 393 394} 395 396void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, 397 unsigned SelOpIdx, raw_ostream &OS) const { 398 const MCOperand &RegMO = MI.getOperand(RegOpIdx); 399 const MCOperand &SelMO = MI.getOperand(SelOpIdx); 400 401 union { 402 float f; 403 uint32_t i; 404 } InlineConstant; 405 InlineConstant.i = 0; 406 // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0 407 // and select is 0 (GPR index is encoded in the instr encoding. For constants 408 // type is 1 and select is the original const select passed from the driver. 409 unsigned Reg = RegMO.getReg(); 410 if (Reg == AMDGPU::ALU_CONST) { 411 EmitByte(1, OS); 412 uint32_t Sel = SelMO.getImm(); 413 Emit(Sel, OS); 414 } else { 415 EmitByte(0, OS); 416 Emit((uint32_t)0, OS); 417 } 418 419 if (Reg == AMDGPU::ALU_LITERAL_X) { 420 unsigned ImmOpIndex = MI.getNumOperands() - 1; 421 MCOperand ImmOp = MI.getOperand(ImmOpIndex); 422 if (ImmOp.isFPImm()) { 423 InlineConstant.f = ImmOp.getFPImm(); 424 } else { 425 assert(ImmOp.isImm()); 426 InlineConstant.i = ImmOp.getImm(); 427 } 428 } 429 430 // Emit the literal value, if applicable (4 bytes). 431 Emit(InlineConstant.i, OS); 432} 433 434void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const { 435 436 // Emit instruction type 437 EmitByte(INSTR_FC, OS); 438 439 // Emit SRC 440 unsigned NumOperands = MI.getNumOperands(); 441 if (NumOperands > 0) { 442 assert(NumOperands == 1); 443 EmitSrc(MI, 0, OS); 444 } else { 445 EmitNullBytes(SRC_BYTE_COUNT, OS); 446 } 447 448 // Emit FC Instruction 449 enum FCInstr instr; 450 switch (MI.getOpcode()) { 451 case AMDGPU::PREDICATED_BREAK: 452 instr = FC_BREAK_PREDICATE; 453 break; 454 case AMDGPU::CONTINUE: 455 instr = FC_CONTINUE; 456 break; 457 case AMDGPU::IF_PREDICATE_SET: 458 instr = FC_IF_PREDICATE; 459 break; 460 case AMDGPU::ELSE: 461 instr = FC_ELSE; 462 break; 463 case AMDGPU::ENDIF: 464 instr = FC_ENDIF; 465 break; 466 case AMDGPU::ENDLOOP: 467 instr = FC_ENDLOOP; 468 break; 469 case AMDGPU::WHILELOOP: 470 instr = FC_BGNLOOP; 471 break; 472 default: 473 abort(); 474 break; 475 } 476 EmitByte(instr, OS); 477} 478 479void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount, 480 raw_ostream &OS) const { 481 482 for (unsigned int i = 0; i < ByteCount; i++) { 483 EmitByte(0, OS); 484 } 485} 486 487void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const { 488 OS.write((uint8_t) Byte & 0xff); 489} 490 491void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes, 492 raw_ostream &OS) const { 493 OS.write((uint8_t) (Bytes & 0xff)); 494 OS.write((uint8_t) ((Bytes >> 8) & 0xff)); 495} 496 497void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const { 498 for (unsigned i = 0; i < 4; i++) { 499 OS.write((uint8_t) ((Value >> (8 * i)) & 0xff)); 500 } 501} 502 503void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const { 504 for (unsigned i = 0; i < 8; i++) { 505 EmitByte((Value >> (8 * i)) & 0xff, OS); 506 } 507} 508 509unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const { 510 return MRI.getEncodingValue(reg) >> HW_CHAN_SHIFT; 511} 512 513unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const { 514 return MRI.getEncodingValue(RegNo) & HW_REG_MASK; 515} 516 517uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI, 518 const MCOperand &MO, 519 SmallVectorImpl<MCFixup> &Fixup) const { 520 if (MO.isReg()) { 521 if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags)) { 522 return MRI.getEncodingValue(MO.getReg()); 523 } else { 524 return getHWReg(MO.getReg()); 525 } 526 } else if (MO.isImm()) { 527 return MO.getImm(); 528 } else { 529 assert(0); 530 return 0; 531 } 532} 533 534//===----------------------------------------------------------------------===// 535// Encoding helper functions 536//===----------------------------------------------------------------------===// 537 538bool R600MCCodeEmitter::isFCOp(unsigned opcode) const { 539 switch(opcode) { 540 default: return false; 541 case AMDGPU::PREDICATED_BREAK: 542 case AMDGPU::CONTINUE: 543 case AMDGPU::IF_PREDICATE_SET: 544 case AMDGPU::ELSE: 545 case AMDGPU::ENDIF: 546 case AMDGPU::ENDLOOP: 547 case AMDGPU::WHILELOOP: 548 return true; 549 } 550} 551 552bool R600MCCodeEmitter::isTexOp(unsigned opcode) const { 553 switch(opcode) { 554 default: return false; 555 case AMDGPU::TEX_LD: 556 case AMDGPU::TEX_GET_TEXTURE_RESINFO: 557 case AMDGPU::TEX_SAMPLE: 558 case AMDGPU::TEX_SAMPLE_C: 559 case AMDGPU::TEX_SAMPLE_L: 560 case AMDGPU::TEX_SAMPLE_C_L: 561 case AMDGPU::TEX_SAMPLE_LB: 562 case AMDGPU::TEX_SAMPLE_C_LB: 563 case AMDGPU::TEX_SAMPLE_G: 564 case AMDGPU::TEX_SAMPLE_C_G: 565 case AMDGPU::TEX_GET_GRADIENTS_H: 566 case AMDGPU::TEX_GET_GRADIENTS_V: 567 case AMDGPU::TEX_SET_GRADIENTS_H: 568 case AMDGPU::TEX_SET_GRADIENTS_V: 569 return true; 570 } 571} 572 573bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand, 574 unsigned Flag) const { 575 const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); 576 unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags); 577 if (FlagIndex == 0) { 578 return false; 579 } 580 assert(MI.getOperand(FlagIndex).isImm()); 581 return !!((MI.getOperand(FlagIndex).getImm() >> 582 (NUM_MO_FLAGS * Operand)) & Flag); 583} 584 585#include "AMDGPUGenMCCodeEmitter.inc" 586