AsmWriterEmitter.cpp revision 203954
1//===- AsmWriterEmitter.cpp - Generate an assembly writer -----------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This tablegen backend is emits an assembly printer for the current target. 11// Note that this is currently fairly skeletal, but will grow over time. 12// 13//===----------------------------------------------------------------------===// 14 15#include "AsmWriterEmitter.h" 16#include "AsmWriterInst.h" 17#include "CodeGenTarget.h" 18#include "Record.h" 19#include "StringToOffsetTable.h" 20#include "llvm/Support/Debug.h" 21#include "llvm/Support/MathExtras.h" 22#include <algorithm> 23using namespace llvm; 24 25static void PrintCases(std::vector<std::pair<std::string, 26 AsmWriterOperand> > &OpsToPrint, raw_ostream &O) { 27 O << " case " << OpsToPrint.back().first << ": "; 28 AsmWriterOperand TheOp = OpsToPrint.back().second; 29 OpsToPrint.pop_back(); 30 31 // Check to see if any other operands are identical in this list, and if so, 32 // emit a case label for them. 33 for (unsigned i = OpsToPrint.size(); i != 0; --i) 34 if (OpsToPrint[i-1].second == TheOp) { 35 O << "\n case " << OpsToPrint[i-1].first << ": "; 36 OpsToPrint.erase(OpsToPrint.begin()+i-1); 37 } 38 39 // Finally, emit the code. 40 O << TheOp.getCode(); 41 O << "break;\n"; 42} 43 44 45/// EmitInstructions - Emit the last instruction in the vector and any other 46/// instructions that are suitably similar to it. 47static void EmitInstructions(std::vector<AsmWriterInst> &Insts, 48 raw_ostream &O) { 49 AsmWriterInst FirstInst = Insts.back(); 50 Insts.pop_back(); 51 52 std::vector<AsmWriterInst> SimilarInsts; 53 unsigned DifferingOperand = ~0; 54 for (unsigned i = Insts.size(); i != 0; --i) { 55 unsigned DiffOp = Insts[i-1].MatchesAllButOneOp(FirstInst); 56 if (DiffOp != ~1U) { 57 if (DifferingOperand == ~0U) // First match! 58 DifferingOperand = DiffOp; 59 60 // If this differs in the same operand as the rest of the instructions in 61 // this class, move it to the SimilarInsts list. 62 if (DifferingOperand == DiffOp || DiffOp == ~0U) { 63 SimilarInsts.push_back(Insts[i-1]); 64 Insts.erase(Insts.begin()+i-1); 65 } 66 } 67 } 68 69 O << " case " << FirstInst.CGI->Namespace << "::" 70 << FirstInst.CGI->TheDef->getName() << ":\n"; 71 for (unsigned i = 0, e = SimilarInsts.size(); i != e; ++i) 72 O << " case " << SimilarInsts[i].CGI->Namespace << "::" 73 << SimilarInsts[i].CGI->TheDef->getName() << ":\n"; 74 for (unsigned i = 0, e = FirstInst.Operands.size(); i != e; ++i) { 75 if (i != DifferingOperand) { 76 // If the operand is the same for all instructions, just print it. 77 O << " " << FirstInst.Operands[i].getCode(); 78 } else { 79 // If this is the operand that varies between all of the instructions, 80 // emit a switch for just this operand now. 81 O << " switch (MI->getOpcode()) {\n"; 82 std::vector<std::pair<std::string, AsmWriterOperand> > OpsToPrint; 83 OpsToPrint.push_back(std::make_pair(FirstInst.CGI->Namespace + "::" + 84 FirstInst.CGI->TheDef->getName(), 85 FirstInst.Operands[i])); 86 87 for (unsigned si = 0, e = SimilarInsts.size(); si != e; ++si) { 88 AsmWriterInst &AWI = SimilarInsts[si]; 89 OpsToPrint.push_back(std::make_pair(AWI.CGI->Namespace+"::"+ 90 AWI.CGI->TheDef->getName(), 91 AWI.Operands[i])); 92 } 93 std::reverse(OpsToPrint.begin(), OpsToPrint.end()); 94 while (!OpsToPrint.empty()) 95 PrintCases(OpsToPrint, O); 96 O << " }"; 97 } 98 O << "\n"; 99 } 100 O << " break;\n"; 101} 102 103void AsmWriterEmitter:: 104FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands, 105 std::vector<unsigned> &InstIdxs, 106 std::vector<unsigned> &InstOpsUsed) const { 107 InstIdxs.assign(NumberedInstructions.size(), ~0U); 108 109 // This vector parallels UniqueOperandCommands, keeping track of which 110 // instructions each case are used for. It is a comma separated string of 111 // enums. 112 std::vector<std::string> InstrsForCase; 113 InstrsForCase.resize(UniqueOperandCommands.size()); 114 InstOpsUsed.assign(UniqueOperandCommands.size(), 0); 115 116 for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) { 117 const AsmWriterInst *Inst = getAsmWriterInstByID(i); 118 if (Inst == 0) continue; // PHI, INLINEASM, DBG_LABEL, etc. 119 120 std::string Command; 121 if (Inst->Operands.empty()) 122 continue; // Instruction already done. 123 124 Command = " " + Inst->Operands[0].getCode() + "\n"; 125 126 // Check to see if we already have 'Command' in UniqueOperandCommands. 127 // If not, add it. 128 bool FoundIt = false; 129 for (unsigned idx = 0, e = UniqueOperandCommands.size(); idx != e; ++idx) 130 if (UniqueOperandCommands[idx] == Command) { 131 InstIdxs[i] = idx; 132 InstrsForCase[idx] += ", "; 133 InstrsForCase[idx] += Inst->CGI->TheDef->getName(); 134 FoundIt = true; 135 break; 136 } 137 if (!FoundIt) { 138 InstIdxs[i] = UniqueOperandCommands.size(); 139 UniqueOperandCommands.push_back(Command); 140 InstrsForCase.push_back(Inst->CGI->TheDef->getName()); 141 142 // This command matches one operand so far. 143 InstOpsUsed.push_back(1); 144 } 145 } 146 147 // For each entry of UniqueOperandCommands, there is a set of instructions 148 // that uses it. If the next command of all instructions in the set are 149 // identical, fold it into the command. 150 for (unsigned CommandIdx = 0, e = UniqueOperandCommands.size(); 151 CommandIdx != e; ++CommandIdx) { 152 153 for (unsigned Op = 1; ; ++Op) { 154 // Scan for the first instruction in the set. 155 std::vector<unsigned>::iterator NIT = 156 std::find(InstIdxs.begin(), InstIdxs.end(), CommandIdx); 157 if (NIT == InstIdxs.end()) break; // No commonality. 158 159 // If this instruction has no more operands, we isn't anything to merge 160 // into this command. 161 const AsmWriterInst *FirstInst = 162 getAsmWriterInstByID(NIT-InstIdxs.begin()); 163 if (!FirstInst || FirstInst->Operands.size() == Op) 164 break; 165 166 // Otherwise, scan to see if all of the other instructions in this command 167 // set share the operand. 168 bool AllSame = true; 169 // Keep track of the maximum, number of operands or any 170 // instruction we see in the group. 171 size_t MaxSize = FirstInst->Operands.size(); 172 173 for (NIT = std::find(NIT+1, InstIdxs.end(), CommandIdx); 174 NIT != InstIdxs.end(); 175 NIT = std::find(NIT+1, InstIdxs.end(), CommandIdx)) { 176 // Okay, found another instruction in this command set. If the operand 177 // matches, we're ok, otherwise bail out. 178 const AsmWriterInst *OtherInst = 179 getAsmWriterInstByID(NIT-InstIdxs.begin()); 180 181 if (OtherInst && 182 OtherInst->Operands.size() > FirstInst->Operands.size()) 183 MaxSize = std::max(MaxSize, OtherInst->Operands.size()); 184 185 if (!OtherInst || OtherInst->Operands.size() == Op || 186 OtherInst->Operands[Op] != FirstInst->Operands[Op]) { 187 AllSame = false; 188 break; 189 } 190 } 191 if (!AllSame) break; 192 193 // Okay, everything in this command set has the same next operand. Add it 194 // to UniqueOperandCommands and remember that it was consumed. 195 std::string Command = " " + FirstInst->Operands[Op].getCode() + "\n"; 196 197 UniqueOperandCommands[CommandIdx] += Command; 198 InstOpsUsed[CommandIdx]++; 199 } 200 } 201 202 // Prepend some of the instructions each case is used for onto the case val. 203 for (unsigned i = 0, e = InstrsForCase.size(); i != e; ++i) { 204 std::string Instrs = InstrsForCase[i]; 205 if (Instrs.size() > 70) { 206 Instrs.erase(Instrs.begin()+70, Instrs.end()); 207 Instrs += "..."; 208 } 209 210 if (!Instrs.empty()) 211 UniqueOperandCommands[i] = " // " + Instrs + "\n" + 212 UniqueOperandCommands[i]; 213 } 214} 215 216 217static void UnescapeString(std::string &Str) { 218 for (unsigned i = 0; i != Str.size(); ++i) { 219 if (Str[i] == '\\' && i != Str.size()-1) { 220 switch (Str[i+1]) { 221 default: continue; // Don't execute the code after the switch. 222 case 'a': Str[i] = '\a'; break; 223 case 'b': Str[i] = '\b'; break; 224 case 'e': Str[i] = 27; break; 225 case 'f': Str[i] = '\f'; break; 226 case 'n': Str[i] = '\n'; break; 227 case 'r': Str[i] = '\r'; break; 228 case 't': Str[i] = '\t'; break; 229 case 'v': Str[i] = '\v'; break; 230 case '"': Str[i] = '\"'; break; 231 case '\'': Str[i] = '\''; break; 232 case '\\': Str[i] = '\\'; break; 233 } 234 // Nuke the second character. 235 Str.erase(Str.begin()+i+1); 236 } 237 } 238} 239 240/// EmitPrintInstruction - Generate the code for the "printInstruction" method 241/// implementation. 242void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) { 243 CodeGenTarget Target; 244 Record *AsmWriter = Target.getAsmWriter(); 245 std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName"); 246 247 O << 248 "/// printInstruction - This method is automatically generated by tablegen\n" 249 "/// from the instruction set description.\n" 250 "void " << Target.getName() << ClassName 251 << "::printInstruction(const MachineInstr *MI) {\n"; 252 253 std::vector<AsmWriterInst> Instructions; 254 255 for (CodeGenTarget::inst_iterator I = Target.inst_begin(), 256 E = Target.inst_end(); I != E; ++I) 257 if (!I->second.AsmString.empty() && 258 I->second.TheDef->getName() != "PHI") 259 Instructions.push_back( 260 AsmWriterInst(I->second, 261 AsmWriter->getValueAsInt("Variant"), 262 AsmWriter->getValueAsInt("FirstOperandColumn"), 263 AsmWriter->getValueAsInt("OperandSpacing"))); 264 265 // Get the instruction numbering. 266 Target.getInstructionsByEnumValue(NumberedInstructions); 267 268 // Compute the CodeGenInstruction -> AsmWriterInst mapping. Note that not 269 // all machine instructions are necessarily being printed, so there may be 270 // target instructions not in this map. 271 for (unsigned i = 0, e = Instructions.size(); i != e; ++i) 272 CGIAWIMap.insert(std::make_pair(Instructions[i].CGI, &Instructions[i])); 273 274 // Build an aggregate string, and build a table of offsets into it. 275 StringToOffsetTable StringTable; 276 277 /// OpcodeInfo - This encodes the index of the string to use for the first 278 /// chunk of the output as well as indices used for operand printing. 279 std::vector<unsigned> OpcodeInfo; 280 281 unsigned MaxStringIdx = 0; 282 for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) { 283 AsmWriterInst *AWI = CGIAWIMap[NumberedInstructions[i]]; 284 unsigned Idx; 285 if (AWI == 0) { 286 // Something not handled by the asmwriter printer. 287 Idx = ~0U; 288 } else if (AWI->Operands[0].OperandType != 289 AsmWriterOperand::isLiteralTextOperand || 290 AWI->Operands[0].Str.empty()) { 291 // Something handled by the asmwriter printer, but with no leading string. 292 Idx = StringTable.GetOrAddStringOffset(""); 293 } else { 294 std::string Str = AWI->Operands[0].Str; 295 UnescapeString(Str); 296 Idx = StringTable.GetOrAddStringOffset(Str); 297 MaxStringIdx = std::max(MaxStringIdx, Idx); 298 299 // Nuke the string from the operand list. It is now handled! 300 AWI->Operands.erase(AWI->Operands.begin()); 301 } 302 303 // Bias offset by one since we want 0 as a sentinel. 304 OpcodeInfo.push_back(Idx+1); 305 } 306 307 // Figure out how many bits we used for the string index. 308 unsigned AsmStrBits = Log2_32_Ceil(MaxStringIdx+2); 309 310 // To reduce code size, we compactify common instructions into a few bits 311 // in the opcode-indexed table. 312 unsigned BitsLeft = 32-AsmStrBits; 313 314 std::vector<std::vector<std::string> > TableDrivenOperandPrinters; 315 316 while (1) { 317 std::vector<std::string> UniqueOperandCommands; 318 std::vector<unsigned> InstIdxs; 319 std::vector<unsigned> NumInstOpsHandled; 320 FindUniqueOperandCommands(UniqueOperandCommands, InstIdxs, 321 NumInstOpsHandled); 322 323 // If we ran out of operands to print, we're done. 324 if (UniqueOperandCommands.empty()) break; 325 326 // Compute the number of bits we need to represent these cases, this is 327 // ceil(log2(numentries)). 328 unsigned NumBits = Log2_32_Ceil(UniqueOperandCommands.size()); 329 330 // If we don't have enough bits for this operand, don't include it. 331 if (NumBits > BitsLeft) { 332 DEBUG(errs() << "Not enough bits to densely encode " << NumBits 333 << " more bits\n"); 334 break; 335 } 336 337 // Otherwise, we can include this in the initial lookup table. Add it in. 338 BitsLeft -= NumBits; 339 for (unsigned i = 0, e = InstIdxs.size(); i != e; ++i) 340 if (InstIdxs[i] != ~0U) 341 OpcodeInfo[i] |= InstIdxs[i] << (BitsLeft+AsmStrBits); 342 343 // Remove the info about this operand. 344 for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) { 345 if (AsmWriterInst *Inst = getAsmWriterInstByID(i)) 346 if (!Inst->Operands.empty()) { 347 unsigned NumOps = NumInstOpsHandled[InstIdxs[i]]; 348 assert(NumOps <= Inst->Operands.size() && 349 "Can't remove this many ops!"); 350 Inst->Operands.erase(Inst->Operands.begin(), 351 Inst->Operands.begin()+NumOps); 352 } 353 } 354 355 // Remember the handlers for this set of operands. 356 TableDrivenOperandPrinters.push_back(UniqueOperandCommands); 357 } 358 359 360 361 O<<" static const unsigned OpInfo[] = {\n"; 362 for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) { 363 O << " " << OpcodeInfo[i] << "U,\t// " 364 << NumberedInstructions[i]->TheDef->getName() << "\n"; 365 } 366 // Add a dummy entry so the array init doesn't end with a comma. 367 O << " 0U\n"; 368 O << " };\n\n"; 369 370 // Emit the string itself. 371 O << " const char *AsmStrs = \n"; 372 StringTable.EmitString(O); 373 O << ";\n\n"; 374 375 O << " O << \"\\t\";\n\n"; 376 377 O << " // Emit the opcode for the instruction.\n" 378 << " unsigned Bits = OpInfo[MI->getOpcode()];\n" 379 << " assert(Bits != 0 && \"Cannot print this instruction.\");\n" 380 << " O << AsmStrs+(Bits & " << (1 << AsmStrBits)-1 << ")-1;\n\n"; 381 382 // Output the table driven operand information. 383 BitsLeft = 32-AsmStrBits; 384 for (unsigned i = 0, e = TableDrivenOperandPrinters.size(); i != e; ++i) { 385 std::vector<std::string> &Commands = TableDrivenOperandPrinters[i]; 386 387 // Compute the number of bits we need to represent these cases, this is 388 // ceil(log2(numentries)). 389 unsigned NumBits = Log2_32_Ceil(Commands.size()); 390 assert(NumBits <= BitsLeft && "consistency error"); 391 392 // Emit code to extract this field from Bits. 393 BitsLeft -= NumBits; 394 395 O << "\n // Fragment " << i << " encoded into " << NumBits 396 << " bits for " << Commands.size() << " unique commands.\n"; 397 398 if (Commands.size() == 2) { 399 // Emit two possibilitys with if/else. 400 O << " if ((Bits >> " << (BitsLeft+AsmStrBits) << ") & " 401 << ((1 << NumBits)-1) << ") {\n" 402 << Commands[1] 403 << " } else {\n" 404 << Commands[0] 405 << " }\n\n"; 406 } else { 407 O << " switch ((Bits >> " << (BitsLeft+AsmStrBits) << ") & " 408 << ((1 << NumBits)-1) << ") {\n" 409 << " default: // unreachable.\n"; 410 411 // Print out all the cases. 412 for (unsigned i = 0, e = Commands.size(); i != e; ++i) { 413 O << " case " << i << ":\n"; 414 O << Commands[i]; 415 O << " break;\n"; 416 } 417 O << " }\n\n"; 418 } 419 } 420 421 // Okay, delete instructions with no operand info left. 422 for (unsigned i = 0, e = Instructions.size(); i != e; ++i) { 423 // Entire instruction has been emitted? 424 AsmWriterInst &Inst = Instructions[i]; 425 if (Inst.Operands.empty()) { 426 Instructions.erase(Instructions.begin()+i); 427 --i; --e; 428 } 429 } 430 431 432 // Because this is a vector, we want to emit from the end. Reverse all of the 433 // elements in the vector. 434 std::reverse(Instructions.begin(), Instructions.end()); 435 436 437 // Now that we've emitted all of the operand info that fit into 32 bits, emit 438 // information for those instructions that are left. This is a less dense 439 // encoding, but we expect the main 32-bit table to handle the majority of 440 // instructions. 441 if (!Instructions.empty()) { 442 // Find the opcode # of inline asm. 443 O << " switch (MI->getOpcode()) {\n"; 444 while (!Instructions.empty()) 445 EmitInstructions(Instructions, O); 446 447 O << " }\n"; 448 O << " return;\n"; 449 } 450 451 O << "}\n"; 452} 453 454 455void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) { 456 CodeGenTarget Target; 457 Record *AsmWriter = Target.getAsmWriter(); 458 std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName"); 459 const std::vector<CodeGenRegister> &Registers = Target.getRegisters(); 460 461 StringToOffsetTable StringTable; 462 O << 463 "\n\n/// getRegisterName - This method is automatically generated by tblgen\n" 464 "/// from the register set description. This returns the assembler name\n" 465 "/// for the specified register.\n" 466 "const char *" << Target.getName() << ClassName 467 << "::getRegisterName(unsigned RegNo) {\n" 468 << " assert(RegNo && RegNo < " << (Registers.size()+1) 469 << " && \"Invalid register number!\");\n" 470 << "\n" 471 << " static const unsigned RegAsmOffset[] = {"; 472 for (unsigned i = 0, e = Registers.size(); i != e; ++i) { 473 const CodeGenRegister &Reg = Registers[i]; 474 475 std::string AsmName = Reg.TheDef->getValueAsString("AsmName"); 476 if (AsmName.empty()) 477 AsmName = Reg.getName(); 478 479 480 if ((i % 14) == 0) 481 O << "\n "; 482 483 O << StringTable.GetOrAddStringOffset(AsmName) << ", "; 484 } 485 O << "0\n" 486 << " };\n" 487 << "\n"; 488 489 O << " const char *AsmStrs =\n"; 490 StringTable.EmitString(O); 491 O << ";\n"; 492 493 O << " return AsmStrs+RegAsmOffset[RegNo-1];\n" 494 << "}\n"; 495} 496 497void AsmWriterEmitter::EmitGetInstructionName(raw_ostream &O) { 498 CodeGenTarget Target; 499 Record *AsmWriter = Target.getAsmWriter(); 500 std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName"); 501 502 std::vector<const CodeGenInstruction*> NumberedInstructions; 503 Target.getInstructionsByEnumValue(NumberedInstructions); 504 505 StringToOffsetTable StringTable; 506 O << 507"\n\n#ifdef GET_INSTRUCTION_NAME\n" 508"#undef GET_INSTRUCTION_NAME\n\n" 509"/// getInstructionName: This method is automatically generated by tblgen\n" 510"/// from the instruction set description. This returns the enum name of the\n" 511"/// specified instruction.\n" 512 "const char *" << Target.getName() << ClassName 513 << "::getInstructionName(unsigned Opcode) {\n" 514 << " assert(Opcode < " << NumberedInstructions.size() 515 << " && \"Invalid instruction number!\");\n" 516 << "\n" 517 << " static const unsigned InstAsmOffset[] = {"; 518 for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) { 519 const CodeGenInstruction &Inst = *NumberedInstructions[i]; 520 521 std::string AsmName = Inst.TheDef->getName(); 522 if ((i % 14) == 0) 523 O << "\n "; 524 525 O << StringTable.GetOrAddStringOffset(AsmName) << ", "; 526 } 527 O << "0\n" 528 << " };\n" 529 << "\n"; 530 531 O << " const char *Strs =\n"; 532 StringTable.EmitString(O); 533 O << ";\n"; 534 535 O << " return Strs+InstAsmOffset[Opcode];\n" 536 << "}\n\n#endif\n"; 537} 538 539 540 541void AsmWriterEmitter::run(raw_ostream &O) { 542 EmitSourceFileHeader("Assembly Writer Source Fragment", O); 543 544 EmitPrintInstruction(O); 545 EmitGetRegisterName(O); 546 EmitGetInstructionName(O); 547} 548 549