AsmWriterEmitter.cpp revision 203954
1//===- AsmWriterEmitter.cpp - Generate an assembly writer -----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This tablegen backend is emits an assembly printer for the current target.
11// Note that this is currently fairly skeletal, but will grow over time.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AsmWriterEmitter.h"
16#include "AsmWriterInst.h"
17#include "CodeGenTarget.h"
18#include "Record.h"
19#include "StringToOffsetTable.h"
20#include "llvm/Support/Debug.h"
21#include "llvm/Support/MathExtras.h"
22#include <algorithm>
23using namespace llvm;
24
25static void PrintCases(std::vector<std::pair<std::string,
26                       AsmWriterOperand> > &OpsToPrint, raw_ostream &O) {
27  O << "    case " << OpsToPrint.back().first << ": ";
28  AsmWriterOperand TheOp = OpsToPrint.back().second;
29  OpsToPrint.pop_back();
30
31  // Check to see if any other operands are identical in this list, and if so,
32  // emit a case label for them.
33  for (unsigned i = OpsToPrint.size(); i != 0; --i)
34    if (OpsToPrint[i-1].second == TheOp) {
35      O << "\n    case " << OpsToPrint[i-1].first << ": ";
36      OpsToPrint.erase(OpsToPrint.begin()+i-1);
37    }
38
39  // Finally, emit the code.
40  O << TheOp.getCode();
41  O << "break;\n";
42}
43
44
45/// EmitInstructions - Emit the last instruction in the vector and any other
46/// instructions that are suitably similar to it.
47static void EmitInstructions(std::vector<AsmWriterInst> &Insts,
48                             raw_ostream &O) {
49  AsmWriterInst FirstInst = Insts.back();
50  Insts.pop_back();
51
52  std::vector<AsmWriterInst> SimilarInsts;
53  unsigned DifferingOperand = ~0;
54  for (unsigned i = Insts.size(); i != 0; --i) {
55    unsigned DiffOp = Insts[i-1].MatchesAllButOneOp(FirstInst);
56    if (DiffOp != ~1U) {
57      if (DifferingOperand == ~0U)  // First match!
58        DifferingOperand = DiffOp;
59
60      // If this differs in the same operand as the rest of the instructions in
61      // this class, move it to the SimilarInsts list.
62      if (DifferingOperand == DiffOp || DiffOp == ~0U) {
63        SimilarInsts.push_back(Insts[i-1]);
64        Insts.erase(Insts.begin()+i-1);
65      }
66    }
67  }
68
69  O << "  case " << FirstInst.CGI->Namespace << "::"
70    << FirstInst.CGI->TheDef->getName() << ":\n";
71  for (unsigned i = 0, e = SimilarInsts.size(); i != e; ++i)
72    O << "  case " << SimilarInsts[i].CGI->Namespace << "::"
73      << SimilarInsts[i].CGI->TheDef->getName() << ":\n";
74  for (unsigned i = 0, e = FirstInst.Operands.size(); i != e; ++i) {
75    if (i != DifferingOperand) {
76      // If the operand is the same for all instructions, just print it.
77      O << "    " << FirstInst.Operands[i].getCode();
78    } else {
79      // If this is the operand that varies between all of the instructions,
80      // emit a switch for just this operand now.
81      O << "    switch (MI->getOpcode()) {\n";
82      std::vector<std::pair<std::string, AsmWriterOperand> > OpsToPrint;
83      OpsToPrint.push_back(std::make_pair(FirstInst.CGI->Namespace + "::" +
84                                          FirstInst.CGI->TheDef->getName(),
85                                          FirstInst.Operands[i]));
86
87      for (unsigned si = 0, e = SimilarInsts.size(); si != e; ++si) {
88        AsmWriterInst &AWI = SimilarInsts[si];
89        OpsToPrint.push_back(std::make_pair(AWI.CGI->Namespace+"::"+
90                                            AWI.CGI->TheDef->getName(),
91                                            AWI.Operands[i]));
92      }
93      std::reverse(OpsToPrint.begin(), OpsToPrint.end());
94      while (!OpsToPrint.empty())
95        PrintCases(OpsToPrint, O);
96      O << "    }";
97    }
98    O << "\n";
99  }
100  O << "    break;\n";
101}
102
103void AsmWriterEmitter::
104FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
105                          std::vector<unsigned> &InstIdxs,
106                          std::vector<unsigned> &InstOpsUsed) const {
107  InstIdxs.assign(NumberedInstructions.size(), ~0U);
108
109  // This vector parallels UniqueOperandCommands, keeping track of which
110  // instructions each case are used for.  It is a comma separated string of
111  // enums.
112  std::vector<std::string> InstrsForCase;
113  InstrsForCase.resize(UniqueOperandCommands.size());
114  InstOpsUsed.assign(UniqueOperandCommands.size(), 0);
115
116  for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
117    const AsmWriterInst *Inst = getAsmWriterInstByID(i);
118    if (Inst == 0) continue;  // PHI, INLINEASM, DBG_LABEL, etc.
119
120    std::string Command;
121    if (Inst->Operands.empty())
122      continue;   // Instruction already done.
123
124    Command = "    " + Inst->Operands[0].getCode() + "\n";
125
126    // Check to see if we already have 'Command' in UniqueOperandCommands.
127    // If not, add it.
128    bool FoundIt = false;
129    for (unsigned idx = 0, e = UniqueOperandCommands.size(); idx != e; ++idx)
130      if (UniqueOperandCommands[idx] == Command) {
131        InstIdxs[i] = idx;
132        InstrsForCase[idx] += ", ";
133        InstrsForCase[idx] += Inst->CGI->TheDef->getName();
134        FoundIt = true;
135        break;
136      }
137    if (!FoundIt) {
138      InstIdxs[i] = UniqueOperandCommands.size();
139      UniqueOperandCommands.push_back(Command);
140      InstrsForCase.push_back(Inst->CGI->TheDef->getName());
141
142      // This command matches one operand so far.
143      InstOpsUsed.push_back(1);
144    }
145  }
146
147  // For each entry of UniqueOperandCommands, there is a set of instructions
148  // that uses it.  If the next command of all instructions in the set are
149  // identical, fold it into the command.
150  for (unsigned CommandIdx = 0, e = UniqueOperandCommands.size();
151       CommandIdx != e; ++CommandIdx) {
152
153    for (unsigned Op = 1; ; ++Op) {
154      // Scan for the first instruction in the set.
155      std::vector<unsigned>::iterator NIT =
156        std::find(InstIdxs.begin(), InstIdxs.end(), CommandIdx);
157      if (NIT == InstIdxs.end()) break;  // No commonality.
158
159      // If this instruction has no more operands, we isn't anything to merge
160      // into this command.
161      const AsmWriterInst *FirstInst =
162        getAsmWriterInstByID(NIT-InstIdxs.begin());
163      if (!FirstInst || FirstInst->Operands.size() == Op)
164        break;
165
166      // Otherwise, scan to see if all of the other instructions in this command
167      // set share the operand.
168      bool AllSame = true;
169      // Keep track of the maximum, number of operands or any
170      // instruction we see in the group.
171      size_t MaxSize = FirstInst->Operands.size();
172
173      for (NIT = std::find(NIT+1, InstIdxs.end(), CommandIdx);
174           NIT != InstIdxs.end();
175           NIT = std::find(NIT+1, InstIdxs.end(), CommandIdx)) {
176        // Okay, found another instruction in this command set.  If the operand
177        // matches, we're ok, otherwise bail out.
178        const AsmWriterInst *OtherInst =
179          getAsmWriterInstByID(NIT-InstIdxs.begin());
180
181        if (OtherInst &&
182            OtherInst->Operands.size() > FirstInst->Operands.size())
183          MaxSize = std::max(MaxSize, OtherInst->Operands.size());
184
185        if (!OtherInst || OtherInst->Operands.size() == Op ||
186            OtherInst->Operands[Op] != FirstInst->Operands[Op]) {
187          AllSame = false;
188          break;
189        }
190      }
191      if (!AllSame) break;
192
193      // Okay, everything in this command set has the same next operand.  Add it
194      // to UniqueOperandCommands and remember that it was consumed.
195      std::string Command = "    " + FirstInst->Operands[Op].getCode() + "\n";
196
197      UniqueOperandCommands[CommandIdx] += Command;
198      InstOpsUsed[CommandIdx]++;
199    }
200  }
201
202  // Prepend some of the instructions each case is used for onto the case val.
203  for (unsigned i = 0, e = InstrsForCase.size(); i != e; ++i) {
204    std::string Instrs = InstrsForCase[i];
205    if (Instrs.size() > 70) {
206      Instrs.erase(Instrs.begin()+70, Instrs.end());
207      Instrs += "...";
208    }
209
210    if (!Instrs.empty())
211      UniqueOperandCommands[i] = "    // " + Instrs + "\n" +
212        UniqueOperandCommands[i];
213  }
214}
215
216
217static void UnescapeString(std::string &Str) {
218  for (unsigned i = 0; i != Str.size(); ++i) {
219    if (Str[i] == '\\' && i != Str.size()-1) {
220      switch (Str[i+1]) {
221      default: continue;  // Don't execute the code after the switch.
222      case 'a': Str[i] = '\a'; break;
223      case 'b': Str[i] = '\b'; break;
224      case 'e': Str[i] = 27; break;
225      case 'f': Str[i] = '\f'; break;
226      case 'n': Str[i] = '\n'; break;
227      case 'r': Str[i] = '\r'; break;
228      case 't': Str[i] = '\t'; break;
229      case 'v': Str[i] = '\v'; break;
230      case '"': Str[i] = '\"'; break;
231      case '\'': Str[i] = '\''; break;
232      case '\\': Str[i] = '\\'; break;
233      }
234      // Nuke the second character.
235      Str.erase(Str.begin()+i+1);
236    }
237  }
238}
239
240/// EmitPrintInstruction - Generate the code for the "printInstruction" method
241/// implementation.
242void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
243  CodeGenTarget Target;
244  Record *AsmWriter = Target.getAsmWriter();
245  std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
246
247  O <<
248  "/// printInstruction - This method is automatically generated by tablegen\n"
249  "/// from the instruction set description.\n"
250    "void " << Target.getName() << ClassName
251            << "::printInstruction(const MachineInstr *MI) {\n";
252
253  std::vector<AsmWriterInst> Instructions;
254
255  for (CodeGenTarget::inst_iterator I = Target.inst_begin(),
256         E = Target.inst_end(); I != E; ++I)
257    if (!I->second.AsmString.empty() &&
258        I->second.TheDef->getName() != "PHI")
259      Instructions.push_back(
260        AsmWriterInst(I->second,
261                      AsmWriter->getValueAsInt("Variant"),
262                      AsmWriter->getValueAsInt("FirstOperandColumn"),
263                      AsmWriter->getValueAsInt("OperandSpacing")));
264
265  // Get the instruction numbering.
266  Target.getInstructionsByEnumValue(NumberedInstructions);
267
268  // Compute the CodeGenInstruction -> AsmWriterInst mapping.  Note that not
269  // all machine instructions are necessarily being printed, so there may be
270  // target instructions not in this map.
271  for (unsigned i = 0, e = Instructions.size(); i != e; ++i)
272    CGIAWIMap.insert(std::make_pair(Instructions[i].CGI, &Instructions[i]));
273
274  // Build an aggregate string, and build a table of offsets into it.
275  StringToOffsetTable StringTable;
276
277  /// OpcodeInfo - This encodes the index of the string to use for the first
278  /// chunk of the output as well as indices used for operand printing.
279  std::vector<unsigned> OpcodeInfo;
280
281  unsigned MaxStringIdx = 0;
282  for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
283    AsmWriterInst *AWI = CGIAWIMap[NumberedInstructions[i]];
284    unsigned Idx;
285    if (AWI == 0) {
286      // Something not handled by the asmwriter printer.
287      Idx = ~0U;
288    } else if (AWI->Operands[0].OperandType !=
289                        AsmWriterOperand::isLiteralTextOperand ||
290               AWI->Operands[0].Str.empty()) {
291      // Something handled by the asmwriter printer, but with no leading string.
292      Idx = StringTable.GetOrAddStringOffset("");
293    } else {
294      std::string Str = AWI->Operands[0].Str;
295      UnescapeString(Str);
296      Idx = StringTable.GetOrAddStringOffset(Str);
297      MaxStringIdx = std::max(MaxStringIdx, Idx);
298
299      // Nuke the string from the operand list.  It is now handled!
300      AWI->Operands.erase(AWI->Operands.begin());
301    }
302
303    // Bias offset by one since we want 0 as a sentinel.
304    OpcodeInfo.push_back(Idx+1);
305  }
306
307  // Figure out how many bits we used for the string index.
308  unsigned AsmStrBits = Log2_32_Ceil(MaxStringIdx+2);
309
310  // To reduce code size, we compactify common instructions into a few bits
311  // in the opcode-indexed table.
312  unsigned BitsLeft = 32-AsmStrBits;
313
314  std::vector<std::vector<std::string> > TableDrivenOperandPrinters;
315
316  while (1) {
317    std::vector<std::string> UniqueOperandCommands;
318    std::vector<unsigned> InstIdxs;
319    std::vector<unsigned> NumInstOpsHandled;
320    FindUniqueOperandCommands(UniqueOperandCommands, InstIdxs,
321                              NumInstOpsHandled);
322
323    // If we ran out of operands to print, we're done.
324    if (UniqueOperandCommands.empty()) break;
325
326    // Compute the number of bits we need to represent these cases, this is
327    // ceil(log2(numentries)).
328    unsigned NumBits = Log2_32_Ceil(UniqueOperandCommands.size());
329
330    // If we don't have enough bits for this operand, don't include it.
331    if (NumBits > BitsLeft) {
332      DEBUG(errs() << "Not enough bits to densely encode " << NumBits
333                   << " more bits\n");
334      break;
335    }
336
337    // Otherwise, we can include this in the initial lookup table.  Add it in.
338    BitsLeft -= NumBits;
339    for (unsigned i = 0, e = InstIdxs.size(); i != e; ++i)
340      if (InstIdxs[i] != ~0U)
341        OpcodeInfo[i] |= InstIdxs[i] << (BitsLeft+AsmStrBits);
342
343    // Remove the info about this operand.
344    for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
345      if (AsmWriterInst *Inst = getAsmWriterInstByID(i))
346        if (!Inst->Operands.empty()) {
347          unsigned NumOps = NumInstOpsHandled[InstIdxs[i]];
348          assert(NumOps <= Inst->Operands.size() &&
349                 "Can't remove this many ops!");
350          Inst->Operands.erase(Inst->Operands.begin(),
351                               Inst->Operands.begin()+NumOps);
352        }
353    }
354
355    // Remember the handlers for this set of operands.
356    TableDrivenOperandPrinters.push_back(UniqueOperandCommands);
357  }
358
359
360
361  O<<"  static const unsigned OpInfo[] = {\n";
362  for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
363    O << "    " << OpcodeInfo[i] << "U,\t// "
364      << NumberedInstructions[i]->TheDef->getName() << "\n";
365  }
366  // Add a dummy entry so the array init doesn't end with a comma.
367  O << "    0U\n";
368  O << "  };\n\n";
369
370  // Emit the string itself.
371  O << "  const char *AsmStrs = \n";
372  StringTable.EmitString(O);
373  O << ";\n\n";
374
375  O << "  O << \"\\t\";\n\n";
376
377  O << "  // Emit the opcode for the instruction.\n"
378    << "  unsigned Bits = OpInfo[MI->getOpcode()];\n"
379    << "  assert(Bits != 0 && \"Cannot print this instruction.\");\n"
380    << "  O << AsmStrs+(Bits & " << (1 << AsmStrBits)-1 << ")-1;\n\n";
381
382  // Output the table driven operand information.
383  BitsLeft = 32-AsmStrBits;
384  for (unsigned i = 0, e = TableDrivenOperandPrinters.size(); i != e; ++i) {
385    std::vector<std::string> &Commands = TableDrivenOperandPrinters[i];
386
387    // Compute the number of bits we need to represent these cases, this is
388    // ceil(log2(numentries)).
389    unsigned NumBits = Log2_32_Ceil(Commands.size());
390    assert(NumBits <= BitsLeft && "consistency error");
391
392    // Emit code to extract this field from Bits.
393    BitsLeft -= NumBits;
394
395    O << "\n  // Fragment " << i << " encoded into " << NumBits
396      << " bits for " << Commands.size() << " unique commands.\n";
397
398    if (Commands.size() == 2) {
399      // Emit two possibilitys with if/else.
400      O << "  if ((Bits >> " << (BitsLeft+AsmStrBits) << ") & "
401        << ((1 << NumBits)-1) << ") {\n"
402        << Commands[1]
403        << "  } else {\n"
404        << Commands[0]
405        << "  }\n\n";
406    } else {
407      O << "  switch ((Bits >> " << (BitsLeft+AsmStrBits) << ") & "
408        << ((1 << NumBits)-1) << ") {\n"
409        << "  default:   // unreachable.\n";
410
411      // Print out all the cases.
412      for (unsigned i = 0, e = Commands.size(); i != e; ++i) {
413        O << "  case " << i << ":\n";
414        O << Commands[i];
415        O << "    break;\n";
416      }
417      O << "  }\n\n";
418    }
419  }
420
421  // Okay, delete instructions with no operand info left.
422  for (unsigned i = 0, e = Instructions.size(); i != e; ++i) {
423    // Entire instruction has been emitted?
424    AsmWriterInst &Inst = Instructions[i];
425    if (Inst.Operands.empty()) {
426      Instructions.erase(Instructions.begin()+i);
427      --i; --e;
428    }
429  }
430
431
432  // Because this is a vector, we want to emit from the end.  Reverse all of the
433  // elements in the vector.
434  std::reverse(Instructions.begin(), Instructions.end());
435
436
437  // Now that we've emitted all of the operand info that fit into 32 bits, emit
438  // information for those instructions that are left.  This is a less dense
439  // encoding, but we expect the main 32-bit table to handle the majority of
440  // instructions.
441  if (!Instructions.empty()) {
442    // Find the opcode # of inline asm.
443    O << "  switch (MI->getOpcode()) {\n";
444    while (!Instructions.empty())
445      EmitInstructions(Instructions, O);
446
447    O << "  }\n";
448    O << "  return;\n";
449  }
450
451  O << "}\n";
452}
453
454
455void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
456  CodeGenTarget Target;
457  Record *AsmWriter = Target.getAsmWriter();
458  std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
459  const std::vector<CodeGenRegister> &Registers = Target.getRegisters();
460
461  StringToOffsetTable StringTable;
462  O <<
463  "\n\n/// getRegisterName - This method is automatically generated by tblgen\n"
464  "/// from the register set description.  This returns the assembler name\n"
465  "/// for the specified register.\n"
466  "const char *" << Target.getName() << ClassName
467  << "::getRegisterName(unsigned RegNo) {\n"
468  << "  assert(RegNo && RegNo < " << (Registers.size()+1)
469  << " && \"Invalid register number!\");\n"
470  << "\n"
471  << "  static const unsigned RegAsmOffset[] = {";
472  for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
473    const CodeGenRegister &Reg = Registers[i];
474
475    std::string AsmName = Reg.TheDef->getValueAsString("AsmName");
476    if (AsmName.empty())
477      AsmName = Reg.getName();
478
479
480    if ((i % 14) == 0)
481      O << "\n    ";
482
483    O << StringTable.GetOrAddStringOffset(AsmName) << ", ";
484  }
485  O << "0\n"
486    << "  };\n"
487    << "\n";
488
489  O << "  const char *AsmStrs =\n";
490  StringTable.EmitString(O);
491  O << ";\n";
492
493  O << "  return AsmStrs+RegAsmOffset[RegNo-1];\n"
494    << "}\n";
495}
496
497void AsmWriterEmitter::EmitGetInstructionName(raw_ostream &O) {
498  CodeGenTarget Target;
499  Record *AsmWriter = Target.getAsmWriter();
500  std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
501
502  std::vector<const CodeGenInstruction*> NumberedInstructions;
503  Target.getInstructionsByEnumValue(NumberedInstructions);
504
505  StringToOffsetTable StringTable;
506  O <<
507"\n\n#ifdef GET_INSTRUCTION_NAME\n"
508"#undef GET_INSTRUCTION_NAME\n\n"
509"/// getInstructionName: This method is automatically generated by tblgen\n"
510"/// from the instruction set description.  This returns the enum name of the\n"
511"/// specified instruction.\n"
512  "const char *" << Target.getName() << ClassName
513  << "::getInstructionName(unsigned Opcode) {\n"
514  << "  assert(Opcode < " << NumberedInstructions.size()
515  << " && \"Invalid instruction number!\");\n"
516  << "\n"
517  << "  static const unsigned InstAsmOffset[] = {";
518  for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
519    const CodeGenInstruction &Inst = *NumberedInstructions[i];
520
521    std::string AsmName = Inst.TheDef->getName();
522    if ((i % 14) == 0)
523      O << "\n    ";
524
525    O << StringTable.GetOrAddStringOffset(AsmName) << ", ";
526  }
527  O << "0\n"
528  << "  };\n"
529  << "\n";
530
531  O << "  const char *Strs =\n";
532  StringTable.EmitString(O);
533  O << ";\n";
534
535  O << "  return Strs+InstAsmOffset[Opcode];\n"
536  << "}\n\n#endif\n";
537}
538
539
540
541void AsmWriterEmitter::run(raw_ostream &O) {
542  EmitSourceFileHeader("Assembly Writer Source Fragment", O);
543
544  EmitPrintInstruction(O);
545  EmitGetRegisterName(O);
546  EmitGetInstructionName(O);
547}
548
549