LLParser.h revision 208954
1//===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the parser class for .ll files.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ASMPARSER_LLPARSER_H
15#define LLVM_ASMPARSER_LLPARSER_H
16
17#include "LLLexer.h"
18#include "llvm/Module.h"
19#include "llvm/Type.h"
20#include "llvm/ADT/DenseMap.h"
21#include "llvm/Support/ValueHandle.h"
22#include <map>
23
24namespace llvm {
25  class Module;
26  class OpaqueType;
27  class Function;
28  class Value;
29  class BasicBlock;
30  class Instruction;
31  class Constant;
32  class GlobalValue;
33  class MDString;
34  class MDNode;
35  class UnionType;
36
37  /// ValID - Represents a reference of a definition of some sort with no type.
38  /// There are several cases where we have to parse the value but where the
39  /// type can depend on later context.  This may either be a numeric reference
40  /// or a symbolic (%var) reference.  This is just a discriminated union.
41  struct ValID {
42    enum {
43      t_LocalID, t_GlobalID,      // ID in UIntVal.
44      t_LocalName, t_GlobalName,  // Name in StrVal.
45      t_APSInt, t_APFloat,        // Value in APSIntVal/APFloatVal.
46      t_Null, t_Undef, t_Zero,    // No value.
47      t_EmptyArray,               // No value:  []
48      t_Constant,                 // Value in ConstantVal.
49      t_InlineAsm,                // Value in StrVal/StrVal2/UIntVal.
50      t_MDNode,                   // Value in MDNodeVal.
51      t_MDString                  // Value in MDStringVal.
52    } Kind;
53
54    LLLexer::LocTy Loc;
55    unsigned UIntVal;
56    std::string StrVal, StrVal2;
57    APSInt APSIntVal;
58    APFloat APFloatVal;
59    Constant *ConstantVal;
60    MDNode *MDNodeVal;
61    MDString *MDStringVal;
62    ValID() : APFloatVal(0.0) {}
63
64    bool operator<(const ValID &RHS) const {
65      if (Kind == t_LocalID || Kind == t_GlobalID)
66        return UIntVal < RHS.UIntVal;
67      assert((Kind == t_LocalName || Kind == t_GlobalName) &&
68             "Ordering not defined for this ValID kind yet");
69      return StrVal < RHS.StrVal;
70    }
71  };
72
73  class LLParser {
74  public:
75    typedef LLLexer::LocTy LocTy;
76  private:
77    LLVMContext &Context;
78    LLLexer Lex;
79    Module *M;
80
81    // Instruction metadata resolution.  Each instruction can have a list of
82    // MDRef info associated with them.
83    struct MDRef {
84      SMLoc Loc;
85      unsigned MDKind, MDSlot;
86    };
87    DenseMap<Instruction*, std::vector<MDRef> > ForwardRefInstMetadata;
88
89    // Type resolution handling data structures.
90    std::map<std::string, std::pair<PATypeHolder, LocTy> > ForwardRefTypes;
91    std::map<unsigned, std::pair<PATypeHolder, LocTy> > ForwardRefTypeIDs;
92    std::vector<PATypeHolder> NumberedTypes;
93    std::vector<TrackingVH<MDNode> > NumberedMetadata;
94    std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> > ForwardRefMDNodes;
95    struct UpRefRecord {
96      /// Loc - This is the location of the upref.
97      LocTy Loc;
98
99      /// NestingLevel - The number of nesting levels that need to be popped
100      /// before this type is resolved.
101      unsigned NestingLevel;
102
103      /// LastContainedTy - This is the type at the current binding level for
104      /// the type.  Every time we reduce the nesting level, this gets updated.
105      const Type *LastContainedTy;
106
107      /// UpRefTy - This is the actual opaque type that the upreference is
108      /// represented with.
109      OpaqueType *UpRefTy;
110
111      UpRefRecord(LocTy L, unsigned NL, OpaqueType *URTy)
112        : Loc(L), NestingLevel(NL), LastContainedTy((Type*)URTy),
113          UpRefTy(URTy) {}
114    };
115    std::vector<UpRefRecord> UpRefs;
116
117    // Global Value reference information.
118    std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
119    std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
120    std::vector<GlobalValue*> NumberedVals;
121
122    // References to blockaddress.  The key is the function ValID, the value is
123    // a list of references to blocks in that function.
124    std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >
125      ForwardRefBlockAddresses;
126
127    Function *MallocF;
128  public:
129    LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) :
130      Context(m->getContext()), Lex(F, SM, Err, m->getContext()),
131      M(m), MallocF(NULL) {}
132    bool Run();
133
134    LLVMContext& getContext() { return Context; }
135
136  private:
137
138    bool Error(LocTy L, const std::string &Msg) const {
139      return Lex.Error(L, Msg);
140    }
141    bool TokError(const std::string &Msg) const {
142      return Error(Lex.getLoc(), Msg);
143    }
144
145    /// GetGlobalVal - Get a value with the specified name or ID, creating a
146    /// forward reference record if needed.  This can return null if the value
147    /// exists but does not have the right type.
148    GlobalValue *GetGlobalVal(const std::string &N, const Type *Ty, LocTy Loc);
149    GlobalValue *GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc);
150
151    // Helper Routines.
152    bool ParseToken(lltok::Kind T, const char *ErrMsg);
153    bool EatIfPresent(lltok::Kind T) {
154      if (Lex.getKind() != T) return false;
155      Lex.Lex();
156      return true;
157    }
158    bool ParseOptionalToken(lltok::Kind T, bool &Present) {
159      if (Lex.getKind() != T) {
160        Present = false;
161      } else {
162        Lex.Lex();
163        Present = true;
164      }
165      return false;
166    }
167    bool ParseStringConstant(std::string &Result);
168    bool ParseUInt32(unsigned &Val);
169    bool ParseUInt32(unsigned &Val, LocTy &Loc) {
170      Loc = Lex.getLoc();
171      return ParseUInt32(Val);
172    }
173    bool ParseOptionalAddrSpace(unsigned &AddrSpace);
174    bool ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind);
175    bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
176    bool ParseOptionalLinkage(unsigned &Linkage) {
177      bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
178    }
179    bool ParseOptionalVisibility(unsigned &Visibility);
180    bool ParseOptionalCallingConv(CallingConv::ID &CC);
181    bool ParseOptionalAlignment(unsigned &Alignment);
182    bool ParseOptionalStackAlignment(unsigned &Alignment);
183    bool ParseInstructionMetadata(Instruction *Inst);
184    bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
185    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma);
186    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
187      bool AteExtraComma;
188      if (ParseIndexList(Indices, AteExtraComma)) return true;
189      if (AteExtraComma)
190        return TokError("expected index");
191      return false;
192    }
193
194    // Top-Level Entities
195    bool ParseTopLevelEntities();
196    bool ValidateEndOfModule();
197    bool ParseTargetDefinition();
198    bool ParseDepLibs();
199    bool ParseModuleAsm();
200    bool ParseUnnamedType();
201    bool ParseNamedType();
202    bool ParseDeclare();
203    bool ParseDefine();
204
205    bool ParseGlobalType(bool &IsConstant);
206    bool ParseUnnamedGlobal();
207    bool ParseNamedGlobal();
208    bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage,
209                     bool HasLinkage, unsigned Visibility);
210    bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility);
211    bool ParseStandaloneMetadata();
212    bool ParseNamedMetadata();
213    bool ParseMDString(MDString *&Result);
214    bool ParseMDNodeID(MDNode *&Result);
215    bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo);
216
217    // Type Parsing.
218    bool ParseType(PATypeHolder &Result, bool AllowVoid = false);
219    bool ParseType(PATypeHolder &Result, LocTy &Loc, bool AllowVoid = false) {
220      Loc = Lex.getLoc();
221      return ParseType(Result, AllowVoid);
222    }
223    bool ParseTypeRec(PATypeHolder &H);
224    bool ParseStructType(PATypeHolder &H, bool Packed);
225    bool ParseUnionType(PATypeHolder &H);
226    bool ParseArrayVectorType(PATypeHolder &H, bool isVector);
227    bool ParseFunctionType(PATypeHolder &Result);
228    PATypeHolder HandleUpRefs(const Type *Ty);
229
230    // Function Semantic Analysis.
231    class PerFunctionState {
232      LLParser &P;
233      Function &F;
234      std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals;
235      std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs;
236      std::vector<Value*> NumberedVals;
237
238      /// FunctionNumber - If this is an unnamed function, this is the slot
239      /// number of it, otherwise it is -1.
240      int FunctionNumber;
241    public:
242      PerFunctionState(LLParser &p, Function &f, int FunctionNumber);
243      ~PerFunctionState();
244
245      Function &getFunction() const { return F; }
246
247      bool FinishFunction();
248
249      /// GetVal - Get a value with the specified name or ID, creating a
250      /// forward reference record if needed.  This can return null if the value
251      /// exists but does not have the right type.
252      Value *GetVal(const std::string &Name, const Type *Ty, LocTy Loc);
253      Value *GetVal(unsigned ID, const Type *Ty, LocTy Loc);
254
255      /// SetInstName - After an instruction is parsed and inserted into its
256      /// basic block, this installs its name.
257      bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc,
258                       Instruction *Inst);
259
260      /// GetBB - Get a basic block with the specified name or ID, creating a
261      /// forward reference record if needed.  This can return null if the value
262      /// is not a BasicBlock.
263      BasicBlock *GetBB(const std::string &Name, LocTy Loc);
264      BasicBlock *GetBB(unsigned ID, LocTy Loc);
265
266      /// DefineBB - Define the specified basic block, which is either named or
267      /// unnamed.  If there is an error, this returns null otherwise it returns
268      /// the block being defined.
269      BasicBlock *DefineBB(const std::string &Name, LocTy Loc);
270    };
271
272    bool ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
273                             PerFunctionState *PFS);
274
275    bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS);
276    bool ParseValue(const Type *Ty, Value *&V, LocTy &Loc,
277                    PerFunctionState &PFS) {
278      Loc = Lex.getLoc();
279      return ParseValue(Ty, V, PFS);
280    }
281
282    bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS);
283    bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) {
284      Loc = Lex.getLoc();
285      return ParseTypeAndValue(V, PFS);
286    }
287    bool ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
288                                PerFunctionState &PFS);
289    bool ParseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) {
290      LocTy Loc;
291      return ParseTypeAndBasicBlock(BB, Loc, PFS);
292    }
293
294    bool ParseUnionValue(const UnionType* utype, ValID &ID, Value *&V);
295
296    struct ParamInfo {
297      LocTy Loc;
298      Value *V;
299      unsigned Attrs;
300      ParamInfo(LocTy loc, Value *v, unsigned attrs)
301        : Loc(loc), V(v), Attrs(attrs) {}
302    };
303    bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
304                            PerFunctionState &PFS);
305
306    // Constant Parsing.
307    bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL);
308    bool ParseGlobalValue(const Type *Ty, Constant *&V);
309    bool ParseGlobalTypeAndValue(Constant *&V);
310    bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
311    bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS);
312
313    // Function Parsing.
314    struct ArgInfo {
315      LocTy Loc;
316      PATypeHolder Type;
317      unsigned Attrs;
318      std::string Name;
319      ArgInfo(LocTy L, PATypeHolder Ty, unsigned Attr, const std::string &N)
320        : Loc(L), Type(Ty), Attrs(Attr), Name(N) {}
321    };
322    bool ParseArgumentList(std::vector<ArgInfo> &ArgList,
323                           bool &isVarArg, bool inType);
324    bool ParseFunctionHeader(Function *&Fn, bool isDefine);
325    bool ParseFunctionBody(Function &Fn);
326    bool ParseBasicBlock(PerFunctionState &PFS);
327
328    // Instruction Parsing.  Each instruction parsing routine can return with a
329    // normal result, an error result, or return having eaten an extra comma.
330    enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 };
331    int ParseInstruction(Instruction *&Inst, BasicBlock *BB,
332                         PerFunctionState &PFS);
333    bool ParseCmpPredicate(unsigned &Pred, unsigned Opc);
334
335    int ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
336    bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
337    bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
338    bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
339    bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
340
341    bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc,
342                         unsigned OperandType);
343    bool ParseLogical(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
344    bool ParseCompare(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
345    bool ParseCast(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
346    bool ParseSelect(Instruction *&I, PerFunctionState &PFS);
347    bool ParseVA_Arg(Instruction *&I, PerFunctionState &PFS);
348    bool ParseExtractElement(Instruction *&I, PerFunctionState &PFS);
349    bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS);
350    bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS);
351    int ParsePHI(Instruction *&I, PerFunctionState &PFS);
352    bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
353    int ParseAlloc(Instruction *&I, PerFunctionState &PFS,
354                    BasicBlock *BB = 0, bool isAlloca = true);
355    bool ParseFree(Instruction *&I, PerFunctionState &PFS, BasicBlock *BB);
356    int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
357    int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
358    bool ParseGetResult(Instruction *&I, PerFunctionState &PFS);
359    int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
360    int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
361    int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
362
363    bool ResolveForwardRefBlockAddresses(Function *TheFn,
364                             std::vector<std::pair<ValID, GlobalValue*> > &Refs,
365                                         PerFunctionState *PFS);
366  };
367} // End llvm namespace
368
369#endif
370