LLParser.h revision 226633
1//===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the parser class for .ll files.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ASMPARSER_LLPARSER_H
15#define LLVM_ASMPARSER_LLPARSER_H
16
17#include "LLLexer.h"
18#include "llvm/Instructions.h"
19#include "llvm/Module.h"
20#include "llvm/Type.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/StringMap.h"
23#include "llvm/Support/ValueHandle.h"
24#include <map>
25
26namespace llvm {
27  class Module;
28  class OpaqueType;
29  class Function;
30  class Value;
31  class BasicBlock;
32  class Instruction;
33  class Constant;
34  class GlobalValue;
35  class MDString;
36  class MDNode;
37  class StructType;
38
39  /// ValID - Represents a reference of a definition of some sort with no type.
40  /// There are several cases where we have to parse the value but where the
41  /// type can depend on later context.  This may either be a numeric reference
42  /// or a symbolic (%var) reference.  This is just a discriminated union.
43  struct ValID {
44    enum {
45      t_LocalID, t_GlobalID,      // ID in UIntVal.
46      t_LocalName, t_GlobalName,  // Name in StrVal.
47      t_APSInt, t_APFloat,        // Value in APSIntVal/APFloatVal.
48      t_Null, t_Undef, t_Zero,    // No value.
49      t_EmptyArray,               // No value:  []
50      t_Constant,                 // Value in ConstantVal.
51      t_InlineAsm,                // Value in StrVal/StrVal2/UIntVal.
52      t_MDNode,                   // Value in MDNodeVal.
53      t_MDString,                 // Value in MDStringVal.
54      t_ConstantStruct,           // Value in ConstantStructElts.
55      t_PackedConstantStruct      // Value in ConstantStructElts.
56    } Kind;
57
58    LLLexer::LocTy Loc;
59    unsigned UIntVal;
60    std::string StrVal, StrVal2;
61    APSInt APSIntVal;
62    APFloat APFloatVal;
63    Constant *ConstantVal;
64    MDNode *MDNodeVal;
65    MDString *MDStringVal;
66    Constant **ConstantStructElts;
67
68    ValID() : Kind(t_LocalID), APFloatVal(0.0) {}
69    ~ValID() {
70      if (Kind == t_ConstantStruct || Kind == t_PackedConstantStruct)
71        delete [] ConstantStructElts;
72    }
73
74    bool operator<(const ValID &RHS) const {
75      if (Kind == t_LocalID || Kind == t_GlobalID)
76        return UIntVal < RHS.UIntVal;
77      assert((Kind == t_LocalName || Kind == t_GlobalName ||
78              Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) &&
79             "Ordering not defined for this ValID kind yet");
80      return StrVal < RHS.StrVal;
81    }
82  };
83
84  class LLParser {
85  public:
86    typedef LLLexer::LocTy LocTy;
87  private:
88    LLVMContext &Context;
89    LLLexer Lex;
90    Module *M;
91
92    // Instruction metadata resolution.  Each instruction can have a list of
93    // MDRef info associated with them.
94    //
95    // The simpler approach of just creating temporary MDNodes and then calling
96    // RAUW on them when the definition is processed doesn't work because some
97    // instruction metadata kinds, such as dbg, get stored in the IR in an
98    // "optimized" format which doesn't participate in the normal value use
99    // lists. This means that RAUW doesn't work, even on temporary MDNodes
100    // which otherwise support RAUW. Instead, we defer resolving MDNode
101    // references until the definitions have been processed.
102    struct MDRef {
103      SMLoc Loc;
104      unsigned MDKind, MDSlot;
105    };
106    DenseMap<Instruction*, std::vector<MDRef> > ForwardRefInstMetadata;
107
108    // Type resolution handling data structures.  The location is set when we
109    // have processed a use of the type but not a definition yet.
110    StringMap<std::pair<Type*, LocTy> > NamedTypes;
111    std::vector<std::pair<Type*, LocTy> > NumberedTypes;
112
113    std::vector<TrackingVH<MDNode> > NumberedMetadata;
114    std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> > ForwardRefMDNodes;
115
116    // Global Value reference information.
117    std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
118    std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
119    std::vector<GlobalValue*> NumberedVals;
120
121    // References to blockaddress.  The key is the function ValID, the value is
122    // a list of references to blocks in that function.
123    std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >
124      ForwardRefBlockAddresses;
125
126  public:
127    LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) :
128      Context(m->getContext()), Lex(F, SM, Err, m->getContext()),
129      M(m) {}
130    bool Run();
131
132    LLVMContext &getContext() { return Context; }
133
134  private:
135
136    bool Error(LocTy L, const Twine &Msg) const {
137      return Lex.Error(L, Msg);
138    }
139    bool TokError(const Twine &Msg) const {
140      return Error(Lex.getLoc(), Msg);
141    }
142
143    /// GetGlobalVal - Get a value with the specified name or ID, creating a
144    /// forward reference record if needed.  This can return null if the value
145    /// exists but does not have the right type.
146    GlobalValue *GetGlobalVal(const std::string &N, Type *Ty, LocTy Loc);
147    GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc);
148
149    // Helper Routines.
150    bool ParseToken(lltok::Kind T, const char *ErrMsg);
151    bool EatIfPresent(lltok::Kind T) {
152      if (Lex.getKind() != T) return false;
153      Lex.Lex();
154      return true;
155    }
156    bool ParseOptionalToken(lltok::Kind T, bool &Present, LocTy *Loc = 0) {
157      if (Lex.getKind() != T) {
158        Present = false;
159      } else {
160        if (Loc)
161          *Loc = Lex.getLoc();
162        Lex.Lex();
163        Present = true;
164      }
165      return false;
166    }
167    bool ParseStringConstant(std::string &Result);
168    bool ParseUInt32(unsigned &Val);
169    bool ParseUInt32(unsigned &Val, LocTy &Loc) {
170      Loc = Lex.getLoc();
171      return ParseUInt32(Val);
172    }
173    bool ParseOptionalAddrSpace(unsigned &AddrSpace);
174    bool ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind);
175    bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
176    bool ParseOptionalLinkage(unsigned &Linkage) {
177      bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
178    }
179    bool ParseOptionalVisibility(unsigned &Visibility);
180    bool ParseOptionalCallingConv(CallingConv::ID &CC);
181    bool ParseOptionalAlignment(unsigned &Alignment);
182    bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
183                               AtomicOrdering &Ordering);
184    bool ParseOptionalStackAlignment(unsigned &Alignment);
185    bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
186    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma);
187    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
188      bool AteExtraComma;
189      if (ParseIndexList(Indices, AteExtraComma)) return true;
190      if (AteExtraComma)
191        return TokError("expected index");
192      return false;
193    }
194
195    // Top-Level Entities
196    bool ParseTopLevelEntities();
197    bool ValidateEndOfModule();
198    bool ParseTargetDefinition();
199    bool ParseDepLibs();
200    bool ParseModuleAsm();
201    bool ParseUnnamedType();
202    bool ParseNamedType();
203    bool ParseDeclare();
204    bool ParseDefine();
205
206    bool ParseGlobalType(bool &IsConstant);
207    bool ParseUnnamedGlobal();
208    bool ParseNamedGlobal();
209    bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage,
210                     bool HasLinkage, unsigned Visibility);
211    bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility);
212    bool ParseStandaloneMetadata();
213    bool ParseNamedMetadata();
214    bool ParseMDString(MDString *&Result);
215    bool ParseMDNodeID(MDNode *&Result);
216    bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo);
217
218    // Type Parsing.
219    bool ParseType(Type *&Result, bool AllowVoid = false);
220    bool ParseType(Type *&Result, LocTy &Loc, bool AllowVoid = false) {
221      Loc = Lex.getLoc();
222      return ParseType(Result, AllowVoid);
223    }
224    bool ParseAnonStructType(Type *&Result, bool Packed);
225    bool ParseStructBody(SmallVectorImpl<Type*> &Body);
226    bool ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
227                               std::pair<Type*, LocTy> &Entry,
228                               Type *&ResultTy);
229
230    bool ParseArrayVectorType(Type *&Result, bool isVector);
231    bool ParseFunctionType(Type *&Result);
232
233    // Function Semantic Analysis.
234    class PerFunctionState {
235      LLParser &P;
236      Function &F;
237      std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals;
238      std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs;
239      std::vector<Value*> NumberedVals;
240
241      /// FunctionNumber - If this is an unnamed function, this is the slot
242      /// number of it, otherwise it is -1.
243      int FunctionNumber;
244    public:
245      PerFunctionState(LLParser &p, Function &f, int FunctionNumber);
246      ~PerFunctionState();
247
248      Function &getFunction() const { return F; }
249
250      bool FinishFunction();
251
252      /// GetVal - Get a value with the specified name or ID, creating a
253      /// forward reference record if needed.  This can return null if the value
254      /// exists but does not have the right type.
255      Value *GetVal(const std::string &Name, Type *Ty, LocTy Loc);
256      Value *GetVal(unsigned ID, Type *Ty, LocTy Loc);
257
258      /// SetInstName - After an instruction is parsed and inserted into its
259      /// basic block, this installs its name.
260      bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc,
261                       Instruction *Inst);
262
263      /// GetBB - Get a basic block with the specified name or ID, creating a
264      /// forward reference record if needed.  This can return null if the value
265      /// is not a BasicBlock.
266      BasicBlock *GetBB(const std::string &Name, LocTy Loc);
267      BasicBlock *GetBB(unsigned ID, LocTy Loc);
268
269      /// DefineBB - Define the specified basic block, which is either named or
270      /// unnamed.  If there is an error, this returns null otherwise it returns
271      /// the block being defined.
272      BasicBlock *DefineBB(const std::string &Name, LocTy Loc);
273    };
274
275    bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
276                             PerFunctionState *PFS);
277
278    bool ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS);
279    bool ParseValue(Type *Ty, Value *&V, PerFunctionState &PFS) {
280      return ParseValue(Ty, V, &PFS);
281    }
282    bool ParseValue(Type *Ty, Value *&V, LocTy &Loc,
283                    PerFunctionState &PFS) {
284      Loc = Lex.getLoc();
285      return ParseValue(Ty, V, &PFS);
286    }
287
288    bool ParseTypeAndValue(Value *&V, PerFunctionState *PFS);
289    bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
290      return ParseTypeAndValue(V, &PFS);
291    }
292    bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) {
293      Loc = Lex.getLoc();
294      return ParseTypeAndValue(V, PFS);
295    }
296    bool ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
297                                PerFunctionState &PFS);
298    bool ParseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) {
299      LocTy Loc;
300      return ParseTypeAndBasicBlock(BB, Loc, PFS);
301    }
302
303
304    struct ParamInfo {
305      LocTy Loc;
306      Value *V;
307      unsigned Attrs;
308      ParamInfo(LocTy loc, Value *v, unsigned attrs)
309        : Loc(loc), V(v), Attrs(attrs) {}
310    };
311    bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
312                            PerFunctionState &PFS);
313
314    // Constant Parsing.
315    bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL);
316    bool ParseGlobalValue(Type *Ty, Constant *&V);
317    bool ParseGlobalTypeAndValue(Constant *&V);
318    bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
319    bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS);
320    bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS);
321    bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS);
322    bool ParseInstructionMetadata(Instruction *Inst, PerFunctionState *PFS);
323
324    // Function Parsing.
325    struct ArgInfo {
326      LocTy Loc;
327      Type *Ty;
328      unsigned Attrs;
329      std::string Name;
330      ArgInfo(LocTy L, Type *ty, unsigned Attr, const std::string &N)
331        : Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
332    };
333    bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg);
334    bool ParseFunctionHeader(Function *&Fn, bool isDefine);
335    bool ParseFunctionBody(Function &Fn);
336    bool ParseBasicBlock(PerFunctionState &PFS);
337
338    // Instruction Parsing.  Each instruction parsing routine can return with a
339    // normal result, an error result, or return having eaten an extra comma.
340    enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 };
341    int ParseInstruction(Instruction *&Inst, BasicBlock *BB,
342                         PerFunctionState &PFS);
343    bool ParseCmpPredicate(unsigned &Pred, unsigned Opc);
344
345    bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
346    bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
347    bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
348    bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
349    bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
350    bool ParseResume(Instruction *&Inst, PerFunctionState &PFS);
351
352    bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc,
353                         unsigned OperandType);
354    bool ParseLogical(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
355    bool ParseCompare(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
356    bool ParseCast(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
357    bool ParseSelect(Instruction *&I, PerFunctionState &PFS);
358    bool ParseVA_Arg(Instruction *&I, PerFunctionState &PFS);
359    bool ParseExtractElement(Instruction *&I, PerFunctionState &PFS);
360    bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS);
361    bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS);
362    int ParsePHI(Instruction *&I, PerFunctionState &PFS);
363    bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS);
364    bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
365    int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
366    int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
367    int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
368    int ParseCmpXchg(Instruction *&I, PerFunctionState &PFS);
369    int ParseAtomicRMW(Instruction *&I, PerFunctionState &PFS);
370    int ParseFence(Instruction *&I, PerFunctionState &PFS);
371    int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
372    int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
373    int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
374
375    bool ResolveForwardRefBlockAddresses(Function *TheFn,
376                             std::vector<std::pair<ValID, GlobalValue*> > &Refs,
377                                         PerFunctionState *PFS);
378  };
379} // End llvm namespace
380
381#endif
382