LLParser.h revision 234353
1//===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the parser class for .ll files.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ASMPARSER_LLPARSER_H
15#define LLVM_ASMPARSER_LLPARSER_H
16
17#include "LLLexer.h"
18#include "llvm/Attributes.h"
19#include "llvm/Instructions.h"
20#include "llvm/Module.h"
21#include "llvm/Type.h"
22#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/StringMap.h"
24#include "llvm/Support/ValueHandle.h"
25#include <map>
26
27namespace llvm {
28  class Module;
29  class OpaqueType;
30  class Function;
31  class Value;
32  class BasicBlock;
33  class Instruction;
34  class Constant;
35  class GlobalValue;
36  class MDString;
37  class MDNode;
38  class StructType;
39
40  /// ValID - Represents a reference of a definition of some sort with no type.
41  /// There are several cases where we have to parse the value but where the
42  /// type can depend on later context.  This may either be a numeric reference
43  /// or a symbolic (%var) reference.  This is just a discriminated union.
44  struct ValID {
45    enum {
46      t_LocalID, t_GlobalID,      // ID in UIntVal.
47      t_LocalName, t_GlobalName,  // Name in StrVal.
48      t_APSInt, t_APFloat,        // Value in APSIntVal/APFloatVal.
49      t_Null, t_Undef, t_Zero,    // No value.
50      t_EmptyArray,               // No value:  []
51      t_Constant,                 // Value in ConstantVal.
52      t_InlineAsm,                // Value in StrVal/StrVal2/UIntVal.
53      t_MDNode,                   // Value in MDNodeVal.
54      t_MDString,                 // Value in MDStringVal.
55      t_ConstantStruct,           // Value in ConstantStructElts.
56      t_PackedConstantStruct      // Value in ConstantStructElts.
57    } Kind;
58
59    LLLexer::LocTy Loc;
60    unsigned UIntVal;
61    std::string StrVal, StrVal2;
62    APSInt APSIntVal;
63    APFloat APFloatVal;
64    Constant *ConstantVal;
65    MDNode *MDNodeVal;
66    MDString *MDStringVal;
67    Constant **ConstantStructElts;
68
69    ValID() : Kind(t_LocalID), APFloatVal(0.0) {}
70    ~ValID() {
71      if (Kind == t_ConstantStruct || Kind == t_PackedConstantStruct)
72        delete [] ConstantStructElts;
73    }
74
75    bool operator<(const ValID &RHS) const {
76      if (Kind == t_LocalID || Kind == t_GlobalID)
77        return UIntVal < RHS.UIntVal;
78      assert((Kind == t_LocalName || Kind == t_GlobalName ||
79              Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) &&
80             "Ordering not defined for this ValID kind yet");
81      return StrVal < RHS.StrVal;
82    }
83  };
84
85  class LLParser {
86  public:
87    typedef LLLexer::LocTy LocTy;
88  private:
89    LLVMContext &Context;
90    LLLexer Lex;
91    Module *M;
92
93    // Instruction metadata resolution.  Each instruction can have a list of
94    // MDRef info associated with them.
95    //
96    // The simpler approach of just creating temporary MDNodes and then calling
97    // RAUW on them when the definition is processed doesn't work because some
98    // instruction metadata kinds, such as dbg, get stored in the IR in an
99    // "optimized" format which doesn't participate in the normal value use
100    // lists. This means that RAUW doesn't work, even on temporary MDNodes
101    // which otherwise support RAUW. Instead, we defer resolving MDNode
102    // references until the definitions have been processed.
103    struct MDRef {
104      SMLoc Loc;
105      unsigned MDKind, MDSlot;
106    };
107    DenseMap<Instruction*, std::vector<MDRef> > ForwardRefInstMetadata;
108
109    // Type resolution handling data structures.  The location is set when we
110    // have processed a use of the type but not a definition yet.
111    StringMap<std::pair<Type*, LocTy> > NamedTypes;
112    std::vector<std::pair<Type*, LocTy> > NumberedTypes;
113
114    std::vector<TrackingVH<MDNode> > NumberedMetadata;
115    std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> > ForwardRefMDNodes;
116
117    // Global Value reference information.
118    std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
119    std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
120    std::vector<GlobalValue*> NumberedVals;
121
122    // References to blockaddress.  The key is the function ValID, the value is
123    // a list of references to blocks in that function.
124    std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >
125      ForwardRefBlockAddresses;
126
127  public:
128    LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) :
129      Context(m->getContext()), Lex(F, SM, Err, m->getContext()),
130      M(m) {}
131    bool Run();
132
133    LLVMContext &getContext() { return Context; }
134
135  private:
136
137    bool Error(LocTy L, const Twine &Msg) const {
138      return Lex.Error(L, Msg);
139    }
140    bool TokError(const Twine &Msg) const {
141      return Error(Lex.getLoc(), Msg);
142    }
143
144    /// GetGlobalVal - Get a value with the specified name or ID, creating a
145    /// forward reference record if needed.  This can return null if the value
146    /// exists but does not have the right type.
147    GlobalValue *GetGlobalVal(const std::string &N, Type *Ty, LocTy Loc);
148    GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc);
149
150    // Helper Routines.
151    bool ParseToken(lltok::Kind T, const char *ErrMsg);
152    bool EatIfPresent(lltok::Kind T) {
153      if (Lex.getKind() != T) return false;
154      Lex.Lex();
155      return true;
156    }
157    bool ParseOptionalToken(lltok::Kind T, bool &Present, LocTy *Loc = 0) {
158      if (Lex.getKind() != T) {
159        Present = false;
160      } else {
161        if (Loc)
162          *Loc = Lex.getLoc();
163        Lex.Lex();
164        Present = true;
165      }
166      return false;
167    }
168    bool ParseStringConstant(std::string &Result);
169    bool ParseUInt32(unsigned &Val);
170    bool ParseUInt32(unsigned &Val, LocTy &Loc) {
171      Loc = Lex.getLoc();
172      return ParseUInt32(Val);
173    }
174    bool ParseOptionalAddrSpace(unsigned &AddrSpace);
175    bool ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind);
176    bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
177    bool ParseOptionalLinkage(unsigned &Linkage) {
178      bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
179    }
180    bool ParseOptionalVisibility(unsigned &Visibility);
181    bool ParseOptionalCallingConv(CallingConv::ID &CC);
182    bool ParseOptionalAlignment(unsigned &Alignment);
183    bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
184                               AtomicOrdering &Ordering);
185    bool ParseOptionalStackAlignment(unsigned &Alignment);
186    bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
187    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma);
188    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
189      bool AteExtraComma;
190      if (ParseIndexList(Indices, AteExtraComma)) return true;
191      if (AteExtraComma)
192        return TokError("expected index");
193      return false;
194    }
195
196    // Top-Level Entities
197    bool ParseTopLevelEntities();
198    bool ValidateEndOfModule();
199    bool ParseTargetDefinition();
200    bool ParseDepLibs();
201    bool ParseModuleAsm();
202    bool ParseUnnamedType();
203    bool ParseNamedType();
204    bool ParseDeclare();
205    bool ParseDefine();
206
207    bool ParseGlobalType(bool &IsConstant);
208    bool ParseUnnamedGlobal();
209    bool ParseNamedGlobal();
210    bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage,
211                     bool HasLinkage, unsigned Visibility);
212    bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility);
213    bool ParseStandaloneMetadata();
214    bool ParseNamedMetadata();
215    bool ParseMDString(MDString *&Result);
216    bool ParseMDNodeID(MDNode *&Result);
217    bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo);
218
219    // Type Parsing.
220    bool ParseType(Type *&Result, bool AllowVoid = false);
221    bool ParseType(Type *&Result, LocTy &Loc, bool AllowVoid = false) {
222      Loc = Lex.getLoc();
223      return ParseType(Result, AllowVoid);
224    }
225    bool ParseAnonStructType(Type *&Result, bool Packed);
226    bool ParseStructBody(SmallVectorImpl<Type*> &Body);
227    bool ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
228                               std::pair<Type*, LocTy> &Entry,
229                               Type *&ResultTy);
230
231    bool ParseArrayVectorType(Type *&Result, bool isVector);
232    bool ParseFunctionType(Type *&Result);
233
234    // Function Semantic Analysis.
235    class PerFunctionState {
236      LLParser &P;
237      Function &F;
238      std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals;
239      std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs;
240      std::vector<Value*> NumberedVals;
241
242      /// FunctionNumber - If this is an unnamed function, this is the slot
243      /// number of it, otherwise it is -1.
244      int FunctionNumber;
245    public:
246      PerFunctionState(LLParser &p, Function &f, int FunctionNumber);
247      ~PerFunctionState();
248
249      Function &getFunction() const { return F; }
250
251      bool FinishFunction();
252
253      /// GetVal - Get a value with the specified name or ID, creating a
254      /// forward reference record if needed.  This can return null if the value
255      /// exists but does not have the right type.
256      Value *GetVal(const std::string &Name, Type *Ty, LocTy Loc);
257      Value *GetVal(unsigned ID, Type *Ty, LocTy Loc);
258
259      /// SetInstName - After an instruction is parsed and inserted into its
260      /// basic block, this installs its name.
261      bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc,
262                       Instruction *Inst);
263
264      /// GetBB - Get a basic block with the specified name or ID, creating a
265      /// forward reference record if needed.  This can return null if the value
266      /// is not a BasicBlock.
267      BasicBlock *GetBB(const std::string &Name, LocTy Loc);
268      BasicBlock *GetBB(unsigned ID, LocTy Loc);
269
270      /// DefineBB - Define the specified basic block, which is either named or
271      /// unnamed.  If there is an error, this returns null otherwise it returns
272      /// the block being defined.
273      BasicBlock *DefineBB(const std::string &Name, LocTy Loc);
274    };
275
276    bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
277                             PerFunctionState *PFS);
278
279    bool ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS);
280    bool ParseValue(Type *Ty, Value *&V, PerFunctionState &PFS) {
281      return ParseValue(Ty, V, &PFS);
282    }
283    bool ParseValue(Type *Ty, Value *&V, LocTy &Loc,
284                    PerFunctionState &PFS) {
285      Loc = Lex.getLoc();
286      return ParseValue(Ty, V, &PFS);
287    }
288
289    bool ParseTypeAndValue(Value *&V, PerFunctionState *PFS);
290    bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
291      return ParseTypeAndValue(V, &PFS);
292    }
293    bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) {
294      Loc = Lex.getLoc();
295      return ParseTypeAndValue(V, PFS);
296    }
297    bool ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
298                                PerFunctionState &PFS);
299    bool ParseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) {
300      LocTy Loc;
301      return ParseTypeAndBasicBlock(BB, Loc, PFS);
302    }
303
304
305    struct ParamInfo {
306      LocTy Loc;
307      Value *V;
308      Attributes Attrs;
309      ParamInfo(LocTy loc, Value *v, Attributes attrs)
310        : Loc(loc), V(v), Attrs(attrs) {}
311    };
312    bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
313                            PerFunctionState &PFS);
314
315    // Constant Parsing.
316    bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL);
317    bool ParseGlobalValue(Type *Ty, Constant *&V);
318    bool ParseGlobalTypeAndValue(Constant *&V);
319    bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
320    bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS);
321    bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS);
322    bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS);
323    bool ParseInstructionMetadata(Instruction *Inst, PerFunctionState *PFS);
324
325    // Function Parsing.
326    struct ArgInfo {
327      LocTy Loc;
328      Type *Ty;
329      Attributes Attrs;
330      std::string Name;
331      ArgInfo(LocTy L, Type *ty, Attributes Attr, const std::string &N)
332        : Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
333    };
334    bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg);
335    bool ParseFunctionHeader(Function *&Fn, bool isDefine);
336    bool ParseFunctionBody(Function &Fn);
337    bool ParseBasicBlock(PerFunctionState &PFS);
338
339    // Instruction Parsing.  Each instruction parsing routine can return with a
340    // normal result, an error result, or return having eaten an extra comma.
341    enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 };
342    int ParseInstruction(Instruction *&Inst, BasicBlock *BB,
343                         PerFunctionState &PFS);
344    bool ParseCmpPredicate(unsigned &Pred, unsigned Opc);
345
346    bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
347    bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
348    bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
349    bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
350    bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
351    bool ParseResume(Instruction *&Inst, PerFunctionState &PFS);
352
353    bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc,
354                         unsigned OperandType);
355    bool ParseLogical(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
356    bool ParseCompare(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
357    bool ParseCast(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
358    bool ParseSelect(Instruction *&I, PerFunctionState &PFS);
359    bool ParseVA_Arg(Instruction *&I, PerFunctionState &PFS);
360    bool ParseExtractElement(Instruction *&I, PerFunctionState &PFS);
361    bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS);
362    bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS);
363    int ParsePHI(Instruction *&I, PerFunctionState &PFS);
364    bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS);
365    bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
366    int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
367    int ParseLoad(Instruction *&I, PerFunctionState &PFS);
368    int ParseStore(Instruction *&I, PerFunctionState &PFS);
369    int ParseCmpXchg(Instruction *&I, PerFunctionState &PFS);
370    int ParseAtomicRMW(Instruction *&I, PerFunctionState &PFS);
371    int ParseFence(Instruction *&I, PerFunctionState &PFS);
372    int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
373    int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
374    int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
375
376    bool ResolveForwardRefBlockAddresses(Function *TheFn,
377                             std::vector<std::pair<ValID, GlobalValue*> > &Refs,
378                                         PerFunctionState *PFS);
379  };
380} // End llvm namespace
381
382#endif
383