LLParser.h revision 226633
1264790Sbapt//===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===//
2264790Sbapt//
3264790Sbapt//                     The LLVM Compiler Infrastructure
4264790Sbapt//
5264790Sbapt// This file is distributed under the University of Illinois Open Source
6264790Sbapt// License. See LICENSE.TXT for details.
7264790Sbapt//
8264790Sbapt//===----------------------------------------------------------------------===//
9264790Sbapt//
10264790Sbapt//  This file defines the parser class for .ll files.
11264790Sbapt//
12264790Sbapt//===----------------------------------------------------------------------===//
13264790Sbapt
14264790Sbapt#ifndef LLVM_ASMPARSER_LLPARSER_H
15264790Sbapt#define LLVM_ASMPARSER_LLPARSER_H
16264790Sbapt
17264790Sbapt#include "LLLexer.h"
18264790Sbapt#include "llvm/Instructions.h"
19264790Sbapt#include "llvm/Module.h"
20264790Sbapt#include "llvm/Type.h"
21264790Sbapt#include "llvm/ADT/DenseMap.h"
22264790Sbapt#include "llvm/ADT/StringMap.h"
23264790Sbapt#include "llvm/Support/ValueHandle.h"
24264790Sbapt#include <map>
25264790Sbapt
26264790Sbaptnamespace llvm {
27264790Sbapt  class Module;
28264790Sbapt  class OpaqueType;
29264790Sbapt  class Function;
30264790Sbapt  class Value;
31264790Sbapt  class BasicBlock;
32264790Sbapt  class Instruction;
33264790Sbapt  class Constant;
34264790Sbapt  class GlobalValue;
35264790Sbapt  class MDString;
36264790Sbapt  class MDNode;
37264790Sbapt  class StructType;
38264790Sbapt
39264790Sbapt  /// ValID - Represents a reference of a definition of some sort with no type.
40264790Sbapt  /// There are several cases where we have to parse the value but where the
41264790Sbapt  /// type can depend on later context.  This may either be a numeric reference
42264790Sbapt  /// or a symbolic (%var) reference.  This is just a discriminated union.
43264790Sbapt  struct ValID {
44264790Sbapt    enum {
45264790Sbapt      t_LocalID, t_GlobalID,      // ID in UIntVal.
46264790Sbapt      t_LocalName, t_GlobalName,  // Name in StrVal.
47264790Sbapt      t_APSInt, t_APFloat,        // Value in APSIntVal/APFloatVal.
48264790Sbapt      t_Null, t_Undef, t_Zero,    // No value.
49264790Sbapt      t_EmptyArray,               // No value:  []
50264790Sbapt      t_Constant,                 // Value in ConstantVal.
51264790Sbapt      t_InlineAsm,                // Value in StrVal/StrVal2/UIntVal.
52264790Sbapt      t_MDNode,                   // Value in MDNodeVal.
53264790Sbapt      t_MDString,                 // Value in MDStringVal.
54264790Sbapt      t_ConstantStruct,           // Value in ConstantStructElts.
55264790Sbapt      t_PackedConstantStruct      // Value in ConstantStructElts.
56264790Sbapt    } Kind;
57264790Sbapt
58264790Sbapt    LLLexer::LocTy Loc;
59264790Sbapt    unsigned UIntVal;
60264790Sbapt    std::string StrVal, StrVal2;
61264790Sbapt    APSInt APSIntVal;
62264790Sbapt    APFloat APFloatVal;
63264790Sbapt    Constant *ConstantVal;
64264790Sbapt    MDNode *MDNodeVal;
65264790Sbapt    MDString *MDStringVal;
66264790Sbapt    Constant **ConstantStructElts;
67264790Sbapt
68264790Sbapt    ValID() : Kind(t_LocalID), APFloatVal(0.0) {}
69264790Sbapt    ~ValID() {
70264790Sbapt      if (Kind == t_ConstantStruct || Kind == t_PackedConstantStruct)
71264790Sbapt        delete [] ConstantStructElts;
72264790Sbapt    }
73264790Sbapt
74264790Sbapt    bool operator<(const ValID &RHS) const {
75264790Sbapt      if (Kind == t_LocalID || Kind == t_GlobalID)
76264790Sbapt        return UIntVal < RHS.UIntVal;
77264790Sbapt      assert((Kind == t_LocalName || Kind == t_GlobalName ||
78264790Sbapt              Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) &&
79264790Sbapt             "Ordering not defined for this ValID kind yet");
80264790Sbapt      return StrVal < RHS.StrVal;
81264790Sbapt    }
82264790Sbapt  };
83264790Sbapt
84264790Sbapt  class LLParser {
85264790Sbapt  public:
86264790Sbapt    typedef LLLexer::LocTy LocTy;
87264790Sbapt  private:
88264790Sbapt    LLVMContext &Context;
89264790Sbapt    LLLexer Lex;
90264790Sbapt    Module *M;
91264790Sbapt
92264790Sbapt    // Instruction metadata resolution.  Each instruction can have a list of
93264790Sbapt    // MDRef info associated with them.
94264790Sbapt    //
95264790Sbapt    // The simpler approach of just creating temporary MDNodes and then calling
96264790Sbapt    // RAUW on them when the definition is processed doesn't work because some
97264790Sbapt    // instruction metadata kinds, such as dbg, get stored in the IR in an
98264790Sbapt    // "optimized" format which doesn't participate in the normal value use
99264790Sbapt    // lists. This means that RAUW doesn't work, even on temporary MDNodes
100264790Sbapt    // which otherwise support RAUW. Instead, we defer resolving MDNode
101264790Sbapt    // references until the definitions have been processed.
102264790Sbapt    struct MDRef {
103264790Sbapt      SMLoc Loc;
104264790Sbapt      unsigned MDKind, MDSlot;
105264790Sbapt    };
106264790Sbapt    DenseMap<Instruction*, std::vector<MDRef> > ForwardRefInstMetadata;
107264790Sbapt
108264790Sbapt    // Type resolution handling data structures.  The location is set when we
109264790Sbapt    // have processed a use of the type but not a definition yet.
110264790Sbapt    StringMap<std::pair<Type*, LocTy> > NamedTypes;
111264790Sbapt    std::vector<std::pair<Type*, LocTy> > NumberedTypes;
112264790Sbapt
113264790Sbapt    std::vector<TrackingVH<MDNode> > NumberedMetadata;
114264790Sbapt    std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> > ForwardRefMDNodes;
115264790Sbapt
116264790Sbapt    // Global Value reference information.
117264790Sbapt    std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
118264790Sbapt    std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
119264790Sbapt    std::vector<GlobalValue*> NumberedVals;
120264790Sbapt
121264790Sbapt    // References to blockaddress.  The key is the function ValID, the value is
122264790Sbapt    // a list of references to blocks in that function.
123264790Sbapt    std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >
124264790Sbapt      ForwardRefBlockAddresses;
125264790Sbapt
126264790Sbapt  public:
127264790Sbapt    LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) :
128264790Sbapt      Context(m->getContext()), Lex(F, SM, Err, m->getContext()),
129264790Sbapt      M(m) {}
130264790Sbapt    bool Run();
131264790Sbapt
132264790Sbapt    LLVMContext &getContext() { return Context; }
133264790Sbapt
134264790Sbapt  private:
135264790Sbapt
136264790Sbapt    bool Error(LocTy L, const Twine &Msg) const {
137264790Sbapt      return Lex.Error(L, Msg);
138264790Sbapt    }
139264790Sbapt    bool TokError(const Twine &Msg) const {
140264790Sbapt      return Error(Lex.getLoc(), Msg);
141264790Sbapt    }
142264790Sbapt
143264790Sbapt    /// GetGlobalVal - Get a value with the specified name or ID, creating a
144264790Sbapt    /// forward reference record if needed.  This can return null if the value
145264790Sbapt    /// exists but does not have the right type.
146264790Sbapt    GlobalValue *GetGlobalVal(const std::string &N, Type *Ty, LocTy Loc);
147264790Sbapt    GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc);
148264790Sbapt
149264790Sbapt    // Helper Routines.
150264790Sbapt    bool ParseToken(lltok::Kind T, const char *ErrMsg);
151264790Sbapt    bool EatIfPresent(lltok::Kind T) {
152264790Sbapt      if (Lex.getKind() != T) return false;
153264790Sbapt      Lex.Lex();
154264790Sbapt      return true;
155264790Sbapt    }
156264790Sbapt    bool ParseOptionalToken(lltok::Kind T, bool &Present, LocTy *Loc = 0) {
157264790Sbapt      if (Lex.getKind() != T) {
158264790Sbapt        Present = false;
159264790Sbapt      } else {
160264790Sbapt        if (Loc)
161264790Sbapt          *Loc = Lex.getLoc();
162264790Sbapt        Lex.Lex();
163264790Sbapt        Present = true;
164264790Sbapt      }
165264790Sbapt      return false;
166264790Sbapt    }
167264790Sbapt    bool ParseStringConstant(std::string &Result);
168264790Sbapt    bool ParseUInt32(unsigned &Val);
169264790Sbapt    bool ParseUInt32(unsigned &Val, LocTy &Loc) {
170264790Sbapt      Loc = Lex.getLoc();
171264790Sbapt      return ParseUInt32(Val);
172264790Sbapt    }
173264790Sbapt    bool ParseOptionalAddrSpace(unsigned &AddrSpace);
174264790Sbapt    bool ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind);
175264790Sbapt    bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
176264790Sbapt    bool ParseOptionalLinkage(unsigned &Linkage) {
177264790Sbapt      bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
178264790Sbapt    }
179264790Sbapt    bool ParseOptionalVisibility(unsigned &Visibility);
180264790Sbapt    bool ParseOptionalCallingConv(CallingConv::ID &CC);
181264790Sbapt    bool ParseOptionalAlignment(unsigned &Alignment);
182264790Sbapt    bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
183264790Sbapt                               AtomicOrdering &Ordering);
184264790Sbapt    bool ParseOptionalStackAlignment(unsigned &Alignment);
185264790Sbapt    bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
186264790Sbapt    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma);
187264790Sbapt    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
188264790Sbapt      bool AteExtraComma;
189264790Sbapt      if (ParseIndexList(Indices, AteExtraComma)) return true;
190264790Sbapt      if (AteExtraComma)
191264790Sbapt        return TokError("expected index");
192264790Sbapt      return false;
193264790Sbapt    }
194264790Sbapt
195264790Sbapt    // Top-Level Entities
196264790Sbapt    bool ParseTopLevelEntities();
197264790Sbapt    bool ValidateEndOfModule();
198264790Sbapt    bool ParseTargetDefinition();
199264790Sbapt    bool ParseDepLibs();
200264790Sbapt    bool ParseModuleAsm();
201264790Sbapt    bool ParseUnnamedType();
202264790Sbapt    bool ParseNamedType();
203264790Sbapt    bool ParseDeclare();
204264790Sbapt    bool ParseDefine();
205264790Sbapt
206264790Sbapt    bool ParseGlobalType(bool &IsConstant);
207264790Sbapt    bool ParseUnnamedGlobal();
208264790Sbapt    bool ParseNamedGlobal();
209264790Sbapt    bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage,
210264790Sbapt                     bool HasLinkage, unsigned Visibility);
211264790Sbapt    bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility);
212264790Sbapt    bool ParseStandaloneMetadata();
213264790Sbapt    bool ParseNamedMetadata();
214264790Sbapt    bool ParseMDString(MDString *&Result);
215264790Sbapt    bool ParseMDNodeID(MDNode *&Result);
216264790Sbapt    bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo);
217264790Sbapt
218264790Sbapt    // Type Parsing.
219264790Sbapt    bool ParseType(Type *&Result, bool AllowVoid = false);
220264790Sbapt    bool ParseType(Type *&Result, LocTy &Loc, bool AllowVoid = false) {
221264790Sbapt      Loc = Lex.getLoc();
222264790Sbapt      return ParseType(Result, AllowVoid);
223264790Sbapt    }
224264790Sbapt    bool ParseAnonStructType(Type *&Result, bool Packed);
225264790Sbapt    bool ParseStructBody(SmallVectorImpl<Type*> &Body);
226264790Sbapt    bool ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
227264790Sbapt                               std::pair<Type*, LocTy> &Entry,
228264790Sbapt                               Type *&ResultTy);
229264790Sbapt
230264790Sbapt    bool ParseArrayVectorType(Type *&Result, bool isVector);
231264790Sbapt    bool ParseFunctionType(Type *&Result);
232264790Sbapt
233264790Sbapt    // Function Semantic Analysis.
234264790Sbapt    class PerFunctionState {
235264790Sbapt      LLParser &P;
236264790Sbapt      Function &F;
237264790Sbapt      std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals;
238264790Sbapt      std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs;
239264790Sbapt      std::vector<Value*> NumberedVals;
240264790Sbapt
241264790Sbapt      /// FunctionNumber - If this is an unnamed function, this is the slot
242264790Sbapt      /// number of it, otherwise it is -1.
243264790Sbapt      int FunctionNumber;
244264790Sbapt    public:
245264790Sbapt      PerFunctionState(LLParser &p, Function &f, int FunctionNumber);
246264790Sbapt      ~PerFunctionState();
247264790Sbapt
248264790Sbapt      Function &getFunction() const { return F; }
249264790Sbapt
250264790Sbapt      bool FinishFunction();
251264790Sbapt
252264790Sbapt      /// GetVal - Get a value with the specified name or ID, creating a
253264790Sbapt      /// forward reference record if needed.  This can return null if the value
254264790Sbapt      /// exists but does not have the right type.
255264790Sbapt      Value *GetVal(const std::string &Name, Type *Ty, LocTy Loc);
256264790Sbapt      Value *GetVal(unsigned ID, Type *Ty, LocTy Loc);
257264790Sbapt
258264790Sbapt      /// SetInstName - After an instruction is parsed and inserted into its
259264790Sbapt      /// basic block, this installs its name.
260264790Sbapt      bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc,
261264790Sbapt                       Instruction *Inst);
262264790Sbapt
263264790Sbapt      /// GetBB - Get a basic block with the specified name or ID, creating a
264264790Sbapt      /// forward reference record if needed.  This can return null if the value
265264790Sbapt      /// is not a BasicBlock.
266264790Sbapt      BasicBlock *GetBB(const std::string &Name, LocTy Loc);
267264790Sbapt      BasicBlock *GetBB(unsigned ID, LocTy Loc);
268264790Sbapt
269264790Sbapt      /// DefineBB - Define the specified basic block, which is either named or
270264790Sbapt      /// unnamed.  If there is an error, this returns null otherwise it returns
271264790Sbapt      /// the block being defined.
272264790Sbapt      BasicBlock *DefineBB(const std::string &Name, LocTy Loc);
273264790Sbapt    };
274264790Sbapt
275264790Sbapt    bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
276264790Sbapt                             PerFunctionState *PFS);
277264790Sbapt
278264790Sbapt    bool ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS);
279264790Sbapt    bool ParseValue(Type *Ty, Value *&V, PerFunctionState &PFS) {
280264790Sbapt      return ParseValue(Ty, V, &PFS);
281264790Sbapt    }
282264790Sbapt    bool ParseValue(Type *Ty, Value *&V, LocTy &Loc,
283264790Sbapt                    PerFunctionState &PFS) {
284264790Sbapt      Loc = Lex.getLoc();
285264790Sbapt      return ParseValue(Ty, V, &PFS);
286264790Sbapt    }
287264790Sbapt
288264790Sbapt    bool ParseTypeAndValue(Value *&V, PerFunctionState *PFS);
289264790Sbapt    bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
290264790Sbapt      return ParseTypeAndValue(V, &PFS);
291264790Sbapt    }
292264790Sbapt    bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) {
293264790Sbapt      Loc = Lex.getLoc();
294264790Sbapt      return ParseTypeAndValue(V, PFS);
295264790Sbapt    }
296264790Sbapt    bool ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
297264790Sbapt                                PerFunctionState &PFS);
298264790Sbapt    bool ParseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) {
299264790Sbapt      LocTy Loc;
300264790Sbapt      return ParseTypeAndBasicBlock(BB, Loc, PFS);
301264790Sbapt    }
302264790Sbapt
303264790Sbapt
304264790Sbapt    struct ParamInfo {
305264790Sbapt      LocTy Loc;
306264790Sbapt      Value *V;
307264790Sbapt      unsigned Attrs;
308264790Sbapt      ParamInfo(LocTy loc, Value *v, unsigned attrs)
309264790Sbapt        : Loc(loc), V(v), Attrs(attrs) {}
310264790Sbapt    };
311264790Sbapt    bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
312264790Sbapt                            PerFunctionState &PFS);
313264790Sbapt
314264790Sbapt    // Constant Parsing.
315264790Sbapt    bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL);
316264790Sbapt    bool ParseGlobalValue(Type *Ty, Constant *&V);
317264790Sbapt    bool ParseGlobalTypeAndValue(Constant *&V);
318264790Sbapt    bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
319264790Sbapt    bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS);
320264790Sbapt    bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS);
321264790Sbapt    bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS);
322264790Sbapt    bool ParseInstructionMetadata(Instruction *Inst, PerFunctionState *PFS);
323264790Sbapt
324264790Sbapt    // Function Parsing.
325264790Sbapt    struct ArgInfo {
326264790Sbapt      LocTy Loc;
327264790Sbapt      Type *Ty;
328264790Sbapt      unsigned Attrs;
329264790Sbapt      std::string Name;
330264790Sbapt      ArgInfo(LocTy L, Type *ty, unsigned Attr, const std::string &N)
331264790Sbapt        : Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
332264790Sbapt    };
333264790Sbapt    bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg);
334264790Sbapt    bool ParseFunctionHeader(Function *&Fn, bool isDefine);
335264790Sbapt    bool ParseFunctionBody(Function &Fn);
336264790Sbapt    bool ParseBasicBlock(PerFunctionState &PFS);
337264790Sbapt
338264790Sbapt    // Instruction Parsing.  Each instruction parsing routine can return with a
339264790Sbapt    // normal result, an error result, or return having eaten an extra comma.
340264790Sbapt    enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 };
341264790Sbapt    int ParseInstruction(Instruction *&Inst, BasicBlock *BB,
342264790Sbapt                         PerFunctionState &PFS);
343264790Sbapt    bool ParseCmpPredicate(unsigned &Pred, unsigned Opc);
344264790Sbapt
345264790Sbapt    bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
346264790Sbapt    bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
347264790Sbapt    bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
348264790Sbapt    bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
349264790Sbapt    bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
350264790Sbapt    bool ParseResume(Instruction *&Inst, PerFunctionState &PFS);
351264790Sbapt
352264790Sbapt    bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc,
353264790Sbapt                         unsigned OperandType);
354264790Sbapt    bool ParseLogical(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
355264790Sbapt    bool ParseCompare(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
356264790Sbapt    bool ParseCast(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
357264790Sbapt    bool ParseSelect(Instruction *&I, PerFunctionState &PFS);
358264790Sbapt    bool ParseVA_Arg(Instruction *&I, PerFunctionState &PFS);
359264790Sbapt    bool ParseExtractElement(Instruction *&I, PerFunctionState &PFS);
360264790Sbapt    bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS);
361264790Sbapt    bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS);
362264790Sbapt    int ParsePHI(Instruction *&I, PerFunctionState &PFS);
363264790Sbapt    bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS);
364264790Sbapt    bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
365264790Sbapt    int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
366264790Sbapt    int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
367264790Sbapt    int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
368264790Sbapt    int ParseCmpXchg(Instruction *&I, PerFunctionState &PFS);
369264790Sbapt    int ParseAtomicRMW(Instruction *&I, PerFunctionState &PFS);
370264790Sbapt    int ParseFence(Instruction *&I, PerFunctionState &PFS);
371264790Sbapt    int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
372264790Sbapt    int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
373264790Sbapt    int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
374264790Sbapt
375264790Sbapt    bool ResolveForwardRefBlockAddresses(Function *TheFn,
376264790Sbapt                             std::vector<std::pair<ValID, GlobalValue*> > &Refs,
377264790Sbapt                                         PerFunctionState *PFS);
378264790Sbapt  };
379264790Sbapt} // End llvm namespace
380264790Sbapt
381264790Sbapt#endif
382264790Sbapt