1//===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the parser class for .ll files.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_ASMPARSER_LLPARSER_H
15#define LLVM_LIB_ASMPARSER_LLPARSER_H
16
17#include "LLLexer.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/StringMap.h"
20#include "llvm/IR/Attributes.h"
21#include "llvm/IR/Instructions.h"
22#include "llvm/IR/Module.h"
23#include "llvm/IR/Operator.h"
24#include "llvm/IR/Type.h"
25#include "llvm/IR/ValueHandle.h"
26#include <map>
27
28namespace llvm {
29  class Module;
30  class OpaqueType;
31  class Function;
32  class Value;
33  class BasicBlock;
34  class Instruction;
35  class Constant;
36  class GlobalValue;
37  class Comdat;
38  class MDString;
39  class MDNode;
40  struct SlotMapping;
41  class StructType;
42
43  /// ValID - Represents a reference of a definition of some sort with no type.
44  /// There are several cases where we have to parse the value but where the
45  /// type can depend on later context.  This may either be a numeric reference
46  /// or a symbolic (%var) reference.  This is just a discriminated union.
47  struct ValID {
48    enum {
49      t_LocalID, t_GlobalID,           // ID in UIntVal.
50      t_LocalName, t_GlobalName,       // Name in StrVal.
51      t_APSInt, t_APFloat,             // Value in APSIntVal/APFloatVal.
52      t_Null, t_Undef, t_Zero, t_None, // No value.
53      t_EmptyArray,                    // No value:  []
54      t_Constant,                      // Value in ConstantVal.
55      t_InlineAsm,                     // Value in FTy/StrVal/StrVal2/UIntVal.
56      t_ConstantStruct,                // Value in ConstantStructElts.
57      t_PackedConstantStruct           // Value in ConstantStructElts.
58    } Kind = t_LocalID;
59
60    LLLexer::LocTy Loc;
61    unsigned UIntVal;
62    FunctionType *FTy = nullptr;
63    std::string StrVal, StrVal2;
64    APSInt APSIntVal;
65    APFloat APFloatVal{0.0};
66    Constant *ConstantVal;
67    std::unique_ptr<Constant *[]> ConstantStructElts;
68
69    ValID() = default;
70    ValID(const ValID &RHS)
71        : Kind(RHS.Kind), Loc(RHS.Loc), UIntVal(RHS.UIntVal), FTy(RHS.FTy),
72          StrVal(RHS.StrVal), StrVal2(RHS.StrVal2), APSIntVal(RHS.APSIntVal),
73          APFloatVal(RHS.APFloatVal), ConstantVal(RHS.ConstantVal) {
74      assert(!RHS.ConstantStructElts);
75    }
76
77    bool operator<(const ValID &RHS) const {
78      if (Kind == t_LocalID || Kind == t_GlobalID)
79        return UIntVal < RHS.UIntVal;
80      assert((Kind == t_LocalName || Kind == t_GlobalName ||
81              Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) &&
82             "Ordering not defined for this ValID kind yet");
83      return StrVal < RHS.StrVal;
84    }
85  };
86
87  class LLParser {
88  public:
89    typedef LLLexer::LocTy LocTy;
90  private:
91    LLVMContext &Context;
92    LLLexer Lex;
93    Module *M;
94    SlotMapping *Slots;
95
96    // Instruction metadata resolution.  Each instruction can have a list of
97    // MDRef info associated with them.
98    //
99    // The simpler approach of just creating temporary MDNodes and then calling
100    // RAUW on them when the definition is processed doesn't work because some
101    // instruction metadata kinds, such as dbg, get stored in the IR in an
102    // "optimized" format which doesn't participate in the normal value use
103    // lists. This means that RAUW doesn't work, even on temporary MDNodes
104    // which otherwise support RAUW. Instead, we defer resolving MDNode
105    // references until the definitions have been processed.
106    struct MDRef {
107      SMLoc Loc;
108      unsigned MDKind, MDSlot;
109    };
110
111    SmallVector<Instruction*, 64> InstsWithTBAATag;
112
113    // Type resolution handling data structures.  The location is set when we
114    // have processed a use of the type but not a definition yet.
115    StringMap<std::pair<Type*, LocTy> > NamedTypes;
116    std::map<unsigned, std::pair<Type*, LocTy> > NumberedTypes;
117
118    std::map<unsigned, TrackingMDNodeRef> NumberedMetadata;
119    std::map<unsigned, std::pair<TempMDTuple, LocTy>> ForwardRefMDNodes;
120
121    // Global Value reference information.
122    std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
123    std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
124    std::vector<GlobalValue*> NumberedVals;
125
126    // Comdat forward reference information.
127    std::map<std::string, LocTy> ForwardRefComdats;
128
129    // References to blockaddress.  The key is the function ValID, the value is
130    // a list of references to blocks in that function.
131    std::map<ValID, std::map<ValID, GlobalValue *>> ForwardRefBlockAddresses;
132    class PerFunctionState;
133    /// Reference to per-function state to allow basic blocks to be
134    /// forward-referenced by blockaddress instructions within the same
135    /// function.
136    PerFunctionState *BlockAddressPFS;
137
138    // Attribute builder reference information.
139    std::map<Value*, std::vector<unsigned> > ForwardRefAttrGroups;
140    std::map<unsigned, AttrBuilder> NumberedAttrBuilders;
141
142  public:
143    LLParser(StringRef F, SourceMgr &SM, SMDiagnostic &Err, Module *M,
144             SlotMapping *Slots = nullptr)
145        : Context(M->getContext()), Lex(F, SM, Err, M->getContext()), M(M),
146          Slots(Slots), BlockAddressPFS(nullptr) {}
147    bool Run();
148
149    bool parseStandaloneConstantValue(Constant *&C, const SlotMapping *Slots);
150
151    LLVMContext &getContext() { return Context; }
152
153  private:
154
155    bool Error(LocTy L, const Twine &Msg) const {
156      return Lex.Error(L, Msg);
157    }
158    bool TokError(const Twine &Msg) const {
159      return Error(Lex.getLoc(), Msg);
160    }
161
162    /// Restore the internal name and slot mappings using the mappings that
163    /// were created at an earlier parsing stage.
164    void restoreParsingState(const SlotMapping *Slots);
165
166    /// GetGlobalVal - Get a value with the specified name or ID, creating a
167    /// forward reference record if needed.  This can return null if the value
168    /// exists but does not have the right type.
169    GlobalValue *GetGlobalVal(const std::string &N, Type *Ty, LocTy Loc);
170    GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc);
171
172    /// Get a Comdat with the specified name, creating a forward reference
173    /// record if needed.
174    Comdat *getComdat(const std::string &N, LocTy Loc);
175
176    // Helper Routines.
177    bool ParseToken(lltok::Kind T, const char *ErrMsg);
178    bool EatIfPresent(lltok::Kind T) {
179      if (Lex.getKind() != T) return false;
180      Lex.Lex();
181      return true;
182    }
183
184    FastMathFlags EatFastMathFlagsIfPresent() {
185      FastMathFlags FMF;
186      while (true)
187        switch (Lex.getKind()) {
188        case lltok::kw_fast: FMF.setUnsafeAlgebra();   Lex.Lex(); continue;
189        case lltok::kw_nnan: FMF.setNoNaNs();          Lex.Lex(); continue;
190        case lltok::kw_ninf: FMF.setNoInfs();          Lex.Lex(); continue;
191        case lltok::kw_nsz:  FMF.setNoSignedZeros();   Lex.Lex(); continue;
192        case lltok::kw_arcp: FMF.setAllowReciprocal(); Lex.Lex(); continue;
193        default: return FMF;
194        }
195      return FMF;
196    }
197
198    bool ParseOptionalToken(lltok::Kind T, bool &Present,
199                            LocTy *Loc = nullptr) {
200      if (Lex.getKind() != T) {
201        Present = false;
202      } else {
203        if (Loc)
204          *Loc = Lex.getLoc();
205        Lex.Lex();
206        Present = true;
207      }
208      return false;
209    }
210    bool ParseStringConstant(std::string &Result);
211    bool ParseUInt32(unsigned &Val);
212    bool ParseUInt32(unsigned &Val, LocTy &Loc) {
213      Loc = Lex.getLoc();
214      return ParseUInt32(Val);
215    }
216    bool ParseUInt64(uint64_t &Val);
217    bool ParseUInt64(uint64_t &Val, LocTy &Loc) {
218      Loc = Lex.getLoc();
219      return ParseUInt64(Val);
220    }
221
222    bool ParseStringAttribute(AttrBuilder &B);
223
224    bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM);
225    bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM);
226    bool parseOptionalUnnamedAddr(bool &UnnamedAddr) {
227      return ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr);
228    }
229    bool ParseOptionalAddrSpace(unsigned &AddrSpace);
230    bool ParseOptionalParamAttrs(AttrBuilder &B);
231    bool ParseOptionalReturnAttrs(AttrBuilder &B);
232    bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
233    bool ParseOptionalLinkage(unsigned &Linkage) {
234      bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
235    }
236    bool ParseOptionalVisibility(unsigned &Visibility);
237    bool ParseOptionalDLLStorageClass(unsigned &DLLStorageClass);
238    bool ParseOptionalCallingConv(unsigned &CC);
239    bool ParseOptionalAlignment(unsigned &Alignment);
240    bool ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes);
241    bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
242                               AtomicOrdering &Ordering);
243    bool ParseOrdering(AtomicOrdering &Ordering);
244    bool ParseOptionalStackAlignment(unsigned &Alignment);
245    bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
246    bool ParseOptionalCommaInAlloca(bool &IsInAlloca);
247    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma);
248    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
249      bool AteExtraComma;
250      if (ParseIndexList(Indices, AteExtraComma)) return true;
251      if (AteExtraComma)
252        return TokError("expected index");
253      return false;
254    }
255
256    // Top-Level Entities
257    bool ParseTopLevelEntities();
258    bool ValidateEndOfModule();
259    bool ParseTargetDefinition();
260    bool ParseModuleAsm();
261    bool ParseDepLibs();        // FIXME: Remove in 4.0.
262    bool ParseUnnamedType();
263    bool ParseNamedType();
264    bool ParseDeclare();
265    bool ParseDefine();
266
267    bool ParseGlobalType(bool &IsConstant);
268    bool ParseUnnamedGlobal();
269    bool ParseNamedGlobal();
270    bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage,
271                     bool HasLinkage, unsigned Visibility,
272                     unsigned DLLStorageClass,
273                     GlobalVariable::ThreadLocalMode TLM, bool UnnamedAddr);
274    bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Linkage,
275                    unsigned Visibility, unsigned DLLStorageClass,
276                    GlobalVariable::ThreadLocalMode TLM, bool UnnamedAddr);
277    bool parseComdat();
278    bool ParseStandaloneMetadata();
279    bool ParseNamedMetadata();
280    bool ParseMDString(MDString *&Result);
281    bool ParseMDNodeID(MDNode *&Result);
282    bool ParseUnnamedAttrGrp();
283    bool ParseFnAttributeValuePairs(AttrBuilder &B,
284                                    std::vector<unsigned> &FwdRefAttrGrps,
285                                    bool inAttrGrp, LocTy &BuiltinLoc);
286
287    // Type Parsing.
288    bool ParseType(Type *&Result, const Twine &Msg, bool AllowVoid = false);
289    bool ParseType(Type *&Result, bool AllowVoid = false) {
290      return ParseType(Result, "expected type", AllowVoid);
291    }
292    bool ParseType(Type *&Result, const Twine &Msg, LocTy &Loc,
293                   bool AllowVoid = false) {
294      Loc = Lex.getLoc();
295      return ParseType(Result, Msg, AllowVoid);
296    }
297    bool ParseType(Type *&Result, LocTy &Loc, bool AllowVoid = false) {
298      Loc = Lex.getLoc();
299      return ParseType(Result, AllowVoid);
300    }
301    bool ParseAnonStructType(Type *&Result, bool Packed);
302    bool ParseStructBody(SmallVectorImpl<Type*> &Body);
303    bool ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
304                               std::pair<Type*, LocTy> &Entry,
305                               Type *&ResultTy);
306
307    bool ParseArrayVectorType(Type *&Result, bool isVector);
308    bool ParseFunctionType(Type *&Result);
309
310    // Function Semantic Analysis.
311    class PerFunctionState {
312      LLParser &P;
313      Function &F;
314      std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals;
315      std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs;
316      std::vector<Value*> NumberedVals;
317
318      /// FunctionNumber - If this is an unnamed function, this is the slot
319      /// number of it, otherwise it is -1.
320      int FunctionNumber;
321    public:
322      PerFunctionState(LLParser &p, Function &f, int FunctionNumber);
323      ~PerFunctionState();
324
325      Function &getFunction() const { return F; }
326
327      bool FinishFunction();
328
329      /// GetVal - Get a value with the specified name or ID, creating a
330      /// forward reference record if needed.  This can return null if the value
331      /// exists but does not have the right type.
332      Value *GetVal(const std::string &Name, Type *Ty, LocTy Loc);
333      Value *GetVal(unsigned ID, Type *Ty, LocTy Loc);
334
335      /// SetInstName - After an instruction is parsed and inserted into its
336      /// basic block, this installs its name.
337      bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc,
338                       Instruction *Inst);
339
340      /// GetBB - Get a basic block with the specified name or ID, creating a
341      /// forward reference record if needed.  This can return null if the value
342      /// is not a BasicBlock.
343      BasicBlock *GetBB(const std::string &Name, LocTy Loc);
344      BasicBlock *GetBB(unsigned ID, LocTy Loc);
345
346      /// DefineBB - Define the specified basic block, which is either named or
347      /// unnamed.  If there is an error, this returns null otherwise it returns
348      /// the block being defined.
349      BasicBlock *DefineBB(const std::string &Name, LocTy Loc);
350
351      bool resolveForwardRefBlockAddresses();
352    };
353
354    bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
355                             PerFunctionState *PFS);
356
357    bool parseConstantValue(Type *Ty, Constant *&C);
358    bool ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS);
359    bool ParseValue(Type *Ty, Value *&V, PerFunctionState &PFS) {
360      return ParseValue(Ty, V, &PFS);
361    }
362
363    bool ParseValue(Type *Ty, Value *&V, LocTy &Loc,
364                    PerFunctionState &PFS) {
365      Loc = Lex.getLoc();
366      return ParseValue(Ty, V, &PFS);
367    }
368
369    bool ParseTypeAndValue(Value *&V, PerFunctionState *PFS);
370    bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
371      return ParseTypeAndValue(V, &PFS);
372    }
373    bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) {
374      Loc = Lex.getLoc();
375      return ParseTypeAndValue(V, PFS);
376    }
377    bool ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
378                                PerFunctionState &PFS);
379    bool ParseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) {
380      LocTy Loc;
381      return ParseTypeAndBasicBlock(BB, Loc, PFS);
382    }
383
384
385    struct ParamInfo {
386      LocTy Loc;
387      Value *V;
388      AttributeSet Attrs;
389      ParamInfo(LocTy loc, Value *v, AttributeSet attrs)
390        : Loc(loc), V(v), Attrs(attrs) {}
391    };
392    bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
393                            PerFunctionState &PFS,
394                            bool IsMustTailCall = false,
395                            bool InVarArgsFunc = false);
396
397    bool
398    ParseOptionalOperandBundles(SmallVectorImpl<OperandBundleDef> &BundleList,
399                                PerFunctionState &PFS);
400
401    bool ParseExceptionArgs(SmallVectorImpl<Value *> &Args,
402                            PerFunctionState &PFS);
403
404    // Constant Parsing.
405    bool ParseValID(ValID &ID, PerFunctionState *PFS = nullptr);
406    bool ParseGlobalValue(Type *Ty, Constant *&V);
407    bool ParseGlobalTypeAndValue(Constant *&V);
408    bool ParseGlobalValueVector(SmallVectorImpl<Constant *> &Elts);
409    bool parseOptionalComdat(StringRef GlobalName, Comdat *&C);
410    bool ParseMetadataAsValue(Value *&V, PerFunctionState &PFS);
411    bool ParseValueAsMetadata(Metadata *&MD, const Twine &TypeMsg,
412                              PerFunctionState *PFS);
413    bool ParseMetadata(Metadata *&MD, PerFunctionState *PFS);
414    bool ParseMDTuple(MDNode *&MD, bool IsDistinct = false);
415    bool ParseMDNode(MDNode *&MD);
416    bool ParseMDNodeTail(MDNode *&MD);
417    bool ParseMDNodeVector(SmallVectorImpl<Metadata *> &MDs);
418    bool ParseMetadataAttachment(unsigned &Kind, MDNode *&MD);
419    bool ParseInstructionMetadata(Instruction &Inst);
420    bool ParseOptionalFunctionMetadata(Function &F);
421
422    template <class FieldTy>
423    bool ParseMDField(LocTy Loc, StringRef Name, FieldTy &Result);
424    template <class FieldTy> bool ParseMDField(StringRef Name, FieldTy &Result);
425    template <class ParserTy>
426    bool ParseMDFieldsImplBody(ParserTy parseField);
427    template <class ParserTy>
428    bool ParseMDFieldsImpl(ParserTy parseField, LocTy &ClosingLoc);
429    bool ParseSpecializedMDNode(MDNode *&N, bool IsDistinct = false);
430
431#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS)                                  \
432  bool Parse##CLASS(MDNode *&Result, bool IsDistinct);
433#include "llvm/IR/Metadata.def"
434
435    // Function Parsing.
436    struct ArgInfo {
437      LocTy Loc;
438      Type *Ty;
439      AttributeSet Attrs;
440      std::string Name;
441      ArgInfo(LocTy L, Type *ty, AttributeSet Attr, const std::string &N)
442        : Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
443    };
444    bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg);
445    bool ParseFunctionHeader(Function *&Fn, bool isDefine);
446    bool ParseFunctionBody(Function &Fn);
447    bool ParseBasicBlock(PerFunctionState &PFS);
448
449    enum TailCallType { TCT_None, TCT_Tail, TCT_MustTail };
450
451    // Instruction Parsing.  Each instruction parsing routine can return with a
452    // normal result, an error result, or return having eaten an extra comma.
453    enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 };
454    int ParseInstruction(Instruction *&Inst, BasicBlock *BB,
455                         PerFunctionState &PFS);
456    bool ParseCmpPredicate(unsigned &Pred, unsigned Opc);
457
458    bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
459    bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
460    bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
461    bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
462    bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
463    bool ParseResume(Instruction *&Inst, PerFunctionState &PFS);
464    bool ParseCleanupRet(Instruction *&Inst, PerFunctionState &PFS);
465    bool ParseCatchRet(Instruction *&Inst, PerFunctionState &PFS);
466    bool ParseCatchSwitch(Instruction *&Inst, PerFunctionState &PFS);
467    bool ParseCatchPad(Instruction *&Inst, PerFunctionState &PFS);
468    bool ParseCleanupPad(Instruction *&Inst, PerFunctionState &PFS);
469
470    bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc,
471                         unsigned OperandType);
472    bool ParseLogical(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
473    bool ParseCompare(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
474    bool ParseCast(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
475    bool ParseSelect(Instruction *&I, PerFunctionState &PFS);
476    bool ParseVA_Arg(Instruction *&I, PerFunctionState &PFS);
477    bool ParseExtractElement(Instruction *&I, PerFunctionState &PFS);
478    bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS);
479    bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS);
480    int ParsePHI(Instruction *&I, PerFunctionState &PFS);
481    bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS);
482    bool ParseCall(Instruction *&I, PerFunctionState &PFS,
483                   CallInst::TailCallKind IsTail);
484    int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
485    int ParseLoad(Instruction *&I, PerFunctionState &PFS);
486    int ParseStore(Instruction *&I, PerFunctionState &PFS);
487    int ParseCmpXchg(Instruction *&I, PerFunctionState &PFS);
488    int ParseAtomicRMW(Instruction *&I, PerFunctionState &PFS);
489    int ParseFence(Instruction *&I, PerFunctionState &PFS);
490    int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
491    int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
492    int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
493
494    // Use-list order directives.
495    bool ParseUseListOrder(PerFunctionState *PFS = nullptr);
496    bool ParseUseListOrderBB();
497    bool ParseUseListOrderIndexes(SmallVectorImpl<unsigned> &Indexes);
498    bool sortUseListOrder(Value *V, ArrayRef<unsigned> Indexes, SMLoc Loc);
499  };
500} // End llvm namespace
501
502#endif
503