LLParser.h revision 226633
1264790Sbapt//===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===// 2264790Sbapt// 3264790Sbapt// The LLVM Compiler Infrastructure 4264790Sbapt// 5264790Sbapt// This file is distributed under the University of Illinois Open Source 6264790Sbapt// License. See LICENSE.TXT for details. 7264790Sbapt// 8264790Sbapt//===----------------------------------------------------------------------===// 9264790Sbapt// 10264790Sbapt// This file defines the parser class for .ll files. 11264790Sbapt// 12264790Sbapt//===----------------------------------------------------------------------===// 13264790Sbapt 14264790Sbapt#ifndef LLVM_ASMPARSER_LLPARSER_H 15264790Sbapt#define LLVM_ASMPARSER_LLPARSER_H 16264790Sbapt 17264790Sbapt#include "LLLexer.h" 18264790Sbapt#include "llvm/Instructions.h" 19264790Sbapt#include "llvm/Module.h" 20264790Sbapt#include "llvm/Type.h" 21264790Sbapt#include "llvm/ADT/DenseMap.h" 22264790Sbapt#include "llvm/ADT/StringMap.h" 23264790Sbapt#include "llvm/Support/ValueHandle.h" 24264790Sbapt#include <map> 25264790Sbapt 26264790Sbaptnamespace llvm { 27264790Sbapt class Module; 28264790Sbapt class OpaqueType; 29264790Sbapt class Function; 30264790Sbapt class Value; 31264790Sbapt class BasicBlock; 32264790Sbapt class Instruction; 33264790Sbapt class Constant; 34264790Sbapt class GlobalValue; 35264790Sbapt class MDString; 36264790Sbapt class MDNode; 37264790Sbapt class StructType; 38264790Sbapt 39264790Sbapt /// ValID - Represents a reference of a definition of some sort with no type. 40264790Sbapt /// There are several cases where we have to parse the value but where the 41264790Sbapt /// type can depend on later context. This may either be a numeric reference 42264790Sbapt /// or a symbolic (%var) reference. This is just a discriminated union. 43264790Sbapt struct ValID { 44264790Sbapt enum { 45264790Sbapt t_LocalID, t_GlobalID, // ID in UIntVal. 46264790Sbapt t_LocalName, t_GlobalName, // Name in StrVal. 47264790Sbapt t_APSInt, t_APFloat, // Value in APSIntVal/APFloatVal. 48264790Sbapt t_Null, t_Undef, t_Zero, // No value. 49264790Sbapt t_EmptyArray, // No value: [] 50264790Sbapt t_Constant, // Value in ConstantVal. 51264790Sbapt t_InlineAsm, // Value in StrVal/StrVal2/UIntVal. 52264790Sbapt t_MDNode, // Value in MDNodeVal. 53264790Sbapt t_MDString, // Value in MDStringVal. 54264790Sbapt t_ConstantStruct, // Value in ConstantStructElts. 55264790Sbapt t_PackedConstantStruct // Value in ConstantStructElts. 56264790Sbapt } Kind; 57264790Sbapt 58264790Sbapt LLLexer::LocTy Loc; 59264790Sbapt unsigned UIntVal; 60264790Sbapt std::string StrVal, StrVal2; 61264790Sbapt APSInt APSIntVal; 62264790Sbapt APFloat APFloatVal; 63264790Sbapt Constant *ConstantVal; 64264790Sbapt MDNode *MDNodeVal; 65264790Sbapt MDString *MDStringVal; 66264790Sbapt Constant **ConstantStructElts; 67264790Sbapt 68264790Sbapt ValID() : Kind(t_LocalID), APFloatVal(0.0) {} 69264790Sbapt ~ValID() { 70264790Sbapt if (Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) 71264790Sbapt delete [] ConstantStructElts; 72264790Sbapt } 73264790Sbapt 74264790Sbapt bool operator<(const ValID &RHS) const { 75264790Sbapt if (Kind == t_LocalID || Kind == t_GlobalID) 76264790Sbapt return UIntVal < RHS.UIntVal; 77264790Sbapt assert((Kind == t_LocalName || Kind == t_GlobalName || 78264790Sbapt Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) && 79264790Sbapt "Ordering not defined for this ValID kind yet"); 80264790Sbapt return StrVal < RHS.StrVal; 81264790Sbapt } 82264790Sbapt }; 83264790Sbapt 84264790Sbapt class LLParser { 85264790Sbapt public: 86264790Sbapt typedef LLLexer::LocTy LocTy; 87264790Sbapt private: 88264790Sbapt LLVMContext &Context; 89264790Sbapt LLLexer Lex; 90264790Sbapt Module *M; 91264790Sbapt 92264790Sbapt // Instruction metadata resolution. Each instruction can have a list of 93264790Sbapt // MDRef info associated with them. 94264790Sbapt // 95264790Sbapt // The simpler approach of just creating temporary MDNodes and then calling 96264790Sbapt // RAUW on them when the definition is processed doesn't work because some 97264790Sbapt // instruction metadata kinds, such as dbg, get stored in the IR in an 98264790Sbapt // "optimized" format which doesn't participate in the normal value use 99264790Sbapt // lists. This means that RAUW doesn't work, even on temporary MDNodes 100264790Sbapt // which otherwise support RAUW. Instead, we defer resolving MDNode 101264790Sbapt // references until the definitions have been processed. 102264790Sbapt struct MDRef { 103264790Sbapt SMLoc Loc; 104264790Sbapt unsigned MDKind, MDSlot; 105264790Sbapt }; 106264790Sbapt DenseMap<Instruction*, std::vector<MDRef> > ForwardRefInstMetadata; 107264790Sbapt 108264790Sbapt // Type resolution handling data structures. The location is set when we 109264790Sbapt // have processed a use of the type but not a definition yet. 110264790Sbapt StringMap<std::pair<Type*, LocTy> > NamedTypes; 111264790Sbapt std::vector<std::pair<Type*, LocTy> > NumberedTypes; 112264790Sbapt 113264790Sbapt std::vector<TrackingVH<MDNode> > NumberedMetadata; 114264790Sbapt std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> > ForwardRefMDNodes; 115264790Sbapt 116264790Sbapt // Global Value reference information. 117264790Sbapt std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals; 118264790Sbapt std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs; 119264790Sbapt std::vector<GlobalValue*> NumberedVals; 120264790Sbapt 121264790Sbapt // References to blockaddress. The key is the function ValID, the value is 122264790Sbapt // a list of references to blocks in that function. 123264790Sbapt std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > > 124264790Sbapt ForwardRefBlockAddresses; 125264790Sbapt 126264790Sbapt public: 127264790Sbapt LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) : 128264790Sbapt Context(m->getContext()), Lex(F, SM, Err, m->getContext()), 129264790Sbapt M(m) {} 130264790Sbapt bool Run(); 131264790Sbapt 132264790Sbapt LLVMContext &getContext() { return Context; } 133264790Sbapt 134264790Sbapt private: 135264790Sbapt 136264790Sbapt bool Error(LocTy L, const Twine &Msg) const { 137264790Sbapt return Lex.Error(L, Msg); 138264790Sbapt } 139264790Sbapt bool TokError(const Twine &Msg) const { 140264790Sbapt return Error(Lex.getLoc(), Msg); 141264790Sbapt } 142264790Sbapt 143264790Sbapt /// GetGlobalVal - Get a value with the specified name or ID, creating a 144264790Sbapt /// forward reference record if needed. This can return null if the value 145264790Sbapt /// exists but does not have the right type. 146264790Sbapt GlobalValue *GetGlobalVal(const std::string &N, Type *Ty, LocTy Loc); 147264790Sbapt GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc); 148264790Sbapt 149264790Sbapt // Helper Routines. 150264790Sbapt bool ParseToken(lltok::Kind T, const char *ErrMsg); 151264790Sbapt bool EatIfPresent(lltok::Kind T) { 152264790Sbapt if (Lex.getKind() != T) return false; 153264790Sbapt Lex.Lex(); 154264790Sbapt return true; 155264790Sbapt } 156264790Sbapt bool ParseOptionalToken(lltok::Kind T, bool &Present, LocTy *Loc = 0) { 157264790Sbapt if (Lex.getKind() != T) { 158264790Sbapt Present = false; 159264790Sbapt } else { 160264790Sbapt if (Loc) 161264790Sbapt *Loc = Lex.getLoc(); 162264790Sbapt Lex.Lex(); 163264790Sbapt Present = true; 164264790Sbapt } 165264790Sbapt return false; 166264790Sbapt } 167264790Sbapt bool ParseStringConstant(std::string &Result); 168264790Sbapt bool ParseUInt32(unsigned &Val); 169264790Sbapt bool ParseUInt32(unsigned &Val, LocTy &Loc) { 170264790Sbapt Loc = Lex.getLoc(); 171264790Sbapt return ParseUInt32(Val); 172264790Sbapt } 173264790Sbapt bool ParseOptionalAddrSpace(unsigned &AddrSpace); 174264790Sbapt bool ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind); 175264790Sbapt bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage); 176264790Sbapt bool ParseOptionalLinkage(unsigned &Linkage) { 177264790Sbapt bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage); 178264790Sbapt } 179264790Sbapt bool ParseOptionalVisibility(unsigned &Visibility); 180264790Sbapt bool ParseOptionalCallingConv(CallingConv::ID &CC); 181264790Sbapt bool ParseOptionalAlignment(unsigned &Alignment); 182264790Sbapt bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope, 183264790Sbapt AtomicOrdering &Ordering); 184264790Sbapt bool ParseOptionalStackAlignment(unsigned &Alignment); 185264790Sbapt bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma); 186264790Sbapt bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma); 187264790Sbapt bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) { 188264790Sbapt bool AteExtraComma; 189264790Sbapt if (ParseIndexList(Indices, AteExtraComma)) return true; 190264790Sbapt if (AteExtraComma) 191264790Sbapt return TokError("expected index"); 192264790Sbapt return false; 193264790Sbapt } 194264790Sbapt 195264790Sbapt // Top-Level Entities 196264790Sbapt bool ParseTopLevelEntities(); 197264790Sbapt bool ValidateEndOfModule(); 198264790Sbapt bool ParseTargetDefinition(); 199264790Sbapt bool ParseDepLibs(); 200264790Sbapt bool ParseModuleAsm(); 201264790Sbapt bool ParseUnnamedType(); 202264790Sbapt bool ParseNamedType(); 203264790Sbapt bool ParseDeclare(); 204264790Sbapt bool ParseDefine(); 205264790Sbapt 206264790Sbapt bool ParseGlobalType(bool &IsConstant); 207264790Sbapt bool ParseUnnamedGlobal(); 208264790Sbapt bool ParseNamedGlobal(); 209264790Sbapt bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage, 210264790Sbapt bool HasLinkage, unsigned Visibility); 211264790Sbapt bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility); 212264790Sbapt bool ParseStandaloneMetadata(); 213264790Sbapt bool ParseNamedMetadata(); 214264790Sbapt bool ParseMDString(MDString *&Result); 215264790Sbapt bool ParseMDNodeID(MDNode *&Result); 216264790Sbapt bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo); 217264790Sbapt 218264790Sbapt // Type Parsing. 219264790Sbapt bool ParseType(Type *&Result, bool AllowVoid = false); 220264790Sbapt bool ParseType(Type *&Result, LocTy &Loc, bool AllowVoid = false) { 221264790Sbapt Loc = Lex.getLoc(); 222264790Sbapt return ParseType(Result, AllowVoid); 223264790Sbapt } 224264790Sbapt bool ParseAnonStructType(Type *&Result, bool Packed); 225264790Sbapt bool ParseStructBody(SmallVectorImpl<Type*> &Body); 226264790Sbapt bool ParseStructDefinition(SMLoc TypeLoc, StringRef Name, 227264790Sbapt std::pair<Type*, LocTy> &Entry, 228264790Sbapt Type *&ResultTy); 229264790Sbapt 230264790Sbapt bool ParseArrayVectorType(Type *&Result, bool isVector); 231264790Sbapt bool ParseFunctionType(Type *&Result); 232264790Sbapt 233264790Sbapt // Function Semantic Analysis. 234264790Sbapt class PerFunctionState { 235264790Sbapt LLParser &P; 236264790Sbapt Function &F; 237264790Sbapt std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals; 238264790Sbapt std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs; 239264790Sbapt std::vector<Value*> NumberedVals; 240264790Sbapt 241264790Sbapt /// FunctionNumber - If this is an unnamed function, this is the slot 242264790Sbapt /// number of it, otherwise it is -1. 243264790Sbapt int FunctionNumber; 244264790Sbapt public: 245264790Sbapt PerFunctionState(LLParser &p, Function &f, int FunctionNumber); 246264790Sbapt ~PerFunctionState(); 247264790Sbapt 248264790Sbapt Function &getFunction() const { return F; } 249264790Sbapt 250264790Sbapt bool FinishFunction(); 251264790Sbapt 252264790Sbapt /// GetVal - Get a value with the specified name or ID, creating a 253264790Sbapt /// forward reference record if needed. This can return null if the value 254264790Sbapt /// exists but does not have the right type. 255264790Sbapt Value *GetVal(const std::string &Name, Type *Ty, LocTy Loc); 256264790Sbapt Value *GetVal(unsigned ID, Type *Ty, LocTy Loc); 257264790Sbapt 258264790Sbapt /// SetInstName - After an instruction is parsed and inserted into its 259264790Sbapt /// basic block, this installs its name. 260264790Sbapt bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc, 261264790Sbapt Instruction *Inst); 262264790Sbapt 263264790Sbapt /// GetBB - Get a basic block with the specified name or ID, creating a 264264790Sbapt /// forward reference record if needed. This can return null if the value 265264790Sbapt /// is not a BasicBlock. 266264790Sbapt BasicBlock *GetBB(const std::string &Name, LocTy Loc); 267264790Sbapt BasicBlock *GetBB(unsigned ID, LocTy Loc); 268264790Sbapt 269264790Sbapt /// DefineBB - Define the specified basic block, which is either named or 270264790Sbapt /// unnamed. If there is an error, this returns null otherwise it returns 271264790Sbapt /// the block being defined. 272264790Sbapt BasicBlock *DefineBB(const std::string &Name, LocTy Loc); 273264790Sbapt }; 274264790Sbapt 275264790Sbapt bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, 276264790Sbapt PerFunctionState *PFS); 277264790Sbapt 278264790Sbapt bool ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS); 279264790Sbapt bool ParseValue(Type *Ty, Value *&V, PerFunctionState &PFS) { 280264790Sbapt return ParseValue(Ty, V, &PFS); 281264790Sbapt } 282264790Sbapt bool ParseValue(Type *Ty, Value *&V, LocTy &Loc, 283264790Sbapt PerFunctionState &PFS) { 284264790Sbapt Loc = Lex.getLoc(); 285264790Sbapt return ParseValue(Ty, V, &PFS); 286264790Sbapt } 287264790Sbapt 288264790Sbapt bool ParseTypeAndValue(Value *&V, PerFunctionState *PFS); 289264790Sbapt bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS) { 290264790Sbapt return ParseTypeAndValue(V, &PFS); 291264790Sbapt } 292264790Sbapt bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) { 293264790Sbapt Loc = Lex.getLoc(); 294264790Sbapt return ParseTypeAndValue(V, PFS); 295264790Sbapt } 296264790Sbapt bool ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc, 297264790Sbapt PerFunctionState &PFS); 298264790Sbapt bool ParseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) { 299264790Sbapt LocTy Loc; 300264790Sbapt return ParseTypeAndBasicBlock(BB, Loc, PFS); 301264790Sbapt } 302264790Sbapt 303264790Sbapt 304264790Sbapt struct ParamInfo { 305264790Sbapt LocTy Loc; 306264790Sbapt Value *V; 307264790Sbapt unsigned Attrs; 308264790Sbapt ParamInfo(LocTy loc, Value *v, unsigned attrs) 309264790Sbapt : Loc(loc), V(v), Attrs(attrs) {} 310264790Sbapt }; 311264790Sbapt bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList, 312264790Sbapt PerFunctionState &PFS); 313264790Sbapt 314264790Sbapt // Constant Parsing. 315264790Sbapt bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL); 316264790Sbapt bool ParseGlobalValue(Type *Ty, Constant *&V); 317264790Sbapt bool ParseGlobalTypeAndValue(Constant *&V); 318264790Sbapt bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts); 319264790Sbapt bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS); 320264790Sbapt bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS); 321264790Sbapt bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS); 322264790Sbapt bool ParseInstructionMetadata(Instruction *Inst, PerFunctionState *PFS); 323264790Sbapt 324264790Sbapt // Function Parsing. 325264790Sbapt struct ArgInfo { 326264790Sbapt LocTy Loc; 327264790Sbapt Type *Ty; 328264790Sbapt unsigned Attrs; 329264790Sbapt std::string Name; 330264790Sbapt ArgInfo(LocTy L, Type *ty, unsigned Attr, const std::string &N) 331264790Sbapt : Loc(L), Ty(ty), Attrs(Attr), Name(N) {} 332264790Sbapt }; 333264790Sbapt bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg); 334264790Sbapt bool ParseFunctionHeader(Function *&Fn, bool isDefine); 335264790Sbapt bool ParseFunctionBody(Function &Fn); 336264790Sbapt bool ParseBasicBlock(PerFunctionState &PFS); 337264790Sbapt 338264790Sbapt // Instruction Parsing. Each instruction parsing routine can return with a 339264790Sbapt // normal result, an error result, or return having eaten an extra comma. 340264790Sbapt enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 }; 341264790Sbapt int ParseInstruction(Instruction *&Inst, BasicBlock *BB, 342264790Sbapt PerFunctionState &PFS); 343264790Sbapt bool ParseCmpPredicate(unsigned &Pred, unsigned Opc); 344264790Sbapt 345264790Sbapt bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS); 346264790Sbapt bool ParseBr(Instruction *&Inst, PerFunctionState &PFS); 347264790Sbapt bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS); 348264790Sbapt bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS); 349264790Sbapt bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS); 350264790Sbapt bool ParseResume(Instruction *&Inst, PerFunctionState &PFS); 351264790Sbapt 352264790Sbapt bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc, 353264790Sbapt unsigned OperandType); 354264790Sbapt bool ParseLogical(Instruction *&I, PerFunctionState &PFS, unsigned Opc); 355264790Sbapt bool ParseCompare(Instruction *&I, PerFunctionState &PFS, unsigned Opc); 356264790Sbapt bool ParseCast(Instruction *&I, PerFunctionState &PFS, unsigned Opc); 357264790Sbapt bool ParseSelect(Instruction *&I, PerFunctionState &PFS); 358264790Sbapt bool ParseVA_Arg(Instruction *&I, PerFunctionState &PFS); 359264790Sbapt bool ParseExtractElement(Instruction *&I, PerFunctionState &PFS); 360264790Sbapt bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS); 361264790Sbapt bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS); 362264790Sbapt int ParsePHI(Instruction *&I, PerFunctionState &PFS); 363264790Sbapt bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS); 364264790Sbapt bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail); 365264790Sbapt int ParseAlloc(Instruction *&I, PerFunctionState &PFS); 366264790Sbapt int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile); 367264790Sbapt int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile); 368264790Sbapt int ParseCmpXchg(Instruction *&I, PerFunctionState &PFS); 369264790Sbapt int ParseAtomicRMW(Instruction *&I, PerFunctionState &PFS); 370264790Sbapt int ParseFence(Instruction *&I, PerFunctionState &PFS); 371264790Sbapt int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS); 372264790Sbapt int ParseExtractValue(Instruction *&I, PerFunctionState &PFS); 373264790Sbapt int ParseInsertValue(Instruction *&I, PerFunctionState &PFS); 374264790Sbapt 375264790Sbapt bool ResolveForwardRefBlockAddresses(Function *TheFn, 376264790Sbapt std::vector<std::pair<ValID, GlobalValue*> > &Refs, 377264790Sbapt PerFunctionState *PFS); 378264790Sbapt }; 379264790Sbapt} // End llvm namespace 380264790Sbapt 381264790Sbapt#endif 382264790Sbapt