LiteralSupport.h revision 280031
1193326Sed//===--- LiteralSupport.h ---------------------------------------*- C++ -*-===// 2193326Sed// 3193326Sed// The LLVM Compiler Infrastructure 4193326Sed// 5193326Sed// This file is distributed under the University of Illinois Open Source 6193326Sed// License. See LICENSE.TXT for details. 7193326Sed// 8193326Sed//===----------------------------------------------------------------------===// 9193326Sed// 10193326Sed// This file defines the NumericLiteralParser, CharLiteralParser, and 11193326Sed// StringLiteralParser interfaces. 12193326Sed// 13193326Sed//===----------------------------------------------------------------------===// 14193326Sed 15280031Sdim#ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H 16280031Sdim#define LLVM_CLANG_LEX_LITERALSUPPORT_H 17193326Sed 18249423Sdim#include "clang/Basic/CharInfo.h" 19226633Sdim#include "clang/Basic/LLVM.h" 20249423Sdim#include "clang/Basic/TokenKinds.h" 21201361Srdivacky#include "llvm/ADT/APFloat.h" 22193326Sed#include "llvm/ADT/SmallString.h" 23243830Sdim#include "llvm/ADT/StringRef.h" 24218893Sdim#include "llvm/Support/DataTypes.h" 25193326Sed 26193326Sednamespace clang { 27193326Sed 28226633Sdimclass DiagnosticsEngine; 29193326Sedclass Preprocessor; 30193326Sedclass Token; 31193326Sedclass SourceLocation; 32193326Sedclass TargetInfo; 33218893Sdimclass SourceManager; 34218893Sdimclass LangOptions; 35198092Srdivacky 36276479Sdim/// Copy characters from Input to Buf, expanding any UCNs. 37276479Sdimvoid expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input); 38276479Sdim 39193326Sed/// NumericLiteralParser - This performs strict semantic analysis of the content 40193326Sed/// of a ppnumber, classifying it as either integer, floating, or erroneous, 41193326Sed/// determines the radix of the value and can convert it to a useful value. 42193326Sedclass NumericLiteralParser { 43193326Sed Preprocessor &PP; // needed for diagnostics 44198092Srdivacky 45193326Sed const char *const ThisTokBegin; 46193326Sed const char *const ThisTokEnd; 47193326Sed const char *DigitsBegin, *SuffixBegin; // markers 48193326Sed const char *s; // cursor 49198092Srdivacky 50193326Sed unsigned radix; 51198092Srdivacky 52234353Sdim bool saw_exponent, saw_period, saw_ud_suffix; 53198092Srdivacky 54276479Sdim SmallString<32> UDSuffixBuf; 55276479Sdim 56193326Sedpublic: 57243830Sdim NumericLiteralParser(StringRef TokSpelling, 58243830Sdim SourceLocation TokLoc, 59243830Sdim Preprocessor &PP); 60193326Sed bool hadError; 61193326Sed bool isUnsigned; 62193326Sed bool isLong; // This is *not* set for long long. 63193326Sed bool isLongLong; 64193326Sed bool isFloat; // 1.0f 65193326Sed bool isImaginary; // 1.0i 66276479Sdim uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64. 67198092Srdivacky 68198092Srdivacky bool isIntegerLiteral() const { 69193326Sed return !saw_period && !saw_exponent; 70193326Sed } 71193326Sed bool isFloatingLiteral() const { 72193326Sed return saw_period || saw_exponent; 73193326Sed } 74234353Sdim 75234353Sdim bool hasUDSuffix() const { 76234353Sdim return saw_ud_suffix; 77193326Sed } 78234353Sdim StringRef getUDSuffix() const { 79234353Sdim assert(saw_ud_suffix); 80276479Sdim return UDSuffixBuf; 81234353Sdim } 82234353Sdim unsigned getUDSuffixOffset() const { 83234353Sdim assert(saw_ud_suffix); 84234353Sdim return SuffixBegin - ThisTokBegin; 85234353Sdim } 86198092Srdivacky 87261991Sdim static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); 88261991Sdim 89193326Sed unsigned getRadix() const { return radix; } 90198092Srdivacky 91193326Sed /// GetIntegerValue - Convert this numeric literal value to an APInt that 92193326Sed /// matches Val's input width. If there is an overflow (i.e., if the unsigned 93193326Sed /// value read is larger than the APInt's bits will hold), set Val to the low 94193326Sed /// bits of the result and return true. Otherwise, return false. 95193326Sed bool GetIntegerValue(llvm::APInt &Val); 96198092Srdivacky 97193326Sed /// GetFloatValue - Convert this numeric literal to a floating value, using 98193326Sed /// the specified APFloat fltSemantics (specifying float, double, etc). 99193326Sed /// The optional bool isExact (passed-by-reference) has its value 100193326Sed /// set to true if the returned APFloat can represent the number in the 101193326Sed /// literal exactly, and false otherwise. 102201361Srdivacky llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result); 103193326Sed 104198092Srdivackyprivate: 105198092Srdivacky 106193326Sed void ParseNumberStartingWithZero(SourceLocation TokLoc); 107198092Srdivacky 108261991Sdim static bool isDigitSeparator(char C) { return C == '\''; } 109261991Sdim 110261991Sdim enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits }; 111261991Sdim 112261991Sdim /// \brief Ensure that we don't have a digit separator here. 113261991Sdim void checkSeparator(SourceLocation TokLoc, const char *Pos, 114261991Sdim CheckSeparatorKind IsAfterDigits); 115261991Sdim 116193326Sed /// SkipHexDigits - Read and skip over any hex digits, up to End. 117193326Sed /// Return a pointer to the first non-hex digit or End. 118193326Sed const char *SkipHexDigits(const char *ptr) { 119261991Sdim while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr))) 120193326Sed ptr++; 121193326Sed return ptr; 122193326Sed } 123198092Srdivacky 124193326Sed /// SkipOctalDigits - Read and skip over any octal digits, up to End. 125193326Sed /// Return a pointer to the first non-hex digit or End. 126193326Sed const char *SkipOctalDigits(const char *ptr) { 127261991Sdim while (ptr != ThisTokEnd && 128261991Sdim ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr))) 129193326Sed ptr++; 130193326Sed return ptr; 131193326Sed } 132198092Srdivacky 133193326Sed /// SkipDigits - Read and skip over any digits, up to End. 134193326Sed /// Return a pointer to the first non-hex digit or End. 135193326Sed const char *SkipDigits(const char *ptr) { 136261991Sdim while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr))) 137193326Sed ptr++; 138193326Sed return ptr; 139193326Sed } 140198092Srdivacky 141193326Sed /// SkipBinaryDigits - Read and skip over any binary digits, up to End. 142193326Sed /// Return a pointer to the first non-binary digit or End. 143193326Sed const char *SkipBinaryDigits(const char *ptr) { 144261991Sdim while (ptr != ThisTokEnd && 145261991Sdim (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr))) 146193326Sed ptr++; 147193326Sed return ptr; 148193326Sed } 149198092Srdivacky 150193326Sed}; 151193326Sed 152193326Sed/// CharLiteralParser - Perform interpretation and semantic analysis of a 153193326Sed/// character literal. 154193326Sedclass CharLiteralParser { 155193326Sed uint64_t Value; 156226633Sdim tok::TokenKind Kind; 157193326Sed bool IsMultiChar; 158193326Sed bool HadError; 159234353Sdim SmallString<32> UDSuffixBuf; 160234353Sdim unsigned UDSuffixOffset; 161193326Sedpublic: 162193326Sed CharLiteralParser(const char *begin, const char *end, 163226633Sdim SourceLocation Loc, Preprocessor &PP, 164226633Sdim tok::TokenKind kind); 165193326Sed 166193326Sed bool hadError() const { return HadError; } 167226633Sdim bool isAscii() const { return Kind == tok::char_constant; } 168226633Sdim bool isWide() const { return Kind == tok::wide_char_constant; } 169226633Sdim bool isUTF16() const { return Kind == tok::utf16_char_constant; } 170226633Sdim bool isUTF32() const { return Kind == tok::utf32_char_constant; } 171193326Sed bool isMultiChar() const { return IsMultiChar; } 172193326Sed uint64_t getValue() const { return Value; } 173234353Sdim StringRef getUDSuffix() const { return UDSuffixBuf; } 174234353Sdim unsigned getUDSuffixOffset() const { 175234353Sdim assert(!UDSuffixBuf.empty() && "no ud-suffix"); 176234353Sdim return UDSuffixOffset; 177234353Sdim } 178193326Sed}; 179193326Sed 180193326Sed/// StringLiteralParser - This decodes string escape characters and performs 181193326Sed/// wide string analysis and Translation Phase #6 (concatenation of string 182193326Sed/// literals) (C99 5.1.1.2p1). 183193326Sedclass StringLiteralParser { 184218893Sdim const SourceManager &SM; 185218893Sdim const LangOptions &Features; 186218893Sdim const TargetInfo &Target; 187226633Sdim DiagnosticsEngine *Diags; 188218893Sdim 189193326Sed unsigned MaxTokenLength; 190193326Sed unsigned SizeBound; 191226633Sdim unsigned CharByteWidth; 192226633Sdim tok::TokenKind Kind; 193234353Sdim SmallString<512> ResultBuf; 194193326Sed char *ResultPtr; // cursor 195234353Sdim SmallString<32> UDSuffixBuf; 196234353Sdim unsigned UDSuffixToken; 197234353Sdim unsigned UDSuffixOffset; 198193326Sedpublic: 199276479Sdim StringLiteralParser(ArrayRef<Token> StringToks, 200208600Srdivacky Preprocessor &PP, bool Complain = true); 201276479Sdim StringLiteralParser(ArrayRef<Token> StringToks, 202218893Sdim const SourceManager &sm, const LangOptions &features, 203276479Sdim const TargetInfo &target, 204276479Sdim DiagnosticsEngine *diags = nullptr) 205223017Sdim : SM(sm), Features(features), Target(target), Diags(diags), 206226633Sdim MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), 207226633Sdim ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { 208276479Sdim init(StringToks); 209218893Sdim } 210218893Sdim 211218893Sdim 212193326Sed bool hadError; 213193326Sed bool Pascal; 214198092Srdivacky 215226633Sdim StringRef GetString() const { 216226633Sdim return StringRef(ResultBuf.data(), GetStringLength()); 217224145Sdim } 218223017Sdim unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); } 219193326Sed 220193326Sed unsigned GetNumStringChars() const { 221226633Sdim return GetStringLength() / CharByteWidth; 222198092Srdivacky } 223193326Sed /// getOffsetOfStringByte - This function returns the offset of the 224193326Sed /// specified byte of the string data represented by Token. This handles 225193326Sed /// advancing over escape sequences in the string. 226218893Sdim /// 227218893Sdim /// If the Diagnostics pointer is non-null, then this will do semantic 228218893Sdim /// checking of the string literal and emit errors and warnings. 229218893Sdim unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const; 230226633Sdim 231234353Sdim bool isAscii() const { return Kind == tok::string_literal; } 232234353Sdim bool isWide() const { return Kind == tok::wide_string_literal; } 233234353Sdim bool isUTF8() const { return Kind == tok::utf8_string_literal; } 234234353Sdim bool isUTF16() const { return Kind == tok::utf16_string_literal; } 235234353Sdim bool isUTF32() const { return Kind == tok::utf32_string_literal; } 236234353Sdim bool isPascal() const { return Pascal; } 237226633Sdim 238234353Sdim StringRef getUDSuffix() const { return UDSuffixBuf; } 239234353Sdim 240234353Sdim /// Get the index of a token containing a ud-suffix. 241234353Sdim unsigned getUDSuffixToken() const { 242234353Sdim assert(!UDSuffixBuf.empty() && "no ud-suffix"); 243234353Sdim return UDSuffixToken; 244234353Sdim } 245234353Sdim /// Get the spelling offset of the first byte of the ud-suffix. 246234353Sdim unsigned getUDSuffixOffset() const { 247234353Sdim assert(!UDSuffixBuf.empty() && "no ud-suffix"); 248234353Sdim return UDSuffixOffset; 249234353Sdim } 250234353Sdim 251218893Sdimprivate: 252276479Sdim void init(ArrayRef<Token> StringToks); 253243830Sdim bool CopyStringFragment(const Token &Tok, const char *TokBegin, 254243830Sdim StringRef Fragment); 255239462Sdim void DiagnoseLexingError(SourceLocation Loc); 256193326Sed}; 257198092Srdivacky 258193326Sed} // end namespace clang 259193326Sed 260193326Sed#endif 261