1193326Sed//===--- LiteralSupport.h ---------------------------------------*- C++ -*-===// 2193326Sed// 3193326Sed// The LLVM Compiler Infrastructure 4193326Sed// 5193326Sed// This file is distributed under the University of Illinois Open Source 6193326Sed// License. See LICENSE.TXT for details. 7193326Sed// 8193326Sed//===----------------------------------------------------------------------===// 9193326Sed// 10193326Sed// This file defines the NumericLiteralParser, CharLiteralParser, and 11193326Sed// StringLiteralParser interfaces. 12193326Sed// 13193326Sed//===----------------------------------------------------------------------===// 14193326Sed 15193326Sed#ifndef CLANG_LITERALSUPPORT_H 16193326Sed#define CLANG_LITERALSUPPORT_H 17193326Sed 18252723Sdim#include "clang/Basic/CharInfo.h" 19226890Sdim#include "clang/Basic/LLVM.h" 20252723Sdim#include "clang/Basic/TokenKinds.h" 21201361Srdivacky#include "llvm/ADT/APFloat.h" 22193326Sed#include "llvm/ADT/SmallString.h" 23245431Sdim#include "llvm/ADT/StringRef.h" 24218893Sdim#include "llvm/Support/DataTypes.h" 25193326Sed 26193326Sednamespace clang { 27193326Sed 28226890Sdimclass DiagnosticsEngine; 29193326Sedclass Preprocessor; 30193326Sedclass Token; 31193326Sedclass SourceLocation; 32193326Sedclass TargetInfo; 33218893Sdimclass SourceManager; 34218893Sdimclass LangOptions; 35198092Srdivacky 36193326Sed/// NumericLiteralParser - This performs strict semantic analysis of the content 37193326Sed/// of a ppnumber, classifying it as either integer, floating, or erroneous, 38193326Sed/// determines the radix of the value and can convert it to a useful value. 39193326Sedclass NumericLiteralParser { 40193326Sed Preprocessor &PP; // needed for diagnostics 41198092Srdivacky 42193326Sed const char *const ThisTokBegin; 43193326Sed const char *const ThisTokEnd; 44193326Sed const char *DigitsBegin, *SuffixBegin; // markers 45193326Sed const char *s; // cursor 46198092Srdivacky 47193326Sed unsigned radix; 48198092Srdivacky 49235633Sdim bool saw_exponent, saw_period, saw_ud_suffix; 50198092Srdivacky 51193326Sedpublic: 52245431Sdim NumericLiteralParser(StringRef TokSpelling, 53245431Sdim SourceLocation TokLoc, 54245431Sdim Preprocessor &PP); 55193326Sed bool hadError; 56193326Sed bool isUnsigned; 57193326Sed bool isLong; // This is *not* set for long long. 58193326Sed bool isLongLong; 59193326Sed bool isFloat; // 1.0f 60193326Sed bool isImaginary; // 1.0i 61198092Srdivacky bool isMicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64. 62198092Srdivacky 63198092Srdivacky bool isIntegerLiteral() const { 64193326Sed return !saw_period && !saw_exponent; 65193326Sed } 66193326Sed bool isFloatingLiteral() const { 67193326Sed return saw_period || saw_exponent; 68193326Sed } 69235633Sdim 70235633Sdim bool hasUDSuffix() const { 71235633Sdim return saw_ud_suffix; 72193326Sed } 73235633Sdim StringRef getUDSuffix() const { 74235633Sdim assert(saw_ud_suffix); 75235633Sdim return StringRef(SuffixBegin, ThisTokEnd - SuffixBegin); 76235633Sdim } 77235633Sdim unsigned getUDSuffixOffset() const { 78235633Sdim assert(saw_ud_suffix); 79235633Sdim return SuffixBegin - ThisTokBegin; 80235633Sdim } 81198092Srdivacky 82263509Sdim static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); 83263509Sdim 84193326Sed unsigned getRadix() const { return radix; } 85198092Srdivacky 86193326Sed /// GetIntegerValue - Convert this numeric literal value to an APInt that 87193326Sed /// matches Val's input width. If there is an overflow (i.e., if the unsigned 88193326Sed /// value read is larger than the APInt's bits will hold), set Val to the low 89193326Sed /// bits of the result and return true. Otherwise, return false. 90193326Sed bool GetIntegerValue(llvm::APInt &Val); 91198092Srdivacky 92193326Sed /// GetFloatValue - Convert this numeric literal to a floating value, using 93193326Sed /// the specified APFloat fltSemantics (specifying float, double, etc). 94193326Sed /// The optional bool isExact (passed-by-reference) has its value 95193326Sed /// set to true if the returned APFloat can represent the number in the 96193326Sed /// literal exactly, and false otherwise. 97201361Srdivacky llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result); 98193326Sed 99198092Srdivackyprivate: 100198092Srdivacky 101193326Sed void ParseNumberStartingWithZero(SourceLocation TokLoc); 102198092Srdivacky 103263509Sdim static bool isDigitSeparator(char C) { return C == '\''; } 104263509Sdim 105263509Sdim enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits }; 106263509Sdim 107263509Sdim /// \brief Ensure that we don't have a digit separator here. 108263509Sdim void checkSeparator(SourceLocation TokLoc, const char *Pos, 109263509Sdim CheckSeparatorKind IsAfterDigits); 110263509Sdim 111193326Sed /// SkipHexDigits - Read and skip over any hex digits, up to End. 112193326Sed /// Return a pointer to the first non-hex digit or End. 113193326Sed const char *SkipHexDigits(const char *ptr) { 114263509Sdim while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr))) 115193326Sed ptr++; 116193326Sed return ptr; 117193326Sed } 118198092Srdivacky 119193326Sed /// SkipOctalDigits - Read and skip over any octal digits, up to End. 120193326Sed /// Return a pointer to the first non-hex digit or End. 121193326Sed const char *SkipOctalDigits(const char *ptr) { 122263509Sdim while (ptr != ThisTokEnd && 123263509Sdim ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr))) 124193326Sed ptr++; 125193326Sed return ptr; 126193326Sed } 127198092Srdivacky 128193326Sed /// SkipDigits - Read and skip over any digits, up to End. 129193326Sed /// Return a pointer to the first non-hex digit or End. 130193326Sed const char *SkipDigits(const char *ptr) { 131263509Sdim while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr))) 132193326Sed ptr++; 133193326Sed return ptr; 134193326Sed } 135198092Srdivacky 136193326Sed /// SkipBinaryDigits - Read and skip over any binary digits, up to End. 137193326Sed /// Return a pointer to the first non-binary digit or End. 138193326Sed const char *SkipBinaryDigits(const char *ptr) { 139263509Sdim while (ptr != ThisTokEnd && 140263509Sdim (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr))) 141193326Sed ptr++; 142193326Sed return ptr; 143193326Sed } 144198092Srdivacky 145193326Sed}; 146193326Sed 147193326Sed/// CharLiteralParser - Perform interpretation and semantic analysis of a 148193326Sed/// character literal. 149193326Sedclass CharLiteralParser { 150193326Sed uint64_t Value; 151226890Sdim tok::TokenKind Kind; 152193326Sed bool IsMultiChar; 153193326Sed bool HadError; 154235633Sdim SmallString<32> UDSuffixBuf; 155235633Sdim unsigned UDSuffixOffset; 156193326Sedpublic: 157193326Sed CharLiteralParser(const char *begin, const char *end, 158226890Sdim SourceLocation Loc, Preprocessor &PP, 159226890Sdim tok::TokenKind kind); 160193326Sed 161193326Sed bool hadError() const { return HadError; } 162226890Sdim bool isAscii() const { return Kind == tok::char_constant; } 163226890Sdim bool isWide() const { return Kind == tok::wide_char_constant; } 164226890Sdim bool isUTF16() const { return Kind == tok::utf16_char_constant; } 165226890Sdim bool isUTF32() const { return Kind == tok::utf32_char_constant; } 166193326Sed bool isMultiChar() const { return IsMultiChar; } 167193326Sed uint64_t getValue() const { return Value; } 168235633Sdim StringRef getUDSuffix() const { return UDSuffixBuf; } 169235633Sdim unsigned getUDSuffixOffset() const { 170235633Sdim assert(!UDSuffixBuf.empty() && "no ud-suffix"); 171235633Sdim return UDSuffixOffset; 172235633Sdim } 173193326Sed}; 174193326Sed 175193326Sed/// StringLiteralParser - This decodes string escape characters and performs 176193326Sed/// wide string analysis and Translation Phase #6 (concatenation of string 177193326Sed/// literals) (C99 5.1.1.2p1). 178193326Sedclass StringLiteralParser { 179218893Sdim const SourceManager &SM; 180218893Sdim const LangOptions &Features; 181218893Sdim const TargetInfo &Target; 182226890Sdim DiagnosticsEngine *Diags; 183218893Sdim 184193326Sed unsigned MaxTokenLength; 185193326Sed unsigned SizeBound; 186226890Sdim unsigned CharByteWidth; 187226890Sdim tok::TokenKind Kind; 188235633Sdim SmallString<512> ResultBuf; 189193326Sed char *ResultPtr; // cursor 190235633Sdim SmallString<32> UDSuffixBuf; 191235633Sdim unsigned UDSuffixToken; 192235633Sdim unsigned UDSuffixOffset; 193193326Sedpublic: 194193326Sed StringLiteralParser(const Token *StringToks, unsigned NumStringToks, 195208600Srdivacky Preprocessor &PP, bool Complain = true); 196218893Sdim StringLiteralParser(const Token *StringToks, unsigned NumStringToks, 197218893Sdim const SourceManager &sm, const LangOptions &features, 198226890Sdim const TargetInfo &target, DiagnosticsEngine *diags = 0) 199223017Sdim : SM(sm), Features(features), Target(target), Diags(diags), 200226890Sdim MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), 201226890Sdim ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { 202218893Sdim init(StringToks, NumStringToks); 203218893Sdim } 204218893Sdim 205218893Sdim 206193326Sed bool hadError; 207193326Sed bool Pascal; 208198092Srdivacky 209226890Sdim StringRef GetString() const { 210226890Sdim return StringRef(ResultBuf.data(), GetStringLength()); 211224145Sdim } 212223017Sdim unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); } 213193326Sed 214193326Sed unsigned GetNumStringChars() const { 215226890Sdim return GetStringLength() / CharByteWidth; 216198092Srdivacky } 217193326Sed /// getOffsetOfStringByte - This function returns the offset of the 218193326Sed /// specified byte of the string data represented by Token. This handles 219193326Sed /// advancing over escape sequences in the string. 220218893Sdim /// 221218893Sdim /// If the Diagnostics pointer is non-null, then this will do semantic 222218893Sdim /// checking of the string literal and emit errors and warnings. 223218893Sdim unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const; 224226890Sdim 225235633Sdim bool isAscii() const { return Kind == tok::string_literal; } 226235633Sdim bool isWide() const { return Kind == tok::wide_string_literal; } 227235633Sdim bool isUTF8() const { return Kind == tok::utf8_string_literal; } 228235633Sdim bool isUTF16() const { return Kind == tok::utf16_string_literal; } 229235633Sdim bool isUTF32() const { return Kind == tok::utf32_string_literal; } 230235633Sdim bool isPascal() const { return Pascal; } 231226890Sdim 232235633Sdim StringRef getUDSuffix() const { return UDSuffixBuf; } 233235633Sdim 234235633Sdim /// Get the index of a token containing a ud-suffix. 235235633Sdim unsigned getUDSuffixToken() const { 236235633Sdim assert(!UDSuffixBuf.empty() && "no ud-suffix"); 237235633Sdim return UDSuffixToken; 238235633Sdim } 239235633Sdim /// Get the spelling offset of the first byte of the ud-suffix. 240235633Sdim unsigned getUDSuffixOffset() const { 241235633Sdim assert(!UDSuffixBuf.empty() && "no ud-suffix"); 242235633Sdim return UDSuffixOffset; 243235633Sdim } 244235633Sdim 245218893Sdimprivate: 246218893Sdim void init(const Token *StringToks, unsigned NumStringToks); 247245431Sdim bool CopyStringFragment(const Token &Tok, const char *TokBegin, 248245431Sdim StringRef Fragment); 249245431Sdim void DiagnoseLexingError(SourceLocation Loc); 250193326Sed}; 251198092Srdivacky 252193326Sed} // end namespace clang 253193326Sed 254193326Sed#endif 255