1193326Sed//===--- LiteralSupport.h ---------------------------------------*- C++ -*-===// 2193326Sed// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6193326Sed// 7193326Sed//===----------------------------------------------------------------------===// 8193326Sed// 9193326Sed// This file defines the NumericLiteralParser, CharLiteralParser, and 10193326Sed// StringLiteralParser interfaces. 11193326Sed// 12193326Sed//===----------------------------------------------------------------------===// 13193326Sed 14280031Sdim#ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H 15280031Sdim#define LLVM_CLANG_LEX_LITERALSUPPORT_H 16193326Sed 17249423Sdim#include "clang/Basic/CharInfo.h" 18226633Sdim#include "clang/Basic/LLVM.h" 19249423Sdim#include "clang/Basic/TokenKinds.h" 20201361Srdivacky#include "llvm/ADT/APFloat.h" 21309124Sdim#include "llvm/ADT/ArrayRef.h" 22193326Sed#include "llvm/ADT/SmallString.h" 23243830Sdim#include "llvm/ADT/StringRef.h" 24218893Sdim#include "llvm/Support/DataTypes.h" 25193326Sed 26193326Sednamespace clang { 27193326Sed 28226633Sdimclass DiagnosticsEngine; 29193326Sedclass Preprocessor; 30193326Sedclass Token; 31193326Sedclass SourceLocation; 32193326Sedclass TargetInfo; 33218893Sdimclass SourceManager; 34218893Sdimclass LangOptions; 35198092Srdivacky 36276479Sdim/// Copy characters from Input to Buf, expanding any UCNs. 37276479Sdimvoid expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input); 38276479Sdim 39193326Sed/// NumericLiteralParser - This performs strict semantic analysis of the content 40193326Sed/// of a ppnumber, classifying it as either integer, floating, or erroneous, 41193326Sed/// determines the radix of the value and can convert it to a useful value. 42193326Sedclass NumericLiteralParser { 43193326Sed Preprocessor &PP; // needed for diagnostics 44198092Srdivacky 45193326Sed const char *const ThisTokBegin; 46193326Sed const char *const ThisTokEnd; 47193326Sed const char *DigitsBegin, *SuffixBegin; // markers 48193326Sed const char *s; // cursor 49198092Srdivacky 50193326Sed unsigned radix; 51198092Srdivacky 52341825Sdim bool saw_exponent, saw_period, saw_ud_suffix, saw_fixed_point_suffix; 53198092Srdivacky 54276479Sdim SmallString<32> UDSuffixBuf; 55276479Sdim 56193326Sedpublic: 57243830Sdim NumericLiteralParser(StringRef TokSpelling, 58243830Sdim SourceLocation TokLoc, 59243830Sdim Preprocessor &PP); 60288943Sdim bool hadError : 1; 61288943Sdim bool isUnsigned : 1; 62288943Sdim bool isLong : 1; // This is *not* set for long long. 63288943Sdim bool isLongLong : 1; 64309124Sdim bool isHalf : 1; // 1.0h 65288943Sdim bool isFloat : 1; // 1.0f 66288943Sdim bool isImaginary : 1; // 1.0i 67327952Sdim bool isFloat16 : 1; // 1.0f16 68309124Sdim bool isFloat128 : 1; // 1.0q 69288943Sdim uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64. 70198092Srdivacky 71341825Sdim bool isFract : 1; // 1.0hr/r/lr/uhr/ur/ulr 72341825Sdim bool isAccum : 1; // 1.0hk/k/lk/uhk/uk/ulk 73341825Sdim 74341825Sdim bool isFixedPointLiteral() const { return saw_fixed_point_suffix; } 75341825Sdim 76198092Srdivacky bool isIntegerLiteral() const { 77341825Sdim return !saw_period && !saw_exponent && !isFixedPointLiteral(); 78193326Sed } 79193326Sed bool isFloatingLiteral() const { 80341825Sdim return (saw_period || saw_exponent) && !isFixedPointLiteral(); 81193326Sed } 82234353Sdim 83234353Sdim bool hasUDSuffix() const { 84234353Sdim return saw_ud_suffix; 85193326Sed } 86234353Sdim StringRef getUDSuffix() const { 87234353Sdim assert(saw_ud_suffix); 88276479Sdim return UDSuffixBuf; 89234353Sdim } 90234353Sdim unsigned getUDSuffixOffset() const { 91234353Sdim assert(saw_ud_suffix); 92234353Sdim return SuffixBegin - ThisTokBegin; 93234353Sdim } 94198092Srdivacky 95261991Sdim static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); 96261991Sdim 97193326Sed unsigned getRadix() const { return radix; } 98198092Srdivacky 99193326Sed /// GetIntegerValue - Convert this numeric literal value to an APInt that 100193326Sed /// matches Val's input width. If there is an overflow (i.e., if the unsigned 101193326Sed /// value read is larger than the APInt's bits will hold), set Val to the low 102193326Sed /// bits of the result and return true. Otherwise, return false. 103193326Sed bool GetIntegerValue(llvm::APInt &Val); 104198092Srdivacky 105193326Sed /// GetFloatValue - Convert this numeric literal to a floating value, using 106193326Sed /// the specified APFloat fltSemantics (specifying float, double, etc). 107193326Sed /// The optional bool isExact (passed-by-reference) has its value 108193326Sed /// set to true if the returned APFloat can represent the number in the 109193326Sed /// literal exactly, and false otherwise. 110201361Srdivacky llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result); 111193326Sed 112341825Sdim /// GetFixedPointValue - Convert this numeric literal value into a 113341825Sdim /// scaled integer that represents this value. Returns true if an overflow 114341825Sdim /// occurred when calculating the integral part of the scaled integer or 115341825Sdim /// calculating the digit sequence of the exponent. 116341825Sdim bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale); 117341825Sdim 118198092Srdivackyprivate: 119198092Srdivacky 120193326Sed void ParseNumberStartingWithZero(SourceLocation TokLoc); 121309124Sdim void ParseDecimalOrOctalCommon(SourceLocation TokLoc); 122198092Srdivacky 123261991Sdim static bool isDigitSeparator(char C) { return C == '\''; } 124261991Sdim 125341825Sdim /// Determine whether the sequence of characters [Start, End) contains 126309124Sdim /// any real digits (not digit separators). 127309124Sdim bool containsDigits(const char *Start, const char *End) { 128309124Sdim return Start != End && (Start + 1 != End || !isDigitSeparator(Start[0])); 129309124Sdim } 130309124Sdim 131261991Sdim enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits }; 132261991Sdim 133341825Sdim /// Ensure that we don't have a digit separator here. 134261991Sdim void checkSeparator(SourceLocation TokLoc, const char *Pos, 135261991Sdim CheckSeparatorKind IsAfterDigits); 136261991Sdim 137193326Sed /// SkipHexDigits - Read and skip over any hex digits, up to End. 138193326Sed /// Return a pointer to the first non-hex digit or End. 139193326Sed const char *SkipHexDigits(const char *ptr) { 140261991Sdim while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr))) 141193326Sed ptr++; 142193326Sed return ptr; 143193326Sed } 144198092Srdivacky 145193326Sed /// SkipOctalDigits - Read and skip over any octal digits, up to End. 146193326Sed /// Return a pointer to the first non-hex digit or End. 147193326Sed const char *SkipOctalDigits(const char *ptr) { 148261991Sdim while (ptr != ThisTokEnd && 149261991Sdim ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr))) 150193326Sed ptr++; 151193326Sed return ptr; 152193326Sed } 153198092Srdivacky 154193326Sed /// SkipDigits - Read and skip over any digits, up to End. 155193326Sed /// Return a pointer to the first non-hex digit or End. 156193326Sed const char *SkipDigits(const char *ptr) { 157261991Sdim while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr))) 158193326Sed ptr++; 159193326Sed return ptr; 160193326Sed } 161198092Srdivacky 162193326Sed /// SkipBinaryDigits - Read and skip over any binary digits, up to End. 163193326Sed /// Return a pointer to the first non-binary digit or End. 164193326Sed const char *SkipBinaryDigits(const char *ptr) { 165261991Sdim while (ptr != ThisTokEnd && 166261991Sdim (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr))) 167193326Sed ptr++; 168193326Sed return ptr; 169193326Sed } 170198092Srdivacky 171193326Sed}; 172193326Sed 173193326Sed/// CharLiteralParser - Perform interpretation and semantic analysis of a 174193326Sed/// character literal. 175193326Sedclass CharLiteralParser { 176193326Sed uint64_t Value; 177226633Sdim tok::TokenKind Kind; 178193326Sed bool IsMultiChar; 179193326Sed bool HadError; 180234353Sdim SmallString<32> UDSuffixBuf; 181234353Sdim unsigned UDSuffixOffset; 182193326Sedpublic: 183193326Sed CharLiteralParser(const char *begin, const char *end, 184226633Sdim SourceLocation Loc, Preprocessor &PP, 185226633Sdim tok::TokenKind kind); 186193326Sed 187193326Sed bool hadError() const { return HadError; } 188226633Sdim bool isAscii() const { return Kind == tok::char_constant; } 189226633Sdim bool isWide() const { return Kind == tok::wide_char_constant; } 190296417Sdim bool isUTF8() const { return Kind == tok::utf8_char_constant; } 191226633Sdim bool isUTF16() const { return Kind == tok::utf16_char_constant; } 192226633Sdim bool isUTF32() const { return Kind == tok::utf32_char_constant; } 193193326Sed bool isMultiChar() const { return IsMultiChar; } 194193326Sed uint64_t getValue() const { return Value; } 195234353Sdim StringRef getUDSuffix() const { return UDSuffixBuf; } 196234353Sdim unsigned getUDSuffixOffset() const { 197234353Sdim assert(!UDSuffixBuf.empty() && "no ud-suffix"); 198234353Sdim return UDSuffixOffset; 199234353Sdim } 200193326Sed}; 201193326Sed 202193326Sed/// StringLiteralParser - This decodes string escape characters and performs 203193326Sed/// wide string analysis and Translation Phase #6 (concatenation of string 204193326Sed/// literals) (C99 5.1.1.2p1). 205193326Sedclass StringLiteralParser { 206218893Sdim const SourceManager &SM; 207218893Sdim const LangOptions &Features; 208218893Sdim const TargetInfo &Target; 209226633Sdim DiagnosticsEngine *Diags; 210341825Sdim 211193326Sed unsigned MaxTokenLength; 212193326Sed unsigned SizeBound; 213226633Sdim unsigned CharByteWidth; 214226633Sdim tok::TokenKind Kind; 215234353Sdim SmallString<512> ResultBuf; 216193326Sed char *ResultPtr; // cursor 217234353Sdim SmallString<32> UDSuffixBuf; 218234353Sdim unsigned UDSuffixToken; 219234353Sdim unsigned UDSuffixOffset; 220193326Sedpublic: 221276479Sdim StringLiteralParser(ArrayRef<Token> StringToks, 222208600Srdivacky Preprocessor &PP, bool Complain = true); 223276479Sdim StringLiteralParser(ArrayRef<Token> StringToks, 224218893Sdim const SourceManager &sm, const LangOptions &features, 225276479Sdim const TargetInfo &target, 226276479Sdim DiagnosticsEngine *diags = nullptr) 227223017Sdim : SM(sm), Features(features), Target(target), Diags(diags), 228226633Sdim MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), 229226633Sdim ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { 230276479Sdim init(StringToks); 231218893Sdim } 232218893Sdim 233341825Sdim 234193326Sed bool hadError; 235193326Sed bool Pascal; 236198092Srdivacky 237226633Sdim StringRef GetString() const { 238226633Sdim return StringRef(ResultBuf.data(), GetStringLength()); 239224145Sdim } 240223017Sdim unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); } 241193326Sed 242193326Sed unsigned GetNumStringChars() const { 243226633Sdim return GetStringLength() / CharByteWidth; 244198092Srdivacky } 245193326Sed /// getOffsetOfStringByte - This function returns the offset of the 246193326Sed /// specified byte of the string data represented by Token. This handles 247193326Sed /// advancing over escape sequences in the string. 248218893Sdim /// 249218893Sdim /// If the Diagnostics pointer is non-null, then this will do semantic 250218893Sdim /// checking of the string literal and emit errors and warnings. 251218893Sdim unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const; 252226633Sdim 253234353Sdim bool isAscii() const { return Kind == tok::string_literal; } 254234353Sdim bool isWide() const { return Kind == tok::wide_string_literal; } 255234353Sdim bool isUTF8() const { return Kind == tok::utf8_string_literal; } 256234353Sdim bool isUTF16() const { return Kind == tok::utf16_string_literal; } 257234353Sdim bool isUTF32() const { return Kind == tok::utf32_string_literal; } 258234353Sdim bool isPascal() const { return Pascal; } 259226633Sdim 260234353Sdim StringRef getUDSuffix() const { return UDSuffixBuf; } 261234353Sdim 262234353Sdim /// Get the index of a token containing a ud-suffix. 263234353Sdim unsigned getUDSuffixToken() const { 264234353Sdim assert(!UDSuffixBuf.empty() && "no ud-suffix"); 265234353Sdim return UDSuffixToken; 266234353Sdim } 267234353Sdim /// Get the spelling offset of the first byte of the ud-suffix. 268234353Sdim unsigned getUDSuffixOffset() const { 269234353Sdim assert(!UDSuffixBuf.empty() && "no ud-suffix"); 270234353Sdim return UDSuffixOffset; 271234353Sdim } 272234353Sdim 273314564Sdim static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); 274314564Sdim 275218893Sdimprivate: 276276479Sdim void init(ArrayRef<Token> StringToks); 277243830Sdim bool CopyStringFragment(const Token &Tok, const char *TokBegin, 278243830Sdim StringRef Fragment); 279239462Sdim void DiagnoseLexingError(SourceLocation Loc); 280193326Sed}; 281198092Srdivacky 282193326Sed} // end namespace clang 283193326Sed 284193326Sed#endif 285