1193326Sed//===--- LiteralSupport.h ---------------------------------------*- C++ -*-===//
2193326Sed//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6193326Sed//
7193326Sed//===----------------------------------------------------------------------===//
8193326Sed//
9193326Sed// This file defines the NumericLiteralParser, CharLiteralParser, and
10193326Sed// StringLiteralParser interfaces.
11193326Sed//
12193326Sed//===----------------------------------------------------------------------===//
13193326Sed
14280031Sdim#ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H
15280031Sdim#define LLVM_CLANG_LEX_LITERALSUPPORT_H
16193326Sed
17249423Sdim#include "clang/Basic/CharInfo.h"
18226633Sdim#include "clang/Basic/LLVM.h"
19249423Sdim#include "clang/Basic/TokenKinds.h"
20201361Srdivacky#include "llvm/ADT/APFloat.h"
21309124Sdim#include "llvm/ADT/ArrayRef.h"
22193326Sed#include "llvm/ADT/SmallString.h"
23243830Sdim#include "llvm/ADT/StringRef.h"
24218893Sdim#include "llvm/Support/DataTypes.h"
25193326Sed
26193326Sednamespace clang {
27193326Sed
28226633Sdimclass DiagnosticsEngine;
29193326Sedclass Preprocessor;
30193326Sedclass Token;
31193326Sedclass SourceLocation;
32193326Sedclass TargetInfo;
33218893Sdimclass SourceManager;
34218893Sdimclass LangOptions;
35198092Srdivacky
36276479Sdim/// Copy characters from Input to Buf, expanding any UCNs.
37276479Sdimvoid expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input);
38276479Sdim
39193326Sed/// NumericLiteralParser - This performs strict semantic analysis of the content
40193326Sed/// of a ppnumber, classifying it as either integer, floating, or erroneous,
41193326Sed/// determines the radix of the value and can convert it to a useful value.
42193326Sedclass NumericLiteralParser {
43193326Sed  Preprocessor &PP; // needed for diagnostics
44198092Srdivacky
45193326Sed  const char *const ThisTokBegin;
46193326Sed  const char *const ThisTokEnd;
47193326Sed  const char *DigitsBegin, *SuffixBegin; // markers
48193326Sed  const char *s; // cursor
49198092Srdivacky
50193326Sed  unsigned radix;
51198092Srdivacky
52341825Sdim  bool saw_exponent, saw_period, saw_ud_suffix, saw_fixed_point_suffix;
53198092Srdivacky
54276479Sdim  SmallString<32> UDSuffixBuf;
55276479Sdim
56193326Sedpublic:
57243830Sdim  NumericLiteralParser(StringRef TokSpelling,
58243830Sdim                       SourceLocation TokLoc,
59243830Sdim                       Preprocessor &PP);
60288943Sdim  bool hadError : 1;
61288943Sdim  bool isUnsigned : 1;
62288943Sdim  bool isLong : 1;          // This is *not* set for long long.
63288943Sdim  bool isLongLong : 1;
64309124Sdim  bool isHalf : 1;          // 1.0h
65288943Sdim  bool isFloat : 1;         // 1.0f
66288943Sdim  bool isImaginary : 1;     // 1.0i
67327952Sdim  bool isFloat16 : 1;       // 1.0f16
68309124Sdim  bool isFloat128 : 1;      // 1.0q
69288943Sdim  uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
70198092Srdivacky
71341825Sdim  bool isFract : 1;         // 1.0hr/r/lr/uhr/ur/ulr
72341825Sdim  bool isAccum : 1;         // 1.0hk/k/lk/uhk/uk/ulk
73341825Sdim
74341825Sdim  bool isFixedPointLiteral() const { return saw_fixed_point_suffix; }
75341825Sdim
76198092Srdivacky  bool isIntegerLiteral() const {
77341825Sdim    return !saw_period && !saw_exponent && !isFixedPointLiteral();
78193326Sed  }
79193326Sed  bool isFloatingLiteral() const {
80341825Sdim    return (saw_period || saw_exponent) && !isFixedPointLiteral();
81193326Sed  }
82234353Sdim
83234353Sdim  bool hasUDSuffix() const {
84234353Sdim    return saw_ud_suffix;
85193326Sed  }
86234353Sdim  StringRef getUDSuffix() const {
87234353Sdim    assert(saw_ud_suffix);
88276479Sdim    return UDSuffixBuf;
89234353Sdim  }
90234353Sdim  unsigned getUDSuffixOffset() const {
91234353Sdim    assert(saw_ud_suffix);
92234353Sdim    return SuffixBegin - ThisTokBegin;
93234353Sdim  }
94198092Srdivacky
95261991Sdim  static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
96261991Sdim
97193326Sed  unsigned getRadix() const { return radix; }
98198092Srdivacky
99193326Sed  /// GetIntegerValue - Convert this numeric literal value to an APInt that
100193326Sed  /// matches Val's input width.  If there is an overflow (i.e., if the unsigned
101193326Sed  /// value read is larger than the APInt's bits will hold), set Val to the low
102193326Sed  /// bits of the result and return true.  Otherwise, return false.
103193326Sed  bool GetIntegerValue(llvm::APInt &Val);
104198092Srdivacky
105193326Sed  /// GetFloatValue - Convert this numeric literal to a floating value, using
106193326Sed  /// the specified APFloat fltSemantics (specifying float, double, etc).
107193326Sed  /// The optional bool isExact (passed-by-reference) has its value
108193326Sed  /// set to true if the returned APFloat can represent the number in the
109193326Sed  /// literal exactly, and false otherwise.
110201361Srdivacky  llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result);
111193326Sed
112341825Sdim  /// GetFixedPointValue - Convert this numeric literal value into a
113341825Sdim  /// scaled integer that represents this value. Returns true if an overflow
114341825Sdim  /// occurred when calculating the integral part of the scaled integer or
115341825Sdim  /// calculating the digit sequence of the exponent.
116341825Sdim  bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale);
117341825Sdim
118198092Srdivackyprivate:
119198092Srdivacky
120193326Sed  void ParseNumberStartingWithZero(SourceLocation TokLoc);
121309124Sdim  void ParseDecimalOrOctalCommon(SourceLocation TokLoc);
122198092Srdivacky
123261991Sdim  static bool isDigitSeparator(char C) { return C == '\''; }
124261991Sdim
125341825Sdim  /// Determine whether the sequence of characters [Start, End) contains
126309124Sdim  /// any real digits (not digit separators).
127309124Sdim  bool containsDigits(const char *Start, const char *End) {
128309124Sdim    return Start != End && (Start + 1 != End || !isDigitSeparator(Start[0]));
129309124Sdim  }
130309124Sdim
131261991Sdim  enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits };
132261991Sdim
133341825Sdim  /// Ensure that we don't have a digit separator here.
134261991Sdim  void checkSeparator(SourceLocation TokLoc, const char *Pos,
135261991Sdim                      CheckSeparatorKind IsAfterDigits);
136261991Sdim
137193326Sed  /// SkipHexDigits - Read and skip over any hex digits, up to End.
138193326Sed  /// Return a pointer to the first non-hex digit or End.
139193326Sed  const char *SkipHexDigits(const char *ptr) {
140261991Sdim    while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr)))
141193326Sed      ptr++;
142193326Sed    return ptr;
143193326Sed  }
144198092Srdivacky
145193326Sed  /// SkipOctalDigits - Read and skip over any octal digits, up to End.
146193326Sed  /// Return a pointer to the first non-hex digit or End.
147193326Sed  const char *SkipOctalDigits(const char *ptr) {
148261991Sdim    while (ptr != ThisTokEnd &&
149261991Sdim           ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr)))
150193326Sed      ptr++;
151193326Sed    return ptr;
152193326Sed  }
153198092Srdivacky
154193326Sed  /// SkipDigits - Read and skip over any digits, up to End.
155193326Sed  /// Return a pointer to the first non-hex digit or End.
156193326Sed  const char *SkipDigits(const char *ptr) {
157261991Sdim    while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr)))
158193326Sed      ptr++;
159193326Sed    return ptr;
160193326Sed  }
161198092Srdivacky
162193326Sed  /// SkipBinaryDigits - Read and skip over any binary digits, up to End.
163193326Sed  /// Return a pointer to the first non-binary digit or End.
164193326Sed  const char *SkipBinaryDigits(const char *ptr) {
165261991Sdim    while (ptr != ThisTokEnd &&
166261991Sdim           (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr)))
167193326Sed      ptr++;
168193326Sed    return ptr;
169193326Sed  }
170198092Srdivacky
171193326Sed};
172193326Sed
173193326Sed/// CharLiteralParser - Perform interpretation and semantic analysis of a
174193326Sed/// character literal.
175193326Sedclass CharLiteralParser {
176193326Sed  uint64_t Value;
177226633Sdim  tok::TokenKind Kind;
178193326Sed  bool IsMultiChar;
179193326Sed  bool HadError;
180234353Sdim  SmallString<32> UDSuffixBuf;
181234353Sdim  unsigned UDSuffixOffset;
182193326Sedpublic:
183193326Sed  CharLiteralParser(const char *begin, const char *end,
184226633Sdim                    SourceLocation Loc, Preprocessor &PP,
185226633Sdim                    tok::TokenKind kind);
186193326Sed
187193326Sed  bool hadError() const { return HadError; }
188226633Sdim  bool isAscii() const { return Kind == tok::char_constant; }
189226633Sdim  bool isWide() const { return Kind == tok::wide_char_constant; }
190296417Sdim  bool isUTF8() const { return Kind == tok::utf8_char_constant; }
191226633Sdim  bool isUTF16() const { return Kind == tok::utf16_char_constant; }
192226633Sdim  bool isUTF32() const { return Kind == tok::utf32_char_constant; }
193193326Sed  bool isMultiChar() const { return IsMultiChar; }
194193326Sed  uint64_t getValue() const { return Value; }
195234353Sdim  StringRef getUDSuffix() const { return UDSuffixBuf; }
196234353Sdim  unsigned getUDSuffixOffset() const {
197234353Sdim    assert(!UDSuffixBuf.empty() && "no ud-suffix");
198234353Sdim    return UDSuffixOffset;
199234353Sdim  }
200193326Sed};
201193326Sed
202193326Sed/// StringLiteralParser - This decodes string escape characters and performs
203193326Sed/// wide string analysis and Translation Phase #6 (concatenation of string
204193326Sed/// literals) (C99 5.1.1.2p1).
205193326Sedclass StringLiteralParser {
206218893Sdim  const SourceManager &SM;
207218893Sdim  const LangOptions &Features;
208218893Sdim  const TargetInfo &Target;
209226633Sdim  DiagnosticsEngine *Diags;
210341825Sdim
211193326Sed  unsigned MaxTokenLength;
212193326Sed  unsigned SizeBound;
213226633Sdim  unsigned CharByteWidth;
214226633Sdim  tok::TokenKind Kind;
215234353Sdim  SmallString<512> ResultBuf;
216193326Sed  char *ResultPtr; // cursor
217234353Sdim  SmallString<32> UDSuffixBuf;
218234353Sdim  unsigned UDSuffixToken;
219234353Sdim  unsigned UDSuffixOffset;
220193326Sedpublic:
221276479Sdim  StringLiteralParser(ArrayRef<Token> StringToks,
222208600Srdivacky                      Preprocessor &PP, bool Complain = true);
223276479Sdim  StringLiteralParser(ArrayRef<Token> StringToks,
224218893Sdim                      const SourceManager &sm, const LangOptions &features,
225276479Sdim                      const TargetInfo &target,
226276479Sdim                      DiagnosticsEngine *diags = nullptr)
227223017Sdim    : SM(sm), Features(features), Target(target), Diags(diags),
228226633Sdim      MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
229226633Sdim      ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
230276479Sdim    init(StringToks);
231218893Sdim  }
232218893Sdim
233341825Sdim
234193326Sed  bool hadError;
235193326Sed  bool Pascal;
236198092Srdivacky
237226633Sdim  StringRef GetString() const {
238226633Sdim    return StringRef(ResultBuf.data(), GetStringLength());
239224145Sdim  }
240223017Sdim  unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); }
241193326Sed
242193326Sed  unsigned GetNumStringChars() const {
243226633Sdim    return GetStringLength() / CharByteWidth;
244198092Srdivacky  }
245193326Sed  /// getOffsetOfStringByte - This function returns the offset of the
246193326Sed  /// specified byte of the string data represented by Token.  This handles
247193326Sed  /// advancing over escape sequences in the string.
248218893Sdim  ///
249218893Sdim  /// If the Diagnostics pointer is non-null, then this will do semantic
250218893Sdim  /// checking of the string literal and emit errors and warnings.
251218893Sdim  unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const;
252226633Sdim
253234353Sdim  bool isAscii() const { return Kind == tok::string_literal; }
254234353Sdim  bool isWide() const { return Kind == tok::wide_string_literal; }
255234353Sdim  bool isUTF8() const { return Kind == tok::utf8_string_literal; }
256234353Sdim  bool isUTF16() const { return Kind == tok::utf16_string_literal; }
257234353Sdim  bool isUTF32() const { return Kind == tok::utf32_string_literal; }
258234353Sdim  bool isPascal() const { return Pascal; }
259226633Sdim
260234353Sdim  StringRef getUDSuffix() const { return UDSuffixBuf; }
261234353Sdim
262234353Sdim  /// Get the index of a token containing a ud-suffix.
263234353Sdim  unsigned getUDSuffixToken() const {
264234353Sdim    assert(!UDSuffixBuf.empty() && "no ud-suffix");
265234353Sdim    return UDSuffixToken;
266234353Sdim  }
267234353Sdim  /// Get the spelling offset of the first byte of the ud-suffix.
268234353Sdim  unsigned getUDSuffixOffset() const {
269234353Sdim    assert(!UDSuffixBuf.empty() && "no ud-suffix");
270234353Sdim    return UDSuffixOffset;
271234353Sdim  }
272234353Sdim
273314564Sdim  static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
274314564Sdim
275218893Sdimprivate:
276276479Sdim  void init(ArrayRef<Token> StringToks);
277243830Sdim  bool CopyStringFragment(const Token &Tok, const char *TokBegin,
278243830Sdim                          StringRef Fragment);
279239462Sdim  void DiagnoseLexingError(SourceLocation Loc);
280193326Sed};
281198092Srdivacky
282193326Sed}  // end namespace clang
283193326Sed
284193326Sed#endif
285