LiteralSupport.h revision 280031
1193326Sed//===--- LiteralSupport.h ---------------------------------------*- C++ -*-===//
2193326Sed//
3193326Sed//                     The LLVM Compiler Infrastructure
4193326Sed//
5193326Sed// This file is distributed under the University of Illinois Open Source
6193326Sed// License. See LICENSE.TXT for details.
7193326Sed//
8193326Sed//===----------------------------------------------------------------------===//
9193326Sed//
10193326Sed// This file defines the NumericLiteralParser, CharLiteralParser, and
11193326Sed// StringLiteralParser interfaces.
12193326Sed//
13193326Sed//===----------------------------------------------------------------------===//
14193326Sed
15280031Sdim#ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H
16280031Sdim#define LLVM_CLANG_LEX_LITERALSUPPORT_H
17193326Sed
18249423Sdim#include "clang/Basic/CharInfo.h"
19226633Sdim#include "clang/Basic/LLVM.h"
20249423Sdim#include "clang/Basic/TokenKinds.h"
21201361Srdivacky#include "llvm/ADT/APFloat.h"
22193326Sed#include "llvm/ADT/SmallString.h"
23243830Sdim#include "llvm/ADT/StringRef.h"
24218893Sdim#include "llvm/Support/DataTypes.h"
25193326Sed
26193326Sednamespace clang {
27193326Sed
28226633Sdimclass DiagnosticsEngine;
29193326Sedclass Preprocessor;
30193326Sedclass Token;
31193326Sedclass SourceLocation;
32193326Sedclass TargetInfo;
33218893Sdimclass SourceManager;
34218893Sdimclass LangOptions;
35198092Srdivacky
36276479Sdim/// Copy characters from Input to Buf, expanding any UCNs.
37276479Sdimvoid expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input);
38276479Sdim
39193326Sed/// NumericLiteralParser - This performs strict semantic analysis of the content
40193326Sed/// of a ppnumber, classifying it as either integer, floating, or erroneous,
41193326Sed/// determines the radix of the value and can convert it to a useful value.
42193326Sedclass NumericLiteralParser {
43193326Sed  Preprocessor &PP; // needed for diagnostics
44198092Srdivacky
45193326Sed  const char *const ThisTokBegin;
46193326Sed  const char *const ThisTokEnd;
47193326Sed  const char *DigitsBegin, *SuffixBegin; // markers
48193326Sed  const char *s; // cursor
49198092Srdivacky
50193326Sed  unsigned radix;
51198092Srdivacky
52234353Sdim  bool saw_exponent, saw_period, saw_ud_suffix;
53198092Srdivacky
54276479Sdim  SmallString<32> UDSuffixBuf;
55276479Sdim
56193326Sedpublic:
57243830Sdim  NumericLiteralParser(StringRef TokSpelling,
58243830Sdim                       SourceLocation TokLoc,
59243830Sdim                       Preprocessor &PP);
60193326Sed  bool hadError;
61193326Sed  bool isUnsigned;
62193326Sed  bool isLong;        // This is *not* set for long long.
63193326Sed  bool isLongLong;
64193326Sed  bool isFloat;       // 1.0f
65193326Sed  bool isImaginary;   // 1.0i
66276479Sdim  uint8_t MicrosoftInteger;  // Microsoft suffix extension i8, i16, i32, or i64.
67198092Srdivacky
68198092Srdivacky  bool isIntegerLiteral() const {
69193326Sed    return !saw_period && !saw_exponent;
70193326Sed  }
71193326Sed  bool isFloatingLiteral() const {
72193326Sed    return saw_period || saw_exponent;
73193326Sed  }
74234353Sdim
75234353Sdim  bool hasUDSuffix() const {
76234353Sdim    return saw_ud_suffix;
77193326Sed  }
78234353Sdim  StringRef getUDSuffix() const {
79234353Sdim    assert(saw_ud_suffix);
80276479Sdim    return UDSuffixBuf;
81234353Sdim  }
82234353Sdim  unsigned getUDSuffixOffset() const {
83234353Sdim    assert(saw_ud_suffix);
84234353Sdim    return SuffixBegin - ThisTokBegin;
85234353Sdim  }
86198092Srdivacky
87261991Sdim  static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
88261991Sdim
89193326Sed  unsigned getRadix() const { return radix; }
90198092Srdivacky
91193326Sed  /// GetIntegerValue - Convert this numeric literal value to an APInt that
92193326Sed  /// matches Val's input width.  If there is an overflow (i.e., if the unsigned
93193326Sed  /// value read is larger than the APInt's bits will hold), set Val to the low
94193326Sed  /// bits of the result and return true.  Otherwise, return false.
95193326Sed  bool GetIntegerValue(llvm::APInt &Val);
96198092Srdivacky
97193326Sed  /// GetFloatValue - Convert this numeric literal to a floating value, using
98193326Sed  /// the specified APFloat fltSemantics (specifying float, double, etc).
99193326Sed  /// The optional bool isExact (passed-by-reference) has its value
100193326Sed  /// set to true if the returned APFloat can represent the number in the
101193326Sed  /// literal exactly, and false otherwise.
102201361Srdivacky  llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result);
103193326Sed
104198092Srdivackyprivate:
105198092Srdivacky
106193326Sed  void ParseNumberStartingWithZero(SourceLocation TokLoc);
107198092Srdivacky
108261991Sdim  static bool isDigitSeparator(char C) { return C == '\''; }
109261991Sdim
110261991Sdim  enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits };
111261991Sdim
112261991Sdim  /// \brief Ensure that we don't have a digit separator here.
113261991Sdim  void checkSeparator(SourceLocation TokLoc, const char *Pos,
114261991Sdim                      CheckSeparatorKind IsAfterDigits);
115261991Sdim
116193326Sed  /// SkipHexDigits - Read and skip over any hex digits, up to End.
117193326Sed  /// Return a pointer to the first non-hex digit or End.
118193326Sed  const char *SkipHexDigits(const char *ptr) {
119261991Sdim    while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr)))
120193326Sed      ptr++;
121193326Sed    return ptr;
122193326Sed  }
123198092Srdivacky
124193326Sed  /// SkipOctalDigits - Read and skip over any octal digits, up to End.
125193326Sed  /// Return a pointer to the first non-hex digit or End.
126193326Sed  const char *SkipOctalDigits(const char *ptr) {
127261991Sdim    while (ptr != ThisTokEnd &&
128261991Sdim           ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr)))
129193326Sed      ptr++;
130193326Sed    return ptr;
131193326Sed  }
132198092Srdivacky
133193326Sed  /// SkipDigits - Read and skip over any digits, up to End.
134193326Sed  /// Return a pointer to the first non-hex digit or End.
135193326Sed  const char *SkipDigits(const char *ptr) {
136261991Sdim    while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr)))
137193326Sed      ptr++;
138193326Sed    return ptr;
139193326Sed  }
140198092Srdivacky
141193326Sed  /// SkipBinaryDigits - Read and skip over any binary digits, up to End.
142193326Sed  /// Return a pointer to the first non-binary digit or End.
143193326Sed  const char *SkipBinaryDigits(const char *ptr) {
144261991Sdim    while (ptr != ThisTokEnd &&
145261991Sdim           (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr)))
146193326Sed      ptr++;
147193326Sed    return ptr;
148193326Sed  }
149198092Srdivacky
150193326Sed};
151193326Sed
152193326Sed/// CharLiteralParser - Perform interpretation and semantic analysis of a
153193326Sed/// character literal.
154193326Sedclass CharLiteralParser {
155193326Sed  uint64_t Value;
156226633Sdim  tok::TokenKind Kind;
157193326Sed  bool IsMultiChar;
158193326Sed  bool HadError;
159234353Sdim  SmallString<32> UDSuffixBuf;
160234353Sdim  unsigned UDSuffixOffset;
161193326Sedpublic:
162193326Sed  CharLiteralParser(const char *begin, const char *end,
163226633Sdim                    SourceLocation Loc, Preprocessor &PP,
164226633Sdim                    tok::TokenKind kind);
165193326Sed
166193326Sed  bool hadError() const { return HadError; }
167226633Sdim  bool isAscii() const { return Kind == tok::char_constant; }
168226633Sdim  bool isWide() const { return Kind == tok::wide_char_constant; }
169226633Sdim  bool isUTF16() const { return Kind == tok::utf16_char_constant; }
170226633Sdim  bool isUTF32() const { return Kind == tok::utf32_char_constant; }
171193326Sed  bool isMultiChar() const { return IsMultiChar; }
172193326Sed  uint64_t getValue() const { return Value; }
173234353Sdim  StringRef getUDSuffix() const { return UDSuffixBuf; }
174234353Sdim  unsigned getUDSuffixOffset() const {
175234353Sdim    assert(!UDSuffixBuf.empty() && "no ud-suffix");
176234353Sdim    return UDSuffixOffset;
177234353Sdim  }
178193326Sed};
179193326Sed
180193326Sed/// StringLiteralParser - This decodes string escape characters and performs
181193326Sed/// wide string analysis and Translation Phase #6 (concatenation of string
182193326Sed/// literals) (C99 5.1.1.2p1).
183193326Sedclass StringLiteralParser {
184218893Sdim  const SourceManager &SM;
185218893Sdim  const LangOptions &Features;
186218893Sdim  const TargetInfo &Target;
187226633Sdim  DiagnosticsEngine *Diags;
188218893Sdim
189193326Sed  unsigned MaxTokenLength;
190193326Sed  unsigned SizeBound;
191226633Sdim  unsigned CharByteWidth;
192226633Sdim  tok::TokenKind Kind;
193234353Sdim  SmallString<512> ResultBuf;
194193326Sed  char *ResultPtr; // cursor
195234353Sdim  SmallString<32> UDSuffixBuf;
196234353Sdim  unsigned UDSuffixToken;
197234353Sdim  unsigned UDSuffixOffset;
198193326Sedpublic:
199276479Sdim  StringLiteralParser(ArrayRef<Token> StringToks,
200208600Srdivacky                      Preprocessor &PP, bool Complain = true);
201276479Sdim  StringLiteralParser(ArrayRef<Token> StringToks,
202218893Sdim                      const SourceManager &sm, const LangOptions &features,
203276479Sdim                      const TargetInfo &target,
204276479Sdim                      DiagnosticsEngine *diags = nullptr)
205223017Sdim    : SM(sm), Features(features), Target(target), Diags(diags),
206226633Sdim      MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
207226633Sdim      ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
208276479Sdim    init(StringToks);
209218893Sdim  }
210218893Sdim
211218893Sdim
212193326Sed  bool hadError;
213193326Sed  bool Pascal;
214198092Srdivacky
215226633Sdim  StringRef GetString() const {
216226633Sdim    return StringRef(ResultBuf.data(), GetStringLength());
217224145Sdim  }
218223017Sdim  unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); }
219193326Sed
220193326Sed  unsigned GetNumStringChars() const {
221226633Sdim    return GetStringLength() / CharByteWidth;
222198092Srdivacky  }
223193326Sed  /// getOffsetOfStringByte - This function returns the offset of the
224193326Sed  /// specified byte of the string data represented by Token.  This handles
225193326Sed  /// advancing over escape sequences in the string.
226218893Sdim  ///
227218893Sdim  /// If the Diagnostics pointer is non-null, then this will do semantic
228218893Sdim  /// checking of the string literal and emit errors and warnings.
229218893Sdim  unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const;
230226633Sdim
231234353Sdim  bool isAscii() const { return Kind == tok::string_literal; }
232234353Sdim  bool isWide() const { return Kind == tok::wide_string_literal; }
233234353Sdim  bool isUTF8() const { return Kind == tok::utf8_string_literal; }
234234353Sdim  bool isUTF16() const { return Kind == tok::utf16_string_literal; }
235234353Sdim  bool isUTF32() const { return Kind == tok::utf32_string_literal; }
236234353Sdim  bool isPascal() const { return Pascal; }
237226633Sdim
238234353Sdim  StringRef getUDSuffix() const { return UDSuffixBuf; }
239234353Sdim
240234353Sdim  /// Get the index of a token containing a ud-suffix.
241234353Sdim  unsigned getUDSuffixToken() const {
242234353Sdim    assert(!UDSuffixBuf.empty() && "no ud-suffix");
243234353Sdim    return UDSuffixToken;
244234353Sdim  }
245234353Sdim  /// Get the spelling offset of the first byte of the ud-suffix.
246234353Sdim  unsigned getUDSuffixOffset() const {
247234353Sdim    assert(!UDSuffixBuf.empty() && "no ud-suffix");
248234353Sdim    return UDSuffixOffset;
249234353Sdim  }
250234353Sdim
251218893Sdimprivate:
252276479Sdim  void init(ArrayRef<Token> StringToks);
253243830Sdim  bool CopyStringFragment(const Token &Tok, const char *TokBegin,
254243830Sdim                          StringRef Fragment);
255239462Sdim  void DiagnoseLexingError(SourceLocation Loc);
256193326Sed};
257198092Srdivacky
258193326Sed}  // end namespace clang
259193326Sed
260193326Sed#endif
261