1193326Sed//===--- LiteralSupport.h ---------------------------------------*- C++ -*-===//
2193326Sed//
3193326Sed//                     The LLVM Compiler Infrastructure
4193326Sed//
5193326Sed// This file is distributed under the University of Illinois Open Source
6193326Sed// License. See LICENSE.TXT for details.
7193326Sed//
8193326Sed//===----------------------------------------------------------------------===//
9193326Sed//
10193326Sed// This file defines the NumericLiteralParser, CharLiteralParser, and
11193326Sed// StringLiteralParser interfaces.
12193326Sed//
13193326Sed//===----------------------------------------------------------------------===//
14193326Sed
15193326Sed#ifndef CLANG_LITERALSUPPORT_H
16193326Sed#define CLANG_LITERALSUPPORT_H
17193326Sed
18252723Sdim#include "clang/Basic/CharInfo.h"
19226890Sdim#include "clang/Basic/LLVM.h"
20252723Sdim#include "clang/Basic/TokenKinds.h"
21201361Srdivacky#include "llvm/ADT/APFloat.h"
22193326Sed#include "llvm/ADT/SmallString.h"
23245431Sdim#include "llvm/ADT/StringRef.h"
24218893Sdim#include "llvm/Support/DataTypes.h"
25193326Sed
26193326Sednamespace clang {
27193326Sed
28226890Sdimclass DiagnosticsEngine;
29193326Sedclass Preprocessor;
30193326Sedclass Token;
31193326Sedclass SourceLocation;
32193326Sedclass TargetInfo;
33218893Sdimclass SourceManager;
34218893Sdimclass LangOptions;
35198092Srdivacky
36193326Sed/// NumericLiteralParser - This performs strict semantic analysis of the content
37193326Sed/// of a ppnumber, classifying it as either integer, floating, or erroneous,
38193326Sed/// determines the radix of the value and can convert it to a useful value.
39193326Sedclass NumericLiteralParser {
40193326Sed  Preprocessor &PP; // needed for diagnostics
41198092Srdivacky
42193326Sed  const char *const ThisTokBegin;
43193326Sed  const char *const ThisTokEnd;
44193326Sed  const char *DigitsBegin, *SuffixBegin; // markers
45193326Sed  const char *s; // cursor
46198092Srdivacky
47193326Sed  unsigned radix;
48198092Srdivacky
49235633Sdim  bool saw_exponent, saw_period, saw_ud_suffix;
50198092Srdivacky
51193326Sedpublic:
52245431Sdim  NumericLiteralParser(StringRef TokSpelling,
53245431Sdim                       SourceLocation TokLoc,
54245431Sdim                       Preprocessor &PP);
55193326Sed  bool hadError;
56193326Sed  bool isUnsigned;
57193326Sed  bool isLong;        // This is *not* set for long long.
58193326Sed  bool isLongLong;
59193326Sed  bool isFloat;       // 1.0f
60193326Sed  bool isImaginary;   // 1.0i
61198092Srdivacky  bool isMicrosoftInteger;  // Microsoft suffix extension i8, i16, i32, or i64.
62198092Srdivacky
63198092Srdivacky  bool isIntegerLiteral() const {
64193326Sed    return !saw_period && !saw_exponent;
65193326Sed  }
66193326Sed  bool isFloatingLiteral() const {
67193326Sed    return saw_period || saw_exponent;
68193326Sed  }
69235633Sdim
70235633Sdim  bool hasUDSuffix() const {
71235633Sdim    return saw_ud_suffix;
72193326Sed  }
73235633Sdim  StringRef getUDSuffix() const {
74235633Sdim    assert(saw_ud_suffix);
75235633Sdim    return StringRef(SuffixBegin, ThisTokEnd - SuffixBegin);
76235633Sdim  }
77235633Sdim  unsigned getUDSuffixOffset() const {
78235633Sdim    assert(saw_ud_suffix);
79235633Sdim    return SuffixBegin - ThisTokBegin;
80235633Sdim  }
81198092Srdivacky
82263509Sdim  static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
83263509Sdim
84193326Sed  unsigned getRadix() const { return radix; }
85198092Srdivacky
86193326Sed  /// GetIntegerValue - Convert this numeric literal value to an APInt that
87193326Sed  /// matches Val's input width.  If there is an overflow (i.e., if the unsigned
88193326Sed  /// value read is larger than the APInt's bits will hold), set Val to the low
89193326Sed  /// bits of the result and return true.  Otherwise, return false.
90193326Sed  bool GetIntegerValue(llvm::APInt &Val);
91198092Srdivacky
92193326Sed  /// GetFloatValue - Convert this numeric literal to a floating value, using
93193326Sed  /// the specified APFloat fltSemantics (specifying float, double, etc).
94193326Sed  /// The optional bool isExact (passed-by-reference) has its value
95193326Sed  /// set to true if the returned APFloat can represent the number in the
96193326Sed  /// literal exactly, and false otherwise.
97201361Srdivacky  llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result);
98193326Sed
99198092Srdivackyprivate:
100198092Srdivacky
101193326Sed  void ParseNumberStartingWithZero(SourceLocation TokLoc);
102198092Srdivacky
103263509Sdim  static bool isDigitSeparator(char C) { return C == '\''; }
104263509Sdim
105263509Sdim  enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits };
106263509Sdim
107263509Sdim  /// \brief Ensure that we don't have a digit separator here.
108263509Sdim  void checkSeparator(SourceLocation TokLoc, const char *Pos,
109263509Sdim                      CheckSeparatorKind IsAfterDigits);
110263509Sdim
111193326Sed  /// SkipHexDigits - Read and skip over any hex digits, up to End.
112193326Sed  /// Return a pointer to the first non-hex digit or End.
113193326Sed  const char *SkipHexDigits(const char *ptr) {
114263509Sdim    while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr)))
115193326Sed      ptr++;
116193326Sed    return ptr;
117193326Sed  }
118198092Srdivacky
119193326Sed  /// SkipOctalDigits - Read and skip over any octal digits, up to End.
120193326Sed  /// Return a pointer to the first non-hex digit or End.
121193326Sed  const char *SkipOctalDigits(const char *ptr) {
122263509Sdim    while (ptr != ThisTokEnd &&
123263509Sdim           ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr)))
124193326Sed      ptr++;
125193326Sed    return ptr;
126193326Sed  }
127198092Srdivacky
128193326Sed  /// SkipDigits - Read and skip over any digits, up to End.
129193326Sed  /// Return a pointer to the first non-hex digit or End.
130193326Sed  const char *SkipDigits(const char *ptr) {
131263509Sdim    while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr)))
132193326Sed      ptr++;
133193326Sed    return ptr;
134193326Sed  }
135198092Srdivacky
136193326Sed  /// SkipBinaryDigits - Read and skip over any binary digits, up to End.
137193326Sed  /// Return a pointer to the first non-binary digit or End.
138193326Sed  const char *SkipBinaryDigits(const char *ptr) {
139263509Sdim    while (ptr != ThisTokEnd &&
140263509Sdim           (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr)))
141193326Sed      ptr++;
142193326Sed    return ptr;
143193326Sed  }
144198092Srdivacky
145193326Sed};
146193326Sed
147193326Sed/// CharLiteralParser - Perform interpretation and semantic analysis of a
148193326Sed/// character literal.
149193326Sedclass CharLiteralParser {
150193326Sed  uint64_t Value;
151226890Sdim  tok::TokenKind Kind;
152193326Sed  bool IsMultiChar;
153193326Sed  bool HadError;
154235633Sdim  SmallString<32> UDSuffixBuf;
155235633Sdim  unsigned UDSuffixOffset;
156193326Sedpublic:
157193326Sed  CharLiteralParser(const char *begin, const char *end,
158226890Sdim                    SourceLocation Loc, Preprocessor &PP,
159226890Sdim                    tok::TokenKind kind);
160193326Sed
161193326Sed  bool hadError() const { return HadError; }
162226890Sdim  bool isAscii() const { return Kind == tok::char_constant; }
163226890Sdim  bool isWide() const { return Kind == tok::wide_char_constant; }
164226890Sdim  bool isUTF16() const { return Kind == tok::utf16_char_constant; }
165226890Sdim  bool isUTF32() const { return Kind == tok::utf32_char_constant; }
166193326Sed  bool isMultiChar() const { return IsMultiChar; }
167193326Sed  uint64_t getValue() const { return Value; }
168235633Sdim  StringRef getUDSuffix() const { return UDSuffixBuf; }
169235633Sdim  unsigned getUDSuffixOffset() const {
170235633Sdim    assert(!UDSuffixBuf.empty() && "no ud-suffix");
171235633Sdim    return UDSuffixOffset;
172235633Sdim  }
173193326Sed};
174193326Sed
175193326Sed/// StringLiteralParser - This decodes string escape characters and performs
176193326Sed/// wide string analysis and Translation Phase #6 (concatenation of string
177193326Sed/// literals) (C99 5.1.1.2p1).
178193326Sedclass StringLiteralParser {
179218893Sdim  const SourceManager &SM;
180218893Sdim  const LangOptions &Features;
181218893Sdim  const TargetInfo &Target;
182226890Sdim  DiagnosticsEngine *Diags;
183218893Sdim
184193326Sed  unsigned MaxTokenLength;
185193326Sed  unsigned SizeBound;
186226890Sdim  unsigned CharByteWidth;
187226890Sdim  tok::TokenKind Kind;
188235633Sdim  SmallString<512> ResultBuf;
189193326Sed  char *ResultPtr; // cursor
190235633Sdim  SmallString<32> UDSuffixBuf;
191235633Sdim  unsigned UDSuffixToken;
192235633Sdim  unsigned UDSuffixOffset;
193193326Sedpublic:
194193326Sed  StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
195208600Srdivacky                      Preprocessor &PP, bool Complain = true);
196218893Sdim  StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
197218893Sdim                      const SourceManager &sm, const LangOptions &features,
198226890Sdim                      const TargetInfo &target, DiagnosticsEngine *diags = 0)
199223017Sdim    : SM(sm), Features(features), Target(target), Diags(diags),
200226890Sdim      MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
201226890Sdim      ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
202218893Sdim    init(StringToks, NumStringToks);
203218893Sdim  }
204218893Sdim
205218893Sdim
206193326Sed  bool hadError;
207193326Sed  bool Pascal;
208198092Srdivacky
209226890Sdim  StringRef GetString() const {
210226890Sdim    return StringRef(ResultBuf.data(), GetStringLength());
211224145Sdim  }
212223017Sdim  unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); }
213193326Sed
214193326Sed  unsigned GetNumStringChars() const {
215226890Sdim    return GetStringLength() / CharByteWidth;
216198092Srdivacky  }
217193326Sed  /// getOffsetOfStringByte - This function returns the offset of the
218193326Sed  /// specified byte of the string data represented by Token.  This handles
219193326Sed  /// advancing over escape sequences in the string.
220218893Sdim  ///
221218893Sdim  /// If the Diagnostics pointer is non-null, then this will do semantic
222218893Sdim  /// checking of the string literal and emit errors and warnings.
223218893Sdim  unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const;
224226890Sdim
225235633Sdim  bool isAscii() const { return Kind == tok::string_literal; }
226235633Sdim  bool isWide() const { return Kind == tok::wide_string_literal; }
227235633Sdim  bool isUTF8() const { return Kind == tok::utf8_string_literal; }
228235633Sdim  bool isUTF16() const { return Kind == tok::utf16_string_literal; }
229235633Sdim  bool isUTF32() const { return Kind == tok::utf32_string_literal; }
230235633Sdim  bool isPascal() const { return Pascal; }
231226890Sdim
232235633Sdim  StringRef getUDSuffix() const { return UDSuffixBuf; }
233235633Sdim
234235633Sdim  /// Get the index of a token containing a ud-suffix.
235235633Sdim  unsigned getUDSuffixToken() const {
236235633Sdim    assert(!UDSuffixBuf.empty() && "no ud-suffix");
237235633Sdim    return UDSuffixToken;
238235633Sdim  }
239235633Sdim  /// Get the spelling offset of the first byte of the ud-suffix.
240235633Sdim  unsigned getUDSuffixOffset() const {
241235633Sdim    assert(!UDSuffixBuf.empty() && "no ud-suffix");
242235633Sdim    return UDSuffixOffset;
243235633Sdim  }
244235633Sdim
245218893Sdimprivate:
246218893Sdim  void init(const Token *StringToks, unsigned NumStringToks);
247245431Sdim  bool CopyStringFragment(const Token &Tok, const char *TokBegin,
248245431Sdim                          StringRef Fragment);
249245431Sdim  void DiagnoseLexingError(SourceLocation Loc);
250193326Sed};
251198092Srdivacky
252193326Sed}  // end namespace clang
253193326Sed
254193326Sed#endif
255