1//===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer
11/// literal separators.
12///
13//===----------------------------------------------------------------------===//
14
15#include "IntegerLiteralSeparatorFixer.h"
16
17namespace clang {
18namespace format {
19
20enum class Base { Binary, Decimal, Hex, Other };
21
22static Base getBase(const StringRef IntegerLiteral) {
23  assert(IntegerLiteral.size() > 1);
24
25  if (IntegerLiteral[0] > '0') {
26    assert(IntegerLiteral[0] <= '9');
27    return Base::Decimal;
28  }
29
30  assert(IntegerLiteral[0] == '0');
31
32  switch (IntegerLiteral[1]) {
33  case 'b':
34  case 'B':
35    return Base::Binary;
36  case 'x':
37  case 'X':
38    return Base::Hex;
39  default:
40    return Base::Other;
41  }
42}
43
44std::pair<tooling::Replacements, unsigned>
45IntegerLiteralSeparatorFixer::process(const Environment &Env,
46                                      const FormatStyle &Style) {
47  switch (Style.Language) {
48  case FormatStyle::LK_Cpp:
49  case FormatStyle::LK_ObjC:
50    Separator = '\'';
51    break;
52  case FormatStyle::LK_CSharp:
53  case FormatStyle::LK_Java:
54  case FormatStyle::LK_JavaScript:
55    Separator = '_';
56    break;
57  default:
58    return {};
59  }
60
61  const auto &Option = Style.IntegerLiteralSeparator;
62  const auto Binary = Option.Binary;
63  const auto Decimal = Option.Decimal;
64  const auto Hex = Option.Hex;
65  const bool SkipBinary = Binary == 0;
66  const bool SkipDecimal = Decimal == 0;
67  const bool SkipHex = Hex == 0;
68
69  if (SkipBinary && SkipDecimal && SkipHex)
70    return {};
71
72  const auto BinaryMinDigits =
73      std::max((int)Option.BinaryMinDigits, Binary + 1);
74  const auto DecimalMinDigits =
75      std::max((int)Option.DecimalMinDigits, Decimal + 1);
76  const auto HexMinDigits = std::max((int)Option.HexMinDigits, Hex + 1);
77
78  const auto &SourceMgr = Env.getSourceManager();
79  AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
80
81  const auto ID = Env.getFileID();
82  const auto LangOpts = getFormattingLangOpts(Style);
83  Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
84  Lex.SetCommentRetentionState(true);
85
86  Token Tok;
87  tooling::Replacements Result;
88
89  for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) {
90    auto Length = Tok.getLength();
91    if (Length < 2)
92      continue;
93    auto Location = Tok.getLocation();
94    auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
95    if (Tok.is(tok::comment)) {
96      if (isClangFormatOff(Text))
97        Skip = true;
98      else if (isClangFormatOn(Text))
99        Skip = false;
100      continue;
101    }
102    if (Skip || Tok.isNot(tok::numeric_constant) || Text[0] == '.' ||
103        !AffectedRangeMgr.affectsCharSourceRange(
104            CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) {
105      continue;
106    }
107    const auto B = getBase(Text);
108    const bool IsBase2 = B == Base::Binary;
109    const bool IsBase10 = B == Base::Decimal;
110    const bool IsBase16 = B == Base::Hex;
111    if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) ||
112        (IsBase16 && SkipHex) || B == Base::Other) {
113      continue;
114    }
115    if (Style.isCpp()) {
116      // Hex alpha digits a-f/A-F must be at the end of the string literal.
117      StringRef Suffixes = "_himnsuyd";
118      if (const auto Pos =
119              Text.find_first_of(IsBase16 ? Suffixes.drop_back() : Suffixes);
120          Pos != StringRef::npos) {
121        Text = Text.substr(0, Pos);
122        Length = Pos;
123      }
124    }
125    if ((IsBase10 && Text.find_last_of(".eEfFdDmM") != StringRef::npos) ||
126        (IsBase16 && Text.find_last_of(".pP") != StringRef::npos)) {
127      continue;
128    }
129    const auto Start = Text[0] == '0' ? 2 : 0;
130    auto End = Text.find_first_of("uUlLzZn", Start);
131    if (End == StringRef::npos)
132      End = Length;
133    if (Start > 0 || End < Length) {
134      Length = End - Start;
135      Text = Text.substr(Start, Length);
136    }
137    auto DigitsPerGroup = Decimal;
138    auto MinDigits = DecimalMinDigits;
139    if (IsBase2) {
140      DigitsPerGroup = Binary;
141      MinDigits = BinaryMinDigits;
142    } else if (IsBase16) {
143      DigitsPerGroup = Hex;
144      MinDigits = HexMinDigits;
145    }
146    const auto SeparatorCount = Text.count(Separator);
147    const int DigitCount = Length - SeparatorCount;
148    const bool RemoveSeparator = DigitsPerGroup < 0 || DigitCount < MinDigits;
149    if (RemoveSeparator && SeparatorCount == 0)
150      continue;
151    if (!RemoveSeparator && SeparatorCount > 0 &&
152        checkSeparator(Text, DigitsPerGroup)) {
153      continue;
154    }
155    const auto &Formatted =
156        format(Text, DigitsPerGroup, DigitCount, RemoveSeparator);
157    assert(Formatted != Text);
158    if (Start > 0)
159      Location = Location.getLocWithOffset(Start);
160    cantFail(Result.add(
161        tooling::Replacement(SourceMgr, Location, Length, Formatted)));
162  }
163
164  return {Result, 0};
165}
166
167bool IntegerLiteralSeparatorFixer::checkSeparator(
168    const StringRef IntegerLiteral, int DigitsPerGroup) const {
169  assert(DigitsPerGroup > 0);
170
171  int I = 0;
172  for (auto C : llvm::reverse(IntegerLiteral)) {
173    if (C == Separator) {
174      if (I < DigitsPerGroup)
175        return false;
176      I = 0;
177    } else {
178      if (I == DigitsPerGroup)
179        return false;
180      ++I;
181    }
182  }
183
184  return true;
185}
186
187std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral,
188                                                 int DigitsPerGroup,
189                                                 int DigitCount,
190                                                 bool RemoveSeparator) const {
191  assert(DigitsPerGroup != 0);
192
193  std::string Formatted;
194
195  if (RemoveSeparator) {
196    for (auto C : IntegerLiteral)
197      if (C != Separator)
198        Formatted.push_back(C);
199    return Formatted;
200  }
201
202  int Remainder = DigitCount % DigitsPerGroup;
203
204  int I = 0;
205  for (auto C : IntegerLiteral) {
206    if (C == Separator)
207      continue;
208    if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) {
209      Formatted.push_back(Separator);
210      I = 0;
211      Remainder = 0;
212    }
213    Formatted.push_back(C);
214    ++I;
215  }
216
217  return Formatted;
218}
219
220} // namespace format
221} // namespace clang
222