Format.cpp revision 296417
1//===--- Format.cpp - Format C++ code -------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements functions declared in Format.h. This will be
12/// split into separate files as we go.
13///
14//===----------------------------------------------------------------------===//
15
16#include "clang/Format/Format.h"
17#include "ContinuationIndenter.h"
18#include "TokenAnnotator.h"
19#include "UnwrappedLineFormatter.h"
20#include "UnwrappedLineParser.h"
21#include "WhitespaceManager.h"
22#include "clang/Basic/Diagnostic.h"
23#include "clang/Basic/DiagnosticOptions.h"
24#include "clang/Basic/SourceManager.h"
25#include "clang/Lex/Lexer.h"
26#include "llvm/ADT/STLExtras.h"
27#include "llvm/Support/Allocator.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/Path.h"
30#include "llvm/Support/Regex.h"
31#include "llvm/Support/YAMLTraits.h"
32#include <queue>
33#include <string>
34
35#define DEBUG_TYPE "format-formatter"
36
37using clang::format::FormatStyle;
38
39LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
40LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory)
41
42namespace llvm {
43namespace yaml {
44template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
45  static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
46    IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
47    IO.enumCase(Value, "Java", FormatStyle::LK_Java);
48    IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
49    IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
50    IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen);
51  }
52};
53
54template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> {
55  static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) {
56    IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03);
57    IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03);
58    IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11);
59    IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11);
60    IO.enumCase(Value, "Auto", FormatStyle::LS_Auto);
61  }
62};
63
64template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
65  static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) {
66    IO.enumCase(Value, "Never", FormatStyle::UT_Never);
67    IO.enumCase(Value, "false", FormatStyle::UT_Never);
68    IO.enumCase(Value, "Always", FormatStyle::UT_Always);
69    IO.enumCase(Value, "true", FormatStyle::UT_Always);
70    IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
71  }
72};
73
74template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> {
75  static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) {
76    IO.enumCase(Value, "None", FormatStyle::SFS_None);
77    IO.enumCase(Value, "false", FormatStyle::SFS_None);
78    IO.enumCase(Value, "All", FormatStyle::SFS_All);
79    IO.enumCase(Value, "true", FormatStyle::SFS_All);
80    IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline);
81    IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty);
82  }
83};
84
85template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> {
86  static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) {
87    IO.enumCase(Value, "All", FormatStyle::BOS_All);
88    IO.enumCase(Value, "true", FormatStyle::BOS_All);
89    IO.enumCase(Value, "None", FormatStyle::BOS_None);
90    IO.enumCase(Value, "false", FormatStyle::BOS_None);
91    IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment);
92  }
93};
94
95template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
96  static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) {
97    IO.enumCase(Value, "Attach", FormatStyle::BS_Attach);
98    IO.enumCase(Value, "Linux", FormatStyle::BS_Linux);
99    IO.enumCase(Value, "Mozilla", FormatStyle::BS_Mozilla);
100    IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
101    IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
102    IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
103    IO.enumCase(Value, "WebKit", FormatStyle::BS_WebKit);
104    IO.enumCase(Value, "Custom", FormatStyle::BS_Custom);
105  }
106};
107
108template <>
109struct ScalarEnumerationTraits<FormatStyle::ReturnTypeBreakingStyle> {
110  static void enumeration(IO &IO, FormatStyle::ReturnTypeBreakingStyle &Value) {
111    IO.enumCase(Value, "None", FormatStyle::RTBS_None);
112    IO.enumCase(Value, "All", FormatStyle::RTBS_All);
113    IO.enumCase(Value, "TopLevel", FormatStyle::RTBS_TopLevel);
114    IO.enumCase(Value, "TopLevelDefinitions",
115                FormatStyle::RTBS_TopLevelDefinitions);
116    IO.enumCase(Value, "AllDefinitions", FormatStyle::RTBS_AllDefinitions);
117  }
118};
119
120template <>
121struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> {
122  static void
123  enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) {
124    IO.enumCase(Value, "None", FormatStyle::DRTBS_None);
125    IO.enumCase(Value, "All", FormatStyle::DRTBS_All);
126    IO.enumCase(Value, "TopLevel", FormatStyle::DRTBS_TopLevel);
127
128    // For backward compatibility.
129    IO.enumCase(Value, "false", FormatStyle::DRTBS_None);
130    IO.enumCase(Value, "true", FormatStyle::DRTBS_All);
131  }
132};
133
134template <>
135struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
136  static void enumeration(IO &IO,
137                          FormatStyle::NamespaceIndentationKind &Value) {
138    IO.enumCase(Value, "None", FormatStyle::NI_None);
139    IO.enumCase(Value, "Inner", FormatStyle::NI_Inner);
140    IO.enumCase(Value, "All", FormatStyle::NI_All);
141  }
142};
143
144template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> {
145  static void enumeration(IO &IO, FormatStyle::BracketAlignmentStyle &Value) {
146    IO.enumCase(Value, "Align", FormatStyle::BAS_Align);
147    IO.enumCase(Value, "DontAlign", FormatStyle::BAS_DontAlign);
148    IO.enumCase(Value, "AlwaysBreak", FormatStyle::BAS_AlwaysBreak);
149
150    // For backward compatibility.
151    IO.enumCase(Value, "true", FormatStyle::BAS_Align);
152    IO.enumCase(Value, "false", FormatStyle::BAS_DontAlign);
153  }
154};
155
156template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> {
157  static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) {
158    IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle);
159    IO.enumCase(Value, "Left", FormatStyle::PAS_Left);
160    IO.enumCase(Value, "Right", FormatStyle::PAS_Right);
161
162    // For backward compatibility.
163    IO.enumCase(Value, "true", FormatStyle::PAS_Left);
164    IO.enumCase(Value, "false", FormatStyle::PAS_Right);
165  }
166};
167
168template <>
169struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
170  static void enumeration(IO &IO,
171                          FormatStyle::SpaceBeforeParensOptions &Value) {
172    IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
173    IO.enumCase(Value, "ControlStatements",
174                FormatStyle::SBPO_ControlStatements);
175    IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
176
177    // For backward compatibility.
178    IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
179    IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements);
180  }
181};
182
183template <> struct MappingTraits<FormatStyle> {
184  static void mapping(IO &IO, FormatStyle &Style) {
185    // When reading, read the language first, we need it for getPredefinedStyle.
186    IO.mapOptional("Language", Style.Language);
187
188    if (IO.outputting()) {
189      StringRef StylesArray[] = {"LLVM",    "Google", "Chromium",
190                                 "Mozilla", "WebKit", "GNU"};
191      ArrayRef<StringRef> Styles(StylesArray);
192      for (size_t i = 0, e = Styles.size(); i < e; ++i) {
193        StringRef StyleName(Styles[i]);
194        FormatStyle PredefinedStyle;
195        if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
196            Style == PredefinedStyle) {
197          IO.mapOptional("# BasedOnStyle", StyleName);
198          break;
199        }
200      }
201    } else {
202      StringRef BasedOnStyle;
203      IO.mapOptional("BasedOnStyle", BasedOnStyle);
204      if (!BasedOnStyle.empty()) {
205        FormatStyle::LanguageKind OldLanguage = Style.Language;
206        FormatStyle::LanguageKind Language =
207            ((FormatStyle *)IO.getContext())->Language;
208        if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) {
209          IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
210          return;
211        }
212        Style.Language = OldLanguage;
213      }
214    }
215
216    // For backward compatibility.
217    if (!IO.outputting()) {
218      IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment);
219      IO.mapOptional("IndentFunctionDeclarationAfterType",
220                     Style.IndentWrappedFunctionNames);
221      IO.mapOptional("PointerBindsToType", Style.PointerAlignment);
222      IO.mapOptional("SpaceAfterControlStatementKeyword",
223                     Style.SpaceBeforeParens);
224    }
225
226    IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
227    IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket);
228    IO.mapOptional("AlignConsecutiveAssignments",
229                   Style.AlignConsecutiveAssignments);
230    IO.mapOptional("AlignConsecutiveDeclarations",
231                   Style.AlignConsecutiveDeclarations);
232    IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
233    IO.mapOptional("AlignOperands", Style.AlignOperands);
234    IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
235    IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
236                   Style.AllowAllParametersOfDeclarationOnNextLine);
237    IO.mapOptional("AllowShortBlocksOnASingleLine",
238                   Style.AllowShortBlocksOnASingleLine);
239    IO.mapOptional("AllowShortCaseLabelsOnASingleLine",
240                   Style.AllowShortCaseLabelsOnASingleLine);
241    IO.mapOptional("AllowShortFunctionsOnASingleLine",
242                   Style.AllowShortFunctionsOnASingleLine);
243    IO.mapOptional("AllowShortIfStatementsOnASingleLine",
244                   Style.AllowShortIfStatementsOnASingleLine);
245    IO.mapOptional("AllowShortLoopsOnASingleLine",
246                   Style.AllowShortLoopsOnASingleLine);
247    IO.mapOptional("AlwaysBreakAfterDefinitionReturnType",
248                   Style.AlwaysBreakAfterDefinitionReturnType);
249    IO.mapOptional("AlwaysBreakAfterReturnType",
250                   Style.AlwaysBreakAfterReturnType);
251    // If AlwaysBreakAfterDefinitionReturnType was specified but
252    // AlwaysBreakAfterReturnType was not, initialize the latter from the
253    // former for backwards compatibility.
254    if (Style.AlwaysBreakAfterDefinitionReturnType != FormatStyle::DRTBS_None &&
255        Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_None) {
256      if (Style.AlwaysBreakAfterDefinitionReturnType == FormatStyle::DRTBS_All)
257        Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions;
258      else if (Style.AlwaysBreakAfterDefinitionReturnType ==
259               FormatStyle::DRTBS_TopLevel)
260        Style.AlwaysBreakAfterReturnType =
261            FormatStyle::RTBS_TopLevelDefinitions;
262    }
263
264    IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
265                   Style.AlwaysBreakBeforeMultilineStrings);
266    IO.mapOptional("AlwaysBreakTemplateDeclarations",
267                   Style.AlwaysBreakTemplateDeclarations);
268    IO.mapOptional("BinPackArguments", Style.BinPackArguments);
269    IO.mapOptional("BinPackParameters", Style.BinPackParameters);
270    IO.mapOptional("BraceWrapping", Style.BraceWrapping);
271    IO.mapOptional("BreakBeforeBinaryOperators",
272                   Style.BreakBeforeBinaryOperators);
273    IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
274    IO.mapOptional("BreakBeforeTernaryOperators",
275                   Style.BreakBeforeTernaryOperators);
276    IO.mapOptional("BreakConstructorInitializersBeforeComma",
277                   Style.BreakConstructorInitializersBeforeComma);
278    IO.mapOptional("ColumnLimit", Style.ColumnLimit);
279    IO.mapOptional("CommentPragmas", Style.CommentPragmas);
280    IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
281                   Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
282    IO.mapOptional("ConstructorInitializerIndentWidth",
283                   Style.ConstructorInitializerIndentWidth);
284    IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
285    IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
286    IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment);
287    IO.mapOptional("DisableFormat", Style.DisableFormat);
288    IO.mapOptional("ExperimentalAutoDetectBinPacking",
289                   Style.ExperimentalAutoDetectBinPacking);
290    IO.mapOptional("ForEachMacros", Style.ForEachMacros);
291    IO.mapOptional("IncludeCategories", Style.IncludeCategories);
292    IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
293    IO.mapOptional("IndentWidth", Style.IndentWidth);
294    IO.mapOptional("IndentWrappedFunctionNames",
295                   Style.IndentWrappedFunctionNames);
296    IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks",
297                   Style.KeepEmptyLinesAtTheStartOfBlocks);
298    IO.mapOptional("MacroBlockBegin", Style.MacroBlockBegin);
299    IO.mapOptional("MacroBlockEnd", Style.MacroBlockEnd);
300    IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
301    IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
302    IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth);
303    IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
304    IO.mapOptional("ObjCSpaceBeforeProtocolList",
305                   Style.ObjCSpaceBeforeProtocolList);
306    IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
307                   Style.PenaltyBreakBeforeFirstCallParameter);
308    IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
309    IO.mapOptional("PenaltyBreakFirstLessLess",
310                   Style.PenaltyBreakFirstLessLess);
311    IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
312    IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
313    IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
314                   Style.PenaltyReturnTypeOnItsOwnLine);
315    IO.mapOptional("PointerAlignment", Style.PointerAlignment);
316    IO.mapOptional("ReflowComments", Style.ReflowComments);
317    IO.mapOptional("SortIncludes", Style.SortIncludes);
318    IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast);
319    IO.mapOptional("SpaceBeforeAssignmentOperators",
320                   Style.SpaceBeforeAssignmentOperators);
321    IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
322    IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
323    IO.mapOptional("SpacesBeforeTrailingComments",
324                   Style.SpacesBeforeTrailingComments);
325    IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
326    IO.mapOptional("SpacesInContainerLiterals",
327                   Style.SpacesInContainerLiterals);
328    IO.mapOptional("SpacesInCStyleCastParentheses",
329                   Style.SpacesInCStyleCastParentheses);
330    IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
331    IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets);
332    IO.mapOptional("Standard", Style.Standard);
333    IO.mapOptional("TabWidth", Style.TabWidth);
334    IO.mapOptional("UseTab", Style.UseTab);
335  }
336};
337
338template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> {
339  static void mapping(IO &IO, FormatStyle::BraceWrappingFlags &Wrapping) {
340    IO.mapOptional("AfterClass", Wrapping.AfterClass);
341    IO.mapOptional("AfterControlStatement", Wrapping.AfterControlStatement);
342    IO.mapOptional("AfterEnum", Wrapping.AfterEnum);
343    IO.mapOptional("AfterFunction", Wrapping.AfterFunction);
344    IO.mapOptional("AfterNamespace", Wrapping.AfterNamespace);
345    IO.mapOptional("AfterObjCDeclaration", Wrapping.AfterObjCDeclaration);
346    IO.mapOptional("AfterStruct", Wrapping.AfterStruct);
347    IO.mapOptional("AfterUnion", Wrapping.AfterUnion);
348    IO.mapOptional("BeforeCatch", Wrapping.BeforeCatch);
349    IO.mapOptional("BeforeElse", Wrapping.BeforeElse);
350    IO.mapOptional("IndentBraces", Wrapping.IndentBraces);
351  }
352};
353
354template <> struct MappingTraits<FormatStyle::IncludeCategory> {
355  static void mapping(IO &IO, FormatStyle::IncludeCategory &Category) {
356    IO.mapOptional("Regex", Category.Regex);
357    IO.mapOptional("Priority", Category.Priority);
358  }
359};
360
361// Allows to read vector<FormatStyle> while keeping default values.
362// IO.getContext() should contain a pointer to the FormatStyle structure, that
363// will be used to get default values for missing keys.
364// If the first element has no Language specified, it will be treated as the
365// default one for the following elements.
366template <> struct DocumentListTraits<std::vector<FormatStyle>> {
367  static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
368    return Seq.size();
369  }
370  static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq,
371                              size_t Index) {
372    if (Index >= Seq.size()) {
373      assert(Index == Seq.size());
374      FormatStyle Template;
375      if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
376        Template = Seq[0];
377      } else {
378        Template = *((const FormatStyle *)IO.getContext());
379        Template.Language = FormatStyle::LK_None;
380      }
381      Seq.resize(Index + 1, Template);
382    }
383    return Seq[Index];
384  }
385};
386} // namespace yaml
387} // namespace llvm
388
389namespace clang {
390namespace format {
391
392const std::error_category &getParseCategory() {
393  static ParseErrorCategory C;
394  return C;
395}
396std::error_code make_error_code(ParseError e) {
397  return std::error_code(static_cast<int>(e), getParseCategory());
398}
399
400const char *ParseErrorCategory::name() const LLVM_NOEXCEPT {
401  return "clang-format.parse_error";
402}
403
404std::string ParseErrorCategory::message(int EV) const {
405  switch (static_cast<ParseError>(EV)) {
406  case ParseError::Success:
407    return "Success";
408  case ParseError::Error:
409    return "Invalid argument";
410  case ParseError::Unsuitable:
411    return "Unsuitable";
412  }
413  llvm_unreachable("unexpected parse error");
414}
415
416static FormatStyle expandPresets(const FormatStyle &Style) {
417  if (Style.BreakBeforeBraces == FormatStyle::BS_Custom)
418    return Style;
419  FormatStyle Expanded = Style;
420  Expanded.BraceWrapping = {false, false, false, false, false, false,
421                            false, false, false, false, false};
422  switch (Style.BreakBeforeBraces) {
423  case FormatStyle::BS_Linux:
424    Expanded.BraceWrapping.AfterClass = true;
425    Expanded.BraceWrapping.AfterFunction = true;
426    Expanded.BraceWrapping.AfterNamespace = true;
427    break;
428  case FormatStyle::BS_Mozilla:
429    Expanded.BraceWrapping.AfterClass = true;
430    Expanded.BraceWrapping.AfterEnum = true;
431    Expanded.BraceWrapping.AfterFunction = true;
432    Expanded.BraceWrapping.AfterStruct = true;
433    Expanded.BraceWrapping.AfterUnion = true;
434    break;
435  case FormatStyle::BS_Stroustrup:
436    Expanded.BraceWrapping.AfterFunction = true;
437    Expanded.BraceWrapping.BeforeCatch = true;
438    Expanded.BraceWrapping.BeforeElse = true;
439    break;
440  case FormatStyle::BS_Allman:
441    Expanded.BraceWrapping.AfterClass = true;
442    Expanded.BraceWrapping.AfterControlStatement = true;
443    Expanded.BraceWrapping.AfterEnum = true;
444    Expanded.BraceWrapping.AfterFunction = true;
445    Expanded.BraceWrapping.AfterNamespace = true;
446    Expanded.BraceWrapping.AfterObjCDeclaration = true;
447    Expanded.BraceWrapping.AfterStruct = true;
448    Expanded.BraceWrapping.BeforeCatch = true;
449    Expanded.BraceWrapping.BeforeElse = true;
450    break;
451  case FormatStyle::BS_GNU:
452    Expanded.BraceWrapping = {true, true, true, true, true, true,
453                              true, true, true, true, true};
454    break;
455  case FormatStyle::BS_WebKit:
456    Expanded.BraceWrapping.AfterFunction = true;
457    break;
458  default:
459    break;
460  }
461  return Expanded;
462}
463
464FormatStyle getLLVMStyle() {
465  FormatStyle LLVMStyle;
466  LLVMStyle.Language = FormatStyle::LK_Cpp;
467  LLVMStyle.AccessModifierOffset = -2;
468  LLVMStyle.AlignEscapedNewlinesLeft = false;
469  LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align;
470  LLVMStyle.AlignOperands = true;
471  LLVMStyle.AlignTrailingComments = true;
472  LLVMStyle.AlignConsecutiveAssignments = false;
473  LLVMStyle.AlignConsecutiveDeclarations = false;
474  LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
475  LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All;
476  LLVMStyle.AllowShortBlocksOnASingleLine = false;
477  LLVMStyle.AllowShortCaseLabelsOnASingleLine = false;
478  LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
479  LLVMStyle.AllowShortLoopsOnASingleLine = false;
480  LLVMStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_None;
481  LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None;
482  LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
483  LLVMStyle.AlwaysBreakTemplateDeclarations = false;
484  LLVMStyle.BinPackParameters = true;
485  LLVMStyle.BinPackArguments = true;
486  LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None;
487  LLVMStyle.BreakBeforeTernaryOperators = true;
488  LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
489  LLVMStyle.BraceWrapping = {false, false, false, false, false, false,
490                             false, false, false, false, false};
491  LLVMStyle.BreakConstructorInitializersBeforeComma = false;
492  LLVMStyle.BreakAfterJavaFieldAnnotations = false;
493  LLVMStyle.ColumnLimit = 80;
494  LLVMStyle.CommentPragmas = "^ IWYU pragma:";
495  LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
496  LLVMStyle.ConstructorInitializerIndentWidth = 4;
497  LLVMStyle.ContinuationIndentWidth = 4;
498  LLVMStyle.Cpp11BracedListStyle = true;
499  LLVMStyle.DerivePointerAlignment = false;
500  LLVMStyle.ExperimentalAutoDetectBinPacking = false;
501  LLVMStyle.ForEachMacros.push_back("foreach");
502  LLVMStyle.ForEachMacros.push_back("Q_FOREACH");
503  LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH");
504  LLVMStyle.IncludeCategories = {{"^\"(llvm|llvm-c|clang|clang-c)/", 2},
505                                 {"^(<|\"(gtest|isl|json)/)", 3},
506                                 {".*", 1}};
507  LLVMStyle.IndentCaseLabels = false;
508  LLVMStyle.IndentWrappedFunctionNames = false;
509  LLVMStyle.IndentWidth = 2;
510  LLVMStyle.TabWidth = 8;
511  LLVMStyle.MaxEmptyLinesToKeep = 1;
512  LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true;
513  LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
514  LLVMStyle.ObjCBlockIndentWidth = 2;
515  LLVMStyle.ObjCSpaceAfterProperty = false;
516  LLVMStyle.ObjCSpaceBeforeProtocolList = true;
517  LLVMStyle.PointerAlignment = FormatStyle::PAS_Right;
518  LLVMStyle.SpacesBeforeTrailingComments = 1;
519  LLVMStyle.Standard = FormatStyle::LS_Cpp11;
520  LLVMStyle.UseTab = FormatStyle::UT_Never;
521  LLVMStyle.ReflowComments = true;
522  LLVMStyle.SpacesInParentheses = false;
523  LLVMStyle.SpacesInSquareBrackets = false;
524  LLVMStyle.SpaceInEmptyParentheses = false;
525  LLVMStyle.SpacesInContainerLiterals = true;
526  LLVMStyle.SpacesInCStyleCastParentheses = false;
527  LLVMStyle.SpaceAfterCStyleCast = false;
528  LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
529  LLVMStyle.SpaceBeforeAssignmentOperators = true;
530  LLVMStyle.SpacesInAngles = false;
531
532  LLVMStyle.PenaltyBreakComment = 300;
533  LLVMStyle.PenaltyBreakFirstLessLess = 120;
534  LLVMStyle.PenaltyBreakString = 1000;
535  LLVMStyle.PenaltyExcessCharacter = 1000000;
536  LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
537  LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
538
539  LLVMStyle.DisableFormat = false;
540  LLVMStyle.SortIncludes = true;
541
542  return LLVMStyle;
543}
544
545FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
546  FormatStyle GoogleStyle = getLLVMStyle();
547  GoogleStyle.Language = Language;
548
549  GoogleStyle.AccessModifierOffset = -1;
550  GoogleStyle.AlignEscapedNewlinesLeft = true;
551  GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
552  GoogleStyle.AllowShortLoopsOnASingleLine = true;
553  GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
554  GoogleStyle.AlwaysBreakTemplateDeclarations = true;
555  GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
556  GoogleStyle.DerivePointerAlignment = true;
557  GoogleStyle.IncludeCategories = {{"^<.*\\.h>", 1}, {"^<.*", 2}, {".*", 3}};
558  GoogleStyle.IndentCaseLabels = true;
559  GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false;
560  GoogleStyle.ObjCSpaceAfterProperty = false;
561  GoogleStyle.ObjCSpaceBeforeProtocolList = false;
562  GoogleStyle.PointerAlignment = FormatStyle::PAS_Left;
563  GoogleStyle.SpacesBeforeTrailingComments = 2;
564  GoogleStyle.Standard = FormatStyle::LS_Auto;
565
566  GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
567  GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
568
569  if (Language == FormatStyle::LK_Java) {
570    GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
571    GoogleStyle.AlignOperands = false;
572    GoogleStyle.AlignTrailingComments = false;
573    GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
574    GoogleStyle.AllowShortIfStatementsOnASingleLine = false;
575    GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
576    GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment;
577    GoogleStyle.ColumnLimit = 100;
578    GoogleStyle.SpaceAfterCStyleCast = true;
579    GoogleStyle.SpacesBeforeTrailingComments = 1;
580  } else if (Language == FormatStyle::LK_JavaScript) {
581    GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak;
582    GoogleStyle.AlignOperands = false;
583    GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
584    GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
585    GoogleStyle.BreakBeforeTernaryOperators = false;
586    GoogleStyle.CommentPragmas = "@(export|visibility) {";
587    GoogleStyle.MaxEmptyLinesToKeep = 3;
588    GoogleStyle.SpacesInContainerLiterals = false;
589  } else if (Language == FormatStyle::LK_Proto) {
590    GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
591    GoogleStyle.SpacesInContainerLiterals = false;
592  }
593
594  return GoogleStyle;
595}
596
597FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
598  FormatStyle ChromiumStyle = getGoogleStyle(Language);
599  if (Language == FormatStyle::LK_Java) {
600    ChromiumStyle.AllowShortIfStatementsOnASingleLine = true;
601    ChromiumStyle.BreakAfterJavaFieldAnnotations = true;
602    ChromiumStyle.ContinuationIndentWidth = 8;
603    ChromiumStyle.IndentWidth = 4;
604  } else {
605    ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
606    ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
607    ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
608    ChromiumStyle.AllowShortLoopsOnASingleLine = false;
609    ChromiumStyle.BinPackParameters = false;
610    ChromiumStyle.DerivePointerAlignment = false;
611  }
612  ChromiumStyle.SortIncludes = false;
613  return ChromiumStyle;
614}
615
616FormatStyle getMozillaStyle() {
617  FormatStyle MozillaStyle = getLLVMStyle();
618  MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
619  MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
620  MozillaStyle.AlwaysBreakAfterReturnType =
621      FormatStyle::RTBS_TopLevelDefinitions;
622  MozillaStyle.AlwaysBreakAfterDefinitionReturnType =
623      FormatStyle::DRTBS_TopLevel;
624  MozillaStyle.AlwaysBreakTemplateDeclarations = true;
625  MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla;
626  MozillaStyle.BreakConstructorInitializersBeforeComma = true;
627  MozillaStyle.ConstructorInitializerIndentWidth = 2;
628  MozillaStyle.ContinuationIndentWidth = 2;
629  MozillaStyle.Cpp11BracedListStyle = false;
630  MozillaStyle.IndentCaseLabels = true;
631  MozillaStyle.ObjCSpaceAfterProperty = true;
632  MozillaStyle.ObjCSpaceBeforeProtocolList = false;
633  MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
634  MozillaStyle.PointerAlignment = FormatStyle::PAS_Left;
635  return MozillaStyle;
636}
637
638FormatStyle getWebKitStyle() {
639  FormatStyle Style = getLLVMStyle();
640  Style.AccessModifierOffset = -4;
641  Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
642  Style.AlignOperands = false;
643  Style.AlignTrailingComments = false;
644  Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
645  Style.BreakBeforeBraces = FormatStyle::BS_WebKit;
646  Style.BreakConstructorInitializersBeforeComma = true;
647  Style.Cpp11BracedListStyle = false;
648  Style.ColumnLimit = 0;
649  Style.IndentWidth = 4;
650  Style.NamespaceIndentation = FormatStyle::NI_Inner;
651  Style.ObjCBlockIndentWidth = 4;
652  Style.ObjCSpaceAfterProperty = true;
653  Style.PointerAlignment = FormatStyle::PAS_Left;
654  Style.Standard = FormatStyle::LS_Cpp03;
655  return Style;
656}
657
658FormatStyle getGNUStyle() {
659  FormatStyle Style = getLLVMStyle();
660  Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All;
661  Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions;
662  Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
663  Style.BreakBeforeBraces = FormatStyle::BS_GNU;
664  Style.BreakBeforeTernaryOperators = true;
665  Style.Cpp11BracedListStyle = false;
666  Style.ColumnLimit = 79;
667  Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
668  Style.Standard = FormatStyle::LS_Cpp03;
669  return Style;
670}
671
672FormatStyle getNoStyle() {
673  FormatStyle NoStyle = getLLVMStyle();
674  NoStyle.DisableFormat = true;
675  NoStyle.SortIncludes = false;
676  return NoStyle;
677}
678
679bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
680                        FormatStyle *Style) {
681  if (Name.equals_lower("llvm")) {
682    *Style = getLLVMStyle();
683  } else if (Name.equals_lower("chromium")) {
684    *Style = getChromiumStyle(Language);
685  } else if (Name.equals_lower("mozilla")) {
686    *Style = getMozillaStyle();
687  } else if (Name.equals_lower("google")) {
688    *Style = getGoogleStyle(Language);
689  } else if (Name.equals_lower("webkit")) {
690    *Style = getWebKitStyle();
691  } else if (Name.equals_lower("gnu")) {
692    *Style = getGNUStyle();
693  } else if (Name.equals_lower("none")) {
694    *Style = getNoStyle();
695  } else {
696    return false;
697  }
698
699  Style->Language = Language;
700  return true;
701}
702
703std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
704  assert(Style);
705  FormatStyle::LanguageKind Language = Style->Language;
706  assert(Language != FormatStyle::LK_None);
707  if (Text.trim().empty())
708    return make_error_code(ParseError::Error);
709
710  std::vector<FormatStyle> Styles;
711  llvm::yaml::Input Input(Text);
712  // DocumentListTraits<vector<FormatStyle>> uses the context to get default
713  // values for the fields, keys for which are missing from the configuration.
714  // Mapping also uses the context to get the language to find the correct
715  // base style.
716  Input.setContext(Style);
717  Input >> Styles;
718  if (Input.error())
719    return Input.error();
720
721  for (unsigned i = 0; i < Styles.size(); ++i) {
722    // Ensures that only the first configuration can skip the Language option.
723    if (Styles[i].Language == FormatStyle::LK_None && i != 0)
724      return make_error_code(ParseError::Error);
725    // Ensure that each language is configured at most once.
726    for (unsigned j = 0; j < i; ++j) {
727      if (Styles[i].Language == Styles[j].Language) {
728        DEBUG(llvm::dbgs()
729              << "Duplicate languages in the config file on positions " << j
730              << " and " << i << "\n");
731        return make_error_code(ParseError::Error);
732      }
733    }
734  }
735  // Look for a suitable configuration starting from the end, so we can
736  // find the configuration for the specific language first, and the default
737  // configuration (which can only be at slot 0) after it.
738  for (int i = Styles.size() - 1; i >= 0; --i) {
739    if (Styles[i].Language == Language ||
740        Styles[i].Language == FormatStyle::LK_None) {
741      *Style = Styles[i];
742      Style->Language = Language;
743      return make_error_code(ParseError::Success);
744    }
745  }
746  return make_error_code(ParseError::Unsuitable);
747}
748
749std::string configurationAsText(const FormatStyle &Style) {
750  std::string Text;
751  llvm::raw_string_ostream Stream(Text);
752  llvm::yaml::Output Output(Stream);
753  // We use the same mapping method for input and output, so we need a non-const
754  // reference here.
755  FormatStyle NonConstStyle = expandPresets(Style);
756  Output << NonConstStyle;
757  return Stream.str();
758}
759
760namespace {
761
762class FormatTokenLexer {
763public:
764  FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style,
765                   encoding::Encoding Encoding)
766      : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
767        LessStashed(false), Column(0), TrailingWhitespace(0),
768        SourceMgr(SourceMgr), ID(ID), Style(Style),
769        IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
770        Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
771        MacroBlockBeginRegex(Style.MacroBlockBegin),
772        MacroBlockEndRegex(Style.MacroBlockEnd) {
773    Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
774                        getFormattingLangOpts(Style)));
775    Lex->SetKeepWhitespaceMode(true);
776
777    for (const std::string &ForEachMacro : Style.ForEachMacros)
778      ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
779    std::sort(ForEachMacros.begin(), ForEachMacros.end());
780  }
781
782  ArrayRef<FormatToken *> lex() {
783    assert(Tokens.empty());
784    assert(FirstInLineIndex == 0);
785    do {
786      Tokens.push_back(getNextToken());
787      if (Style.Language == FormatStyle::LK_JavaScript)
788        tryParseJSRegexLiteral();
789      tryMergePreviousTokens();
790      if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
791        FirstInLineIndex = Tokens.size() - 1;
792    } while (Tokens.back()->Tok.isNot(tok::eof));
793    return Tokens;
794  }
795
796  const AdditionalKeywords &getKeywords() { return Keywords; }
797
798private:
799  void tryMergePreviousTokens() {
800    if (tryMerge_TMacro())
801      return;
802    if (tryMergeConflictMarkers())
803      return;
804    if (tryMergeLessLess())
805      return;
806
807    if (Style.Language == FormatStyle::LK_JavaScript) {
808      if (tryMergeTemplateString())
809        return;
810
811      static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
812      static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,
813                                                     tok::equal};
814      static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
815                                                    tok::greaterequal};
816      static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
817      // FIXME: Investigate what token type gives the correct operator priority.
818      if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
819        return;
820      if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
821        return;
822      if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
823        return;
824      if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
825        return;
826    }
827  }
828
829  bool tryMergeLessLess() {
830    // Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
831    if (Tokens.size() < 3)
832      return false;
833
834    bool FourthTokenIsLess = false;
835    if (Tokens.size() > 3)
836      FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less);
837
838    auto First = Tokens.end() - 3;
839    if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
840        First[0]->isNot(tok::less) || FourthTokenIsLess)
841      return false;
842
843    // Only merge if there currently is no whitespace between the two "<".
844    if (First[1]->WhitespaceRange.getBegin() !=
845        First[1]->WhitespaceRange.getEnd())
846      return false;
847
848    First[0]->Tok.setKind(tok::lessless);
849    First[0]->TokenText = "<<";
850    First[0]->ColumnWidth += 1;
851    Tokens.erase(Tokens.end() - 2);
852    return true;
853  }
854
855  bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType) {
856    if (Tokens.size() < Kinds.size())
857      return false;
858
859    SmallVectorImpl<FormatToken *>::const_iterator First =
860        Tokens.end() - Kinds.size();
861    if (!First[0]->is(Kinds[0]))
862      return false;
863    unsigned AddLength = 0;
864    for (unsigned i = 1; i < Kinds.size(); ++i) {
865      if (!First[i]->is(Kinds[i]) ||
866          First[i]->WhitespaceRange.getBegin() !=
867              First[i]->WhitespaceRange.getEnd())
868        return false;
869      AddLength += First[i]->TokenText.size();
870    }
871    Tokens.resize(Tokens.size() - Kinds.size() + 1);
872    First[0]->TokenText = StringRef(First[0]->TokenText.data(),
873                                    First[0]->TokenText.size() + AddLength);
874    First[0]->ColumnWidth += AddLength;
875    First[0]->Type = NewType;
876    return true;
877  }
878
879  // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
880  bool precedesOperand(FormatToken *Tok) {
881    // NB: This is not entirely correct, as an r_paren can introduce an operand
882    // location in e.g. `if (foo) /bar/.exec(...);`. That is a rare enough
883    // corner case to not matter in practice, though.
884    return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
885                        tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
886                        tok::colon, tok::question, tok::tilde) ||
887           Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
888                        tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
889                        tok::kw_typeof, Keywords.kw_instanceof,
890                        Keywords.kw_in) ||
891           Tok->isBinaryOperator();
892  }
893
894  bool canPrecedeRegexLiteral(FormatToken *Prev) {
895    if (!Prev)
896      return true;
897
898    // Regex literals can only follow after prefix unary operators, not after
899    // postfix unary operators. If the '++' is followed by a non-operand
900    // introducing token, the slash here is the operand and not the start of a
901    // regex.
902    if (Prev->isOneOf(tok::plusplus, tok::minusminus))
903      return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]));
904
905    // The previous token must introduce an operand location where regex
906    // literals can occur.
907    if (!precedesOperand(Prev))
908      return false;
909
910    return true;
911  }
912
913  // Tries to parse a JavaScript Regex literal starting at the current token,
914  // if that begins with a slash and is in a location where JavaScript allows
915  // regex literals. Changes the current token to a regex literal and updates
916  // its text if successful.
917  void tryParseJSRegexLiteral() {
918    FormatToken *RegexToken = Tokens.back();
919    if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
920      return;
921
922    FormatToken *Prev = nullptr;
923    for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
924      // NB: Because previous pointers are not initialized yet, this cannot use
925      // Token.getPreviousNonComment.
926      if ((*I)->isNot(tok::comment)) {
927        Prev = *I;
928        break;
929      }
930    }
931
932    if (!canPrecedeRegexLiteral(Prev))
933      return;
934
935    // 'Manually' lex ahead in the current file buffer.
936    const char *Offset = Lex->getBufferLocation();
937    const char *RegexBegin = Offset - RegexToken->TokenText.size();
938    StringRef Buffer = Lex->getBuffer();
939    bool InCharacterClass = false;
940    bool HaveClosingSlash = false;
941    for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
942      // Regular expressions are terminated with a '/', which can only be
943      // escaped using '\' or a character class between '[' and ']'.
944      // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.5.
945      switch (*Offset) {
946      case '\\':
947        // Skip the escaped character.
948        ++Offset;
949        break;
950      case '[':
951        InCharacterClass = true;
952        break;
953      case ']':
954        InCharacterClass = false;
955        break;
956      case '/':
957        if (!InCharacterClass)
958          HaveClosingSlash = true;
959        break;
960      }
961    }
962
963    RegexToken->Type = TT_RegexLiteral;
964    // Treat regex literals like other string_literals.
965    RegexToken->Tok.setKind(tok::string_literal);
966    RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
967    RegexToken->ColumnWidth = RegexToken->TokenText.size();
968
969    resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
970  }
971
972  bool tryMergeTemplateString() {
973    if (Tokens.size() < 2)
974      return false;
975
976    FormatToken *EndBacktick = Tokens.back();
977    // Backticks get lexed as tok::unknown tokens. If a template string contains
978    // a comment start, it gets lexed as a tok::comment, or tok::unknown if
979    // unterminated.
980    if (!EndBacktick->isOneOf(tok::comment, tok::string_literal,
981                              tok::char_constant, tok::unknown))
982      return false;
983    size_t CommentBacktickPos = EndBacktick->TokenText.find('`');
984    // Unknown token that's not actually a backtick, or a comment that doesn't
985    // contain a backtick.
986    if (CommentBacktickPos == StringRef::npos)
987      return false;
988
989    unsigned TokenCount = 0;
990    bool IsMultiline = false;
991    unsigned EndColumnInFirstLine =
992        EndBacktick->OriginalColumn + EndBacktick->ColumnWidth;
993    for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) {
994      ++TokenCount;
995      if (I[0]->IsMultiline)
996        IsMultiline = true;
997
998      // If there was a preceding template string, this must be the start of a
999      // template string, not the end.
1000      if (I[0]->is(TT_TemplateString))
1001        return false;
1002
1003      if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") {
1004        // Keep track of the rhs offset of the last token to wrap across lines -
1005        // its the rhs offset of the first line of the template string, used to
1006        // determine its width.
1007        if (I[0]->IsMultiline)
1008          EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth;
1009        // If the token has newlines, the token before it (if it exists) is the
1010        // rhs end of the previous line.
1011        if (I[0]->NewlinesBefore > 0 && (I + 1 != E)) {
1012          EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth;
1013          IsMultiline = true;
1014        }
1015        continue;
1016      }
1017
1018      Tokens.resize(Tokens.size() - TokenCount);
1019      Tokens.back()->Type = TT_TemplateString;
1020      const char *EndOffset =
1021          EndBacktick->TokenText.data() + 1 + CommentBacktickPos;
1022      if (CommentBacktickPos != 0) {
1023        // If the backtick was not the first character (e.g. in a comment),
1024        // re-lex after the backtick position.
1025        SourceLocation Loc = EndBacktick->Tok.getLocation();
1026        resetLexer(SourceMgr.getFileOffset(Loc) + CommentBacktickPos + 1);
1027      }
1028      Tokens.back()->TokenText =
1029          StringRef(Tokens.back()->TokenText.data(),
1030                    EndOffset - Tokens.back()->TokenText.data());
1031
1032      unsigned EndOriginalColumn = EndBacktick->OriginalColumn;
1033      if (EndOriginalColumn == 0) {
1034        SourceLocation Loc = EndBacktick->Tok.getLocation();
1035        EndOriginalColumn = SourceMgr.getSpellingColumnNumber(Loc);
1036      }
1037      // If the ` is further down within the token (e.g. in a comment).
1038      EndOriginalColumn += CommentBacktickPos;
1039
1040      if (IsMultiline) {
1041        // ColumnWidth is from backtick to last token in line.
1042        // LastLineColumnWidth is 0 to backtick.
1043        // x = `some content
1044        //     until here`;
1045        Tokens.back()->ColumnWidth =
1046            EndColumnInFirstLine - Tokens.back()->OriginalColumn;
1047        // +1 for the ` itself.
1048        Tokens.back()->LastLineColumnWidth = EndOriginalColumn + 1;
1049        Tokens.back()->IsMultiline = true;
1050      } else {
1051        // Token simply spans from start to end, +1 for the ` itself.
1052        Tokens.back()->ColumnWidth =
1053            EndOriginalColumn - Tokens.back()->OriginalColumn + 1;
1054      }
1055      return true;
1056    }
1057    return false;
1058  }
1059
1060  bool tryMerge_TMacro() {
1061    if (Tokens.size() < 4)
1062      return false;
1063    FormatToken *Last = Tokens.back();
1064    if (!Last->is(tok::r_paren))
1065      return false;
1066
1067    FormatToken *String = Tokens[Tokens.size() - 2];
1068    if (!String->is(tok::string_literal) || String->IsMultiline)
1069      return false;
1070
1071    if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
1072      return false;
1073
1074    FormatToken *Macro = Tokens[Tokens.size() - 4];
1075    if (Macro->TokenText != "_T")
1076      return false;
1077
1078    const char *Start = Macro->TokenText.data();
1079    const char *End = Last->TokenText.data() + Last->TokenText.size();
1080    String->TokenText = StringRef(Start, End - Start);
1081    String->IsFirst = Macro->IsFirst;
1082    String->LastNewlineOffset = Macro->LastNewlineOffset;
1083    String->WhitespaceRange = Macro->WhitespaceRange;
1084    String->OriginalColumn = Macro->OriginalColumn;
1085    String->ColumnWidth = encoding::columnWidthWithTabs(
1086        String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
1087    String->NewlinesBefore = Macro->NewlinesBefore;
1088    String->HasUnescapedNewline = Macro->HasUnescapedNewline;
1089
1090    Tokens.pop_back();
1091    Tokens.pop_back();
1092    Tokens.pop_back();
1093    Tokens.back() = String;
1094    return true;
1095  }
1096
1097  bool tryMergeConflictMarkers() {
1098    if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
1099      return false;
1100
1101    // Conflict lines look like:
1102    // <marker> <text from the vcs>
1103    // For example:
1104    // >>>>>>> /file/in/file/system at revision 1234
1105    //
1106    // We merge all tokens in a line that starts with a conflict marker
1107    // into a single token with a special token type that the unwrapped line
1108    // parser will use to correctly rebuild the underlying code.
1109
1110    FileID ID;
1111    // Get the position of the first token in the line.
1112    unsigned FirstInLineOffset;
1113    std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
1114        Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
1115    StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
1116    // Calculate the offset of the start of the current line.
1117    auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
1118    if (LineOffset == StringRef::npos) {
1119      LineOffset = 0;
1120    } else {
1121      ++LineOffset;
1122    }
1123
1124    auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
1125    StringRef LineStart;
1126    if (FirstSpace == StringRef::npos) {
1127      LineStart = Buffer.substr(LineOffset);
1128    } else {
1129      LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
1130    }
1131
1132    TokenType Type = TT_Unknown;
1133    if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
1134      Type = TT_ConflictStart;
1135    } else if (LineStart == "|||||||" || LineStart == "=======" ||
1136               LineStart == "====") {
1137      Type = TT_ConflictAlternative;
1138    } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
1139      Type = TT_ConflictEnd;
1140    }
1141
1142    if (Type != TT_Unknown) {
1143      FormatToken *Next = Tokens.back();
1144
1145      Tokens.resize(FirstInLineIndex + 1);
1146      // We do not need to build a complete token here, as we will skip it
1147      // during parsing anyway (as we must not touch whitespace around conflict
1148      // markers).
1149      Tokens.back()->Type = Type;
1150      Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
1151
1152      Tokens.push_back(Next);
1153      return true;
1154    }
1155
1156    return false;
1157  }
1158
1159  FormatToken *getStashedToken() {
1160    // Create a synthesized second '>' or '<' token.
1161    Token Tok = FormatTok->Tok;
1162    StringRef TokenText = FormatTok->TokenText;
1163
1164    unsigned OriginalColumn = FormatTok->OriginalColumn;
1165    FormatTok = new (Allocator.Allocate()) FormatToken;
1166    FormatTok->Tok = Tok;
1167    SourceLocation TokLocation =
1168        FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1);
1169    FormatTok->Tok.setLocation(TokLocation);
1170    FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);
1171    FormatTok->TokenText = TokenText;
1172    FormatTok->ColumnWidth = 1;
1173    FormatTok->OriginalColumn = OriginalColumn + 1;
1174
1175    return FormatTok;
1176  }
1177
1178  FormatToken *getNextToken() {
1179    if (GreaterStashed) {
1180      GreaterStashed = false;
1181      return getStashedToken();
1182    }
1183    if (LessStashed) {
1184      LessStashed = false;
1185      return getStashedToken();
1186    }
1187
1188    FormatTok = new (Allocator.Allocate()) FormatToken;
1189    readRawToken(*FormatTok);
1190    SourceLocation WhitespaceStart =
1191        FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
1192    FormatTok->IsFirst = IsFirstToken;
1193    IsFirstToken = false;
1194
1195    // Consume and record whitespace until we find a significant token.
1196    unsigned WhitespaceLength = TrailingWhitespace;
1197    while (FormatTok->Tok.is(tok::unknown)) {
1198      StringRef Text = FormatTok->TokenText;
1199      auto EscapesNewline = [&](int pos) {
1200        // A '\r' here is just part of '\r\n'. Skip it.
1201        if (pos >= 0 && Text[pos] == '\r')
1202          --pos;
1203        // See whether there is an odd number of '\' before this.
1204        unsigned count = 0;
1205        for (; pos >= 0; --pos, ++count)
1206          if (Text[pos] != '\\')
1207            break;
1208        return count & 1;
1209      };
1210      // FIXME: This miscounts tok:unknown tokens that are not just
1211      // whitespace, e.g. a '`' character.
1212      for (int i = 0, e = Text.size(); i != e; ++i) {
1213        switch (Text[i]) {
1214        case '\n':
1215          ++FormatTok->NewlinesBefore;
1216          FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1);
1217          FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1218          Column = 0;
1219          break;
1220        case '\r':
1221          FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1222          Column = 0;
1223          break;
1224        case '\f':
1225        case '\v':
1226          Column = 0;
1227          break;
1228        case ' ':
1229          ++Column;
1230          break;
1231        case '\t':
1232          Column += Style.TabWidth - Column % Style.TabWidth;
1233          break;
1234        case '\\':
1235          if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n'))
1236            FormatTok->Type = TT_ImplicitStringLiteral;
1237          break;
1238        default:
1239          FormatTok->Type = TT_ImplicitStringLiteral;
1240          break;
1241        }
1242        if (FormatTok->Type == TT_ImplicitStringLiteral)
1243          break;
1244      }
1245
1246      if (FormatTok->is(TT_ImplicitStringLiteral))
1247        break;
1248      WhitespaceLength += FormatTok->Tok.getLength();
1249
1250      readRawToken(*FormatTok);
1251    }
1252
1253    // In case the token starts with escaped newlines, we want to
1254    // take them into account as whitespace - this pattern is quite frequent
1255    // in macro definitions.
1256    // FIXME: Add a more explicit test.
1257    while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
1258           FormatTok->TokenText[1] == '\n') {
1259      ++FormatTok->NewlinesBefore;
1260      WhitespaceLength += 2;
1261      FormatTok->LastNewlineOffset = 2;
1262      Column = 0;
1263      FormatTok->TokenText = FormatTok->TokenText.substr(2);
1264    }
1265
1266    FormatTok->WhitespaceRange = SourceRange(
1267        WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1268
1269    FormatTok->OriginalColumn = Column;
1270
1271    TrailingWhitespace = 0;
1272    if (FormatTok->Tok.is(tok::comment)) {
1273      // FIXME: Add the trimmed whitespace to Column.
1274      StringRef UntrimmedText = FormatTok->TokenText;
1275      FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
1276      TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
1277    } else if (FormatTok->Tok.is(tok::raw_identifier)) {
1278      IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
1279      FormatTok->Tok.setIdentifierInfo(&Info);
1280      FormatTok->Tok.setKind(Info.getTokenID());
1281      if (Style.Language == FormatStyle::LK_Java &&
1282          FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
1283                             tok::kw_operator)) {
1284        FormatTok->Tok.setKind(tok::identifier);
1285        FormatTok->Tok.setIdentifierInfo(nullptr);
1286      } else if (Style.Language == FormatStyle::LK_JavaScript &&
1287                 FormatTok->isOneOf(tok::kw_struct, tok::kw_union,
1288                                    tok::kw_operator)) {
1289        FormatTok->Tok.setKind(tok::identifier);
1290        FormatTok->Tok.setIdentifierInfo(nullptr);
1291      }
1292    } else if (FormatTok->Tok.is(tok::greatergreater)) {
1293      FormatTok->Tok.setKind(tok::greater);
1294      FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1295      GreaterStashed = true;
1296    } else if (FormatTok->Tok.is(tok::lessless)) {
1297      FormatTok->Tok.setKind(tok::less);
1298      FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1299      LessStashed = true;
1300    }
1301
1302    // Now FormatTok is the next non-whitespace token.
1303
1304    StringRef Text = FormatTok->TokenText;
1305    size_t FirstNewlinePos = Text.find('\n');
1306    if (FirstNewlinePos == StringRef::npos) {
1307      // FIXME: ColumnWidth actually depends on the start column, we need to
1308      // take this into account when the token is moved.
1309      FormatTok->ColumnWidth =
1310          encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
1311      Column += FormatTok->ColumnWidth;
1312    } else {
1313      FormatTok->IsMultiline = true;
1314      // FIXME: ColumnWidth actually depends on the start column, we need to
1315      // take this into account when the token is moved.
1316      FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
1317          Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
1318
1319      // The last line of the token always starts in column 0.
1320      // Thus, the length can be precomputed even in the presence of tabs.
1321      FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
1322          Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
1323          Encoding);
1324      Column = FormatTok->LastLineColumnWidth;
1325    }
1326
1327    if (Style.Language == FormatStyle::LK_Cpp) {
1328      if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
1329            Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
1330                tok::pp_define) &&
1331          std::find(ForEachMacros.begin(), ForEachMacros.end(),
1332                    FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end()) {
1333        FormatTok->Type = TT_ForEachMacro;
1334      } else if (FormatTok->is(tok::identifier)) {
1335        if (MacroBlockBeginRegex.match(Text)) {
1336          FormatTok->Type = TT_MacroBlockBegin;
1337        } else if (MacroBlockEndRegex.match(Text)) {
1338          FormatTok->Type = TT_MacroBlockEnd;
1339        }
1340      }
1341    }
1342
1343    return FormatTok;
1344  }
1345
1346  FormatToken *FormatTok;
1347  bool IsFirstToken;
1348  bool GreaterStashed, LessStashed;
1349  unsigned Column;
1350  unsigned TrailingWhitespace;
1351  std::unique_ptr<Lexer> Lex;
1352  SourceManager &SourceMgr;
1353  FileID ID;
1354  FormatStyle &Style;
1355  IdentifierTable IdentTable;
1356  AdditionalKeywords Keywords;
1357  encoding::Encoding Encoding;
1358  llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
1359  // Index (in 'Tokens') of the last token that starts a new line.
1360  unsigned FirstInLineIndex;
1361  SmallVector<FormatToken *, 16> Tokens;
1362  SmallVector<IdentifierInfo *, 8> ForEachMacros;
1363
1364  bool FormattingDisabled;
1365
1366  llvm::Regex MacroBlockBeginRegex;
1367  llvm::Regex MacroBlockEndRegex;
1368
1369  void readRawToken(FormatToken &Tok) {
1370    Lex->LexFromRawLexer(Tok.Tok);
1371    Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1372                              Tok.Tok.getLength());
1373    // For formatting, treat unterminated string literals like normal string
1374    // literals.
1375    if (Tok.is(tok::unknown)) {
1376      if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
1377        Tok.Tok.setKind(tok::string_literal);
1378        Tok.IsUnterminatedLiteral = true;
1379      } else if (Style.Language == FormatStyle::LK_JavaScript &&
1380                 Tok.TokenText == "''") {
1381        Tok.Tok.setKind(tok::char_constant);
1382      }
1383    }
1384
1385    if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
1386                                 Tok.TokenText == "/* clang-format on */")) {
1387      FormattingDisabled = false;
1388    }
1389
1390    Tok.Finalized = FormattingDisabled;
1391
1392    if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" ||
1393                                 Tok.TokenText == "/* clang-format off */")) {
1394      FormattingDisabled = true;
1395    }
1396  }
1397
1398  void resetLexer(unsigned Offset) {
1399    StringRef Buffer = SourceMgr.getBufferData(ID);
1400    Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
1401                        getFormattingLangOpts(Style), Buffer.begin(),
1402                        Buffer.begin() + Offset, Buffer.end()));
1403    Lex->SetKeepWhitespaceMode(true);
1404    TrailingWhitespace = 0;
1405  }
1406};
1407
1408static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
1409  switch (Language) {
1410  case FormatStyle::LK_Cpp:
1411    return "C++";
1412  case FormatStyle::LK_Java:
1413    return "Java";
1414  case FormatStyle::LK_JavaScript:
1415    return "JavaScript";
1416  case FormatStyle::LK_Proto:
1417    return "Proto";
1418  default:
1419    return "Unknown";
1420  }
1421}
1422
1423class Formatter : public UnwrappedLineConsumer {
1424public:
1425  Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID,
1426            ArrayRef<CharSourceRange> Ranges)
1427      : Style(Style), ID(ID), SourceMgr(SourceMgr),
1428        Whitespaces(SourceMgr, Style,
1429                    inputUsesCRLF(SourceMgr.getBufferData(ID))),
1430        Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
1431        Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) {
1432    DEBUG(llvm::dbgs() << "File encoding: "
1433                       << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1434                                                               : "unknown")
1435                       << "\n");
1436    DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
1437                       << "\n");
1438  }
1439
1440  tooling::Replacements format(bool *IncompleteFormat) {
1441    tooling::Replacements Result;
1442    FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding);
1443
1444    UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
1445                               *this);
1446    Parser.parse();
1447    assert(UnwrappedLines.rbegin()->empty());
1448    for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
1449         ++Run) {
1450      DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
1451      SmallVector<AnnotatedLine *, 16> AnnotatedLines;
1452      for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
1453        AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
1454      }
1455      tooling::Replacements RunResult =
1456          format(AnnotatedLines, Tokens, IncompleteFormat);
1457      DEBUG({
1458        llvm::dbgs() << "Replacements for run " << Run << ":\n";
1459        for (tooling::Replacements::iterator I = RunResult.begin(),
1460                                             E = RunResult.end();
1461             I != E; ++I) {
1462          llvm::dbgs() << I->toString() << "\n";
1463        }
1464      });
1465      for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1466        delete AnnotatedLines[i];
1467      }
1468      Result.insert(RunResult.begin(), RunResult.end());
1469      Whitespaces.reset();
1470    }
1471    return Result;
1472  }
1473
1474  tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1475                               FormatTokenLexer &Tokens,
1476                               bool *IncompleteFormat) {
1477    TokenAnnotator Annotator(Style, Tokens.getKeywords());
1478    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1479      Annotator.annotate(*AnnotatedLines[i]);
1480    }
1481    deriveLocalStyle(AnnotatedLines);
1482    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1483      Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
1484    }
1485    computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
1486
1487    Annotator.setCommentLineLevels(AnnotatedLines);
1488    ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr,
1489                                  Whitespaces, Encoding,
1490                                  BinPackInconclusiveFunctions);
1491    UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(),
1492                           IncompleteFormat)
1493        .format(AnnotatedLines);
1494    return Whitespaces.generateReplacements();
1495  }
1496
1497private:
1498  // Determines which lines are affected by the SourceRanges given as input.
1499  // Returns \c true if at least one line between I and E or one of their
1500  // children is affected.
1501  bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
1502                            SmallVectorImpl<AnnotatedLine *>::iterator E) {
1503    bool SomeLineAffected = false;
1504    const AnnotatedLine *PreviousLine = nullptr;
1505    while (I != E) {
1506      AnnotatedLine *Line = *I;
1507      Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
1508
1509      // If a line is part of a preprocessor directive, it needs to be formatted
1510      // if any token within the directive is affected.
1511      if (Line->InPPDirective) {
1512        FormatToken *Last = Line->Last;
1513        SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
1514        while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
1515          Last = (*PPEnd)->Last;
1516          ++PPEnd;
1517        }
1518
1519        if (affectsTokenRange(*Line->First, *Last,
1520                              /*IncludeLeadingNewlines=*/false)) {
1521          SomeLineAffected = true;
1522          markAllAsAffected(I, PPEnd);
1523        }
1524        I = PPEnd;
1525        continue;
1526      }
1527
1528      if (nonPPLineAffected(Line, PreviousLine))
1529        SomeLineAffected = true;
1530
1531      PreviousLine = Line;
1532      ++I;
1533    }
1534    return SomeLineAffected;
1535  }
1536
1537  // Determines whether 'Line' is affected by the SourceRanges given as input.
1538  // Returns \c true if line or one if its children is affected.
1539  bool nonPPLineAffected(AnnotatedLine *Line,
1540                         const AnnotatedLine *PreviousLine) {
1541    bool SomeLineAffected = false;
1542    Line->ChildrenAffected =
1543        computeAffectedLines(Line->Children.begin(), Line->Children.end());
1544    if (Line->ChildrenAffected)
1545      SomeLineAffected = true;
1546
1547    // Stores whether one of the line's tokens is directly affected.
1548    bool SomeTokenAffected = false;
1549    // Stores whether we need to look at the leading newlines of the next token
1550    // in order to determine whether it was affected.
1551    bool IncludeLeadingNewlines = false;
1552
1553    // Stores whether the first child line of any of this line's tokens is
1554    // affected.
1555    bool SomeFirstChildAffected = false;
1556
1557    for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
1558      // Determine whether 'Tok' was affected.
1559      if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
1560        SomeTokenAffected = true;
1561
1562      // Determine whether the first child of 'Tok' was affected.
1563      if (!Tok->Children.empty() && Tok->Children.front()->Affected)
1564        SomeFirstChildAffected = true;
1565
1566      IncludeLeadingNewlines = Tok->Children.empty();
1567    }
1568
1569    // Was this line moved, i.e. has it previously been on the same line as an
1570    // affected line?
1571    bool LineMoved = PreviousLine && PreviousLine->Affected &&
1572                     Line->First->NewlinesBefore == 0;
1573
1574    bool IsContinuedComment =
1575        Line->First->is(tok::comment) && Line->First->Next == nullptr &&
1576        Line->First->NewlinesBefore < 2 && PreviousLine &&
1577        PreviousLine->Affected && PreviousLine->Last->is(tok::comment);
1578
1579    if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
1580        IsContinuedComment) {
1581      Line->Affected = true;
1582      SomeLineAffected = true;
1583    }
1584    return SomeLineAffected;
1585  }
1586
1587  // Marks all lines between I and E as well as all their children as affected.
1588  void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
1589                         SmallVectorImpl<AnnotatedLine *>::iterator E) {
1590    while (I != E) {
1591      (*I)->Affected = true;
1592      markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
1593      ++I;
1594    }
1595  }
1596
1597  // Returns true if the range from 'First' to 'Last' intersects with one of the
1598  // input ranges.
1599  bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
1600                         bool IncludeLeadingNewlines) {
1601    SourceLocation Start = First.WhitespaceRange.getBegin();
1602    if (!IncludeLeadingNewlines)
1603      Start = Start.getLocWithOffset(First.LastNewlineOffset);
1604    SourceLocation End = Last.getStartOfNonWhitespace();
1605    End = End.getLocWithOffset(Last.TokenText.size());
1606    CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
1607    return affectsCharSourceRange(Range);
1608  }
1609
1610  // Returns true if one of the input ranges intersect the leading empty lines
1611  // before 'Tok'.
1612  bool affectsLeadingEmptyLines(const FormatToken &Tok) {
1613    CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
1614        Tok.WhitespaceRange.getBegin(),
1615        Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
1616    return affectsCharSourceRange(EmptyLineRange);
1617  }
1618
1619  // Returns true if 'Range' intersects with one of the input ranges.
1620  bool affectsCharSourceRange(const CharSourceRange &Range) {
1621    for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
1622                                                          E = Ranges.end();
1623         I != E; ++I) {
1624      if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
1625          !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
1626        return true;
1627    }
1628    return false;
1629  }
1630
1631  static bool inputUsesCRLF(StringRef Text) {
1632    return Text.count('\r') * 2 > Text.count('\n');
1633  }
1634
1635  bool
1636  hasCpp03IncompatibleFormat(const SmallVectorImpl<AnnotatedLine *> &Lines) {
1637    for (const AnnotatedLine* Line : Lines) {
1638      if (hasCpp03IncompatibleFormat(Line->Children))
1639        return true;
1640      for (FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next) {
1641        if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1642          if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener))
1643            return true;
1644          if (Tok->is(TT_TemplateCloser) &&
1645              Tok->Previous->is(TT_TemplateCloser))
1646            return true;
1647        }
1648      }
1649    }
1650    return false;
1651  }
1652
1653  int countVariableAlignments(const SmallVectorImpl<AnnotatedLine *> &Lines) {
1654    int AlignmentDiff = 0;
1655    for (const AnnotatedLine* Line : Lines) {
1656      AlignmentDiff += countVariableAlignments(Line->Children);
1657      for (FormatToken *Tok = Line->First; Tok && Tok->Next; Tok = Tok->Next) {
1658        if (!Tok->is(TT_PointerOrReference))
1659          continue;
1660        bool SpaceBefore =
1661            Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1662        bool SpaceAfter = Tok->Next->WhitespaceRange.getBegin() !=
1663                          Tok->Next->WhitespaceRange.getEnd();
1664        if (SpaceBefore && !SpaceAfter)
1665          ++AlignmentDiff;
1666        if (!SpaceBefore && SpaceAfter)
1667          --AlignmentDiff;
1668      }
1669    }
1670    return AlignmentDiff;
1671  }
1672
1673  void
1674  deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1675    bool HasBinPackedFunction = false;
1676    bool HasOnePerLineFunction = false;
1677    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1678      if (!AnnotatedLines[i]->First->Next)
1679        continue;
1680      FormatToken *Tok = AnnotatedLines[i]->First->Next;
1681      while (Tok->Next) {
1682        if (Tok->PackingKind == PPK_BinPacked)
1683          HasBinPackedFunction = true;
1684        if (Tok->PackingKind == PPK_OnePerLine)
1685          HasOnePerLineFunction = true;
1686
1687        Tok = Tok->Next;
1688      }
1689    }
1690    if (Style.DerivePointerAlignment)
1691      Style.PointerAlignment = countVariableAlignments(AnnotatedLines) <= 0
1692                                   ? FormatStyle::PAS_Left
1693                                   : FormatStyle::PAS_Right;
1694    if (Style.Standard == FormatStyle::LS_Auto)
1695      Style.Standard = hasCpp03IncompatibleFormat(AnnotatedLines)
1696                           ? FormatStyle::LS_Cpp11
1697                           : FormatStyle::LS_Cpp03;
1698    BinPackInconclusiveFunctions =
1699        HasBinPackedFunction || !HasOnePerLineFunction;
1700  }
1701
1702  void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
1703    assert(!UnwrappedLines.empty());
1704    UnwrappedLines.back().push_back(TheLine);
1705  }
1706
1707  void finishRun() override {
1708    UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1709  }
1710
1711  FormatStyle Style;
1712  FileID ID;
1713  SourceManager &SourceMgr;
1714  WhitespaceManager Whitespaces;
1715  SmallVector<CharSourceRange, 8> Ranges;
1716  SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1717
1718  encoding::Encoding Encoding;
1719  bool BinPackInconclusiveFunctions;
1720};
1721
1722struct IncludeDirective {
1723  StringRef Filename;
1724  StringRef Text;
1725  unsigned Offset;
1726  int Category;
1727};
1728
1729} // end anonymous namespace
1730
1731// Determines whether 'Ranges' intersects with ('Start', 'End').
1732static bool affectsRange(ArrayRef<tooling::Range> Ranges, unsigned Start,
1733                         unsigned End) {
1734  for (auto Range : Ranges) {
1735    if (Range.getOffset() < End &&
1736        Range.getOffset() + Range.getLength() > Start)
1737      return true;
1738  }
1739  return false;
1740}
1741
1742// Sorts a block of includes given by 'Includes' alphabetically adding the
1743// necessary replacement to 'Replaces'. 'Includes' must be in strict source
1744// order.
1745static void sortIncludes(const FormatStyle &Style,
1746                         const SmallVectorImpl<IncludeDirective> &Includes,
1747                         ArrayRef<tooling::Range> Ranges, StringRef FileName,
1748                         tooling::Replacements &Replaces, unsigned *Cursor) {
1749  if (!affectsRange(Ranges, Includes.front().Offset,
1750                    Includes.back().Offset + Includes.back().Text.size()))
1751    return;
1752  SmallVector<unsigned, 16> Indices;
1753  for (unsigned i = 0, e = Includes.size(); i != e; ++i)
1754    Indices.push_back(i);
1755  std::sort(Indices.begin(), Indices.end(), [&](unsigned LHSI, unsigned RHSI) {
1756    return std::tie(Includes[LHSI].Category, Includes[LHSI].Filename) <
1757           std::tie(Includes[RHSI].Category, Includes[RHSI].Filename);
1758  });
1759
1760  // If the #includes are out of order, we generate a single replacement fixing
1761  // the entire block. Otherwise, no replacement is generated.
1762  bool OutOfOrder = false;
1763  for (unsigned i = 1, e = Indices.size(); i != e; ++i) {
1764    if (Indices[i] != i) {
1765      OutOfOrder = true;
1766      break;
1767    }
1768  }
1769  if (!OutOfOrder)
1770    return;
1771
1772  std::string result;
1773  bool CursorMoved = false;
1774  for (unsigned Index : Indices) {
1775    if (!result.empty())
1776      result += "\n";
1777    result += Includes[Index].Text;
1778
1779    if (Cursor && !CursorMoved) {
1780      unsigned Start = Includes[Index].Offset;
1781      unsigned End = Start + Includes[Index].Text.size();
1782      if (*Cursor >= Start && *Cursor < End) {
1783        *Cursor = Includes.front().Offset + result.size() + *Cursor - End;
1784        CursorMoved = true;
1785      }
1786    }
1787  }
1788
1789  // Sorting #includes shouldn't change their total number of characters.
1790  // This would otherwise mess up 'Ranges'.
1791  assert(result.size() ==
1792         Includes.back().Offset + Includes.back().Text.size() -
1793             Includes.front().Offset);
1794
1795  Replaces.insert(tooling::Replacement(FileName, Includes.front().Offset,
1796                                       result.size(), result));
1797}
1798
1799tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
1800                                   ArrayRef<tooling::Range> Ranges,
1801                                   StringRef FileName, unsigned *Cursor) {
1802  tooling::Replacements Replaces;
1803  if (!Style.SortIncludes)
1804    return Replaces;
1805
1806  unsigned Prev = 0;
1807  unsigned SearchFrom = 0;
1808  llvm::Regex IncludeRegex(
1809      R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))");
1810  SmallVector<StringRef, 4> Matches;
1811  SmallVector<IncludeDirective, 16> IncludesInBlock;
1812
1813  // In compiled files, consider the first #include to be the main #include of
1814  // the file if it is not a system #include. This ensures that the header
1815  // doesn't have hidden dependencies
1816  // (http://llvm.org/docs/CodingStandards.html#include-style).
1817  //
1818  // FIXME: Do some sanity checking, e.g. edit distance of the base name, to fix
1819  // cases where the first #include is unlikely to be the main header.
1820  bool IsSource = FileName.endswith(".c") || FileName.endswith(".cc") ||
1821                  FileName.endswith(".cpp") || FileName.endswith(".c++") ||
1822                  FileName.endswith(".cxx") || FileName.endswith(".m") ||
1823                  FileName.endswith(".mm");
1824  StringRef FileStem = llvm::sys::path::stem(FileName);
1825  bool FirstIncludeBlock = true;
1826  bool MainIncludeFound = false;
1827
1828  // Create pre-compiled regular expressions for the #include categories.
1829  SmallVector<llvm::Regex, 4> CategoryRegexs;
1830  for (const auto &Category : Style.IncludeCategories)
1831    CategoryRegexs.emplace_back(Category.Regex);
1832
1833  bool FormattingOff = false;
1834
1835  for (;;) {
1836    auto Pos = Code.find('\n', SearchFrom);
1837    StringRef Line =
1838        Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) - Prev);
1839
1840    StringRef Trimmed = Line.trim();
1841    if (Trimmed == "// clang-format off")
1842      FormattingOff = true;
1843    else if (Trimmed == "// clang-format on")
1844      FormattingOff = false;
1845
1846    if (!FormattingOff && !Line.endswith("\\")) {
1847      if (IncludeRegex.match(Line, &Matches)) {
1848        StringRef IncludeName = Matches[2];
1849        int Category = INT_MAX;
1850        for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) {
1851          if (CategoryRegexs[i].match(IncludeName)) {
1852            Category = Style.IncludeCategories[i].Priority;
1853            break;
1854          }
1855        }
1856        if (IsSource && !MainIncludeFound && Category > 0 &&
1857            FirstIncludeBlock && IncludeName.startswith("\"")) {
1858          StringRef HeaderStem =
1859              llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1));
1860          if (FileStem.startswith(HeaderStem)) {
1861            Category = 0;
1862            MainIncludeFound = true;
1863          }
1864        }
1865        IncludesInBlock.push_back({IncludeName, Line, Prev, Category});
1866      } else if (!IncludesInBlock.empty()) {
1867        sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces,
1868                     Cursor);
1869        IncludesInBlock.clear();
1870        FirstIncludeBlock = false;
1871      }
1872      Prev = Pos + 1;
1873    }
1874    if (Pos == StringRef::npos || Pos + 1 == Code.size())
1875      break;
1876    SearchFrom = Pos + 1;
1877  }
1878  if (!IncludesInBlock.empty())
1879    sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces, Cursor);
1880  return Replaces;
1881}
1882
1883tooling::Replacements reformat(const FormatStyle &Style,
1884                               SourceManager &SourceMgr, FileID ID,
1885                               ArrayRef<CharSourceRange> Ranges,
1886                               bool *IncompleteFormat) {
1887  FormatStyle Expanded = expandPresets(Style);
1888  if (Expanded.DisableFormat)
1889    return tooling::Replacements();
1890  Formatter formatter(Expanded, SourceMgr, ID, Ranges);
1891  return formatter.format(IncompleteFormat);
1892}
1893
1894tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
1895                               ArrayRef<tooling::Range> Ranges,
1896                               StringRef FileName, bool *IncompleteFormat) {
1897  if (Style.DisableFormat)
1898    return tooling::Replacements();
1899
1900  IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
1901      new vfs::InMemoryFileSystem);
1902  FileManager Files(FileSystemOptions(), InMemoryFileSystem);
1903  DiagnosticsEngine Diagnostics(
1904      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1905      new DiagnosticOptions);
1906  SourceManager SourceMgr(Diagnostics, Files);
1907  InMemoryFileSystem->addFile(
1908      FileName, 0, llvm::MemoryBuffer::getMemBuffer(
1909                       Code, FileName, /*RequiresNullTerminator=*/false));
1910  FileID ID = SourceMgr.createFileID(Files.getFile(FileName), SourceLocation(),
1911                                     clang::SrcMgr::C_User);
1912  SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
1913  std::vector<CharSourceRange> CharRanges;
1914  for (const tooling::Range &Range : Ranges) {
1915    SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
1916    SourceLocation End = Start.getLocWithOffset(Range.getLength());
1917    CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1918  }
1919  return reformat(Style, SourceMgr, ID, CharRanges, IncompleteFormat);
1920}
1921
1922LangOptions getFormattingLangOpts(const FormatStyle &Style) {
1923  LangOptions LangOpts;
1924  LangOpts.CPlusPlus = 1;
1925  LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1926  LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1927  LangOpts.LineComment = 1;
1928  bool AlternativeOperators = Style.Language == FormatStyle::LK_Cpp;
1929  LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0;
1930  LangOpts.Bool = 1;
1931  LangOpts.ObjC1 = 1;
1932  LangOpts.ObjC2 = 1;
1933  LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
1934  LangOpts.DeclSpecKeyword = 1; // To get __declspec.
1935  return LangOpts;
1936}
1937
1938const char *StyleOptionHelpDescription =
1939    "Coding style, currently supports:\n"
1940    "  LLVM, Google, Chromium, Mozilla, WebKit.\n"
1941    "Use -style=file to load style configuration from\n"
1942    ".clang-format file located in one of the parent\n"
1943    "directories of the source file (or current\n"
1944    "directory for stdin).\n"
1945    "Use -style=\"{key: value, ...}\" to set specific\n"
1946    "parameters, e.g.:\n"
1947    "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
1948
1949static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
1950  if (FileName.endswith(".java"))
1951    return FormatStyle::LK_Java;
1952  if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts"))
1953    return FormatStyle::LK_JavaScript; // JavaScript or TypeScript.
1954  if (FileName.endswith_lower(".proto") ||
1955      FileName.endswith_lower(".protodevel"))
1956    return FormatStyle::LK_Proto;
1957  if (FileName.endswith_lower(".td"))
1958    return FormatStyle::LK_TableGen;
1959  return FormatStyle::LK_Cpp;
1960}
1961
1962FormatStyle getStyle(StringRef StyleName, StringRef FileName,
1963                     StringRef FallbackStyle) {
1964  FormatStyle Style = getLLVMStyle();
1965  Style.Language = getLanguageByFileName(FileName);
1966  if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
1967    llvm::errs() << "Invalid fallback style \"" << FallbackStyle
1968                 << "\" using LLVM style\n";
1969    return Style;
1970  }
1971
1972  if (StyleName.startswith("{")) {
1973    // Parse YAML/JSON style from the command line.
1974    if (std::error_code ec = parseConfiguration(StyleName, &Style)) {
1975      llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
1976                   << FallbackStyle << " style\n";
1977    }
1978    return Style;
1979  }
1980
1981  if (!StyleName.equals_lower("file")) {
1982    if (!getPredefinedStyle(StyleName, Style.Language, &Style))
1983      llvm::errs() << "Invalid value for -style, using " << FallbackStyle
1984                   << " style\n";
1985    return Style;
1986  }
1987
1988  // Look for .clang-format/_clang-format file in the file's parent directories.
1989  SmallString<128> UnsuitableConfigFiles;
1990  SmallString<128> Path(FileName);
1991  llvm::sys::fs::make_absolute(Path);
1992  for (StringRef Directory = Path; !Directory.empty();
1993       Directory = llvm::sys::path::parent_path(Directory)) {
1994    if (!llvm::sys::fs::is_directory(Directory))
1995      continue;
1996    SmallString<128> ConfigFile(Directory);
1997
1998    llvm::sys::path::append(ConfigFile, ".clang-format");
1999    DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
2000    bool IsFile = false;
2001    // Ignore errors from is_regular_file: we only need to know if we can read
2002    // the file or not.
2003    llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
2004
2005    if (!IsFile) {
2006      // Try _clang-format too, since dotfiles are not commonly used on Windows.
2007      ConfigFile = Directory;
2008      llvm::sys::path::append(ConfigFile, "_clang-format");
2009      DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
2010      llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
2011    }
2012
2013    if (IsFile) {
2014      llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
2015          llvm::MemoryBuffer::getFile(ConfigFile.c_str());
2016      if (std::error_code EC = Text.getError()) {
2017        llvm::errs() << EC.message() << "\n";
2018        break;
2019      }
2020      if (std::error_code ec =
2021              parseConfiguration(Text.get()->getBuffer(), &Style)) {
2022        if (ec == ParseError::Unsuitable) {
2023          if (!UnsuitableConfigFiles.empty())
2024            UnsuitableConfigFiles.append(", ");
2025          UnsuitableConfigFiles.append(ConfigFile);
2026          continue;
2027        }
2028        llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
2029                     << "\n";
2030        break;
2031      }
2032      DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
2033      return Style;
2034    }
2035  }
2036  if (!UnsuitableConfigFiles.empty()) {
2037    llvm::errs() << "Configuration file(s) do(es) not support "
2038                 << getLanguageName(Style.Language) << ": "
2039                 << UnsuitableConfigFiles << "\n";
2040  }
2041  return Style;
2042}
2043
2044} // namespace format
2045} // namespace clang
2046