1303233Sdim//===--- TokenAnalyzer.cpp - Analyze Token Streams --------------*- C++ -*-===//
2303233Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6303233Sdim//
7303233Sdim//===----------------------------------------------------------------------===//
8303233Sdim///
9303233Sdim/// \file
10341825Sdim/// This file implements an abstract TokenAnalyzer and associated helper
11303233Sdim/// classes. TokenAnalyzer can be extended to generate replacements based on
12303233Sdim/// an annotated and pre-processed token stream.
13303233Sdim///
14303233Sdim//===----------------------------------------------------------------------===//
15303233Sdim
16303233Sdim#include "TokenAnalyzer.h"
17303233Sdim#include "AffectedRangeManager.h"
18303233Sdim#include "Encoding.h"
19303233Sdim#include "FormatToken.h"
20303233Sdim#include "FormatTokenLexer.h"
21303233Sdim#include "TokenAnnotator.h"
22303233Sdim#include "UnwrappedLineParser.h"
23303233Sdim#include "clang/Basic/Diagnostic.h"
24303233Sdim#include "clang/Basic/DiagnosticOptions.h"
25303233Sdim#include "clang/Basic/FileManager.h"
26303233Sdim#include "clang/Basic/SourceManager.h"
27303233Sdim#include "clang/Format/Format.h"
28303233Sdim#include "llvm/ADT/STLExtras.h"
29303233Sdim#include "llvm/Support/Debug.h"
30303233Sdim
31303233Sdim#define DEBUG_TYPE "format-formatter"
32303233Sdim
33303233Sdimnamespace clang {
34303233Sdimnamespace format {
35303233Sdim
36341825SdimEnvironment::Environment(StringRef Code, StringRef FileName,
37341825Sdim                         ArrayRef<tooling::Range> Ranges,
38341825Sdim                         unsigned FirstStartColumn, unsigned NextStartColumn,
39341825Sdim                         unsigned LastStartColumn)
40341825Sdim    : VirtualSM(new SourceManagerForFile(FileName, Code)), SM(VirtualSM->get()),
41341825Sdim      ID(VirtualSM->get().getMainFileID()), FirstStartColumn(FirstStartColumn),
42341825Sdim      NextStartColumn(NextStartColumn), LastStartColumn(LastStartColumn) {
43341825Sdim  SourceLocation StartOfFile = SM.getLocForStartOfFile(ID);
44303233Sdim  for (const tooling::Range &Range : Ranges) {
45303233Sdim    SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
46303233Sdim    SourceLocation End = Start.getLocWithOffset(Range.getLength());
47303233Sdim    CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
48303233Sdim  }
49303233Sdim}
50303233Sdim
51303233SdimTokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style)
52303233Sdim    : Style(Style), Env(Env),
53303233Sdim      AffectedRangeMgr(Env.getSourceManager(), Env.getCharRanges()),
54303233Sdim      UnwrappedLines(1),
55303233Sdim      Encoding(encoding::detectEncoding(
56303233Sdim          Env.getSourceManager().getBufferData(Env.getFileID()))) {
57341825Sdim  LLVM_DEBUG(
58303233Sdim      llvm::dbgs() << "File encoding: "
59303233Sdim                   << (Encoding == encoding::Encoding_UTF8 ? "UTF8" : "unknown")
60303233Sdim                   << "\n");
61341825Sdim  LLVM_DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
62341825Sdim                          << "\n");
63303233Sdim}
64303233Sdim
65327952Sdimstd::pair<tooling::Replacements, unsigned> TokenAnalyzer::process() {
66303233Sdim  tooling::Replacements Result;
67327952Sdim  FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(),
68327952Sdim                          Env.getFirstStartColumn(), Style, Encoding);
69303233Sdim
70327952Sdim  UnwrappedLineParser Parser(Style, Tokens.getKeywords(),
71327952Sdim                             Env.getFirstStartColumn(), Tokens.lex(), *this);
72303233Sdim  Parser.parse();
73303233Sdim  assert(UnwrappedLines.rbegin()->empty());
74327952Sdim  unsigned Penalty = 0;
75303233Sdim  for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) {
76341825Sdim    LLVM_DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
77303233Sdim    SmallVector<AnnotatedLine *, 16> AnnotatedLines;
78303233Sdim
79303233Sdim    TokenAnnotator Annotator(Style, Tokens.getKeywords());
80303233Sdim    for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
81303233Sdim      AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
82303233Sdim      Annotator.annotate(*AnnotatedLines.back());
83303233Sdim    }
84303233Sdim
85327952Sdim    std::pair<tooling::Replacements, unsigned> RunResult =
86314564Sdim        analyze(Annotator, AnnotatedLines, Tokens);
87303233Sdim
88341825Sdim    LLVM_DEBUG({
89303233Sdim      llvm::dbgs() << "Replacements for run " << Run << ":\n";
90327952Sdim      for (tooling::Replacements::const_iterator I = RunResult.first.begin(),
91327952Sdim                                                 E = RunResult.first.end();
92303233Sdim           I != E; ++I) {
93303233Sdim        llvm::dbgs() << I->toString() << "\n";
94303233Sdim      }
95303233Sdim    });
96303233Sdim    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
97303233Sdim      delete AnnotatedLines[i];
98303233Sdim    }
99327952Sdim
100327952Sdim    Penalty += RunResult.second;
101327952Sdim    for (const auto &R : RunResult.first) {
102314564Sdim      auto Err = Result.add(R);
103314564Sdim      // FIXME: better error handling here. For now, simply return an empty
104314564Sdim      // Replacements to indicate failure.
105314564Sdim      if (Err) {
106314564Sdim        llvm::errs() << llvm::toString(std::move(Err)) << "\n";
107327952Sdim        return {tooling::Replacements(), 0};
108314564Sdim      }
109314564Sdim    }
110303233Sdim  }
111327952Sdim  return {Result, Penalty};
112303233Sdim}
113303233Sdim
114303233Sdimvoid TokenAnalyzer::consumeUnwrappedLine(const UnwrappedLine &TheLine) {
115303233Sdim  assert(!UnwrappedLines.empty());
116303233Sdim  UnwrappedLines.back().push_back(TheLine);
117303233Sdim}
118303233Sdim
119303233Sdimvoid TokenAnalyzer::finishRun() {
120303233Sdim  UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
121303233Sdim}
122303233Sdim
123303233Sdim} // end namespace format
124303233Sdim} // end namespace clang
125