Tokens.cpp revision 363496
1//===- Tokens.cpp - collect tokens from preprocessing ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "clang/Tooling/Syntax/Tokens.h"
9
10#include "clang/Basic/Diagnostic.h"
11#include "clang/Basic/IdentifierTable.h"
12#include "clang/Basic/LLVM.h"
13#include "clang/Basic/LangOptions.h"
14#include "clang/Basic/SourceLocation.h"
15#include "clang/Basic/SourceManager.h"
16#include "clang/Basic/TokenKinds.h"
17#include "clang/Lex/PPCallbacks.h"
18#include "clang/Lex/Preprocessor.h"
19#include "clang/Lex/Token.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/None.h"
22#include "llvm/ADT/Optional.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/ErrorHandling.h"
26#include "llvm/Support/FormatVariadic.h"
27#include "llvm/Support/raw_ostream.h"
28#include <algorithm>
29#include <cassert>
30#include <iterator>
31#include <string>
32#include <utility>
33#include <vector>
34
35using namespace clang;
36using namespace clang::syntax;
37
38syntax::Token::Token(SourceLocation Location, unsigned Length,
39                     tok::TokenKind Kind)
40    : Location(Location), Length(Length), Kind(Kind) {
41  assert(Location.isValid());
42}
43
44syntax::Token::Token(const clang::Token &T)
45    : Token(T.getLocation(), T.getLength(), T.getKind()) {
46  assert(!T.isAnnotation());
47}
48
49llvm::StringRef syntax::Token::text(const SourceManager &SM) const {
50  bool Invalid = false;
51  const char *Start = SM.getCharacterData(location(), &Invalid);
52  assert(!Invalid);
53  return llvm::StringRef(Start, length());
54}
55
56FileRange syntax::Token::range(const SourceManager &SM) const {
57  assert(location().isFileID() && "must be a spelled token");
58  FileID File;
59  unsigned StartOffset;
60  std::tie(File, StartOffset) = SM.getDecomposedLoc(location());
61  return FileRange(File, StartOffset, StartOffset + length());
62}
63
64FileRange syntax::Token::range(const SourceManager &SM,
65                               const syntax::Token &First,
66                               const syntax::Token &Last) {
67  auto F = First.range(SM);
68  auto L = Last.range(SM);
69  assert(F.file() == L.file() && "tokens from different files");
70  assert((F == L || F.endOffset() <= L.beginOffset()) && "wrong order of tokens");
71  return FileRange(F.file(), F.beginOffset(), L.endOffset());
72}
73
74llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, const Token &T) {
75  return OS << T.str();
76}
77
78FileRange::FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset)
79    : File(File), Begin(BeginOffset), End(EndOffset) {
80  assert(File.isValid());
81  assert(BeginOffset <= EndOffset);
82}
83
84FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
85                     unsigned Length) {
86  assert(BeginLoc.isValid());
87  assert(BeginLoc.isFileID());
88
89  std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
90  End = Begin + Length;
91}
92FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
93                     SourceLocation EndLoc) {
94  assert(BeginLoc.isValid());
95  assert(BeginLoc.isFileID());
96  assert(EndLoc.isValid());
97  assert(EndLoc.isFileID());
98  assert(SM.getFileID(BeginLoc) == SM.getFileID(EndLoc));
99  assert(SM.getFileOffset(BeginLoc) <= SM.getFileOffset(EndLoc));
100
101  std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
102  End = SM.getFileOffset(EndLoc);
103}
104
105llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS,
106                                      const FileRange &R) {
107  return OS << llvm::formatv("FileRange(file = {0}, offsets = {1}-{2})",
108                             R.file().getHashValue(), R.beginOffset(),
109                             R.endOffset());
110}
111
112llvm::StringRef FileRange::text(const SourceManager &SM) const {
113  bool Invalid = false;
114  StringRef Text = SM.getBufferData(File, &Invalid);
115  if (Invalid)
116    return "";
117  assert(Begin <= Text.size());
118  assert(End <= Text.size());
119  return Text.substr(Begin, length());
120}
121
122llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
123  if (R.isInvalid())
124    return {};
125  const Token *Begin =
126      llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) {
127        return SourceMgr->isBeforeInTranslationUnit(T.location(), R.getBegin());
128      });
129  const Token *End =
130      llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) {
131        return !SourceMgr->isBeforeInTranslationUnit(R.getEnd(), T.location());
132      });
133  if (Begin > End)
134    return {};
135  return {Begin, End};
136}
137
138CharSourceRange FileRange::toCharRange(const SourceManager &SM) const {
139  return CharSourceRange(
140      SourceRange(SM.getComposedLoc(File, Begin), SM.getComposedLoc(File, End)),
141      /*IsTokenRange=*/false);
142}
143
144std::pair<const syntax::Token *, const TokenBuffer::Mapping *>
145TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
146  assert(Expanded);
147  assert(ExpandedTokens.data() <= Expanded &&
148         Expanded < ExpandedTokens.data() + ExpandedTokens.size());
149
150  auto FileIt = Files.find(
151      SourceMgr->getFileID(SourceMgr->getExpansionLoc(Expanded->location())));
152  assert(FileIt != Files.end() && "no file for an expanded token");
153
154  const MarkedFile &File = FileIt->second;
155
156  unsigned ExpandedIndex = Expanded - ExpandedTokens.data();
157  // Find the first mapping that produced tokens after \p Expanded.
158  auto It = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
159    return M.BeginExpanded <= ExpandedIndex;
160  });
161  // Our token could only be produced by the previous mapping.
162  if (It == File.Mappings.begin()) {
163    // No previous mapping, no need to modify offsets.
164    return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], nullptr};
165  }
166  --It; // 'It' now points to last mapping that started before our token.
167
168  // Check if the token is part of the mapping.
169  if (ExpandedIndex < It->EndExpanded)
170    return {&File.SpelledTokens[It->BeginSpelled], /*Mapping*/ &*It};
171
172  // Not part of the mapping, use the index from previous mapping to compute the
173  // corresponding spelled token.
174  return {
175      &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
176      /*Mapping*/ nullptr};
177}
178
179llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
180  auto It = Files.find(FID);
181  assert(It != Files.end());
182  return It->second.SpelledTokens;
183}
184
185std::string TokenBuffer::Mapping::str() const {
186  return llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})",
187                       BeginSpelled, EndSpelled, BeginExpanded, EndExpanded);
188}
189
190llvm::Optional<llvm::ArrayRef<syntax::Token>>
191TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
192  // Mapping an empty range is ambiguous in case of empty mappings at either end
193  // of the range, bail out in that case.
194  if (Expanded.empty())
195    return llvm::None;
196
197  // FIXME: also allow changes uniquely mapping to macro arguments.
198
199  const syntax::Token *BeginSpelled;
200  const Mapping *BeginMapping;
201  std::tie(BeginSpelled, BeginMapping) =
202      spelledForExpandedToken(&Expanded.front());
203
204  const syntax::Token *LastSpelled;
205  const Mapping *LastMapping;
206  std::tie(LastSpelled, LastMapping) =
207      spelledForExpandedToken(&Expanded.back());
208
209  FileID FID = SourceMgr->getFileID(BeginSpelled->location());
210  // FIXME: Handle multi-file changes by trying to map onto a common root.
211  if (FID != SourceMgr->getFileID(LastSpelled->location()))
212    return llvm::None;
213
214  const MarkedFile &File = Files.find(FID)->second;
215
216  // Do not allow changes that cross macro expansion boundaries.
217  unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data();
218  unsigned EndExpanded = Expanded.end() - ExpandedTokens.data();
219  if (BeginMapping && BeginMapping->BeginExpanded < BeginExpanded)
220    return llvm::None;
221  if (LastMapping && EndExpanded < LastMapping->EndExpanded)
222    return llvm::None;
223  // All is good, return the result.
224  return llvm::makeArrayRef(
225      BeginMapping ? File.SpelledTokens.data() + BeginMapping->BeginSpelled
226                   : BeginSpelled,
227      LastMapping ? File.SpelledTokens.data() + LastMapping->EndSpelled
228                  : LastSpelled + 1);
229}
230
231llvm::Optional<TokenBuffer::Expansion>
232TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const {
233  assert(Spelled);
234  assert(Spelled->location().isFileID() && "not a spelled token");
235  auto FileIt = Files.find(SourceMgr->getFileID(Spelled->location()));
236  assert(FileIt != Files.end() && "file not tracked by token buffer");
237
238  auto &File = FileIt->second;
239  assert(File.SpelledTokens.data() <= Spelled &&
240         Spelled < (File.SpelledTokens.data() + File.SpelledTokens.size()));
241
242  unsigned SpelledIndex = Spelled - File.SpelledTokens.data();
243  auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
244    return M.BeginSpelled < SpelledIndex;
245  });
246  if (M == File.Mappings.end() || M->BeginSpelled != SpelledIndex)
247    return llvm::None;
248
249  Expansion E;
250  E.Spelled = llvm::makeArrayRef(File.SpelledTokens.data() + M->BeginSpelled,
251                                 File.SpelledTokens.data() + M->EndSpelled);
252  E.Expanded = llvm::makeArrayRef(ExpandedTokens.data() + M->BeginExpanded,
253                                  ExpandedTokens.data() + M->EndExpanded);
254  return E;
255}
256
257llvm::ArrayRef<syntax::Token>
258syntax::spelledTokensTouching(SourceLocation Loc,
259                              const syntax::TokenBuffer &Tokens) {
260  assert(Loc.isFileID());
261  llvm::ArrayRef<syntax::Token> All =
262      Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc));
263  auto *Right = llvm::partition_point(
264      All, [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
265  bool AcceptRight = Right != All.end() && Right->location() <= Loc;
266  bool AcceptLeft = Right != All.begin() && (Right - 1)->endLocation() >= Loc;
267  return llvm::makeArrayRef(Right - (AcceptLeft ? 1 : 0),
268                            Right + (AcceptRight ? 1 : 0));
269}
270
271const syntax::Token *
272syntax::spelledIdentifierTouching(SourceLocation Loc,
273                                  const syntax::TokenBuffer &Tokens) {
274  for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) {
275    if (Tok.kind() == tok::identifier)
276      return &Tok;
277  }
278  return nullptr;
279}
280
281std::vector<const syntax::Token *>
282TokenBuffer::macroExpansions(FileID FID) const {
283  auto FileIt = Files.find(FID);
284  assert(FileIt != Files.end() && "file not tracked by token buffer");
285  auto &File = FileIt->second;
286  std::vector<const syntax::Token *> Expansions;
287  auto &Spelled = File.SpelledTokens;
288  for (auto Mapping : File.Mappings) {
289    const syntax::Token *Token = &Spelled[Mapping.BeginSpelled];
290    if (Token->kind() == tok::TokenKind::identifier)
291      Expansions.push_back(Token);
292  }
293  return Expansions;
294}
295
296std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM,
297                                            const LangOptions &LO) {
298  std::vector<syntax::Token> Tokens;
299  IdentifierTable Identifiers(LO);
300  auto AddToken = [&](clang::Token T) {
301    // Fill the proper token kind for keywords, etc.
302    if (T.getKind() == tok::raw_identifier && !T.needsCleaning() &&
303        !T.hasUCN()) { // FIXME: support needsCleaning and hasUCN cases.
304      clang::IdentifierInfo &II = Identifiers.get(T.getRawIdentifier());
305      T.setIdentifierInfo(&II);
306      T.setKind(II.getTokenID());
307    }
308    Tokens.push_back(syntax::Token(T));
309  };
310
311  Lexer L(FID, SM.getBuffer(FID), SM, LO);
312
313  clang::Token T;
314  while (!L.LexFromRawLexer(T))
315    AddToken(T);
316  // 'eof' is only the last token if the input is null-terminated. Never store
317  // it, for consistency.
318  if (T.getKind() != tok::eof)
319    AddToken(T);
320  return Tokens;
321}
322
323/// Records information reqired to construct mappings for the token buffer that
324/// we are collecting.
325class TokenCollector::CollectPPExpansions : public PPCallbacks {
326public:
327  CollectPPExpansions(TokenCollector &C) : Collector(&C) {}
328
329  /// Disabled instance will stop reporting anything to TokenCollector.
330  /// This ensures that uses of the preprocessor after TokenCollector::consume()
331  /// is called do not access the (possibly invalid) collector instance.
332  void disable() { Collector = nullptr; }
333
334  void MacroExpands(const clang::Token &MacroNameTok, const MacroDefinition &MD,
335                    SourceRange Range, const MacroArgs *Args) override {
336    if (!Collector)
337      return;
338    const auto &SM = Collector->PP.getSourceManager();
339    // Only record top-level expansions that directly produce expanded tokens.
340    // This excludes those where:
341    //   - the macro use is inside a macro body,
342    //   - the macro appears in an argument to another macro.
343    // However macro expansion isn't really a tree, it's token rewrite rules,
344    // so there are other cases, e.g.
345    //   #define B(X) X
346    //   #define A 1 + B
347    //   A(2)
348    // Both A and B produce expanded tokens, though the macro name 'B' comes
349    // from an expansion. The best we can do is merge the mappings for both.
350
351    // The *last* token of any top-level macro expansion must be in a file.
352    // (In the example above, see the closing paren of the expansion of B).
353    if (!Range.getEnd().isFileID())
354      return;
355    // If there's a current expansion that encloses this one, this one can't be
356    // top-level.
357    if (LastExpansionEnd.isValid() &&
358        !SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd()))
359      return;
360
361    // If the macro invocation (B) starts in a macro (A) but ends in a file,
362    // we'll create a merged mapping for A + B by overwriting the endpoint for
363    // A's startpoint.
364    if (!Range.getBegin().isFileID()) {
365      Range.setBegin(SM.getExpansionLoc(Range.getBegin()));
366      assert(Collector->Expansions.count(Range.getBegin().getRawEncoding()) &&
367             "Overlapping macros should have same expansion location");
368    }
369
370    Collector->Expansions[Range.getBegin().getRawEncoding()] = Range.getEnd();
371    LastExpansionEnd = Range.getEnd();
372  }
373  // FIXME: handle directives like #pragma, #include, etc.
374private:
375  TokenCollector *Collector;
376  /// Used to detect recursive macro expansions.
377  SourceLocation LastExpansionEnd;
378};
379
380/// Fills in the TokenBuffer by tracing the run of a preprocessor. The
381/// implementation tracks the tokens, macro expansions and directives coming
382/// from the preprocessor and:
383/// - for each token, figures out if it is a part of an expanded token stream,
384///   spelled token stream or both. Stores the tokens appropriately.
385/// - records mappings from the spelled to expanded token ranges, e.g. for macro
386///   expansions.
387/// FIXME: also properly record:
388///          - #include directives,
389///          - #pragma, #line and other PP directives,
390///          - skipped pp regions,
391///          - ...
392
393TokenCollector::TokenCollector(Preprocessor &PP) : PP(PP) {
394  // Collect the expanded token stream during preprocessing.
395  PP.setTokenWatcher([this](const clang::Token &T) {
396    if (T.isAnnotation())
397      return;
398    DEBUG_WITH_TYPE("collect-tokens", llvm::dbgs()
399                                          << "Token: "
400                                          << syntax::Token(T).dumpForTests(
401                                                 this->PP.getSourceManager())
402                                          << "\n"
403
404    );
405    Expanded.push_back(syntax::Token(T));
406  });
407  // And locations of macro calls, to properly recover boundaries of those in
408  // case of empty expansions.
409  auto CB = std::make_unique<CollectPPExpansions>(*this);
410  this->Collector = CB.get();
411  PP.addPPCallbacks(std::move(CB));
412}
413
414/// Builds mappings and spelled tokens in the TokenBuffer based on the expanded
415/// token stream.
416class TokenCollector::Builder {
417public:
418  Builder(std::vector<syntax::Token> Expanded, PPExpansions CollectedExpansions,
419          const SourceManager &SM, const LangOptions &LangOpts)
420      : Result(SM), CollectedExpansions(std::move(CollectedExpansions)), SM(SM),
421        LangOpts(LangOpts) {
422    Result.ExpandedTokens = std::move(Expanded);
423  }
424
425  TokenBuffer build() && {
426    assert(!Result.ExpandedTokens.empty());
427    assert(Result.ExpandedTokens.back().kind() == tok::eof);
428
429    // Tokenize every file that contributed tokens to the expanded stream.
430    buildSpelledTokens();
431
432    // The expanded token stream consists of runs of tokens that came from
433    // the same source (a macro expansion, part of a file etc).
434    // Between these runs are the logical positions of spelled tokens that
435    // didn't expand to anything.
436    while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) {
437      // Create empty mappings for spelled tokens that expanded to nothing here.
438      // May advance NextSpelled, but NextExpanded is unchanged.
439      discard();
440      // Create mapping for a contiguous run of expanded tokens.
441      // Advances NextExpanded past the run, and NextSpelled accordingly.
442      unsigned OldPosition = NextExpanded;
443      advance();
444      if (NextExpanded == OldPosition)
445        diagnoseAdvanceFailure();
446    }
447    // If any tokens remain in any of the files, they didn't expand to anything.
448    // Create empty mappings up until the end of the file.
449    for (const auto &File : Result.Files)
450      discard(File.first);
451
452    return std::move(Result);
453  }
454
455private:
456  // Consume a sequence of spelled tokens that didn't expand to anything.
457  // In the simplest case, skips spelled tokens until finding one that produced
458  // the NextExpanded token, and creates an empty mapping for them.
459  // If Drain is provided, skips remaining tokens from that file instead.
460  void discard(llvm::Optional<FileID> Drain = llvm::None) {
461    SourceLocation Target =
462        Drain ? SM.getLocForEndOfFile(*Drain)
463              : SM.getExpansionLoc(
464                    Result.ExpandedTokens[NextExpanded].location());
465    FileID File = SM.getFileID(Target);
466    const auto &SpelledTokens = Result.Files[File].SpelledTokens;
467    auto &NextSpelled = this->NextSpelled[File];
468
469    TokenBuffer::Mapping Mapping;
470    Mapping.BeginSpelled = NextSpelled;
471    // When dropping trailing tokens from a file, the empty mapping should
472    // be positioned within the file's expanded-token range (at the end).
473    Mapping.BeginExpanded = Mapping.EndExpanded =
474        Drain ? Result.Files[*Drain].EndExpanded : NextExpanded;
475    // We may want to split into several adjacent empty mappings.
476    // FlushMapping() emits the current mapping and starts a new one.
477    auto FlushMapping = [&, this] {
478      Mapping.EndSpelled = NextSpelled;
479      if (Mapping.BeginSpelled != Mapping.EndSpelled)
480        Result.Files[File].Mappings.push_back(Mapping);
481      Mapping.BeginSpelled = NextSpelled;
482    };
483
484    while (NextSpelled < SpelledTokens.size() &&
485           SpelledTokens[NextSpelled].location() < Target) {
486      // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion)
487      // then we want to partition our (empty) mapping.
488      //   [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target)
489      SourceLocation KnownEnd = CollectedExpansions.lookup(
490          SpelledTokens[NextSpelled].location().getRawEncoding());
491      if (KnownEnd.isValid()) {
492        FlushMapping(); // Emits [Start, NextSpelled)
493        while (NextSpelled < SpelledTokens.size() &&
494               SpelledTokens[NextSpelled].location() <= KnownEnd)
495          ++NextSpelled;
496        FlushMapping(); // Emits [NextSpelled, KnownEnd]
497        // Now the loop contitues and will emit (KnownEnd, Target).
498      } else {
499        ++NextSpelled;
500      }
501    }
502    FlushMapping();
503  }
504
505  // Consumes the NextExpanded token and others that are part of the same run.
506  // Increases NextExpanded and NextSpelled by at least one, and adds a mapping
507  // (unless this is a run of file tokens, which we represent with no mapping).
508  void advance() {
509    const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded];
510    SourceLocation Expansion = SM.getExpansionLoc(Tok.location());
511    FileID File = SM.getFileID(Expansion);
512    const auto &SpelledTokens = Result.Files[File].SpelledTokens;
513    auto &NextSpelled = this->NextSpelled[File];
514
515    if (Tok.location().isFileID()) {
516      // A run of file tokens continues while the expanded/spelled tokens match.
517      while (NextSpelled < SpelledTokens.size() &&
518             NextExpanded < Result.ExpandedTokens.size() &&
519             SpelledTokens[NextSpelled].location() ==
520                 Result.ExpandedTokens[NextExpanded].location()) {
521        ++NextSpelled;
522        ++NextExpanded;
523      }
524      // We need no mapping for file tokens copied to the expanded stream.
525    } else {
526      // We found a new macro expansion. We should have its spelling bounds.
527      auto End = CollectedExpansions.lookup(Expansion.getRawEncoding());
528      assert(End.isValid() && "Macro expansion wasn't captured?");
529
530      // Mapping starts here...
531      TokenBuffer::Mapping Mapping;
532      Mapping.BeginExpanded = NextExpanded;
533      Mapping.BeginSpelled = NextSpelled;
534      // ... consumes spelled tokens within bounds we captured ...
535      while (NextSpelled < SpelledTokens.size() &&
536             SpelledTokens[NextSpelled].location() <= End)
537        ++NextSpelled;
538      // ... consumes expanded tokens rooted at the same expansion ...
539      while (NextExpanded < Result.ExpandedTokens.size() &&
540             SM.getExpansionLoc(
541                 Result.ExpandedTokens[NextExpanded].location()) == Expansion)
542        ++NextExpanded;
543      // ... and ends here.
544      Mapping.EndExpanded = NextExpanded;
545      Mapping.EndSpelled = NextSpelled;
546      Result.Files[File].Mappings.push_back(Mapping);
547    }
548  }
549
550  // advance() is supposed to consume at least one token - if not, we crash.
551  void diagnoseAdvanceFailure() {
552#ifndef NDEBUG
553    // Show the failed-to-map token in context.
554    for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10;
555         I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) {
556      const char *L =
557          (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : "   ";
558      llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n";
559    }
560#endif
561    llvm_unreachable("Couldn't map expanded token to spelled tokens!");
562  }
563
564  /// Initializes TokenBuffer::Files and fills spelled tokens and expanded
565  /// ranges for each of the files.
566  void buildSpelledTokens() {
567    for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) {
568      const auto &Tok = Result.ExpandedTokens[I];
569      auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location()));
570      auto It = Result.Files.try_emplace(FID);
571      TokenBuffer::MarkedFile &File = It.first->second;
572
573      // The eof token should not be considered part of the main-file's range.
574      File.EndExpanded = Tok.kind() == tok::eof ? I : I + 1;
575
576      if (!It.second)
577        continue; // we have seen this file before.
578      // This is the first time we see this file.
579      File.BeginExpanded = I;
580      File.SpelledTokens = tokenize(FID, SM, LangOpts);
581    }
582  }
583
584  TokenBuffer Result;
585  unsigned NextExpanded = 0;                    // cursor in ExpandedTokens
586  llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens
587  PPExpansions CollectedExpansions;
588  const SourceManager &SM;
589  const LangOptions &LangOpts;
590};
591
592TokenBuffer TokenCollector::consume() && {
593  PP.setTokenWatcher(nullptr);
594  Collector->disable();
595  return Builder(std::move(Expanded), std::move(Expansions),
596                 PP.getSourceManager(), PP.getLangOpts())
597      .build();
598}
599
600std::string syntax::Token::str() const {
601  return llvm::formatv("Token({0}, length = {1})", tok::getTokenName(kind()),
602                       length());
603}
604
605std::string syntax::Token::dumpForTests(const SourceManager &SM) const {
606  return llvm::formatv("{0}   {1}", tok::getTokenName(kind()), text(SM));
607}
608
609std::string TokenBuffer::dumpForTests() const {
610  auto PrintToken = [this](const syntax::Token &T) -> std::string {
611    if (T.kind() == tok::eof)
612      return "<eof>";
613    return T.text(*SourceMgr);
614  };
615
616  auto DumpTokens = [this, &PrintToken](llvm::raw_ostream &OS,
617                                        llvm::ArrayRef<syntax::Token> Tokens) {
618    if (Tokens.empty()) {
619      OS << "<empty>";
620      return;
621    }
622    OS << Tokens[0].text(*SourceMgr);
623    for (unsigned I = 1; I < Tokens.size(); ++I) {
624      if (Tokens[I].kind() == tok::eof)
625        continue;
626      OS << " " << PrintToken(Tokens[I]);
627    }
628  };
629
630  std::string Dump;
631  llvm::raw_string_ostream OS(Dump);
632
633  OS << "expanded tokens:\n"
634     << "  ";
635  // (!) we do not show '<eof>'.
636  DumpTokens(OS, llvm::makeArrayRef(ExpandedTokens).drop_back());
637  OS << "\n";
638
639  std::vector<FileID> Keys;
640  for (auto F : Files)
641    Keys.push_back(F.first);
642  llvm::sort(Keys);
643
644  for (FileID ID : Keys) {
645    const MarkedFile &File = Files.find(ID)->second;
646    auto *Entry = SourceMgr->getFileEntryForID(ID);
647    if (!Entry)
648      continue; // Skip builtin files.
649    OS << llvm::formatv("file '{0}'\n", Entry->getName())
650       << "  spelled tokens:\n"
651       << "    ";
652    DumpTokens(OS, File.SpelledTokens);
653    OS << "\n";
654
655    if (File.Mappings.empty()) {
656      OS << "  no mappings.\n";
657      continue;
658    }
659    OS << "  mappings:\n";
660    for (auto &M : File.Mappings) {
661      OS << llvm::formatv(
662          "    ['{0}'_{1}, '{2}'_{3}) => ['{4}'_{5}, '{6}'_{7})\n",
663          PrintToken(File.SpelledTokens[M.BeginSpelled]), M.BeginSpelled,
664          M.EndSpelled == File.SpelledTokens.size()
665              ? "<eof>"
666              : PrintToken(File.SpelledTokens[M.EndSpelled]),
667          M.EndSpelled, PrintToken(ExpandedTokens[M.BeginExpanded]),
668          M.BeginExpanded, PrintToken(ExpandedTokens[M.EndExpanded]),
669          M.EndExpanded);
670    }
671  }
672  return OS.str();
673}
674