1137817Srwatson//===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===//
2137817Srwatson//
3172930Srwatson// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4182063Srwatson// See https://llvm.org/LICENSE.txt for license information.
5189503Srwatson// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6137817Srwatson//
7137817Srwatson//===----------------------------------------------------------------------===//
8137817Srwatson///
9137817Srwatson/// \file
10137817Srwatson/// This file defines the \c FormatTokenSource interface, which provides a token
11137817Srwatson/// stream as well as the ability to manipulate the token stream.
12137817Srwatson///
13172930Srwatson//===----------------------------------------------------------------------===//
14172930Srwatson
15172930Srwatson#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
16189503Srwatson#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
17189503Srwatson
18189503Srwatson#include "FormatToken.h"
19137817Srwatson#include "UnwrappedLineParser.h"
20137817Srwatson#include "llvm/ADT/DenseMap.h"
21137817Srwatson#include <cstddef>
22137817Srwatson
23137817Srwatson#define DEBUG_TYPE "format-token-source"
24137817Srwatson
25137817Srwatsonnamespace clang {
26137817Srwatsonnamespace format {
27137817Srwatson
28137817Srwatson// Navigate a token stream.
29137817Srwatson//
30137817Srwatson// Enables traversal of a token stream, resetting the position in a token
31137817Srwatson// stream, as well as inserting new tokens.
32137817Srwatsonclass FormatTokenSource {
33137817Srwatsonpublic:
34137817Srwatson  virtual ~FormatTokenSource() {}
35137817Srwatson
36137817Srwatson  // Returns the next token in the token stream.
37137817Srwatson  virtual FormatToken *getNextToken() = 0;
38137817Srwatson
39137817Srwatson  // Returns the token preceding the token returned by the last call to
40137817Srwatson  // getNextToken() in the token stream, or nullptr if no such token exists.
41137817Srwatson  //
42137817Srwatson  // Must not be called directly at the position directly after insertTokens()
43137817Srwatson  // is called.
44189503Srwatson  virtual FormatToken *getPreviousToken() = 0;
45137817Srwatson
46137817Srwatson  // Returns the token that would be returned by the next call to
47137817Srwatson  // getNextToken().
48137817Srwatson  virtual FormatToken *peekNextToken(bool SkipComment = false) = 0;
49137817Srwatson
50137817Srwatson  // Returns whether we are at the end of the file.
51137817Srwatson  // This can be different from whether getNextToken() returned an eof token
52137817Srwatson  // when the FormatTokenSource is a view on a part of the token stream.
53189503Srwatson  virtual bool isEOF() = 0;
54137817Srwatson
55137817Srwatson  // Gets the current position in the token stream, to be used by setPosition().
56137817Srwatson  //
57137817Srwatson  // Note that the value of the position is not meaningful, and specifically
58137817Srwatson  // should not be used to get relative token positions.
59137817Srwatson  virtual unsigned getPosition() = 0;
60137817Srwatson
61137817Srwatson  // Resets the token stream to the state it was in when getPosition() returned
62163606Srwatson  // Position, and return the token at that position in the stream.
63137817Srwatson  virtual FormatToken *setPosition(unsigned Position) = 0;
64165469Srwatson
65137817Srwatson  // Insert the given tokens before the current position.
66137817Srwatson  // Returns the first token in \c Tokens.
67137817Srwatson  // The next returned token will be the second token in \c Tokens.
68137817Srwatson  // Requires the last token in Tokens to be EOF; once the EOF token is reached,
69137817Srwatson  // the next token will be the last token returned by getNextToken();
70137817Srwatson  //
71137817Srwatson  // For example, given the token sequence 'a1 a2':
72191731Srwatson  // getNextToken() -> a1
73137817Srwatson  // insertTokens('b1 b2') -> b1
74137817Srwatson  // getNextToken() -> b2
75137817Srwatson  // getNextToken() -> a1
76137817Srwatson  // getNextToken() -> a2
77172930Srwatson  virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0;
78137817Srwatson};
79137817Srwatson
80182063Srwatsonclass IndexedTokenSource : public FormatTokenSource {
81182063Srwatsonpublic:
82182063Srwatson  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
83182063Srwatson      : Tokens(Tokens), Position(-1) {}
84137817Srwatson
85137817Srwatson  FormatToken *getNextToken() override {
86137817Srwatson    if (Position >= 0 && isEOF()) {
87137817Srwatson      LLVM_DEBUG({
88137817Srwatson        llvm::dbgs() << "Next ";
89137817Srwatson        dbgToken(Position);
90137817Srwatson      });
91137817Srwatson      return Tokens[Position];
92191731Srwatson    }
93137817Srwatson    Position = successor(Position);
94137817Srwatson    LLVM_DEBUG({
95137817Srwatson      llvm::dbgs() << "Next ";
96137817Srwatson      dbgToken(Position);
97172930Srwatson    });
98137817Srwatson    return Tokens[Position];
99137817Srwatson  }
100182063Srwatson
101182063Srwatson  FormatToken *getPreviousToken() override {
102182063Srwatson    assert(Position <= 0 || Tokens[Position - 1]->isNot(tok::eof));
103182063Srwatson    return Position > 0 ? Tokens[Position - 1] : nullptr;
104137817Srwatson  }
105137817Srwatson
106137817Srwatson  FormatToken *peekNextToken(bool SkipComment = false) override {
107137817Srwatson    if (isEOF())
108137817Srwatson      return Tokens[Position];
109137817Srwatson    int Next = successor(Position);
110191731Srwatson    if (SkipComment)
111137817Srwatson      while (Tokens[Next]->is(tok::comment))
112137817Srwatson        Next = successor(Next);
113137817Srwatson    LLVM_DEBUG({
114137817Srwatson      llvm::dbgs() << "Peeking ";
115172930Srwatson      dbgToken(Next);
116137817Srwatson    });
117137817Srwatson    return Tokens[Next];
118182063Srwatson  }
119182063Srwatson
120182063Srwatson  bool isEOF() override {
121182063Srwatson    return Position == -1 ? false : Tokens[Position]->is(tok::eof);
122137817Srwatson  }
123137817Srwatson
124137817Srwatson  unsigned getPosition() override {
125137817Srwatson    LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
126137817Srwatson    assert(Position >= 0);
127137817Srwatson    return Position;
128191731Srwatson  }
129137817Srwatson
130137817Srwatson  FormatToken *setPosition(unsigned P) override {
131137817Srwatson    LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
132137817Srwatson    Position = P;
133172930Srwatson    return Tokens[Position];
134137817Srwatson  }
135137817Srwatson
136182063Srwatson  FormatToken *insertTokens(ArrayRef<FormatToken *> New) override {
137182063Srwatson    assert(Position != -1);
138182063Srwatson    assert((*New.rbegin())->Tok.is(tok::eof));
139182063Srwatson    int Next = Tokens.size();
140137817Srwatson    Tokens.append(New.begin(), New.end());
141137817Srwatson    LLVM_DEBUG({
142137817Srwatson      llvm::dbgs() << "Inserting:\n";
143172930Srwatson      for (int I = Next, E = Tokens.size(); I != E; ++I)
144137817Srwatson        dbgToken(I, "  ");
145137817Srwatson      llvm::dbgs() << "  Jump from: " << (Tokens.size() - 1) << " -> "
146165427Srwatson                   << Position << "\n";
147191731Srwatson    });
148191731Srwatson    Jumps[Tokens.size() - 1] = Position;
149137817Srwatson    Position = Next;
150137817Srwatson    LLVM_DEBUG({
151137817Srwatson      llvm::dbgs() << "At inserted token ";
152172930Srwatson      dbgToken(Position);
153137817Srwatson    });
154165427Srwatson    return Tokens[Position];
155191731Srwatson  }
156191731Srwatson
157137817Srwatson  void reset() { Position = -1; }
158137817Srwatson
159137817Srwatsonprivate:
160172930Srwatson  int successor(int Current) const {
161137817Srwatson    int Next = Current + 1;
162137817Srwatson    auto it = Jumps.find(Next);
163191731Srwatson    if (it != Jumps.end()) {
164137817Srwatson      Next = it->second;
165137817Srwatson      assert(!Jumps.contains(Next));
166137817Srwatson    }
167172930Srwatson    return Next;
168137817Srwatson  }
169165427Srwatson
170191731Srwatson  void dbgToken(int Position, llvm::StringRef Indent = "") {
171137817Srwatson    FormatToken *Tok = Tokens[Position];
172137817Srwatson    llvm::dbgs() << Indent << "[" << Position
173189503Srwatson                 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
174189503Srwatson                 << ", Macro: " << !!Tok->MacroCtx << "\n";
175189503Srwatson  }
176137817Srwatson
177172930Srwatson  SmallVector<FormatToken *> Tokens;
178137817Srwatson  int Position;
179137817Srwatson
180137817Srwatson  // Maps from position a to position b, so that when we reach a, the token
181137817Srwatson  // stream continues at position b instead.
182191731Srwatson  llvm::DenseMap<int, int> Jumps;
183191731Srwatson};
184189503Srwatson
185137817Srwatsonclass ScopedMacroState : public FormatTokenSource {
186165434Srwatsonpublic:
187137817Srwatson  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
188137817Srwatson                   FormatToken *&ResetToken)
189189503Srwatson      : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
190189503Srwatson        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
191189503Srwatson        Token(nullptr), PreviousToken(nullptr) {
192137817Srwatson    FakeEOF.Tok.startToken();
193172930Srwatson    FakeEOF.Tok.setKind(tok::eof);
194137817Srwatson    TokenSource = this;
195137817Srwatson    Line.Level = 0;
196137817Srwatson    Line.InPPDirective = true;
197191731Srwatson    // InMacroBody gets set after the `#define x` part.
198191731Srwatson  }
199189503Srwatson
200137817Srwatson  ~ScopedMacroState() override {
201165434Srwatson    TokenSource = PreviousTokenSource;
202137817Srwatson    ResetToken = Token;
203137817Srwatson    Line.InPPDirective = false;
204189503Srwatson    Line.InMacroBody = false;
205189503Srwatson    Line.Level = PreviousLineLevel;
206189503Srwatson  }
207137817Srwatson
208172930Srwatson  FormatToken *getNextToken() override {
209137817Srwatson    // The \c UnwrappedLineParser guards against this by never calling
210137817Srwatson    // \c getNextToken() after it has encountered the first eof token.
211137817Srwatson    assert(!eof());
212191731Srwatson    PreviousToken = Token;
213189797Srwatson    Token = PreviousTokenSource->getNextToken();
214189503Srwatson    if (eof())
215137817Srwatson      return &FakeEOF;
216165434Srwatson    return Token;
217137817Srwatson  }
218137817Srwatson
219189503Srwatson  FormatToken *getPreviousToken() override {
220189503Srwatson    return PreviousTokenSource->getPreviousToken();
221189503Srwatson  }
222137817Srwatson
223172930Srwatson  FormatToken *peekNextToken(bool SkipComment) override {
224137817Srwatson    if (eof())
225137817Srwatson      return &FakeEOF;
226137817Srwatson    return PreviousTokenSource->peekNextToken(SkipComment);
227191731Srwatson  }
228189797Srwatson
229189503Srwatson  bool isEOF() override { return PreviousTokenSource->isEOF(); }
230137817Srwatson
231165434Srwatson  unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
232137817Srwatson
233137817Srwatson  FormatToken *setPosition(unsigned Position) override {
234189503Srwatson    PreviousToken = nullptr;
235189503Srwatson    Token = PreviousTokenSource->setPosition(Position);
236189503Srwatson    return Token;
237137817Srwatson  }
238172930Srwatson
239137817Srwatson  FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override {
240137817Srwatson    llvm_unreachable("Cannot insert tokens while parsing a macro.");
241137817Srwatson    return nullptr;
242191731Srwatson  }
243189797Srwatson
244189503Srwatsonprivate:
245137817Srwatson  bool eof() {
246165434Srwatson    return Token && Token->HasUnescapedNewline &&
247137817Srwatson           !continuesLineComment(*Token, PreviousToken,
248137817Srwatson                                 /*MinColumnToken=*/PreviousToken);
249189503Srwatson  }
250189503Srwatson
251189503Srwatson  FormatToken FakeEOF;
252137817Srwatson  UnwrappedLine &Line;
253172930Srwatson  FormatTokenSource *&TokenSource;
254137817Srwatson  FormatToken *&ResetToken;
255137817Srwatson  unsigned PreviousLineLevel;
256137817Srwatson  FormatTokenSource *PreviousTokenSource;
257191731Srwatson
258189797Srwatson  FormatToken *Token;
259189503Srwatson  FormatToken *PreviousToken;
260137817Srwatson};
261165434Srwatson
262137817Srwatson} // namespace format
263137817Srwatson} // namespace clang
264189503Srwatson
265189503Srwatson#undef DEBUG_TYPE
266189503Srwatson
267137817Srwatson#endif
268172930Srwatson