1137817Srwatson//===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===// 2137817Srwatson// 3172930Srwatson// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4182063Srwatson// See https://llvm.org/LICENSE.txt for license information. 5189503Srwatson// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6137817Srwatson// 7137817Srwatson//===----------------------------------------------------------------------===// 8137817Srwatson/// 9137817Srwatson/// \file 10137817Srwatson/// This file defines the \c FormatTokenSource interface, which provides a token 11137817Srwatson/// stream as well as the ability to manipulate the token stream. 12137817Srwatson/// 13172930Srwatson//===----------------------------------------------------------------------===// 14172930Srwatson 15172930Srwatson#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H 16189503Srwatson#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H 17189503Srwatson 18189503Srwatson#include "FormatToken.h" 19137817Srwatson#include "UnwrappedLineParser.h" 20137817Srwatson#include "llvm/ADT/DenseMap.h" 21137817Srwatson#include <cstddef> 22137817Srwatson 23137817Srwatson#define DEBUG_TYPE "format-token-source" 24137817Srwatson 25137817Srwatsonnamespace clang { 26137817Srwatsonnamespace format { 27137817Srwatson 28137817Srwatson// Navigate a token stream. 29137817Srwatson// 30137817Srwatson// Enables traversal of a token stream, resetting the position in a token 31137817Srwatson// stream, as well as inserting new tokens. 32137817Srwatsonclass FormatTokenSource { 33137817Srwatsonpublic: 34137817Srwatson virtual ~FormatTokenSource() {} 35137817Srwatson 36137817Srwatson // Returns the next token in the token stream. 37137817Srwatson virtual FormatToken *getNextToken() = 0; 38137817Srwatson 39137817Srwatson // Returns the token preceding the token returned by the last call to 40137817Srwatson // getNextToken() in the token stream, or nullptr if no such token exists. 41137817Srwatson // 42137817Srwatson // Must not be called directly at the position directly after insertTokens() 43137817Srwatson // is called. 44189503Srwatson virtual FormatToken *getPreviousToken() = 0; 45137817Srwatson 46137817Srwatson // Returns the token that would be returned by the next call to 47137817Srwatson // getNextToken(). 48137817Srwatson virtual FormatToken *peekNextToken(bool SkipComment = false) = 0; 49137817Srwatson 50137817Srwatson // Returns whether we are at the end of the file. 51137817Srwatson // This can be different from whether getNextToken() returned an eof token 52137817Srwatson // when the FormatTokenSource is a view on a part of the token stream. 53189503Srwatson virtual bool isEOF() = 0; 54137817Srwatson 55137817Srwatson // Gets the current position in the token stream, to be used by setPosition(). 56137817Srwatson // 57137817Srwatson // Note that the value of the position is not meaningful, and specifically 58137817Srwatson // should not be used to get relative token positions. 59137817Srwatson virtual unsigned getPosition() = 0; 60137817Srwatson 61137817Srwatson // Resets the token stream to the state it was in when getPosition() returned 62163606Srwatson // Position, and return the token at that position in the stream. 63137817Srwatson virtual FormatToken *setPosition(unsigned Position) = 0; 64165469Srwatson 65137817Srwatson // Insert the given tokens before the current position. 66137817Srwatson // Returns the first token in \c Tokens. 67137817Srwatson // The next returned token will be the second token in \c Tokens. 68137817Srwatson // Requires the last token in Tokens to be EOF; once the EOF token is reached, 69137817Srwatson // the next token will be the last token returned by getNextToken(); 70137817Srwatson // 71137817Srwatson // For example, given the token sequence 'a1 a2': 72191731Srwatson // getNextToken() -> a1 73137817Srwatson // insertTokens('b1 b2') -> b1 74137817Srwatson // getNextToken() -> b2 75137817Srwatson // getNextToken() -> a1 76137817Srwatson // getNextToken() -> a2 77172930Srwatson virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0; 78137817Srwatson}; 79137817Srwatson 80182063Srwatsonclass IndexedTokenSource : public FormatTokenSource { 81182063Srwatsonpublic: 82182063Srwatson IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 83182063Srwatson : Tokens(Tokens), Position(-1) {} 84137817Srwatson 85137817Srwatson FormatToken *getNextToken() override { 86137817Srwatson if (Position >= 0 && isEOF()) { 87137817Srwatson LLVM_DEBUG({ 88137817Srwatson llvm::dbgs() << "Next "; 89137817Srwatson dbgToken(Position); 90137817Srwatson }); 91137817Srwatson return Tokens[Position]; 92191731Srwatson } 93137817Srwatson Position = successor(Position); 94137817Srwatson LLVM_DEBUG({ 95137817Srwatson llvm::dbgs() << "Next "; 96137817Srwatson dbgToken(Position); 97172930Srwatson }); 98137817Srwatson return Tokens[Position]; 99137817Srwatson } 100182063Srwatson 101182063Srwatson FormatToken *getPreviousToken() override { 102182063Srwatson assert(Position <= 0 || Tokens[Position - 1]->isNot(tok::eof)); 103182063Srwatson return Position > 0 ? Tokens[Position - 1] : nullptr; 104137817Srwatson } 105137817Srwatson 106137817Srwatson FormatToken *peekNextToken(bool SkipComment = false) override { 107137817Srwatson if (isEOF()) 108137817Srwatson return Tokens[Position]; 109137817Srwatson int Next = successor(Position); 110191731Srwatson if (SkipComment) 111137817Srwatson while (Tokens[Next]->is(tok::comment)) 112137817Srwatson Next = successor(Next); 113137817Srwatson LLVM_DEBUG({ 114137817Srwatson llvm::dbgs() << "Peeking "; 115172930Srwatson dbgToken(Next); 116137817Srwatson }); 117137817Srwatson return Tokens[Next]; 118182063Srwatson } 119182063Srwatson 120182063Srwatson bool isEOF() override { 121182063Srwatson return Position == -1 ? false : Tokens[Position]->is(tok::eof); 122137817Srwatson } 123137817Srwatson 124137817Srwatson unsigned getPosition() override { 125137817Srwatson LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); 126137817Srwatson assert(Position >= 0); 127137817Srwatson return Position; 128191731Srwatson } 129137817Srwatson 130137817Srwatson FormatToken *setPosition(unsigned P) override { 131137817Srwatson LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); 132137817Srwatson Position = P; 133172930Srwatson return Tokens[Position]; 134137817Srwatson } 135137817Srwatson 136182063Srwatson FormatToken *insertTokens(ArrayRef<FormatToken *> New) override { 137182063Srwatson assert(Position != -1); 138182063Srwatson assert((*New.rbegin())->Tok.is(tok::eof)); 139182063Srwatson int Next = Tokens.size(); 140137817Srwatson Tokens.append(New.begin(), New.end()); 141137817Srwatson LLVM_DEBUG({ 142137817Srwatson llvm::dbgs() << "Inserting:\n"; 143172930Srwatson for (int I = Next, E = Tokens.size(); I != E; ++I) 144137817Srwatson dbgToken(I, " "); 145137817Srwatson llvm::dbgs() << " Jump from: " << (Tokens.size() - 1) << " -> " 146165427Srwatson << Position << "\n"; 147191731Srwatson }); 148191731Srwatson Jumps[Tokens.size() - 1] = Position; 149137817Srwatson Position = Next; 150137817Srwatson LLVM_DEBUG({ 151137817Srwatson llvm::dbgs() << "At inserted token "; 152172930Srwatson dbgToken(Position); 153137817Srwatson }); 154165427Srwatson return Tokens[Position]; 155191731Srwatson } 156191731Srwatson 157137817Srwatson void reset() { Position = -1; } 158137817Srwatson 159137817Srwatsonprivate: 160172930Srwatson int successor(int Current) const { 161137817Srwatson int Next = Current + 1; 162137817Srwatson auto it = Jumps.find(Next); 163191731Srwatson if (it != Jumps.end()) { 164137817Srwatson Next = it->second; 165137817Srwatson assert(!Jumps.contains(Next)); 166137817Srwatson } 167172930Srwatson return Next; 168137817Srwatson } 169165427Srwatson 170191731Srwatson void dbgToken(int Position, llvm::StringRef Indent = "") { 171137817Srwatson FormatToken *Tok = Tokens[Position]; 172137817Srwatson llvm::dbgs() << Indent << "[" << Position 173189503Srwatson << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText 174189503Srwatson << ", Macro: " << !!Tok->MacroCtx << "\n"; 175189503Srwatson } 176137817Srwatson 177172930Srwatson SmallVector<FormatToken *> Tokens; 178137817Srwatson int Position; 179137817Srwatson 180137817Srwatson // Maps from position a to position b, so that when we reach a, the token 181137817Srwatson // stream continues at position b instead. 182191731Srwatson llvm::DenseMap<int, int> Jumps; 183191731Srwatson}; 184189503Srwatson 185137817Srwatsonclass ScopedMacroState : public FormatTokenSource { 186165434Srwatsonpublic: 187137817Srwatson ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 188137817Srwatson FormatToken *&ResetToken) 189189503Srwatson : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 190189503Srwatson PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 191189503Srwatson Token(nullptr), PreviousToken(nullptr) { 192137817Srwatson FakeEOF.Tok.startToken(); 193172930Srwatson FakeEOF.Tok.setKind(tok::eof); 194137817Srwatson TokenSource = this; 195137817Srwatson Line.Level = 0; 196137817Srwatson Line.InPPDirective = true; 197191731Srwatson // InMacroBody gets set after the `#define x` part. 198191731Srwatson } 199189503Srwatson 200137817Srwatson ~ScopedMacroState() override { 201165434Srwatson TokenSource = PreviousTokenSource; 202137817Srwatson ResetToken = Token; 203137817Srwatson Line.InPPDirective = false; 204189503Srwatson Line.InMacroBody = false; 205189503Srwatson Line.Level = PreviousLineLevel; 206189503Srwatson } 207137817Srwatson 208172930Srwatson FormatToken *getNextToken() override { 209137817Srwatson // The \c UnwrappedLineParser guards against this by never calling 210137817Srwatson // \c getNextToken() after it has encountered the first eof token. 211137817Srwatson assert(!eof()); 212191731Srwatson PreviousToken = Token; 213189797Srwatson Token = PreviousTokenSource->getNextToken(); 214189503Srwatson if (eof()) 215137817Srwatson return &FakeEOF; 216165434Srwatson return Token; 217137817Srwatson } 218137817Srwatson 219189503Srwatson FormatToken *getPreviousToken() override { 220189503Srwatson return PreviousTokenSource->getPreviousToken(); 221189503Srwatson } 222137817Srwatson 223172930Srwatson FormatToken *peekNextToken(bool SkipComment) override { 224137817Srwatson if (eof()) 225137817Srwatson return &FakeEOF; 226137817Srwatson return PreviousTokenSource->peekNextToken(SkipComment); 227191731Srwatson } 228189797Srwatson 229189503Srwatson bool isEOF() override { return PreviousTokenSource->isEOF(); } 230137817Srwatson 231165434Srwatson unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 232137817Srwatson 233137817Srwatson FormatToken *setPosition(unsigned Position) override { 234189503Srwatson PreviousToken = nullptr; 235189503Srwatson Token = PreviousTokenSource->setPosition(Position); 236189503Srwatson return Token; 237137817Srwatson } 238172930Srwatson 239137817Srwatson FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override { 240137817Srwatson llvm_unreachable("Cannot insert tokens while parsing a macro."); 241137817Srwatson return nullptr; 242191731Srwatson } 243189797Srwatson 244189503Srwatsonprivate: 245137817Srwatson bool eof() { 246165434Srwatson return Token && Token->HasUnescapedNewline && 247137817Srwatson !continuesLineComment(*Token, PreviousToken, 248137817Srwatson /*MinColumnToken=*/PreviousToken); 249189503Srwatson } 250189503Srwatson 251189503Srwatson FormatToken FakeEOF; 252137817Srwatson UnwrappedLine &Line; 253172930Srwatson FormatTokenSource *&TokenSource; 254137817Srwatson FormatToken *&ResetToken; 255137817Srwatson unsigned PreviousLineLevel; 256137817Srwatson FormatTokenSource *PreviousTokenSource; 257191731Srwatson 258189797Srwatson FormatToken *Token; 259189503Srwatson FormatToken *PreviousToken; 260137817Srwatson}; 261165434Srwatson 262137817Srwatson} // namespace format 263137817Srwatson} // namespace clang 264189503Srwatson 265189503Srwatson#undef DEBUG_TYPE 266189503Srwatson 267137817Srwatson#endif 268172930Srwatson