1//===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This code simply runs the preprocessor on the input file and prints out the
10// result.  This is the traditional behavior of the -E option.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Frontend/Utils.h"
15#include "clang/Basic/CharInfo.h"
16#include "clang/Basic/Diagnostic.h"
17#include "clang/Basic/SourceManager.h"
18#include "clang/Frontend/PreprocessorOutputOptions.h"
19#include "clang/Lex/MacroInfo.h"
20#include "clang/Lex/PPCallbacks.h"
21#include "clang/Lex/Pragma.h"
22#include "clang/Lex/Preprocessor.h"
23#include "clang/Lex/TokenConcatenation.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallString.h"
26#include "llvm/ADT/StringRef.h"
27#include "llvm/Support/ErrorHandling.h"
28#include "llvm/Support/raw_ostream.h"
29#include <cstdio>
30using namespace clang;
31
32/// PrintMacroDefinition - Print a macro definition in a form that will be
33/// properly accepted back as a definition.
34static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI,
35                                 Preprocessor &PP, raw_ostream *OS) {
36  *OS << "#define " << II.getName();
37
38  if (MI.isFunctionLike()) {
39    *OS << '(';
40    if (!MI.param_empty()) {
41      MacroInfo::param_iterator AI = MI.param_begin(), E = MI.param_end();
42      for (; AI+1 != E; ++AI) {
43        *OS << (*AI)->getName();
44        *OS << ',';
45      }
46
47      // Last argument.
48      if ((*AI)->getName() == "__VA_ARGS__")
49        *OS << "...";
50      else
51        *OS << (*AI)->getName();
52    }
53
54    if (MI.isGNUVarargs())
55      *OS << "...";  // #define foo(x...)
56
57    *OS << ')';
58  }
59
60  // GCC always emits a space, even if the macro body is empty.  However, do not
61  // want to emit two spaces if the first token has a leading space.
62  if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace())
63    *OS << ' ';
64
65  SmallString<128> SpellingBuffer;
66  for (const auto &T : MI.tokens()) {
67    if (T.hasLeadingSpace())
68      *OS << ' ';
69
70    *OS << PP.getSpelling(T, SpellingBuffer);
71  }
72}
73
74//===----------------------------------------------------------------------===//
75// Preprocessed token printer
76//===----------------------------------------------------------------------===//
77
78namespace {
79class PrintPPOutputPPCallbacks : public PPCallbacks {
80  Preprocessor &PP;
81  SourceManager &SM;
82  TokenConcatenation ConcatInfo;
83public:
84  raw_ostream *OS;
85private:
86  unsigned CurLine;
87
88  bool EmittedTokensOnThisLine;
89  bool EmittedDirectiveOnThisLine;
90  SrcMgr::CharacteristicKind FileType;
91  SmallString<512> CurFilename;
92  bool Initialized;
93  bool DisableLineMarkers;
94  bool DumpDefines;
95  bool DumpIncludeDirectives;
96  bool UseLineDirectives;
97  bool IsFirstFileEntered;
98  bool MinimizeWhitespace;
99  bool DirectivesOnly;
100  bool KeepSystemIncludes;
101  raw_ostream *OrigOS;
102  std::unique_ptr<llvm::raw_null_ostream> NullOS;
103
104  Token PrevTok;
105  Token PrevPrevTok;
106
107public:
108  PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers,
109                           bool defines, bool DumpIncludeDirectives,
110                           bool UseLineDirectives, bool MinimizeWhitespace,
111                           bool DirectivesOnly, bool KeepSystemIncludes)
112      : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
113        DisableLineMarkers(lineMarkers), DumpDefines(defines),
114        DumpIncludeDirectives(DumpIncludeDirectives),
115        UseLineDirectives(UseLineDirectives),
116        MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly),
117        KeepSystemIncludes(KeepSystemIncludes), OrigOS(os) {
118    CurLine = 0;
119    CurFilename += "<uninit>";
120    EmittedTokensOnThisLine = false;
121    EmittedDirectiveOnThisLine = false;
122    FileType = SrcMgr::C_User;
123    Initialized = false;
124    IsFirstFileEntered = false;
125    if (KeepSystemIncludes)
126      NullOS = std::make_unique<llvm::raw_null_ostream>();
127
128    PrevTok.startToken();
129    PrevPrevTok.startToken();
130  }
131
132  bool isMinimizeWhitespace() const { return MinimizeWhitespace; }
133
134  void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
135  bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
136
137  void setEmittedDirectiveOnThisLine() { EmittedDirectiveOnThisLine = true; }
138  bool hasEmittedDirectiveOnThisLine() const {
139    return EmittedDirectiveOnThisLine;
140  }
141
142  /// Ensure that the output stream position is at the beginning of a new line
143  /// and inserts one if it does not. It is intended to ensure that directives
144  /// inserted by the directives not from the input source (such as #line) are
145  /// in the first column. To insert newlines that represent the input, use
146  /// MoveToLine(/*...*/, /*RequireStartOfLine=*/true).
147  void startNewLineIfNeeded();
148
149  void FileChanged(SourceLocation Loc, FileChangeReason Reason,
150                   SrcMgr::CharacteristicKind FileType,
151                   FileID PrevFID) override;
152  void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
153                          StringRef FileName, bool IsAngled,
154                          CharSourceRange FilenameRange,
155                          OptionalFileEntryRef File, StringRef SearchPath,
156                          StringRef RelativePath, const Module *Imported,
157                          SrcMgr::CharacteristicKind FileType) override;
158  void Ident(SourceLocation Loc, StringRef str) override;
159  void PragmaMessage(SourceLocation Loc, StringRef Namespace,
160                     PragmaMessageKind Kind, StringRef Str) override;
161  void PragmaDebug(SourceLocation Loc, StringRef DebugType) override;
162  void PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) override;
163  void PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) override;
164  void PragmaDiagnostic(SourceLocation Loc, StringRef Namespace,
165                        diag::Severity Map, StringRef Str) override;
166  void PragmaWarning(SourceLocation Loc, PragmaWarningSpecifier WarningSpec,
167                     ArrayRef<int> Ids) override;
168  void PragmaWarningPush(SourceLocation Loc, int Level) override;
169  void PragmaWarningPop(SourceLocation Loc) override;
170  void PragmaExecCharsetPush(SourceLocation Loc, StringRef Str) override;
171  void PragmaExecCharsetPop(SourceLocation Loc) override;
172  void PragmaAssumeNonNullBegin(SourceLocation Loc) override;
173  void PragmaAssumeNonNullEnd(SourceLocation Loc) override;
174
175  /// Insert whitespace before emitting the next token.
176  ///
177  /// @param Tok             Next token to be emitted.
178  /// @param RequireSpace    Ensure at least one whitespace is emitted. Useful
179  ///                        if non-tokens have been emitted to the stream.
180  /// @param RequireSameLine Never emit newlines. Useful when semantics depend
181  ///                        on being on the same line, such as directives.
182  void HandleWhitespaceBeforeTok(const Token &Tok, bool RequireSpace,
183                                 bool RequireSameLine);
184
185  /// Move to the line of the provided source location. This will
186  /// return true if a newline was inserted or if
187  /// the requested location is the first token on the first line.
188  /// In these cases the next output will be the first column on the line and
189  /// make it possible to insert indention. The newline was inserted
190  /// implicitly when at the beginning of the file.
191  ///
192  /// @param Tok                 Token where to move to.
193  /// @param RequireStartOfLine  Whether the next line depends on being in the
194  ///                            first column, such as a directive.
195  ///
196  /// @return Whether column adjustments are necessary.
197  bool MoveToLine(const Token &Tok, bool RequireStartOfLine) {
198    PresumedLoc PLoc = SM.getPresumedLoc(Tok.getLocation());
199    unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine;
200    bool IsFirstInFile =
201        Tok.isAtStartOfLine() && PLoc.isValid() && PLoc.getLine() == 1;
202    return MoveToLine(TargetLine, RequireStartOfLine) || IsFirstInFile;
203  }
204
205  /// Move to the line of the provided source location. Returns true if a new
206  /// line was inserted.
207  bool MoveToLine(SourceLocation Loc, bool RequireStartOfLine) {
208    PresumedLoc PLoc = SM.getPresumedLoc(Loc);
209    unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine;
210    return MoveToLine(TargetLine, RequireStartOfLine);
211  }
212  bool MoveToLine(unsigned LineNo, bool RequireStartOfLine);
213
214  bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok,
215                   const Token &Tok) {
216    return ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok);
217  }
218  void WriteLineInfo(unsigned LineNo, const char *Extra=nullptr,
219                     unsigned ExtraLen=0);
220  bool LineMarkersAreDisabled() const { return DisableLineMarkers; }
221  void HandleNewlinesInToken(const char *TokStr, unsigned Len);
222
223  /// MacroDefined - This hook is called whenever a macro definition is seen.
224  void MacroDefined(const Token &MacroNameTok,
225                    const MacroDirective *MD) override;
226
227  /// MacroUndefined - This hook is called whenever a macro #undef is seen.
228  void MacroUndefined(const Token &MacroNameTok,
229                      const MacroDefinition &MD,
230                      const MacroDirective *Undef) override;
231
232  void BeginModule(const Module *M);
233  void EndModule(const Module *M);
234};
235}  // end anonymous namespace
236
237void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
238                                             const char *Extra,
239                                             unsigned ExtraLen) {
240  startNewLineIfNeeded();
241
242  // Emit #line directives or GNU line markers depending on what mode we're in.
243  if (UseLineDirectives) {
244    *OS << "#line" << ' ' << LineNo << ' ' << '"';
245    OS->write_escaped(CurFilename);
246    *OS << '"';
247  } else {
248    *OS << '#' << ' ' << LineNo << ' ' << '"';
249    OS->write_escaped(CurFilename);
250    *OS << '"';
251
252    if (ExtraLen)
253      OS->write(Extra, ExtraLen);
254
255    if (FileType == SrcMgr::C_System)
256      OS->write(" 3", 2);
257    else if (FileType == SrcMgr::C_ExternCSystem)
258      OS->write(" 3 4", 4);
259  }
260  *OS << '\n';
261}
262
263/// MoveToLine - Move the output to the source line specified by the location
264/// object.  We can do this by emitting some number of \n's, or be emitting a
265/// #line directive.  This returns false if already at the specified line, true
266/// if some newlines were emitted.
267bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo,
268                                          bool RequireStartOfLine) {
269  // If it is required to start a new line or finish the current, insert
270  // vertical whitespace now and take it into account when moving to the
271  // expected line.
272  bool StartedNewLine = false;
273  if ((RequireStartOfLine && EmittedTokensOnThisLine) ||
274      EmittedDirectiveOnThisLine) {
275    *OS << '\n';
276    StartedNewLine = true;
277    CurLine += 1;
278    EmittedTokensOnThisLine = false;
279    EmittedDirectiveOnThisLine = false;
280  }
281
282  // If this line is "close enough" to the original line, just print newlines,
283  // otherwise print a #line directive.
284  if (CurLine == LineNo) {
285    // Nothing to do if we are already on the correct line.
286  } else if (MinimizeWhitespace && DisableLineMarkers) {
287    // With -E -P -fminimize-whitespace, don't emit anything if not necessary.
288  } else if (!StartedNewLine && LineNo - CurLine == 1) {
289    // Printing a single line has priority over printing a #line directive, even
290    // when minimizing whitespace which otherwise would print #line directives
291    // for every single line.
292    *OS << '\n';
293    StartedNewLine = true;
294  } else if (!DisableLineMarkers) {
295    if (LineNo - CurLine <= 8) {
296      const char *NewLines = "\n\n\n\n\n\n\n\n";
297      OS->write(NewLines, LineNo - CurLine);
298    } else {
299      // Emit a #line or line marker.
300      WriteLineInfo(LineNo, nullptr, 0);
301    }
302    StartedNewLine = true;
303  } else if (EmittedTokensOnThisLine) {
304    // If we are not on the correct line and don't need to be line-correct,
305    // at least ensure we start on a new line.
306    *OS << '\n';
307    StartedNewLine = true;
308  }
309
310  if (StartedNewLine) {
311    EmittedTokensOnThisLine = false;
312    EmittedDirectiveOnThisLine = false;
313  }
314
315  CurLine = LineNo;
316  return StartedNewLine;
317}
318
319void PrintPPOutputPPCallbacks::startNewLineIfNeeded() {
320  if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) {
321    *OS << '\n';
322    EmittedTokensOnThisLine = false;
323    EmittedDirectiveOnThisLine = false;
324  }
325}
326
327/// FileChanged - Whenever the preprocessor enters or exits a #include file
328/// it invokes this handler.  Update our conception of the current source
329/// position.
330void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
331                                           FileChangeReason Reason,
332                                       SrcMgr::CharacteristicKind NewFileType,
333                                       FileID PrevFID) {
334  // Unless we are exiting a #include, make sure to skip ahead to the line the
335  // #include directive was at.
336  SourceManager &SourceMgr = SM;
337
338  PresumedLoc UserLoc = SourceMgr.getPresumedLoc(Loc);
339  if (UserLoc.isInvalid())
340    return;
341
342  unsigned NewLine = UserLoc.getLine();
343
344  if (Reason == PPCallbacks::EnterFile) {
345    SourceLocation IncludeLoc = UserLoc.getIncludeLoc();
346    if (IncludeLoc.isValid())
347      MoveToLine(IncludeLoc, /*RequireStartOfLine=*/false);
348  } else if (Reason == PPCallbacks::SystemHeaderPragma) {
349    // GCC emits the # directive for this directive on the line AFTER the
350    // directive and emits a bunch of spaces that aren't needed. This is because
351    // otherwise we will emit a line marker for THIS line, which requires an
352    // extra blank line after the directive to avoid making all following lines
353    // off by one. We can do better by simply incrementing NewLine here.
354    NewLine += 1;
355  }
356
357  CurLine = NewLine;
358
359  // In KeepSystemIncludes mode, redirect OS as needed.
360  if (KeepSystemIncludes && (isSystem(FileType) != isSystem(NewFileType)))
361    OS = isSystem(FileType) ? OrigOS : NullOS.get();
362
363  CurFilename.clear();
364  CurFilename += UserLoc.getFilename();
365  FileType = NewFileType;
366
367  if (DisableLineMarkers) {
368    if (!MinimizeWhitespace)
369      startNewLineIfNeeded();
370    return;
371  }
372
373  if (!Initialized) {
374    WriteLineInfo(CurLine);
375    Initialized = true;
376  }
377
378  // Do not emit an enter marker for the main file (which we expect is the first
379  // entered file). This matches gcc, and improves compatibility with some tools
380  // which track the # line markers as a way to determine when the preprocessed
381  // output is in the context of the main file.
382  if (Reason == PPCallbacks::EnterFile && !IsFirstFileEntered) {
383    IsFirstFileEntered = true;
384    return;
385  }
386
387  switch (Reason) {
388  case PPCallbacks::EnterFile:
389    WriteLineInfo(CurLine, " 1", 2);
390    break;
391  case PPCallbacks::ExitFile:
392    WriteLineInfo(CurLine, " 2", 2);
393    break;
394  case PPCallbacks::SystemHeaderPragma:
395  case PPCallbacks::RenameFile:
396    WriteLineInfo(CurLine);
397    break;
398  }
399}
400
401void PrintPPOutputPPCallbacks::InclusionDirective(
402    SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
403    bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
404    StringRef SearchPath, StringRef RelativePath, const Module *Imported,
405    SrcMgr::CharacteristicKind FileType) {
406  // In -dI mode, dump #include directives prior to dumping their content or
407  // interpretation. Similar for -fkeep-system-includes.
408  if (DumpIncludeDirectives || (KeepSystemIncludes && isSystem(FileType))) {
409    MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
410    const std::string TokenText = PP.getSpelling(IncludeTok);
411    assert(!TokenText.empty());
412    *OS << "#" << TokenText << " "
413        << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
414        << " /* clang -E "
415        << (DumpIncludeDirectives ? "-dI" : "-fkeep-system-includes")
416        << " */";
417    setEmittedDirectiveOnThisLine();
418  }
419
420  // When preprocessing, turn implicit imports into module import pragmas.
421  if (Imported) {
422    switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
423    case tok::pp_include:
424    case tok::pp_import:
425    case tok::pp_include_next:
426      MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
427      *OS << "#pragma clang module import "
428          << Imported->getFullModuleName(true)
429          << " /* clang -E: implicit import for "
430          << "#" << PP.getSpelling(IncludeTok) << " "
431          << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
432          << " */";
433      setEmittedDirectiveOnThisLine();
434      break;
435
436    case tok::pp___include_macros:
437      // #__include_macros has no effect on a user of a preprocessed source
438      // file; the only effect is on preprocessing.
439      //
440      // FIXME: That's not *quite* true: it causes the module in question to
441      // be loaded, which can affect downstream diagnostics.
442      break;
443
444    default:
445      llvm_unreachable("unknown include directive kind");
446      break;
447    }
448  }
449}
450
451/// Handle entering the scope of a module during a module compilation.
452void PrintPPOutputPPCallbacks::BeginModule(const Module *M) {
453  startNewLineIfNeeded();
454  *OS << "#pragma clang module begin " << M->getFullModuleName(true);
455  setEmittedDirectiveOnThisLine();
456}
457
458/// Handle leaving the scope of a module during a module compilation.
459void PrintPPOutputPPCallbacks::EndModule(const Module *M) {
460  startNewLineIfNeeded();
461  *OS << "#pragma clang module end /*" << M->getFullModuleName(true) << "*/";
462  setEmittedDirectiveOnThisLine();
463}
464
465/// Ident - Handle #ident directives when read by the preprocessor.
466///
467void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) {
468  MoveToLine(Loc, /*RequireStartOfLine=*/true);
469
470  OS->write("#ident ", strlen("#ident "));
471  OS->write(S.begin(), S.size());
472  setEmittedTokensOnThisLine();
473}
474
475/// MacroDefined - This hook is called whenever a macro definition is seen.
476void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok,
477                                            const MacroDirective *MD) {
478  const MacroInfo *MI = MD->getMacroInfo();
479  // Print out macro definitions in -dD mode and when we have -fdirectives-only
480  // for C++20 header units.
481  if ((!DumpDefines && !DirectivesOnly) ||
482      // Ignore __FILE__ etc.
483      MI->isBuiltinMacro())
484    return;
485
486  SourceLocation DefLoc = MI->getDefinitionLoc();
487  if (DirectivesOnly && !MI->isUsed()) {
488    SourceManager &SM = PP.getSourceManager();
489    if (SM.isWrittenInBuiltinFile(DefLoc) ||
490        SM.isWrittenInCommandLineFile(DefLoc))
491      return;
492  }
493  MoveToLine(DefLoc, /*RequireStartOfLine=*/true);
494  PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS);
495  setEmittedDirectiveOnThisLine();
496}
497
498void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok,
499                                              const MacroDefinition &MD,
500                                              const MacroDirective *Undef) {
501  // Print out macro definitions in -dD mode and when we have -fdirectives-only
502  // for C++20 header units.
503  if (!DumpDefines && !DirectivesOnly)
504    return;
505
506  MoveToLine(MacroNameTok.getLocation(), /*RequireStartOfLine=*/true);
507  *OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName();
508  setEmittedDirectiveOnThisLine();
509}
510
511static void outputPrintable(raw_ostream *OS, StringRef Str) {
512  for (unsigned char Char : Str) {
513    if (isPrintable(Char) && Char != '\\' && Char != '"')
514      *OS << (char)Char;
515    else // Output anything hard as an octal escape.
516      *OS << '\\'
517          << (char)('0' + ((Char >> 6) & 7))
518          << (char)('0' + ((Char >> 3) & 7))
519          << (char)('0' + ((Char >> 0) & 7));
520  }
521}
522
523void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc,
524                                             StringRef Namespace,
525                                             PragmaMessageKind Kind,
526                                             StringRef Str) {
527  MoveToLine(Loc, /*RequireStartOfLine=*/true);
528  *OS << "#pragma ";
529  if (!Namespace.empty())
530    *OS << Namespace << ' ';
531  switch (Kind) {
532    case PMK_Message:
533      *OS << "message(\"";
534      break;
535    case PMK_Warning:
536      *OS << "warning \"";
537      break;
538    case PMK_Error:
539      *OS << "error \"";
540      break;
541  }
542
543  outputPrintable(OS, Str);
544  *OS << '"';
545  if (Kind == PMK_Message)
546    *OS << ')';
547  setEmittedDirectiveOnThisLine();
548}
549
550void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc,
551                                           StringRef DebugType) {
552  MoveToLine(Loc, /*RequireStartOfLine=*/true);
553
554  *OS << "#pragma clang __debug ";
555  *OS << DebugType;
556
557  setEmittedDirectiveOnThisLine();
558}
559
560void PrintPPOutputPPCallbacks::
561PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) {
562  MoveToLine(Loc, /*RequireStartOfLine=*/true);
563  *OS << "#pragma " << Namespace << " diagnostic push";
564  setEmittedDirectiveOnThisLine();
565}
566
567void PrintPPOutputPPCallbacks::
568PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) {
569  MoveToLine(Loc, /*RequireStartOfLine=*/true);
570  *OS << "#pragma " << Namespace << " diagnostic pop";
571  setEmittedDirectiveOnThisLine();
572}
573
574void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc,
575                                                StringRef Namespace,
576                                                diag::Severity Map,
577                                                StringRef Str) {
578  MoveToLine(Loc, /*RequireStartOfLine=*/true);
579  *OS << "#pragma " << Namespace << " diagnostic ";
580  switch (Map) {
581  case diag::Severity::Remark:
582    *OS << "remark";
583    break;
584  case diag::Severity::Warning:
585    *OS << "warning";
586    break;
587  case diag::Severity::Error:
588    *OS << "error";
589    break;
590  case diag::Severity::Ignored:
591    *OS << "ignored";
592    break;
593  case diag::Severity::Fatal:
594    *OS << "fatal";
595    break;
596  }
597  *OS << " \"" << Str << '"';
598  setEmittedDirectiveOnThisLine();
599}
600
601void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc,
602                                             PragmaWarningSpecifier WarningSpec,
603                                             ArrayRef<int> Ids) {
604  MoveToLine(Loc, /*RequireStartOfLine=*/true);
605
606  *OS << "#pragma warning(";
607  switch(WarningSpec) {
608    case PWS_Default:  *OS << "default"; break;
609    case PWS_Disable:  *OS << "disable"; break;
610    case PWS_Error:    *OS << "error"; break;
611    case PWS_Once:     *OS << "once"; break;
612    case PWS_Suppress: *OS << "suppress"; break;
613    case PWS_Level1:   *OS << '1'; break;
614    case PWS_Level2:   *OS << '2'; break;
615    case PWS_Level3:   *OS << '3'; break;
616    case PWS_Level4:   *OS << '4'; break;
617  }
618  *OS << ':';
619
620  for (ArrayRef<int>::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I)
621    *OS << ' ' << *I;
622  *OS << ')';
623  setEmittedDirectiveOnThisLine();
624}
625
626void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc,
627                                                 int Level) {
628  MoveToLine(Loc, /*RequireStartOfLine=*/true);
629  *OS << "#pragma warning(push";
630  if (Level >= 0)
631    *OS << ", " << Level;
632  *OS << ')';
633  setEmittedDirectiveOnThisLine();
634}
635
636void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) {
637  MoveToLine(Loc, /*RequireStartOfLine=*/true);
638  *OS << "#pragma warning(pop)";
639  setEmittedDirectiveOnThisLine();
640}
641
642void PrintPPOutputPPCallbacks::PragmaExecCharsetPush(SourceLocation Loc,
643                                                     StringRef Str) {
644  MoveToLine(Loc, /*RequireStartOfLine=*/true);
645  *OS << "#pragma character_execution_set(push";
646  if (!Str.empty())
647    *OS << ", " << Str;
648  *OS << ')';
649  setEmittedDirectiveOnThisLine();
650}
651
652void PrintPPOutputPPCallbacks::PragmaExecCharsetPop(SourceLocation Loc) {
653  MoveToLine(Loc, /*RequireStartOfLine=*/true);
654  *OS << "#pragma character_execution_set(pop)";
655  setEmittedDirectiveOnThisLine();
656}
657
658void PrintPPOutputPPCallbacks::
659PragmaAssumeNonNullBegin(SourceLocation Loc) {
660  MoveToLine(Loc, /*RequireStartOfLine=*/true);
661  *OS << "#pragma clang assume_nonnull begin";
662  setEmittedDirectiveOnThisLine();
663}
664
665void PrintPPOutputPPCallbacks::
666PragmaAssumeNonNullEnd(SourceLocation Loc) {
667  MoveToLine(Loc, /*RequireStartOfLine=*/true);
668  *OS << "#pragma clang assume_nonnull end";
669  setEmittedDirectiveOnThisLine();
670}
671
672void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
673                                                         bool RequireSpace,
674                                                         bool RequireSameLine) {
675  // These tokens are not expanded to anything and don't need whitespace before
676  // them.
677  if (Tok.is(tok::eof) ||
678      (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) &&
679       !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) &&
680       !Tok.is(tok::annot_repl_input_end)))
681    return;
682
683  // EmittedDirectiveOnThisLine takes priority over RequireSameLine.
684  if ((!RequireSameLine || EmittedDirectiveOnThisLine) &&
685      MoveToLine(Tok, /*RequireStartOfLine=*/EmittedDirectiveOnThisLine)) {
686    if (MinimizeWhitespace) {
687      // Avoid interpreting hash as a directive under -fpreprocessed.
688      if (Tok.is(tok::hash))
689        *OS << ' ';
690    } else {
691      // Print out space characters so that the first token on a line is
692      // indented for easy reading.
693      unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation());
694
695      // The first token on a line can have a column number of 1, yet still
696      // expect leading white space, if a macro expansion in column 1 starts
697      // with an empty macro argument, or an empty nested macro expansion. In
698      // this case, move the token to column 2.
699      if (ColNo == 1 && Tok.hasLeadingSpace())
700        ColNo = 2;
701
702      // This hack prevents stuff like:
703      // #define HASH #
704      // HASH define foo bar
705      // From having the # character end up at column 1, which makes it so it
706      // is not handled as a #define next time through the preprocessor if in
707      // -fpreprocessed mode.
708      if (ColNo <= 1 && Tok.is(tok::hash))
709        *OS << ' ';
710
711      // Otherwise, indent the appropriate number of spaces.
712      for (; ColNo > 1; --ColNo)
713        *OS << ' ';
714    }
715  } else {
716    // Insert whitespace between the previous and next token if either
717    // - The caller requires it
718    // - The input had whitespace between them and we are not in
719    //   whitespace-minimization mode
720    // - The whitespace is necessary to keep the tokens apart and there is not
721    //   already a newline between them
722    if (RequireSpace || (!MinimizeWhitespace && Tok.hasLeadingSpace()) ||
723        ((EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) &&
724         AvoidConcat(PrevPrevTok, PrevTok, Tok)))
725      *OS << ' ';
726  }
727
728  PrevPrevTok = PrevTok;
729  PrevTok = Tok;
730}
731
732void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr,
733                                                     unsigned Len) {
734  unsigned NumNewlines = 0;
735  for (; Len; --Len, ++TokStr) {
736    if (*TokStr != '\n' &&
737        *TokStr != '\r')
738      continue;
739
740    ++NumNewlines;
741
742    // If we have \n\r or \r\n, skip both and count as one line.
743    if (Len != 1 &&
744        (TokStr[1] == '\n' || TokStr[1] == '\r') &&
745        TokStr[0] != TokStr[1]) {
746      ++TokStr;
747      --Len;
748    }
749  }
750
751  if (NumNewlines == 0) return;
752
753  CurLine += NumNewlines;
754}
755
756
757namespace {
758struct UnknownPragmaHandler : public PragmaHandler {
759  const char *Prefix;
760  PrintPPOutputPPCallbacks *Callbacks;
761
762  // Set to true if tokens should be expanded
763  bool ShouldExpandTokens;
764
765  UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks,
766                       bool RequireTokenExpansion)
767      : Prefix(prefix), Callbacks(callbacks),
768        ShouldExpandTokens(RequireTokenExpansion) {}
769  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
770                    Token &PragmaTok) override {
771    // Figure out what line we went to and insert the appropriate number of
772    // newline characters.
773    Callbacks->MoveToLine(PragmaTok.getLocation(), /*RequireStartOfLine=*/true);
774    Callbacks->OS->write(Prefix, strlen(Prefix));
775    Callbacks->setEmittedTokensOnThisLine();
776
777    if (ShouldExpandTokens) {
778      // The first token does not have expanded macros. Expand them, if
779      // required.
780      auto Toks = std::make_unique<Token[]>(1);
781      Toks[0] = PragmaTok;
782      PP.EnterTokenStream(std::move(Toks), /*NumToks=*/1,
783                          /*DisableMacroExpansion=*/false,
784                          /*IsReinject=*/false);
785      PP.Lex(PragmaTok);
786    }
787
788    // Read and print all of the pragma tokens.
789    bool IsFirst = true;
790    while (PragmaTok.isNot(tok::eod)) {
791      Callbacks->HandleWhitespaceBeforeTok(PragmaTok, /*RequireSpace=*/IsFirst,
792                                           /*RequireSameLine=*/true);
793      IsFirst = false;
794      std::string TokSpell = PP.getSpelling(PragmaTok);
795      Callbacks->OS->write(&TokSpell[0], TokSpell.size());
796      Callbacks->setEmittedTokensOnThisLine();
797
798      if (ShouldExpandTokens)
799        PP.Lex(PragmaTok);
800      else
801        PP.LexUnexpandedToken(PragmaTok);
802    }
803    Callbacks->setEmittedDirectiveOnThisLine();
804  }
805};
806} // end anonymous namespace
807
808
809static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
810                                    PrintPPOutputPPCallbacks *Callbacks) {
811  bool DropComments = PP.getLangOpts().TraditionalCPP &&
812                      !PP.getCommentRetentionState();
813
814  bool IsStartOfLine = false;
815  char Buffer[256];
816  while (true) {
817    // Two lines joined with line continuation ('\' as last character on the
818    // line) must be emitted as one line even though Tok.getLine() returns two
819    // different values. In this situation Tok.isAtStartOfLine() is false even
820    // though it may be the first token on the lexical line. When
821    // dropping/skipping a token that is at the start of a line, propagate the
822    // start-of-line-ness to the next token to not append it to the previous
823    // line.
824    IsStartOfLine = IsStartOfLine || Tok.isAtStartOfLine();
825
826    Callbacks->HandleWhitespaceBeforeTok(Tok, /*RequireSpace=*/false,
827                                         /*RequireSameLine=*/!IsStartOfLine);
828
829    if (DropComments && Tok.is(tok::comment)) {
830      // Skip comments. Normally the preprocessor does not generate
831      // tok::comment nodes at all when not keeping comments, but under
832      // -traditional-cpp the lexer keeps /all/ whitespace, including comments.
833      PP.Lex(Tok);
834      continue;
835    } else if (Tok.is(tok::annot_repl_input_end)) {
836      PP.Lex(Tok);
837      continue;
838    } else if (Tok.is(tok::eod)) {
839      // Don't print end of directive tokens, since they are typically newlines
840      // that mess up our line tracking. These come from unknown pre-processor
841      // directives or hash-prefixed comments in standalone assembly files.
842      PP.Lex(Tok);
843      // FIXME: The token on the next line after #include should have
844      // Tok.isAtStartOfLine() set.
845      IsStartOfLine = true;
846      continue;
847    } else if (Tok.is(tok::annot_module_include)) {
848      // PrintPPOutputPPCallbacks::InclusionDirective handles producing
849      // appropriate output here. Ignore this token entirely.
850      PP.Lex(Tok);
851      IsStartOfLine = true;
852      continue;
853    } else if (Tok.is(tok::annot_module_begin)) {
854      // FIXME: We retrieve this token after the FileChanged callback, and
855      // retrieve the module_end token before the FileChanged callback, so
856      // we render this within the file and render the module end outside the
857      // file, but this is backwards from the token locations: the module_begin
858      // token is at the include location (outside the file) and the module_end
859      // token is at the EOF location (within the file).
860      Callbacks->BeginModule(
861          reinterpret_cast<Module *>(Tok.getAnnotationValue()));
862      PP.Lex(Tok);
863      IsStartOfLine = true;
864      continue;
865    } else if (Tok.is(tok::annot_module_end)) {
866      Callbacks->EndModule(
867          reinterpret_cast<Module *>(Tok.getAnnotationValue()));
868      PP.Lex(Tok);
869      IsStartOfLine = true;
870      continue;
871    } else if (Tok.is(tok::annot_header_unit)) {
872      // This is a header-name that has been (effectively) converted into a
873      // module-name.
874      // FIXME: The module name could contain non-identifier module name
875      // components. We don't have a good way to round-trip those.
876      Module *M = reinterpret_cast<Module *>(Tok.getAnnotationValue());
877      std::string Name = M->getFullModuleName();
878      Callbacks->OS->write(Name.data(), Name.size());
879      Callbacks->HandleNewlinesInToken(Name.data(), Name.size());
880    } else if (Tok.isAnnotation()) {
881      // Ignore annotation tokens created by pragmas - the pragmas themselves
882      // will be reproduced in the preprocessed output.
883      PP.Lex(Tok);
884      continue;
885    } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
886      *Callbacks->OS << II->getName();
887    } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
888               Tok.getLiteralData()) {
889      Callbacks->OS->write(Tok.getLiteralData(), Tok.getLength());
890    } else if (Tok.getLength() < std::size(Buffer)) {
891      const char *TokPtr = Buffer;
892      unsigned Len = PP.getSpelling(Tok, TokPtr);
893      Callbacks->OS->write(TokPtr, Len);
894
895      // Tokens that can contain embedded newlines need to adjust our current
896      // line number.
897      // FIXME: The token may end with a newline in which case
898      // setEmittedDirectiveOnThisLine/setEmittedTokensOnThisLine afterwards is
899      // wrong.
900      if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
901        Callbacks->HandleNewlinesInToken(TokPtr, Len);
902      if (Tok.is(tok::comment) && Len >= 2 && TokPtr[0] == '/' &&
903          TokPtr[1] == '/') {
904        // It's a line comment;
905        // Ensure that we don't concatenate anything behind it.
906        Callbacks->setEmittedDirectiveOnThisLine();
907      }
908    } else {
909      std::string S = PP.getSpelling(Tok);
910      Callbacks->OS->write(S.data(), S.size());
911
912      // Tokens that can contain embedded newlines need to adjust our current
913      // line number.
914      if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
915        Callbacks->HandleNewlinesInToken(S.data(), S.size());
916      if (Tok.is(tok::comment) && S.size() >= 2 && S[0] == '/' && S[1] == '/') {
917        // It's a line comment;
918        // Ensure that we don't concatenate anything behind it.
919        Callbacks->setEmittedDirectiveOnThisLine();
920      }
921    }
922    Callbacks->setEmittedTokensOnThisLine();
923    IsStartOfLine = false;
924
925    if (Tok.is(tok::eof)) break;
926
927    PP.Lex(Tok);
928  }
929}
930
931typedef std::pair<const IdentifierInfo *, MacroInfo *> id_macro_pair;
932static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS) {
933  return LHS->first->getName().compare(RHS->first->getName());
934}
935
936static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS) {
937  // Ignore unknown pragmas.
938  PP.IgnorePragmas();
939
940  // -dM mode just scans and ignores all tokens in the files, then dumps out
941  // the macro table at the end.
942  PP.EnterMainSourceFile();
943
944  Token Tok;
945  do PP.Lex(Tok);
946  while (Tok.isNot(tok::eof));
947
948  SmallVector<id_macro_pair, 128> MacrosByID;
949  for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
950       I != E; ++I) {
951    auto *MD = I->second.getLatest();
952    if (MD && MD->isDefined())
953      MacrosByID.push_back(id_macro_pair(I->first, MD->getMacroInfo()));
954  }
955  llvm::array_pod_sort(MacrosByID.begin(), MacrosByID.end(), MacroIDCompare);
956
957  for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) {
958    MacroInfo &MI = *MacrosByID[i].second;
959    // Ignore computed macros like __LINE__ and friends.
960    if (MI.isBuiltinMacro()) continue;
961
962    PrintMacroDefinition(*MacrosByID[i].first, MI, PP, OS);
963    *OS << '\n';
964  }
965}
966
967/// DoPrintPreprocessedInput - This implements -E mode.
968///
969void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
970                                     const PreprocessorOutputOptions &Opts) {
971  // Show macros with no output is handled specially.
972  if (!Opts.ShowCPP) {
973    assert(Opts.ShowMacros && "Not yet implemented!");
974    DoPrintMacros(PP, OS);
975    return;
976  }
977
978  // Inform the preprocessor whether we want it to retain comments or not, due
979  // to -C or -CC.
980  PP.SetCommentRetentionState(Opts.ShowComments, Opts.ShowMacroComments);
981
982  PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
983      PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
984      Opts.ShowIncludeDirectives, Opts.UseLineDirectives,
985      Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes);
986
987  // Expand macros in pragmas with -fms-extensions.  The assumption is that
988  // the majority of pragmas in such a file will be Microsoft pragmas.
989  // Remember the handlers we will add so that we can remove them later.
990  std::unique_ptr<UnknownPragmaHandler> MicrosoftExtHandler(
991      new UnknownPragmaHandler(
992          "#pragma", Callbacks,
993          /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
994
995  std::unique_ptr<UnknownPragmaHandler> GCCHandler(new UnknownPragmaHandler(
996      "#pragma GCC", Callbacks,
997      /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
998
999  std::unique_ptr<UnknownPragmaHandler> ClangHandler(new UnknownPragmaHandler(
1000      "#pragma clang", Callbacks,
1001      /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
1002
1003  PP.AddPragmaHandler(MicrosoftExtHandler.get());
1004  PP.AddPragmaHandler("GCC", GCCHandler.get());
1005  PP.AddPragmaHandler("clang", ClangHandler.get());
1006
1007  // The tokens after pragma omp need to be expanded.
1008  //
1009  //  OpenMP [2.1, Directive format]
1010  //  Preprocessing tokens following the #pragma omp are subject to macro
1011  //  replacement.
1012  std::unique_ptr<UnknownPragmaHandler> OpenMPHandler(
1013      new UnknownPragmaHandler("#pragma omp", Callbacks,
1014                               /*RequireTokenExpansion=*/true));
1015  PP.AddPragmaHandler("omp", OpenMPHandler.get());
1016
1017  PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Callbacks));
1018
1019  // After we have configured the preprocessor, enter the main file.
1020  PP.EnterMainSourceFile();
1021  if (Opts.DirectivesOnly)
1022    PP.SetMacroExpansionOnlyInDirectives();
1023
1024  // Consume all of the tokens that come from the predefines buffer.  Those
1025  // should not be emitted into the output and are guaranteed to be at the
1026  // start.
1027  const SourceManager &SourceMgr = PP.getSourceManager();
1028  Token Tok;
1029  do {
1030    PP.Lex(Tok);
1031    if (Tok.is(tok::eof) || !Tok.getLocation().isFileID())
1032      break;
1033
1034    PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
1035    if (PLoc.isInvalid())
1036      break;
1037
1038    if (strcmp(PLoc.getFilename(), "<built-in>"))
1039      break;
1040  } while (true);
1041
1042  // Read all the preprocessed tokens, printing them out to the stream.
1043  PrintPreprocessedTokens(PP, Tok, Callbacks);
1044  *OS << '\n';
1045
1046  // Remove the handlers we just added to leave the preprocessor in a sane state
1047  // so that it can be reused (for example by a clang::Parser instance).
1048  PP.RemovePragmaHandler(MicrosoftExtHandler.get());
1049  PP.RemovePragmaHandler("GCC", GCCHandler.get());
1050  PP.RemovePragmaHandler("clang", ClangHandler.get());
1051  PP.RemovePragmaHandler("omp", OpenMPHandler.get());
1052}
1053