PrintPreprocessedOutput.cpp revision 327952
1//===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This code simply runs the preprocessor on the input file and prints out the
11// result.  This is the traditional behavior of the -E option.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Frontend/Utils.h"
16#include "clang/Basic/CharInfo.h"
17#include "clang/Basic/Diagnostic.h"
18#include "clang/Basic/SourceManager.h"
19#include "clang/Frontend/PreprocessorOutputOptions.h"
20#include "clang/Lex/MacroInfo.h"
21#include "clang/Lex/PPCallbacks.h"
22#include "clang/Lex/Pragma.h"
23#include "clang/Lex/Preprocessor.h"
24#include "clang/Lex/TokenConcatenation.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallString.h"
27#include "llvm/ADT/StringRef.h"
28#include "llvm/Support/ErrorHandling.h"
29#include "llvm/Support/raw_ostream.h"
30#include <cstdio>
31using namespace clang;
32
33/// PrintMacroDefinition - Print a macro definition in a form that will be
34/// properly accepted back as a definition.
35static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI,
36                                 Preprocessor &PP, raw_ostream &OS) {
37  OS << "#define " << II.getName();
38
39  if (MI.isFunctionLike()) {
40    OS << '(';
41    if (!MI.param_empty()) {
42      MacroInfo::param_iterator AI = MI.param_begin(), E = MI.param_end();
43      for (; AI+1 != E; ++AI) {
44        OS << (*AI)->getName();
45        OS << ',';
46      }
47
48      // Last argument.
49      if ((*AI)->getName() == "__VA_ARGS__")
50        OS << "...";
51      else
52        OS << (*AI)->getName();
53    }
54
55    if (MI.isGNUVarargs())
56      OS << "...";  // #define foo(x...)
57
58    OS << ')';
59  }
60
61  // GCC always emits a space, even if the macro body is empty.  However, do not
62  // want to emit two spaces if the first token has a leading space.
63  if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace())
64    OS << ' ';
65
66  SmallString<128> SpellingBuffer;
67  for (const auto &T : MI.tokens()) {
68    if (T.hasLeadingSpace())
69      OS << ' ';
70
71    OS << PP.getSpelling(T, SpellingBuffer);
72  }
73}
74
75//===----------------------------------------------------------------------===//
76// Preprocessed token printer
77//===----------------------------------------------------------------------===//
78
79namespace {
80class PrintPPOutputPPCallbacks : public PPCallbacks {
81  Preprocessor &PP;
82  SourceManager &SM;
83  TokenConcatenation ConcatInfo;
84public:
85  raw_ostream &OS;
86private:
87  unsigned CurLine;
88
89  bool EmittedTokensOnThisLine;
90  bool EmittedDirectiveOnThisLine;
91  SrcMgr::CharacteristicKind FileType;
92  SmallString<512> CurFilename;
93  bool Initialized;
94  bool DisableLineMarkers;
95  bool DumpDefines;
96  bool DumpIncludeDirectives;
97  bool UseLineDirectives;
98  bool IsFirstFileEntered;
99public:
100  PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream &os, bool lineMarkers,
101                           bool defines, bool DumpIncludeDirectives,
102                           bool UseLineDirectives)
103      : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
104        DisableLineMarkers(lineMarkers), DumpDefines(defines),
105        DumpIncludeDirectives(DumpIncludeDirectives),
106        UseLineDirectives(UseLineDirectives) {
107    CurLine = 0;
108    CurFilename += "<uninit>";
109    EmittedTokensOnThisLine = false;
110    EmittedDirectiveOnThisLine = false;
111    FileType = SrcMgr::C_User;
112    Initialized = false;
113    IsFirstFileEntered = false;
114  }
115
116  void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
117  bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
118
119  void setEmittedDirectiveOnThisLine() { EmittedDirectiveOnThisLine = true; }
120  bool hasEmittedDirectiveOnThisLine() const {
121    return EmittedDirectiveOnThisLine;
122  }
123
124  bool startNewLineIfNeeded(bool ShouldUpdateCurrentLine = true);
125
126  void FileChanged(SourceLocation Loc, FileChangeReason Reason,
127                   SrcMgr::CharacteristicKind FileType,
128                   FileID PrevFID) override;
129  void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
130                          StringRef FileName, bool IsAngled,
131                          CharSourceRange FilenameRange, const FileEntry *File,
132                          StringRef SearchPath, StringRef RelativePath,
133                          const Module *Imported) override;
134  void Ident(SourceLocation Loc, StringRef str) override;
135  void PragmaMessage(SourceLocation Loc, StringRef Namespace,
136                     PragmaMessageKind Kind, StringRef Str) override;
137  void PragmaDebug(SourceLocation Loc, StringRef DebugType) override;
138  void PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) override;
139  void PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) override;
140  void PragmaDiagnostic(SourceLocation Loc, StringRef Namespace,
141                        diag::Severity Map, StringRef Str) override;
142  void PragmaWarning(SourceLocation Loc, StringRef WarningSpec,
143                     ArrayRef<int> Ids) override;
144  void PragmaWarningPush(SourceLocation Loc, int Level) override;
145  void PragmaWarningPop(SourceLocation Loc) override;
146  void PragmaAssumeNonNullBegin(SourceLocation Loc) override;
147  void PragmaAssumeNonNullEnd(SourceLocation Loc) override;
148
149  bool HandleFirstTokOnLine(Token &Tok);
150
151  /// Move to the line of the provided source location. This will
152  /// return true if the output stream required adjustment or if
153  /// the requested location is on the first line.
154  bool MoveToLine(SourceLocation Loc) {
155    PresumedLoc PLoc = SM.getPresumedLoc(Loc);
156    if (PLoc.isInvalid())
157      return false;
158    return MoveToLine(PLoc.getLine()) || (PLoc.getLine() == 1);
159  }
160  bool MoveToLine(unsigned LineNo);
161
162  bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok,
163                   const Token &Tok) {
164    return ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok);
165  }
166  void WriteLineInfo(unsigned LineNo, const char *Extra=nullptr,
167                     unsigned ExtraLen=0);
168  bool LineMarkersAreDisabled() const { return DisableLineMarkers; }
169  void HandleNewlinesInToken(const char *TokStr, unsigned Len);
170
171  /// MacroDefined - This hook is called whenever a macro definition is seen.
172  void MacroDefined(const Token &MacroNameTok,
173                    const MacroDirective *MD) override;
174
175  /// MacroUndefined - This hook is called whenever a macro #undef is seen.
176  void MacroUndefined(const Token &MacroNameTok,
177                      const MacroDefinition &MD,
178                      const MacroDirective *Undef) override;
179
180  void BeginModule(const Module *M);
181  void EndModule(const Module *M);
182};
183}  // end anonymous namespace
184
185void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
186                                             const char *Extra,
187                                             unsigned ExtraLen) {
188  startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
189
190  // Emit #line directives or GNU line markers depending on what mode we're in.
191  if (UseLineDirectives) {
192    OS << "#line" << ' ' << LineNo << ' ' << '"';
193    OS.write_escaped(CurFilename);
194    OS << '"';
195  } else {
196    OS << '#' << ' ' << LineNo << ' ' << '"';
197    OS.write_escaped(CurFilename);
198    OS << '"';
199
200    if (ExtraLen)
201      OS.write(Extra, ExtraLen);
202
203    if (FileType == SrcMgr::C_System)
204      OS.write(" 3", 2);
205    else if (FileType == SrcMgr::C_ExternCSystem)
206      OS.write(" 3 4", 4);
207  }
208  OS << '\n';
209}
210
211/// MoveToLine - Move the output to the source line specified by the location
212/// object.  We can do this by emitting some number of \n's, or be emitting a
213/// #line directive.  This returns false if already at the specified line, true
214/// if some newlines were emitted.
215bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo) {
216  // If this line is "close enough" to the original line, just print newlines,
217  // otherwise print a #line directive.
218  if (LineNo-CurLine <= 8) {
219    if (LineNo-CurLine == 1)
220      OS << '\n';
221    else if (LineNo == CurLine)
222      return false;    // Spelling line moved, but expansion line didn't.
223    else {
224      const char *NewLines = "\n\n\n\n\n\n\n\n";
225      OS.write(NewLines, LineNo-CurLine);
226    }
227  } else if (!DisableLineMarkers) {
228    // Emit a #line or line marker.
229    WriteLineInfo(LineNo, nullptr, 0);
230  } else {
231    // Okay, we're in -P mode, which turns off line markers.  However, we still
232    // need to emit a newline between tokens on different lines.
233    startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
234  }
235
236  CurLine = LineNo;
237  return true;
238}
239
240bool
241PrintPPOutputPPCallbacks::startNewLineIfNeeded(bool ShouldUpdateCurrentLine) {
242  if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) {
243    OS << '\n';
244    EmittedTokensOnThisLine = false;
245    EmittedDirectiveOnThisLine = false;
246    if (ShouldUpdateCurrentLine)
247      ++CurLine;
248    return true;
249  }
250
251  return false;
252}
253
254/// FileChanged - Whenever the preprocessor enters or exits a #include file
255/// it invokes this handler.  Update our conception of the current source
256/// position.
257void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
258                                           FileChangeReason Reason,
259                                       SrcMgr::CharacteristicKind NewFileType,
260                                       FileID PrevFID) {
261  // Unless we are exiting a #include, make sure to skip ahead to the line the
262  // #include directive was at.
263  SourceManager &SourceMgr = SM;
264
265  PresumedLoc UserLoc = SourceMgr.getPresumedLoc(Loc);
266  if (UserLoc.isInvalid())
267    return;
268
269  unsigned NewLine = UserLoc.getLine();
270
271  if (Reason == PPCallbacks::EnterFile) {
272    SourceLocation IncludeLoc = UserLoc.getIncludeLoc();
273    if (IncludeLoc.isValid())
274      MoveToLine(IncludeLoc);
275  } else if (Reason == PPCallbacks::SystemHeaderPragma) {
276    // GCC emits the # directive for this directive on the line AFTER the
277    // directive and emits a bunch of spaces that aren't needed. This is because
278    // otherwise we will emit a line marker for THIS line, which requires an
279    // extra blank line after the directive to avoid making all following lines
280    // off by one. We can do better by simply incrementing NewLine here.
281    NewLine += 1;
282  }
283
284  CurLine = NewLine;
285
286  CurFilename.clear();
287  CurFilename += UserLoc.getFilename();
288  FileType = NewFileType;
289
290  if (DisableLineMarkers) {
291    startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
292    return;
293  }
294
295  if (!Initialized) {
296    WriteLineInfo(CurLine);
297    Initialized = true;
298  }
299
300  // Do not emit an enter marker for the main file (which we expect is the first
301  // entered file). This matches gcc, and improves compatibility with some tools
302  // which track the # line markers as a way to determine when the preprocessed
303  // output is in the context of the main file.
304  if (Reason == PPCallbacks::EnterFile && !IsFirstFileEntered) {
305    IsFirstFileEntered = true;
306    return;
307  }
308
309  switch (Reason) {
310  case PPCallbacks::EnterFile:
311    WriteLineInfo(CurLine, " 1", 2);
312    break;
313  case PPCallbacks::ExitFile:
314    WriteLineInfo(CurLine, " 2", 2);
315    break;
316  case PPCallbacks::SystemHeaderPragma:
317  case PPCallbacks::RenameFile:
318    WriteLineInfo(CurLine);
319    break;
320  }
321}
322
323void PrintPPOutputPPCallbacks::InclusionDirective(SourceLocation HashLoc,
324                                                  const Token &IncludeTok,
325                                                  StringRef FileName,
326                                                  bool IsAngled,
327                                                  CharSourceRange FilenameRange,
328                                                  const FileEntry *File,
329                                                  StringRef SearchPath,
330                                                  StringRef RelativePath,
331                                                  const Module *Imported) {
332  // In -dI mode, dump #include directives prior to dumping their content or
333  // interpretation.
334  if (DumpIncludeDirectives) {
335    startNewLineIfNeeded();
336    MoveToLine(HashLoc);
337    const std::string TokenText = PP.getSpelling(IncludeTok);
338    assert(!TokenText.empty());
339    OS << "#" << TokenText << " "
340       << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
341       << " /* clang -E -dI */";
342    setEmittedDirectiveOnThisLine();
343    startNewLineIfNeeded();
344  }
345
346  // When preprocessing, turn implicit imports into module import pragmas.
347  if (Imported) {
348    switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
349    case tok::pp_include:
350    case tok::pp_import:
351    case tok::pp_include_next:
352      startNewLineIfNeeded();
353      MoveToLine(HashLoc);
354      OS << "#pragma clang module import " << Imported->getFullModuleName(true)
355         << " /* clang -E: implicit import for "
356         << "#" << PP.getSpelling(IncludeTok) << " "
357         << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
358         << " */";
359      // Since we want a newline after the pragma, but not a #<line>, start a
360      // new line immediately.
361      EmittedTokensOnThisLine = true;
362      startNewLineIfNeeded();
363      break;
364
365    case tok::pp___include_macros:
366      // #__include_macros has no effect on a user of a preprocessed source
367      // file; the only effect is on preprocessing.
368      //
369      // FIXME: That's not *quite* true: it causes the module in question to
370      // be loaded, which can affect downstream diagnostics.
371      break;
372
373    default:
374      llvm_unreachable("unknown include directive kind");
375      break;
376    }
377  }
378}
379
380/// Handle entering the scope of a module during a module compilation.
381void PrintPPOutputPPCallbacks::BeginModule(const Module *M) {
382  startNewLineIfNeeded();
383  OS << "#pragma clang module begin " << M->getFullModuleName(true);
384  setEmittedDirectiveOnThisLine();
385}
386
387/// Handle leaving the scope of a module during a module compilation.
388void PrintPPOutputPPCallbacks::EndModule(const Module *M) {
389  startNewLineIfNeeded();
390  OS << "#pragma clang module end /*" << M->getFullModuleName(true) << "*/";
391  setEmittedDirectiveOnThisLine();
392}
393
394/// Ident - Handle #ident directives when read by the preprocessor.
395///
396void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) {
397  MoveToLine(Loc);
398
399  OS.write("#ident ", strlen("#ident "));
400  OS.write(S.begin(), S.size());
401  EmittedTokensOnThisLine = true;
402}
403
404/// MacroDefined - This hook is called whenever a macro definition is seen.
405void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok,
406                                            const MacroDirective *MD) {
407  const MacroInfo *MI = MD->getMacroInfo();
408  // Only print out macro definitions in -dD mode.
409  if (!DumpDefines ||
410      // Ignore __FILE__ etc.
411      MI->isBuiltinMacro()) return;
412
413  MoveToLine(MI->getDefinitionLoc());
414  PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS);
415  setEmittedDirectiveOnThisLine();
416}
417
418void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok,
419                                              const MacroDefinition &MD,
420                                              const MacroDirective *Undef) {
421  // Only print out macro definitions in -dD mode.
422  if (!DumpDefines) return;
423
424  MoveToLine(MacroNameTok.getLocation());
425  OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName();
426  setEmittedDirectiveOnThisLine();
427}
428
429static void outputPrintable(raw_ostream &OS, StringRef Str) {
430  for (unsigned char Char : Str) {
431    if (isPrintable(Char) && Char != '\\' && Char != '"')
432      OS << (char)Char;
433    else // Output anything hard as an octal escape.
434      OS << '\\'
435         << (char)('0' + ((Char >> 6) & 7))
436         << (char)('0' + ((Char >> 3) & 7))
437         << (char)('0' + ((Char >> 0) & 7));
438  }
439}
440
441void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc,
442                                             StringRef Namespace,
443                                             PragmaMessageKind Kind,
444                                             StringRef Str) {
445  startNewLineIfNeeded();
446  MoveToLine(Loc);
447  OS << "#pragma ";
448  if (!Namespace.empty())
449    OS << Namespace << ' ';
450  switch (Kind) {
451    case PMK_Message:
452      OS << "message(\"";
453      break;
454    case PMK_Warning:
455      OS << "warning \"";
456      break;
457    case PMK_Error:
458      OS << "error \"";
459      break;
460  }
461
462  outputPrintable(OS, Str);
463  OS << '"';
464  if (Kind == PMK_Message)
465    OS << ')';
466  setEmittedDirectiveOnThisLine();
467}
468
469void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc,
470                                           StringRef DebugType) {
471  startNewLineIfNeeded();
472  MoveToLine(Loc);
473
474  OS << "#pragma clang __debug ";
475  OS << DebugType;
476
477  setEmittedDirectiveOnThisLine();
478}
479
480void PrintPPOutputPPCallbacks::
481PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) {
482  startNewLineIfNeeded();
483  MoveToLine(Loc);
484  OS << "#pragma " << Namespace << " diagnostic push";
485  setEmittedDirectiveOnThisLine();
486}
487
488void PrintPPOutputPPCallbacks::
489PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) {
490  startNewLineIfNeeded();
491  MoveToLine(Loc);
492  OS << "#pragma " << Namespace << " diagnostic pop";
493  setEmittedDirectiveOnThisLine();
494}
495
496void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc,
497                                                StringRef Namespace,
498                                                diag::Severity Map,
499                                                StringRef Str) {
500  startNewLineIfNeeded();
501  MoveToLine(Loc);
502  OS << "#pragma " << Namespace << " diagnostic ";
503  switch (Map) {
504  case diag::Severity::Remark:
505    OS << "remark";
506    break;
507  case diag::Severity::Warning:
508    OS << "warning";
509    break;
510  case diag::Severity::Error:
511    OS << "error";
512    break;
513  case diag::Severity::Ignored:
514    OS << "ignored";
515    break;
516  case diag::Severity::Fatal:
517    OS << "fatal";
518    break;
519  }
520  OS << " \"" << Str << '"';
521  setEmittedDirectiveOnThisLine();
522}
523
524void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc,
525                                             StringRef WarningSpec,
526                                             ArrayRef<int> Ids) {
527  startNewLineIfNeeded();
528  MoveToLine(Loc);
529  OS << "#pragma warning(" << WarningSpec << ':';
530  for (ArrayRef<int>::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I)
531    OS << ' ' << *I;
532  OS << ')';
533  setEmittedDirectiveOnThisLine();
534}
535
536void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc,
537                                                 int Level) {
538  startNewLineIfNeeded();
539  MoveToLine(Loc);
540  OS << "#pragma warning(push";
541  if (Level >= 0)
542    OS << ", " << Level;
543  OS << ')';
544  setEmittedDirectiveOnThisLine();
545}
546
547void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) {
548  startNewLineIfNeeded();
549  MoveToLine(Loc);
550  OS << "#pragma warning(pop)";
551  setEmittedDirectiveOnThisLine();
552}
553
554void PrintPPOutputPPCallbacks::
555PragmaAssumeNonNullBegin(SourceLocation Loc) {
556  startNewLineIfNeeded();
557  MoveToLine(Loc);
558  OS << "#pragma clang assume_nonnull begin";
559  setEmittedDirectiveOnThisLine();
560}
561
562void PrintPPOutputPPCallbacks::
563PragmaAssumeNonNullEnd(SourceLocation Loc) {
564  startNewLineIfNeeded();
565  MoveToLine(Loc);
566  OS << "#pragma clang assume_nonnull end";
567  setEmittedDirectiveOnThisLine();
568}
569
570/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
571/// is called for the first token on each new line.  If this really is the start
572/// of a new logical line, handle it and return true, otherwise return false.
573/// This may not be the start of a logical line because the "start of line"
574/// marker is set for spelling lines, not expansion ones.
575bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
576  // Figure out what line we went to and insert the appropriate number of
577  // newline characters.
578  if (!MoveToLine(Tok.getLocation()))
579    return false;
580
581  // Print out space characters so that the first token on a line is
582  // indented for easy reading.
583  unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation());
584
585  // The first token on a line can have a column number of 1, yet still expect
586  // leading white space, if a macro expansion in column 1 starts with an empty
587  // macro argument, or an empty nested macro expansion. In this case, move the
588  // token to column 2.
589  if (ColNo == 1 && Tok.hasLeadingSpace())
590    ColNo = 2;
591
592  // This hack prevents stuff like:
593  // #define HASH #
594  // HASH define foo bar
595  // From having the # character end up at column 1, which makes it so it
596  // is not handled as a #define next time through the preprocessor if in
597  // -fpreprocessed mode.
598  if (ColNo <= 1 && Tok.is(tok::hash))
599    OS << ' ';
600
601  // Otherwise, indent the appropriate number of spaces.
602  for (; ColNo > 1; --ColNo)
603    OS << ' ';
604
605  return true;
606}
607
608void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr,
609                                                     unsigned Len) {
610  unsigned NumNewlines = 0;
611  for (; Len; --Len, ++TokStr) {
612    if (*TokStr != '\n' &&
613        *TokStr != '\r')
614      continue;
615
616    ++NumNewlines;
617
618    // If we have \n\r or \r\n, skip both and count as one line.
619    if (Len != 1 &&
620        (TokStr[1] == '\n' || TokStr[1] == '\r') &&
621        TokStr[0] != TokStr[1]) {
622      ++TokStr;
623      --Len;
624    }
625  }
626
627  if (NumNewlines == 0) return;
628
629  CurLine += NumNewlines;
630}
631
632
633namespace {
634struct UnknownPragmaHandler : public PragmaHandler {
635  const char *Prefix;
636  PrintPPOutputPPCallbacks *Callbacks;
637
638  // Set to true if tokens should be expanded
639  bool ShouldExpandTokens;
640
641  UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks,
642                       bool RequireTokenExpansion)
643      : Prefix(prefix), Callbacks(callbacks),
644        ShouldExpandTokens(RequireTokenExpansion) {}
645  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
646                    Token &PragmaTok) override {
647    // Figure out what line we went to and insert the appropriate number of
648    // newline characters.
649    Callbacks->startNewLineIfNeeded();
650    Callbacks->MoveToLine(PragmaTok.getLocation());
651    Callbacks->OS.write(Prefix, strlen(Prefix));
652
653    if (ShouldExpandTokens) {
654      // The first token does not have expanded macros. Expand them, if
655      // required.
656      auto Toks = llvm::make_unique<Token[]>(1);
657      Toks[0] = PragmaTok;
658      PP.EnterTokenStream(std::move(Toks), /*NumToks=*/1,
659                          /*DisableMacroExpansion=*/false);
660      PP.Lex(PragmaTok);
661    }
662    Token PrevToken;
663    Token PrevPrevToken;
664    PrevToken.startToken();
665    PrevPrevToken.startToken();
666
667    // Read and print all of the pragma tokens.
668    while (PragmaTok.isNot(tok::eod)) {
669      if (PragmaTok.hasLeadingSpace() ||
670          Callbacks->AvoidConcat(PrevPrevToken, PrevToken, PragmaTok))
671        Callbacks->OS << ' ';
672      std::string TokSpell = PP.getSpelling(PragmaTok);
673      Callbacks->OS.write(&TokSpell[0], TokSpell.size());
674
675      PrevPrevToken = PrevToken;
676      PrevToken = PragmaTok;
677
678      if (ShouldExpandTokens)
679        PP.Lex(PragmaTok);
680      else
681        PP.LexUnexpandedToken(PragmaTok);
682    }
683    Callbacks->setEmittedDirectiveOnThisLine();
684  }
685};
686} // end anonymous namespace
687
688
689static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
690                                    PrintPPOutputPPCallbacks *Callbacks,
691                                    raw_ostream &OS) {
692  bool DropComments = PP.getLangOpts().TraditionalCPP &&
693                      !PP.getCommentRetentionState();
694
695  char Buffer[256];
696  Token PrevPrevTok, PrevTok;
697  PrevPrevTok.startToken();
698  PrevTok.startToken();
699  while (1) {
700    if (Callbacks->hasEmittedDirectiveOnThisLine()) {
701      Callbacks->startNewLineIfNeeded();
702      Callbacks->MoveToLine(Tok.getLocation());
703    }
704
705    // If this token is at the start of a line, emit newlines if needed.
706    if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
707      // done.
708    } else if (Tok.hasLeadingSpace() ||
709               // If we haven't emitted a token on this line yet, PrevTok isn't
710               // useful to look at and no concatenation could happen anyway.
711               (Callbacks->hasEmittedTokensOnThisLine() &&
712                // Don't print "-" next to "-", it would form "--".
713                Callbacks->AvoidConcat(PrevPrevTok, PrevTok, Tok))) {
714      OS << ' ';
715    }
716
717    if (DropComments && Tok.is(tok::comment)) {
718      // Skip comments. Normally the preprocessor does not generate
719      // tok::comment nodes at all when not keeping comments, but under
720      // -traditional-cpp the lexer keeps /all/ whitespace, including comments.
721      SourceLocation StartLoc = Tok.getLocation();
722      Callbacks->MoveToLine(StartLoc.getLocWithOffset(Tok.getLength()));
723    } else if (Tok.is(tok::eod)) {
724      // Don't print end of directive tokens, since they are typically newlines
725      // that mess up our line tracking. These come from unknown pre-processor
726      // directives or hash-prefixed comments in standalone assembly files.
727      PP.Lex(Tok);
728      continue;
729    } else if (Tok.is(tok::annot_module_include)) {
730      // PrintPPOutputPPCallbacks::InclusionDirective handles producing
731      // appropriate output here. Ignore this token entirely.
732      PP.Lex(Tok);
733      continue;
734    } else if (Tok.is(tok::annot_module_begin)) {
735      // FIXME: We retrieve this token after the FileChanged callback, and
736      // retrieve the module_end token before the FileChanged callback, so
737      // we render this within the file and render the module end outside the
738      // file, but this is backwards from the token locations: the module_begin
739      // token is at the include location (outside the file) and the module_end
740      // token is at the EOF location (within the file).
741      Callbacks->BeginModule(
742          reinterpret_cast<Module *>(Tok.getAnnotationValue()));
743      PP.Lex(Tok);
744      continue;
745    } else if (Tok.is(tok::annot_module_end)) {
746      Callbacks->EndModule(
747          reinterpret_cast<Module *>(Tok.getAnnotationValue()));
748      PP.Lex(Tok);
749      continue;
750    } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
751      OS << II->getName();
752    } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
753               Tok.getLiteralData()) {
754      OS.write(Tok.getLiteralData(), Tok.getLength());
755    } else if (Tok.getLength() < 256) {
756      const char *TokPtr = Buffer;
757      unsigned Len = PP.getSpelling(Tok, TokPtr);
758      OS.write(TokPtr, Len);
759
760      // Tokens that can contain embedded newlines need to adjust our current
761      // line number.
762      if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
763        Callbacks->HandleNewlinesInToken(TokPtr, Len);
764    } else {
765      std::string S = PP.getSpelling(Tok);
766      OS.write(&S[0], S.size());
767
768      // Tokens that can contain embedded newlines need to adjust our current
769      // line number.
770      if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
771        Callbacks->HandleNewlinesInToken(&S[0], S.size());
772    }
773    Callbacks->setEmittedTokensOnThisLine();
774
775    if (Tok.is(tok::eof)) break;
776
777    PrevPrevTok = PrevTok;
778    PrevTok = Tok;
779    PP.Lex(Tok);
780  }
781}
782
783typedef std::pair<const IdentifierInfo *, MacroInfo *> id_macro_pair;
784static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS) {
785  return LHS->first->getName().compare(RHS->first->getName());
786}
787
788static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS) {
789  // Ignore unknown pragmas.
790  PP.IgnorePragmas();
791
792  // -dM mode just scans and ignores all tokens in the files, then dumps out
793  // the macro table at the end.
794  PP.EnterMainSourceFile();
795
796  Token Tok;
797  do PP.Lex(Tok);
798  while (Tok.isNot(tok::eof));
799
800  SmallVector<id_macro_pair, 128> MacrosByID;
801  for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
802       I != E; ++I) {
803    auto *MD = I->second.getLatest();
804    if (MD && MD->isDefined())
805      MacrosByID.push_back(id_macro_pair(I->first, MD->getMacroInfo()));
806  }
807  llvm::array_pod_sort(MacrosByID.begin(), MacrosByID.end(), MacroIDCompare);
808
809  for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) {
810    MacroInfo &MI = *MacrosByID[i].second;
811    // Ignore computed macros like __LINE__ and friends.
812    if (MI.isBuiltinMacro()) continue;
813
814    PrintMacroDefinition(*MacrosByID[i].first, MI, PP, *OS);
815    *OS << '\n';
816  }
817}
818
819/// DoPrintPreprocessedInput - This implements -E mode.
820///
821void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
822                                     const PreprocessorOutputOptions &Opts) {
823  // Show macros with no output is handled specially.
824  if (!Opts.ShowCPP) {
825    assert(Opts.ShowMacros && "Not yet implemented!");
826    DoPrintMacros(PP, OS);
827    return;
828  }
829
830  // Inform the preprocessor whether we want it to retain comments or not, due
831  // to -C or -CC.
832  PP.SetCommentRetentionState(Opts.ShowComments, Opts.ShowMacroComments);
833
834  PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
835      PP, *OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
836      Opts.ShowIncludeDirectives, Opts.UseLineDirectives);
837
838  // Expand macros in pragmas with -fms-extensions.  The assumption is that
839  // the majority of pragmas in such a file will be Microsoft pragmas.
840  // Remember the handlers we will add so that we can remove them later.
841  std::unique_ptr<UnknownPragmaHandler> MicrosoftExtHandler(
842      new UnknownPragmaHandler(
843          "#pragma", Callbacks,
844          /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
845
846  std::unique_ptr<UnknownPragmaHandler> GCCHandler(new UnknownPragmaHandler(
847      "#pragma GCC", Callbacks,
848      /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
849
850  std::unique_ptr<UnknownPragmaHandler> ClangHandler(new UnknownPragmaHandler(
851      "#pragma clang", Callbacks,
852      /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
853
854  PP.AddPragmaHandler(MicrosoftExtHandler.get());
855  PP.AddPragmaHandler("GCC", GCCHandler.get());
856  PP.AddPragmaHandler("clang", ClangHandler.get());
857
858  // The tokens after pragma omp need to be expanded.
859  //
860  //  OpenMP [2.1, Directive format]
861  //  Preprocessing tokens following the #pragma omp are subject to macro
862  //  replacement.
863  std::unique_ptr<UnknownPragmaHandler> OpenMPHandler(
864      new UnknownPragmaHandler("#pragma omp", Callbacks,
865                               /*RequireTokenExpansion=*/true));
866  PP.AddPragmaHandler("omp", OpenMPHandler.get());
867
868  PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Callbacks));
869
870  // After we have configured the preprocessor, enter the main file.
871  PP.EnterMainSourceFile();
872
873  // Consume all of the tokens that come from the predefines buffer.  Those
874  // should not be emitted into the output and are guaranteed to be at the
875  // start.
876  const SourceManager &SourceMgr = PP.getSourceManager();
877  Token Tok;
878  do {
879    PP.Lex(Tok);
880    if (Tok.is(tok::eof) || !Tok.getLocation().isFileID())
881      break;
882
883    PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
884    if (PLoc.isInvalid())
885      break;
886
887    if (strcmp(PLoc.getFilename(), "<built-in>"))
888      break;
889  } while (true);
890
891  // Read all the preprocessed tokens, printing them out to the stream.
892  PrintPreprocessedTokens(PP, Tok, Callbacks, *OS);
893  *OS << '\n';
894
895  // Remove the handlers we just added to leave the preprocessor in a sane state
896  // so that it can be reused (for example by a clang::Parser instance).
897  PP.RemovePragmaHandler(MicrosoftExtHandler.get());
898  PP.RemovePragmaHandler("GCC", GCCHandler.get());
899  PP.RemovePragmaHandler("clang", ClangHandler.get());
900  PP.RemovePragmaHandler("omp", OpenMPHandler.get());
901}
902