1//===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SourceMgr class.  This class is used as a simple
10// substrate for diagnostics, #include handling, and other low level things for
11// simple parsers.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Support/SourceMgr.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/ADT/Twine.h"
21#include "llvm/Support/ErrorOr.h"
22#include "llvm/Support/Locale.h"
23#include "llvm/Support/MemoryBuffer.h"
24#include "llvm/Support/Path.h"
25#include "llvm/Support/SMLoc.h"
26#include "llvm/Support/WithColor.h"
27#include "llvm/Support/raw_ostream.h"
28#include <algorithm>
29#include <cassert>
30#include <cstddef>
31#include <limits>
32#include <memory>
33#include <string>
34#include <utility>
35
36using namespace llvm;
37
38static const size_t TabStop = 8;
39
40unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
41                                   SMLoc IncludeLoc,
42                                   std::string &IncludedFile) {
43  IncludedFile = Filename;
44  ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr =
45      MemoryBuffer::getFile(IncludedFile);
46
47  // If the file didn't exist directly, see if it's in an include path.
48  for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBufOrErr;
49       ++i) {
50    IncludedFile =
51        IncludeDirectories[i] + sys::path::get_separator().data() + Filename;
52    NewBufOrErr = MemoryBuffer::getFile(IncludedFile);
53  }
54
55  if (!NewBufOrErr)
56    return 0;
57
58  return AddNewSourceBuffer(std::move(*NewBufOrErr), IncludeLoc);
59}
60
61unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
62  for (unsigned i = 0, e = Buffers.size(); i != e; ++i)
63    if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() &&
64        // Use <= here so that a pointer to the null at the end of the buffer
65        // is included as part of the buffer.
66        Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd())
67      return i + 1;
68  return 0;
69}
70
71template <typename T>
72static std::vector<T> &GetOrCreateOffsetCache(void *&OffsetCache,
73                                              MemoryBuffer *Buffer) {
74  if (OffsetCache)
75    return *static_cast<std::vector<T> *>(OffsetCache);
76
77  // Lazily fill in the offset cache.
78  auto *Offsets = new std::vector<T>();
79  size_t Sz = Buffer->getBufferSize();
80  assert(Sz <= std::numeric_limits<T>::max());
81  StringRef S = Buffer->getBuffer();
82  for (size_t N = 0; N < Sz; ++N) {
83    if (S[N] == '\n')
84      Offsets->push_back(static_cast<T>(N));
85  }
86
87  OffsetCache = Offsets;
88  return *Offsets;
89}
90
91template <typename T>
92unsigned SourceMgr::SrcBuffer::getLineNumberSpecialized(const char *Ptr) const {
93  std::vector<T> &Offsets =
94      GetOrCreateOffsetCache<T>(OffsetCache, Buffer.get());
95
96  const char *BufStart = Buffer->getBufferStart();
97  assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd());
98  ptrdiff_t PtrDiff = Ptr - BufStart;
99  assert(PtrDiff >= 0 &&
100         static_cast<size_t>(PtrDiff) <= std::numeric_limits<T>::max());
101  T PtrOffset = static_cast<T>(PtrDiff);
102
103  // llvm::lower_bound gives the number of EOL before PtrOffset. Add 1 to get
104  // the line number.
105  return llvm::lower_bound(Offsets, PtrOffset) - Offsets.begin() + 1;
106}
107
108/// Look up a given \p Ptr in in the buffer, determining which line it came
109/// from.
110unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const {
111  size_t Sz = Buffer->getBufferSize();
112  if (Sz <= std::numeric_limits<uint8_t>::max())
113    return getLineNumberSpecialized<uint8_t>(Ptr);
114  else if (Sz <= std::numeric_limits<uint16_t>::max())
115    return getLineNumberSpecialized<uint16_t>(Ptr);
116  else if (Sz <= std::numeric_limits<uint32_t>::max())
117    return getLineNumberSpecialized<uint32_t>(Ptr);
118  else
119    return getLineNumberSpecialized<uint64_t>(Ptr);
120}
121
122template <typename T>
123const char *SourceMgr::SrcBuffer::getPointerForLineNumberSpecialized(
124    unsigned LineNo) const {
125  std::vector<T> &Offsets =
126      GetOrCreateOffsetCache<T>(OffsetCache, Buffer.get());
127
128  // We start counting line and column numbers from 1.
129  if (LineNo != 0)
130    --LineNo;
131
132  const char *BufStart = Buffer->getBufferStart();
133
134  // The offset cache contains the location of the \n for the specified line,
135  // we want the start of the line.  As such, we look for the previous entry.
136  if (LineNo == 0)
137    return BufStart;
138  if (LineNo > Offsets.size())
139    return nullptr;
140  return BufStart + Offsets[LineNo - 1] + 1;
141}
142
143/// Return a pointer to the first character of the specified line number or
144/// null if the line number is invalid.
145const char *
146SourceMgr::SrcBuffer::getPointerForLineNumber(unsigned LineNo) const {
147  size_t Sz = Buffer->getBufferSize();
148  if (Sz <= std::numeric_limits<uint8_t>::max())
149    return getPointerForLineNumberSpecialized<uint8_t>(LineNo);
150  else if (Sz <= std::numeric_limits<uint16_t>::max())
151    return getPointerForLineNumberSpecialized<uint16_t>(LineNo);
152  else if (Sz <= std::numeric_limits<uint32_t>::max())
153    return getPointerForLineNumberSpecialized<uint32_t>(LineNo);
154  else
155    return getPointerForLineNumberSpecialized<uint64_t>(LineNo);
156}
157
158SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other)
159    : Buffer(std::move(Other.Buffer)), OffsetCache(Other.OffsetCache),
160      IncludeLoc(Other.IncludeLoc) {
161  Other.OffsetCache = nullptr;
162}
163
164SourceMgr::SrcBuffer::~SrcBuffer() {
165  if (OffsetCache) {
166    size_t Sz = Buffer->getBufferSize();
167    if (Sz <= std::numeric_limits<uint8_t>::max())
168      delete static_cast<std::vector<uint8_t> *>(OffsetCache);
169    else if (Sz <= std::numeric_limits<uint16_t>::max())
170      delete static_cast<std::vector<uint16_t> *>(OffsetCache);
171    else if (Sz <= std::numeric_limits<uint32_t>::max())
172      delete static_cast<std::vector<uint32_t> *>(OffsetCache);
173    else
174      delete static_cast<std::vector<uint64_t> *>(OffsetCache);
175    OffsetCache = nullptr;
176  }
177}
178
179std::pair<unsigned, unsigned>
180SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const {
181  if (!BufferID)
182    BufferID = FindBufferContainingLoc(Loc);
183  assert(BufferID && "Invalid Location!");
184
185  auto &SB = getBufferInfo(BufferID);
186  const char *Ptr = Loc.getPointer();
187
188  unsigned LineNo = SB.getLineNumber(Ptr);
189  const char *BufStart = SB.Buffer->getBufferStart();
190  size_t NewlineOffs = StringRef(BufStart, Ptr - BufStart).find_last_of("\n\r");
191  if (NewlineOffs == StringRef::npos)
192    NewlineOffs = ~(size_t)0;
193  return std::make_pair(LineNo, Ptr - BufStart - NewlineOffs);
194}
195
196/// Given a line and column number in a mapped buffer, turn it into an SMLoc.
197/// This will return a null SMLoc if the line/column location is invalid.
198SMLoc SourceMgr::FindLocForLineAndColumn(unsigned BufferID, unsigned LineNo,
199                                         unsigned ColNo) {
200  auto &SB = getBufferInfo(BufferID);
201  const char *Ptr = SB.getPointerForLineNumber(LineNo);
202  if (!Ptr)
203    return SMLoc();
204
205  // We start counting line and column numbers from 1.
206  if (ColNo != 0)
207    --ColNo;
208
209  // If we have a column number, validate it.
210  if (ColNo) {
211    // Make sure the location is within the current line.
212    if (Ptr + ColNo > SB.Buffer->getBufferEnd())
213      return SMLoc();
214
215    // Make sure there is no newline in the way.
216    if (StringRef(Ptr, ColNo).find_first_of("\n\r") != StringRef::npos)
217      return SMLoc();
218
219    Ptr += ColNo;
220  }
221
222  return SMLoc::getFromPointer(Ptr);
223}
224
225void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
226  if (IncludeLoc == SMLoc())
227    return; // Top of stack.
228
229  unsigned CurBuf = FindBufferContainingLoc(IncludeLoc);
230  assert(CurBuf && "Invalid or unspecified location!");
231
232  PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
233
234  OS << "Included from " << getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
235     << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n";
236}
237
238SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
239                                   const Twine &Msg, ArrayRef<SMRange> Ranges,
240                                   ArrayRef<SMFixIt> FixIts) const {
241  // First thing to do: find the current buffer containing the specified
242  // location to pull out the source line.
243  SmallVector<std::pair<unsigned, unsigned>, 4> ColRanges;
244  std::pair<unsigned, unsigned> LineAndCol;
245  StringRef BufferID = "<unknown>";
246  std::string LineStr;
247
248  if (Loc.isValid()) {
249    unsigned CurBuf = FindBufferContainingLoc(Loc);
250    assert(CurBuf && "Invalid or unspecified location!");
251
252    const MemoryBuffer *CurMB = getMemoryBuffer(CurBuf);
253    BufferID = CurMB->getBufferIdentifier();
254
255    // Scan backward to find the start of the line.
256    const char *LineStart = Loc.getPointer();
257    const char *BufStart = CurMB->getBufferStart();
258    while (LineStart != BufStart && LineStart[-1] != '\n' &&
259           LineStart[-1] != '\r')
260      --LineStart;
261
262    // Get the end of the line.
263    const char *LineEnd = Loc.getPointer();
264    const char *BufEnd = CurMB->getBufferEnd();
265    while (LineEnd != BufEnd && LineEnd[0] != '\n' && LineEnd[0] != '\r')
266      ++LineEnd;
267    LineStr = std::string(LineStart, LineEnd);
268
269    // Convert any ranges to column ranges that only intersect the line of the
270    // location.
271    for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
272      SMRange R = Ranges[i];
273      if (!R.isValid())
274        continue;
275
276      // If the line doesn't contain any part of the range, then ignore it.
277      if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
278        continue;
279
280      // Ignore pieces of the range that go onto other lines.
281      if (R.Start.getPointer() < LineStart)
282        R.Start = SMLoc::getFromPointer(LineStart);
283      if (R.End.getPointer() > LineEnd)
284        R.End = SMLoc::getFromPointer(LineEnd);
285
286      // Translate from SMLoc ranges to column ranges.
287      // FIXME: Handle multibyte characters.
288      ColRanges.push_back(std::make_pair(R.Start.getPointer() - LineStart,
289                                         R.End.getPointer() - LineStart));
290    }
291
292    LineAndCol = getLineAndColumn(Loc, CurBuf);
293  }
294
295  return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first,
296                      LineAndCol.second - 1, Kind, Msg.str(), LineStr,
297                      ColRanges, FixIts);
298}
299
300void SourceMgr::PrintMessage(raw_ostream &OS, const SMDiagnostic &Diagnostic,
301                             bool ShowColors) const {
302  // Report the message with the diagnostic handler if present.
303  if (DiagHandler) {
304    DiagHandler(Diagnostic, DiagContext);
305    return;
306  }
307
308  if (Diagnostic.getLoc().isValid()) {
309    unsigned CurBuf = FindBufferContainingLoc(Diagnostic.getLoc());
310    assert(CurBuf && "Invalid or unspecified location!");
311    PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
312  }
313
314  Diagnostic.print(nullptr, OS, ShowColors);
315}
316
317void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc,
318                             SourceMgr::DiagKind Kind, const Twine &Msg,
319                             ArrayRef<SMRange> Ranges, ArrayRef<SMFixIt> FixIts,
320                             bool ShowColors) const {
321  PrintMessage(OS, GetMessage(Loc, Kind, Msg, Ranges, FixIts), ShowColors);
322}
323
324void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
325                             const Twine &Msg, ArrayRef<SMRange> Ranges,
326                             ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
327  PrintMessage(errs(), Loc, Kind, Msg, Ranges, FixIts, ShowColors);
328}
329
330//===----------------------------------------------------------------------===//
331// SMFixIt Implementation
332//===----------------------------------------------------------------------===//
333
334SMFixIt::SMFixIt(SMRange R, const Twine &Replacement)
335    : Range(R), Text(Replacement.str()) {
336  assert(R.isValid());
337}
338
339//===----------------------------------------------------------------------===//
340// SMDiagnostic Implementation
341//===----------------------------------------------------------------------===//
342
343SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN, int Line,
344                           int Col, SourceMgr::DiagKind Kind, StringRef Msg,
345                           StringRef LineStr,
346                           ArrayRef<std::pair<unsigned, unsigned>> Ranges,
347                           ArrayRef<SMFixIt> Hints)
348    : SM(&sm), Loc(L), Filename(std::string(FN)), LineNo(Line), ColumnNo(Col),
349      Kind(Kind), Message(std::string(Msg)), LineContents(std::string(LineStr)),
350      Ranges(Ranges.vec()), FixIts(Hints.begin(), Hints.end()) {
351  llvm::sort(FixIts);
352}
353
354static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
355                           ArrayRef<SMFixIt> FixIts,
356                           ArrayRef<char> SourceLine) {
357  if (FixIts.empty())
358    return;
359
360  const char *LineStart = SourceLine.begin();
361  const char *LineEnd = SourceLine.end();
362
363  size_t PrevHintEndCol = 0;
364
365  for (ArrayRef<SMFixIt>::iterator I = FixIts.begin(), E = FixIts.end(); I != E;
366       ++I) {
367    // If the fixit contains a newline or tab, ignore it.
368    if (I->getText().find_first_of("\n\r\t") != StringRef::npos)
369      continue;
370
371    SMRange R = I->getRange();
372
373    // If the line doesn't contain any part of the range, then ignore it.
374    if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
375      continue;
376
377    // Translate from SMLoc to column.
378    // Ignore pieces of the range that go onto other lines.
379    // FIXME: Handle multibyte characters in the source line.
380    unsigned FirstCol;
381    if (R.Start.getPointer() < LineStart)
382      FirstCol = 0;
383    else
384      FirstCol = R.Start.getPointer() - LineStart;
385
386    // If we inserted a long previous hint, push this one forwards, and add
387    // an extra space to show that this is not part of the previous
388    // completion. This is sort of the best we can do when two hints appear
389    // to overlap.
390    //
391    // Note that if this hint is located immediately after the previous
392    // hint, no space will be added, since the location is more important.
393    unsigned HintCol = FirstCol;
394    if (HintCol < PrevHintEndCol)
395      HintCol = PrevHintEndCol + 1;
396
397    // FIXME: This assertion is intended to catch unintended use of multibyte
398    // characters in fixits. If we decide to do this, we'll have to track
399    // separate byte widths for the source and fixit lines.
400    assert((size_t)sys::locale::columnWidth(I->getText()) ==
401           I->getText().size());
402
403    // This relies on one byte per column in our fixit hints.
404    unsigned LastColumnModified = HintCol + I->getText().size();
405    if (LastColumnModified > FixItLine.size())
406      FixItLine.resize(LastColumnModified, ' ');
407
408    std::copy(I->getText().begin(), I->getText().end(),
409              FixItLine.begin() + HintCol);
410
411    PrevHintEndCol = LastColumnModified;
412
413    // For replacements, mark the removal range with '~'.
414    // FIXME: Handle multibyte characters in the source line.
415    unsigned LastCol;
416    if (R.End.getPointer() >= LineEnd)
417      LastCol = LineEnd - LineStart;
418    else
419      LastCol = R.End.getPointer() - LineStart;
420
421    std::fill(&CaretLine[FirstCol], &CaretLine[LastCol], '~');
422  }
423}
424
425static void printSourceLine(raw_ostream &S, StringRef LineContents) {
426  // Print out the source line one character at a time, so we can expand tabs.
427  for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) {
428    size_t NextTab = LineContents.find('\t', i);
429    // If there were no tabs left, print the rest, we are done.
430    if (NextTab == StringRef::npos) {
431      S << LineContents.drop_front(i);
432      break;
433    }
434
435    // Otherwise, print from i to NextTab.
436    S << LineContents.slice(i, NextTab);
437    OutCol += NextTab - i;
438    i = NextTab;
439
440    // If we have a tab, emit at least one space, then round up to 8 columns.
441    do {
442      S << ' ';
443      ++OutCol;
444    } while ((OutCol % TabStop) != 0);
445  }
446  S << '\n';
447}
448
449static bool isNonASCII(char c) { return c & 0x80; }
450
451void SMDiagnostic::print(const char *ProgName, raw_ostream &OS, bool ShowColors,
452                         bool ShowKindLabel) const {
453  ColorMode Mode = ShowColors ? ColorMode::Auto : ColorMode::Disable;
454
455  {
456    WithColor S(OS, raw_ostream::SAVEDCOLOR, true, false, Mode);
457
458    if (ProgName && ProgName[0])
459      S << ProgName << ": ";
460
461    if (!Filename.empty()) {
462      if (Filename == "-")
463        S << "<stdin>";
464      else
465        S << Filename;
466
467      if (LineNo != -1) {
468        S << ':' << LineNo;
469        if (ColumnNo != -1)
470          S << ':' << (ColumnNo + 1);
471      }
472      S << ": ";
473    }
474  }
475
476  if (ShowKindLabel) {
477    switch (Kind) {
478    case SourceMgr::DK_Error:
479      WithColor::error(OS, "", !ShowColors);
480      break;
481    case SourceMgr::DK_Warning:
482      WithColor::warning(OS, "", !ShowColors);
483      break;
484    case SourceMgr::DK_Note:
485      WithColor::note(OS, "", !ShowColors);
486      break;
487    case SourceMgr::DK_Remark:
488      WithColor::remark(OS, "", !ShowColors);
489      break;
490    }
491  }
492
493  WithColor(OS, raw_ostream::SAVEDCOLOR, true, false, Mode) << Message << '\n';
494
495  if (LineNo == -1 || ColumnNo == -1)
496    return;
497
498  // FIXME: If there are multibyte or multi-column characters in the source, all
499  // our ranges will be wrong. To do this properly, we'll need a byte-to-column
500  // map like Clang's TextDiagnostic. For now, we'll just handle tabs by
501  // expanding them later, and bail out rather than show incorrect ranges and
502  // misaligned fixits for any other odd characters.
503  if (find_if(LineContents, isNonASCII) != LineContents.end()) {
504    printSourceLine(OS, LineContents);
505    return;
506  }
507  size_t NumColumns = LineContents.size();
508
509  // Build the line with the caret and ranges.
510  std::string CaretLine(NumColumns + 1, ' ');
511
512  // Expand any ranges.
513  for (unsigned r = 0, e = Ranges.size(); r != e; ++r) {
514    std::pair<unsigned, unsigned> R = Ranges[r];
515    std::fill(&CaretLine[R.first],
516              &CaretLine[std::min((size_t)R.second, CaretLine.size())], '~');
517  }
518
519  // Add any fix-its.
520  // FIXME: Find the beginning of the line properly for multibyte characters.
521  std::string FixItInsertionLine;
522  buildFixItLine(
523      CaretLine, FixItInsertionLine, FixIts,
524      makeArrayRef(Loc.getPointer() - ColumnNo, LineContents.size()));
525
526  // Finally, plop on the caret.
527  if (unsigned(ColumnNo) <= NumColumns)
528    CaretLine[ColumnNo] = '^';
529  else
530    CaretLine[NumColumns] = '^';
531
532  // ... and remove trailing whitespace so the output doesn't wrap for it.  We
533  // know that the line isn't completely empty because it has the caret in it at
534  // least.
535  CaretLine.erase(CaretLine.find_last_not_of(' ') + 1);
536
537  printSourceLine(OS, LineContents);
538
539  {
540    ColorMode Mode = ShowColors ? ColorMode::Auto : ColorMode::Disable;
541    WithColor S(OS, raw_ostream::GREEN, true, false, Mode);
542
543    // Print out the caret line, matching tabs in the source line.
544    for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) {
545      if (i >= LineContents.size() || LineContents[i] != '\t') {
546        S << CaretLine[i];
547        ++OutCol;
548        continue;
549      }
550
551      // Okay, we have a tab.  Insert the appropriate number of characters.
552      do {
553        S << CaretLine[i];
554        ++OutCol;
555      } while ((OutCol % TabStop) != 0);
556    }
557    S << '\n';
558  }
559
560  // Print out the replacement line, matching tabs in the source line.
561  if (FixItInsertionLine.empty())
562    return;
563
564  for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) {
565    if (i >= LineContents.size() || LineContents[i] != '\t') {
566      OS << FixItInsertionLine[i];
567      ++OutCol;
568      continue;
569    }
570
571    // Okay, we have a tab.  Insert the appropriate number of characters.
572    do {
573      OS << FixItInsertionLine[i];
574      // FIXME: This is trying not to break up replacements, but then to re-sync
575      // with the tabs between replacements. This will fail, though, if two
576      // fix-it replacements are exactly adjacent, or if a fix-it contains a
577      // space. Really we should be precomputing column widths, which we'll
578      // need anyway for multibyte chars.
579      if (FixItInsertionLine[i] != ' ')
580        ++i;
581      ++OutCol;
582    } while (((OutCol % TabStop) != 0) && i != e);
583  }
584  OS << '\n';
585}
586