1//===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SourceMgr class.  This class is used as a simple
10// substrate for diagnostics, #include handling, and other low level things for
11// simple parsers.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Support/SourceMgr.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/ADT/Twine.h"
21#include "llvm/Support/ErrorOr.h"
22#include "llvm/Support/Locale.h"
23#include "llvm/Support/MemoryBuffer.h"
24#include "llvm/Support/Path.h"
25#include "llvm/Support/SMLoc.h"
26#include "llvm/Support/WithColor.h"
27#include "llvm/Support/raw_ostream.h"
28#include <algorithm>
29#include <cassert>
30#include <cstddef>
31#include <limits>
32#include <memory>
33#include <string>
34#include <utility>
35
36using namespace llvm;
37
38static const size_t TabStop = 8;
39
40unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
41                                   SMLoc IncludeLoc,
42                                   std::string &IncludedFile) {
43  IncludedFile = Filename;
44  ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr =
45    MemoryBuffer::getFile(IncludedFile);
46
47  // If the file didn't exist directly, see if it's in an include path.
48  for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBufOrErr;
49       ++i) {
50    IncludedFile =
51        IncludeDirectories[i] + sys::path::get_separator().data() + Filename;
52    NewBufOrErr = MemoryBuffer::getFile(IncludedFile);
53  }
54
55  if (!NewBufOrErr)
56    return 0;
57
58  return AddNewSourceBuffer(std::move(*NewBufOrErr), IncludeLoc);
59}
60
61unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
62  for (unsigned i = 0, e = Buffers.size(); i != e; ++i)
63    if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() &&
64        // Use <= here so that a pointer to the null at the end of the buffer
65        // is included as part of the buffer.
66        Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd())
67      return i + 1;
68  return 0;
69}
70
71template <typename T>
72unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const {
73
74  // Ensure OffsetCache is allocated and populated with offsets of all the
75  // '\n' bytes.
76  std::vector<T> *Offsets = nullptr;
77  if (OffsetCache.isNull()) {
78    Offsets = new std::vector<T>();
79    OffsetCache = Offsets;
80    size_t Sz = Buffer->getBufferSize();
81    assert(Sz <= std::numeric_limits<T>::max());
82    StringRef S = Buffer->getBuffer();
83    for (size_t N = 0; N < Sz; ++N) {
84      if (S[N] == '\n') {
85        Offsets->push_back(static_cast<T>(N));
86      }
87    }
88  } else {
89    Offsets = OffsetCache.get<std::vector<T> *>();
90  }
91
92  const char *BufStart = Buffer->getBufferStart();
93  assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd());
94  ptrdiff_t PtrDiff = Ptr - BufStart;
95  assert(PtrDiff >= 0 && static_cast<size_t>(PtrDiff) <= std::numeric_limits<T>::max());
96  T PtrOffset = static_cast<T>(PtrDiff);
97
98  // llvm::lower_bound gives the number of EOL before PtrOffset. Add 1 to get
99  // the line number.
100  return llvm::lower_bound(*Offsets, PtrOffset) - Offsets->begin() + 1;
101}
102
103SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other)
104  : Buffer(std::move(Other.Buffer)),
105    OffsetCache(Other.OffsetCache),
106    IncludeLoc(Other.IncludeLoc) {
107  Other.OffsetCache = nullptr;
108}
109
110SourceMgr::SrcBuffer::~SrcBuffer() {
111  if (!OffsetCache.isNull()) {
112    if (OffsetCache.is<std::vector<uint8_t>*>())
113      delete OffsetCache.get<std::vector<uint8_t>*>();
114    else if (OffsetCache.is<std::vector<uint16_t>*>())
115      delete OffsetCache.get<std::vector<uint16_t>*>();
116    else if (OffsetCache.is<std::vector<uint32_t>*>())
117      delete OffsetCache.get<std::vector<uint32_t>*>();
118    else
119      delete OffsetCache.get<std::vector<uint64_t>*>();
120    OffsetCache = nullptr;
121  }
122}
123
124std::pair<unsigned, unsigned>
125SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const {
126  if (!BufferID)
127    BufferID = FindBufferContainingLoc(Loc);
128  assert(BufferID && "Invalid Location!");
129
130  auto &SB = getBufferInfo(BufferID);
131  const char *Ptr = Loc.getPointer();
132
133  size_t Sz = SB.Buffer->getBufferSize();
134  unsigned LineNo;
135  if (Sz <= std::numeric_limits<uint8_t>::max())
136    LineNo = SB.getLineNumber<uint8_t>(Ptr);
137  else if (Sz <= std::numeric_limits<uint16_t>::max())
138    LineNo = SB.getLineNumber<uint16_t>(Ptr);
139  else if (Sz <= std::numeric_limits<uint32_t>::max())
140    LineNo = SB.getLineNumber<uint32_t>(Ptr);
141  else
142    LineNo = SB.getLineNumber<uint64_t>(Ptr);
143
144  const char *BufStart = SB.Buffer->getBufferStart();
145  size_t NewlineOffs = StringRef(BufStart, Ptr-BufStart).find_last_of("\n\r");
146  if (NewlineOffs == StringRef::npos) NewlineOffs = ~(size_t)0;
147  return std::make_pair(LineNo, Ptr-BufStart-NewlineOffs);
148}
149
150void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
151  if (IncludeLoc == SMLoc()) return;  // Top of stack.
152
153  unsigned CurBuf = FindBufferContainingLoc(IncludeLoc);
154  assert(CurBuf && "Invalid or unspecified location!");
155
156  PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
157
158  OS << "Included from "
159     << getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
160     << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n";
161}
162
163SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
164                                   const Twine &Msg,
165                                   ArrayRef<SMRange> Ranges,
166                                   ArrayRef<SMFixIt> FixIts) const {
167  // First thing to do: find the current buffer containing the specified
168  // location to pull out the source line.
169  SmallVector<std::pair<unsigned, unsigned>, 4> ColRanges;
170  std::pair<unsigned, unsigned> LineAndCol;
171  StringRef BufferID = "<unknown>";
172  std::string LineStr;
173
174  if (Loc.isValid()) {
175    unsigned CurBuf = FindBufferContainingLoc(Loc);
176    assert(CurBuf && "Invalid or unspecified location!");
177
178    const MemoryBuffer *CurMB = getMemoryBuffer(CurBuf);
179    BufferID = CurMB->getBufferIdentifier();
180
181    // Scan backward to find the start of the line.
182    const char *LineStart = Loc.getPointer();
183    const char *BufStart = CurMB->getBufferStart();
184    while (LineStart != BufStart && LineStart[-1] != '\n' &&
185           LineStart[-1] != '\r')
186      --LineStart;
187
188    // Get the end of the line.
189    const char *LineEnd = Loc.getPointer();
190    const char *BufEnd = CurMB->getBufferEnd();
191    while (LineEnd != BufEnd && LineEnd[0] != '\n' && LineEnd[0] != '\r')
192      ++LineEnd;
193    LineStr = std::string(LineStart, LineEnd);
194
195    // Convert any ranges to column ranges that only intersect the line of the
196    // location.
197    for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
198      SMRange R = Ranges[i];
199      if (!R.isValid()) continue;
200
201      // If the line doesn't contain any part of the range, then ignore it.
202      if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
203        continue;
204
205      // Ignore pieces of the range that go onto other lines.
206      if (R.Start.getPointer() < LineStart)
207        R.Start = SMLoc::getFromPointer(LineStart);
208      if (R.End.getPointer() > LineEnd)
209        R.End = SMLoc::getFromPointer(LineEnd);
210
211      // Translate from SMLoc ranges to column ranges.
212      // FIXME: Handle multibyte characters.
213      ColRanges.push_back(std::make_pair(R.Start.getPointer()-LineStart,
214                                         R.End.getPointer()-LineStart));
215    }
216
217    LineAndCol = getLineAndColumn(Loc, CurBuf);
218  }
219
220  return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first,
221                      LineAndCol.second-1, Kind, Msg.str(),
222                      LineStr, ColRanges, FixIts);
223}
224
225void SourceMgr::PrintMessage(raw_ostream &OS, const SMDiagnostic &Diagnostic,
226                             bool ShowColors) const {
227  // Report the message with the diagnostic handler if present.
228  if (DiagHandler) {
229    DiagHandler(Diagnostic, DiagContext);
230    return;
231  }
232
233  if (Diagnostic.getLoc().isValid()) {
234    unsigned CurBuf = FindBufferContainingLoc(Diagnostic.getLoc());
235    assert(CurBuf && "Invalid or unspecified location!");
236    PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
237  }
238
239  Diagnostic.print(nullptr, OS, ShowColors);
240}
241
242void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc,
243                             SourceMgr::DiagKind Kind,
244                             const Twine &Msg, ArrayRef<SMRange> Ranges,
245                             ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
246  PrintMessage(OS, GetMessage(Loc, Kind, Msg, Ranges, FixIts), ShowColors);
247}
248
249void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
250                             const Twine &Msg, ArrayRef<SMRange> Ranges,
251                             ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
252  PrintMessage(errs(), Loc, Kind, Msg, Ranges, FixIts, ShowColors);
253}
254
255//===----------------------------------------------------------------------===//
256// SMDiagnostic Implementation
257//===----------------------------------------------------------------------===//
258
259SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN,
260                           int Line, int Col, SourceMgr::DiagKind Kind,
261                           StringRef Msg, StringRef LineStr,
262                           ArrayRef<std::pair<unsigned,unsigned>> Ranges,
263                           ArrayRef<SMFixIt> Hints)
264  : SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Kind(Kind),
265    Message(Msg), LineContents(LineStr), Ranges(Ranges.vec()),
266    FixIts(Hints.begin(), Hints.end()) {
267  llvm::sort(FixIts);
268}
269
270static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
271                           ArrayRef<SMFixIt> FixIts, ArrayRef<char> SourceLine){
272  if (FixIts.empty())
273    return;
274
275  const char *LineStart = SourceLine.begin();
276  const char *LineEnd = SourceLine.end();
277
278  size_t PrevHintEndCol = 0;
279
280  for (ArrayRef<SMFixIt>::iterator I = FixIts.begin(), E = FixIts.end();
281       I != E; ++I) {
282    // If the fixit contains a newline or tab, ignore it.
283    if (I->getText().find_first_of("\n\r\t") != StringRef::npos)
284      continue;
285
286    SMRange R = I->getRange();
287
288    // If the line doesn't contain any part of the range, then ignore it.
289    if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
290      continue;
291
292    // Translate from SMLoc to column.
293    // Ignore pieces of the range that go onto other lines.
294    // FIXME: Handle multibyte characters in the source line.
295    unsigned FirstCol;
296    if (R.Start.getPointer() < LineStart)
297      FirstCol = 0;
298    else
299      FirstCol = R.Start.getPointer() - LineStart;
300
301    // If we inserted a long previous hint, push this one forwards, and add
302    // an extra space to show that this is not part of the previous
303    // completion. This is sort of the best we can do when two hints appear
304    // to overlap.
305    //
306    // Note that if this hint is located immediately after the previous
307    // hint, no space will be added, since the location is more important.
308    unsigned HintCol = FirstCol;
309    if (HintCol < PrevHintEndCol)
310      HintCol = PrevHintEndCol + 1;
311
312    // FIXME: This assertion is intended to catch unintended use of multibyte
313    // characters in fixits. If we decide to do this, we'll have to track
314    // separate byte widths for the source and fixit lines.
315    assert((size_t)sys::locale::columnWidth(I->getText()) ==
316           I->getText().size());
317
318    // This relies on one byte per column in our fixit hints.
319    unsigned LastColumnModified = HintCol + I->getText().size();
320    if (LastColumnModified > FixItLine.size())
321      FixItLine.resize(LastColumnModified, ' ');
322
323    std::copy(I->getText().begin(), I->getText().end(),
324              FixItLine.begin() + HintCol);
325
326    PrevHintEndCol = LastColumnModified;
327
328    // For replacements, mark the removal range with '~'.
329    // FIXME: Handle multibyte characters in the source line.
330    unsigned LastCol;
331    if (R.End.getPointer() >= LineEnd)
332      LastCol = LineEnd - LineStart;
333    else
334      LastCol = R.End.getPointer() - LineStart;
335
336    std::fill(&CaretLine[FirstCol], &CaretLine[LastCol], '~');
337  }
338}
339
340static void printSourceLine(raw_ostream &S, StringRef LineContents) {
341  // Print out the source line one character at a time, so we can expand tabs.
342  for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) {
343    size_t NextTab = LineContents.find('\t', i);
344    // If there were no tabs left, print the rest, we are done.
345    if (NextTab == StringRef::npos) {
346      S << LineContents.drop_front(i);
347      break;
348    }
349
350    // Otherwise, print from i to NextTab.
351    S << LineContents.slice(i, NextTab);
352    OutCol += NextTab - i;
353    i = NextTab;
354
355    // If we have a tab, emit at least one space, then round up to 8 columns.
356    do {
357      S << ' ';
358      ++OutCol;
359    } while ((OutCol % TabStop) != 0);
360  }
361  S << '\n';
362}
363
364static bool isNonASCII(char c) {
365  return c & 0x80;
366}
367
368void SMDiagnostic::print(const char *ProgName, raw_ostream &OS,
369                         bool ShowColors, bool ShowKindLabel) const {
370  {
371    WithColor S(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors);
372
373    if (ProgName && ProgName[0])
374      S << ProgName << ": ";
375
376    if (!Filename.empty()) {
377      if (Filename == "-")
378        S << "<stdin>";
379      else
380        S << Filename;
381
382      if (LineNo != -1) {
383        S << ':' << LineNo;
384        if (ColumnNo != -1)
385          S << ':' << (ColumnNo + 1);
386      }
387      S << ": ";
388    }
389  }
390
391  if (ShowKindLabel) {
392    switch (Kind) {
393    case SourceMgr::DK_Error:
394      WithColor::error(OS, "", !ShowColors);
395      break;
396    case SourceMgr::DK_Warning:
397      WithColor::warning(OS, "", !ShowColors);
398      break;
399    case SourceMgr::DK_Note:
400      WithColor::note(OS, "", !ShowColors);
401      break;
402    case SourceMgr::DK_Remark:
403      WithColor::remark(OS, "", !ShowColors);
404      break;
405    }
406  }
407
408  WithColor(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors)
409      << Message << '\n';
410
411  if (LineNo == -1 || ColumnNo == -1)
412    return;
413
414  // FIXME: If there are multibyte or multi-column characters in the source, all
415  // our ranges will be wrong. To do this properly, we'll need a byte-to-column
416  // map like Clang's TextDiagnostic. For now, we'll just handle tabs by
417  // expanding them later, and bail out rather than show incorrect ranges and
418  // misaligned fixits for any other odd characters.
419  if (find_if(LineContents, isNonASCII) != LineContents.end()) {
420    printSourceLine(OS, LineContents);
421    return;
422  }
423  size_t NumColumns = LineContents.size();
424
425  // Build the line with the caret and ranges.
426  std::string CaretLine(NumColumns+1, ' ');
427
428  // Expand any ranges.
429  for (unsigned r = 0, e = Ranges.size(); r != e; ++r) {
430    std::pair<unsigned, unsigned> R = Ranges[r];
431    std::fill(&CaretLine[R.first],
432              &CaretLine[std::min((size_t)R.second, CaretLine.size())],
433              '~');
434  }
435
436  // Add any fix-its.
437  // FIXME: Find the beginning of the line properly for multibyte characters.
438  std::string FixItInsertionLine;
439  buildFixItLine(CaretLine, FixItInsertionLine, FixIts,
440                 makeArrayRef(Loc.getPointer() - ColumnNo,
441                              LineContents.size()));
442
443  // Finally, plop on the caret.
444  if (unsigned(ColumnNo) <= NumColumns)
445    CaretLine[ColumnNo] = '^';
446  else
447    CaretLine[NumColumns] = '^';
448
449  // ... and remove trailing whitespace so the output doesn't wrap for it.  We
450  // know that the line isn't completely empty because it has the caret in it at
451  // least.
452  CaretLine.erase(CaretLine.find_last_not_of(' ')+1);
453
454  printSourceLine(OS, LineContents);
455
456  {
457    WithColor S(OS, raw_ostream::GREEN, true, false, !ShowColors);
458
459    // Print out the caret line, matching tabs in the source line.
460    for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) {
461      if (i >= LineContents.size() || LineContents[i] != '\t') {
462        S << CaretLine[i];
463        ++OutCol;
464        continue;
465      }
466
467      // Okay, we have a tab.  Insert the appropriate number of characters.
468      do {
469        S << CaretLine[i];
470        ++OutCol;
471      } while ((OutCol % TabStop) != 0);
472    }
473    S << '\n';
474  }
475
476  // Print out the replacement line, matching tabs in the source line.
477  if (FixItInsertionLine.empty())
478    return;
479
480  for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) {
481    if (i >= LineContents.size() || LineContents[i] != '\t') {
482      OS << FixItInsertionLine[i];
483      ++OutCol;
484      continue;
485    }
486
487    // Okay, we have a tab.  Insert the appropriate number of characters.
488    do {
489      OS << FixItInsertionLine[i];
490      // FIXME: This is trying not to break up replacements, but then to re-sync
491      // with the tabs between replacements. This will fail, though, if two
492      // fix-it replacements are exactly adjacent, or if a fix-it contains a
493      // space. Really we should be precomputing column widths, which we'll
494      // need anyway for multibyte chars.
495      if (FixItInsertionLine[i] != ' ')
496        ++i;
497      ++OutCol;
498    } while (((OutCol % TabStop) != 0) && i != e);
499  }
500  OS << '\n';
501}
502