1//===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the implementation of formatted_raw_ostream.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Support/FormattedStream.h"
14#include "llvm/Support/ConvertUTF.h"
15#include "llvm/Support/Debug.h"
16#include "llvm/Support/Unicode.h"
17#include "llvm/Support/raw_ostream.h"
18#include <algorithm>
19
20using namespace llvm;
21
22/// UpdatePosition - Examine the given char sequence and figure out which
23/// column we end up in after output, and how many line breaks are contained.
24/// This assumes that the input string is well-formed UTF-8, and takes into
25/// account Unicode characters which render as multiple columns wide.
26void formatted_raw_ostream::UpdatePosition(const char *Ptr, size_t Size) {
27  unsigned &Column = Position.first;
28  unsigned &Line = Position.second;
29
30  auto ProcessUTF8CodePoint = [&Line, &Column](StringRef CP) {
31    int Width = sys::unicode::columnWidthUTF8(CP);
32    if (Width != sys::unicode::ErrorNonPrintableCharacter)
33      Column += Width;
34
35    // The only special whitespace characters we care about are single-byte.
36    if (CP.size() > 1)
37      return;
38
39    switch (CP[0]) {
40    case '\n':
41      Line += 1;
42      LLVM_FALLTHROUGH;
43    case '\r':
44      Column = 0;
45      break;
46    case '\t':
47      // Assumes tab stop = 8 characters.
48      Column += (8 - (Column & 0x7)) & 0x7;
49      break;
50    }
51  };
52
53  // If we have a partial UTF-8 sequence from the previous buffer, check that
54  // first.
55  if (PartialUTF8Char.size()) {
56    size_t BytesFromBuffer =
57        getNumBytesForUTF8(PartialUTF8Char[0]) - PartialUTF8Char.size();
58    if (Size < BytesFromBuffer) {
59      // If we still don't have enough bytes for a complete code point, just
60      // append what we have.
61      PartialUTF8Char.append(StringRef(Ptr, Size));
62      return;
63    } else {
64      // The first few bytes from the buffer will complete the code point.
65      // Concatenate them and process their effect on the line and column
66      // numbers.
67      PartialUTF8Char.append(StringRef(Ptr, BytesFromBuffer));
68      ProcessUTF8CodePoint(PartialUTF8Char);
69      PartialUTF8Char.clear();
70      Ptr += BytesFromBuffer;
71      Size -= BytesFromBuffer;
72    }
73  }
74
75  // Now scan the rest of the buffer.
76  unsigned NumBytes;
77  for (const char *End = Ptr + Size; Ptr < End; Ptr += NumBytes) {
78    NumBytes = getNumBytesForUTF8(*Ptr);
79
80    // The buffer might end part way through a UTF-8 code unit sequence for a
81    // Unicode scalar value if it got flushed. If this happens, we can't know
82    // the display width until we see the rest of the code point. Stash the
83    // bytes we do have, so that we can reconstruct the whole code point later,
84    // even if the buffer is being flushed.
85    if ((unsigned)(End - Ptr) < NumBytes) {
86      PartialUTF8Char = StringRef(Ptr, End - Ptr);
87      return;
88    }
89
90    ProcessUTF8CodePoint(StringRef(Ptr, NumBytes));
91  }
92}
93
94/// ComputePosition - Examine the current output and update line and column
95/// counts.
96void formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) {
97  // If our previous scan pointer is inside the buffer, assume we already
98  // scanned those bytes. This depends on raw_ostream to not change our buffer
99  // in unexpected ways.
100  if (Ptr <= Scanned && Scanned <= Ptr + Size)
101    // Scan all characters added since our last scan to determine the new
102    // column.
103    UpdatePosition(Scanned, Size - (Scanned - Ptr));
104  else
105    UpdatePosition(Ptr, Size);
106
107  // Update the scanning pointer.
108  Scanned = Ptr + Size;
109}
110
111/// PadToColumn - Align the output to some column number.
112///
113/// \param NewCol - The column to move to.
114///
115formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) {
116  // Figure out what's in the buffer and add it to the column count.
117  ComputePosition(getBufferStart(), GetNumBytesInBuffer());
118
119  // Output spaces until we reach the desired column.
120  indent(std::max(int(NewCol - getColumn()), 1));
121  return *this;
122}
123
124void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) {
125  // Figure out what's in the buffer and add it to the column count.
126  ComputePosition(Ptr, Size);
127
128  // Write the data to the underlying stream (which is unbuffered, so
129  // the data will be immediately written out).
130  TheStream->write(Ptr, Size);
131
132  // Reset the scanning pointer.
133  Scanned = nullptr;
134}
135
136/// fouts() - This returns a reference to a formatted_raw_ostream for
137/// standard output.  Use it like: fouts() << "foo" << "bar";
138formatted_raw_ostream &llvm::fouts() {
139  static formatted_raw_ostream S(outs());
140  return S;
141}
142
143/// ferrs() - This returns a reference to a formatted_raw_ostream for
144/// standard error.  Use it like: ferrs() << "foo" << "bar";
145formatted_raw_ostream &llvm::ferrs() {
146  static formatted_raw_ostream S(errs());
147  return S;
148}
149
150/// fdbgs() - This returns a reference to a formatted_raw_ostream for
151/// the debug stream.  Use it like: fdbgs() << "foo" << "bar";
152formatted_raw_ostream &llvm::fdbgs() {
153  static formatted_raw_ostream S(dbgs());
154  return S;
155}
156