1//===--- MemoryBuffer.cpp - Memory Buffer implementation ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//  This file implements the MemoryBuffer interface.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Support/MemoryBuffer.h"
14#include "llvm/ADT/SmallString.h"
15#include "llvm/Config/config.h"
16#include "llvm/Support/Alignment.h"
17#include "llvm/Support/Errc.h"
18#include "llvm/Support/Error.h"
19#include "llvm/Support/ErrorHandling.h"
20#include "llvm/Support/FileSystem.h"
21#include "llvm/Support/MathExtras.h"
22#include "llvm/Support/Process.h"
23#include "llvm/Support/Program.h"
24#include "llvm/Support/SmallVectorMemoryBuffer.h"
25#include <cassert>
26#include <cstring>
27#include <new>
28#include <sys/types.h>
29#include <system_error>
30#if !defined(_MSC_VER) && !defined(__MINGW32__)
31#include <unistd.h>
32#else
33#include <io.h>
34#endif
35
36#ifdef __MVS__
37#include "llvm/Support/AutoConvert.h"
38#endif
39using namespace llvm;
40
41//===----------------------------------------------------------------------===//
42// MemoryBuffer implementation itself.
43//===----------------------------------------------------------------------===//
44
45MemoryBuffer::~MemoryBuffer() = default;
46
47/// init - Initialize this MemoryBuffer as a reference to externally allocated
48/// memory, memory that we know is already null terminated.
49void MemoryBuffer::init(const char *BufStart, const char *BufEnd,
50                        bool RequiresNullTerminator) {
51  assert((!RequiresNullTerminator || BufEnd[0] == 0) &&
52         "Buffer is not null terminated!");
53  BufferStart = BufStart;
54  BufferEnd = BufEnd;
55}
56
57//===----------------------------------------------------------------------===//
58// MemoryBufferMem implementation.
59//===----------------------------------------------------------------------===//
60
61/// CopyStringRef - Copies contents of a StringRef into a block of memory and
62/// null-terminates it.
63static void CopyStringRef(char *Memory, StringRef Data) {
64  if (!Data.empty())
65    memcpy(Memory, Data.data(), Data.size());
66  Memory[Data.size()] = 0; // Null terminate string.
67}
68
69namespace {
70struct NamedBufferAlloc {
71  const Twine &Name;
72  NamedBufferAlloc(const Twine &Name) : Name(Name) {}
73};
74} // namespace
75
76void *operator new(size_t N, const NamedBufferAlloc &Alloc) {
77  SmallString<256> NameBuf;
78  StringRef NameRef = Alloc.Name.toStringRef(NameBuf);
79
80  char *Mem = static_cast<char *>(operator new(N + sizeof(size_t) +
81                                               NameRef.size() + 1));
82  *reinterpret_cast<size_t *>(Mem + N) = NameRef.size();
83  CopyStringRef(Mem + N + sizeof(size_t), NameRef);
84  return Mem;
85}
86
87namespace {
88/// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory.
89template<typename MB>
90class MemoryBufferMem : public MB {
91public:
92  MemoryBufferMem(StringRef InputData, bool RequiresNullTerminator) {
93    MemoryBuffer::init(InputData.begin(), InputData.end(),
94                       RequiresNullTerminator);
95  }
96
97  /// Disable sized deallocation for MemoryBufferMem, because it has
98  /// tail-allocated data.
99  void operator delete(void *p) { ::operator delete(p); }
100
101  StringRef getBufferIdentifier() const override {
102    // The name is stored after the class itself.
103    return StringRef(reinterpret_cast<const char *>(this + 1) + sizeof(size_t),
104                     *reinterpret_cast<const size_t *>(this + 1));
105  }
106
107  MemoryBuffer::BufferKind getBufferKind() const override {
108    return MemoryBuffer::MemoryBuffer_Malloc;
109  }
110};
111} // namespace
112
113template <typename MB>
114static ErrorOr<std::unique_ptr<MB>>
115getFileAux(const Twine &Filename, uint64_t MapSize, uint64_t Offset,
116           bool IsText, bool RequiresNullTerminator, bool IsVolatile,
117           std::optional<Align> Alignment);
118
119std::unique_ptr<MemoryBuffer>
120MemoryBuffer::getMemBuffer(StringRef InputData, StringRef BufferName,
121                           bool RequiresNullTerminator) {
122  auto *Ret = new (NamedBufferAlloc(BufferName))
123      MemoryBufferMem<MemoryBuffer>(InputData, RequiresNullTerminator);
124  return std::unique_ptr<MemoryBuffer>(Ret);
125}
126
127std::unique_ptr<MemoryBuffer>
128MemoryBuffer::getMemBuffer(MemoryBufferRef Ref, bool RequiresNullTerminator) {
129  return std::unique_ptr<MemoryBuffer>(getMemBuffer(
130      Ref.getBuffer(), Ref.getBufferIdentifier(), RequiresNullTerminator));
131}
132
133static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
134getMemBufferCopyImpl(StringRef InputData, const Twine &BufferName) {
135  auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(InputData.size(), BufferName);
136  if (!Buf)
137    return make_error_code(errc::not_enough_memory);
138  memcpy(Buf->getBufferStart(), InputData.data(), InputData.size());
139  return std::move(Buf);
140}
141
142std::unique_ptr<MemoryBuffer>
143MemoryBuffer::getMemBufferCopy(StringRef InputData, const Twine &BufferName) {
144  auto Buf = getMemBufferCopyImpl(InputData, BufferName);
145  if (Buf)
146    return std::move(*Buf);
147  return nullptr;
148}
149
150ErrorOr<std::unique_ptr<MemoryBuffer>>
151MemoryBuffer::getFileOrSTDIN(const Twine &Filename, bool IsText,
152                             bool RequiresNullTerminator,
153                             std::optional<Align> Alignment) {
154  SmallString<256> NameBuf;
155  StringRef NameRef = Filename.toStringRef(NameBuf);
156
157  if (NameRef == "-")
158    return getSTDIN();
159  return getFile(Filename, IsText, RequiresNullTerminator,
160                 /*IsVolatile=*/false, Alignment);
161}
162
163ErrorOr<std::unique_ptr<MemoryBuffer>>
164MemoryBuffer::getFileSlice(const Twine &FilePath, uint64_t MapSize,
165                           uint64_t Offset, bool IsVolatile,
166                           std::optional<Align> Alignment) {
167  return getFileAux<MemoryBuffer>(FilePath, MapSize, Offset, /*IsText=*/false,
168                                  /*RequiresNullTerminator=*/false, IsVolatile,
169                                  Alignment);
170}
171
172//===----------------------------------------------------------------------===//
173// MemoryBuffer::getFile implementation.
174//===----------------------------------------------------------------------===//
175
176namespace {
177
178template <typename MB>
179constexpr sys::fs::mapped_file_region::mapmode Mapmode =
180    sys::fs::mapped_file_region::readonly;
181template <>
182constexpr sys::fs::mapped_file_region::mapmode Mapmode<MemoryBuffer> =
183    sys::fs::mapped_file_region::readonly;
184template <>
185constexpr sys::fs::mapped_file_region::mapmode Mapmode<WritableMemoryBuffer> =
186    sys::fs::mapped_file_region::priv;
187template <>
188constexpr sys::fs::mapped_file_region::mapmode
189    Mapmode<WriteThroughMemoryBuffer> = sys::fs::mapped_file_region::readwrite;
190
191/// Memory maps a file descriptor using sys::fs::mapped_file_region.
192///
193/// This handles converting the offset into a legal offset on the platform.
194template<typename MB>
195class MemoryBufferMMapFile : public MB {
196  sys::fs::mapped_file_region MFR;
197
198  static uint64_t getLegalMapOffset(uint64_t Offset) {
199    return Offset & ~(sys::fs::mapped_file_region::alignment() - 1);
200  }
201
202  static uint64_t getLegalMapSize(uint64_t Len, uint64_t Offset) {
203    return Len + (Offset - getLegalMapOffset(Offset));
204  }
205
206  const char *getStart(uint64_t Len, uint64_t Offset) {
207    return MFR.const_data() + (Offset - getLegalMapOffset(Offset));
208  }
209
210public:
211  MemoryBufferMMapFile(bool RequiresNullTerminator, sys::fs::file_t FD, uint64_t Len,
212                       uint64_t Offset, std::error_code &EC)
213      : MFR(FD, Mapmode<MB>, getLegalMapSize(Len, Offset),
214            getLegalMapOffset(Offset), EC) {
215    if (!EC) {
216      const char *Start = getStart(Len, Offset);
217      MemoryBuffer::init(Start, Start + Len, RequiresNullTerminator);
218    }
219  }
220
221  /// Disable sized deallocation for MemoryBufferMMapFile, because it has
222  /// tail-allocated data.
223  void operator delete(void *p) { ::operator delete(p); }
224
225  StringRef getBufferIdentifier() const override {
226    // The name is stored after the class itself.
227    return StringRef(reinterpret_cast<const char *>(this + 1) + sizeof(size_t),
228                     *reinterpret_cast<const size_t *>(this + 1));
229  }
230
231  MemoryBuffer::BufferKind getBufferKind() const override {
232    return MemoryBuffer::MemoryBuffer_MMap;
233  }
234
235  void dontNeedIfMmap() override { MFR.dontNeed(); }
236};
237} // namespace
238
239static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
240getMemoryBufferForStream(sys::fs::file_t FD, const Twine &BufferName) {
241  SmallString<sys::fs::DefaultReadChunkSize> Buffer;
242  if (Error E = sys::fs::readNativeFileToEOF(FD, Buffer))
243    return errorToErrorCode(std::move(E));
244  return getMemBufferCopyImpl(Buffer, BufferName);
245}
246
247ErrorOr<std::unique_ptr<MemoryBuffer>>
248MemoryBuffer::getFile(const Twine &Filename, bool IsText,
249                      bool RequiresNullTerminator, bool IsVolatile,
250                      std::optional<Align> Alignment) {
251  return getFileAux<MemoryBuffer>(Filename, /*MapSize=*/-1, /*Offset=*/0,
252                                  IsText, RequiresNullTerminator, IsVolatile,
253                                  Alignment);
254}
255
256template <typename MB>
257static ErrorOr<std::unique_ptr<MB>>
258getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
259                uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
260                bool IsVolatile, std::optional<Align> Alignment);
261
262template <typename MB>
263static ErrorOr<std::unique_ptr<MB>>
264getFileAux(const Twine &Filename, uint64_t MapSize, uint64_t Offset,
265           bool IsText, bool RequiresNullTerminator, bool IsVolatile,
266           std::optional<Align> Alignment) {
267  Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead(
268      Filename, IsText ? sys::fs::OF_TextWithCRLF : sys::fs::OF_None);
269  if (!FDOrErr)
270    return errorToErrorCode(FDOrErr.takeError());
271  sys::fs::file_t FD = *FDOrErr;
272  auto Ret = getOpenFileImpl<MB>(FD, Filename, /*FileSize=*/-1, MapSize, Offset,
273                                 RequiresNullTerminator, IsVolatile, Alignment);
274  sys::fs::closeFile(FD);
275  return Ret;
276}
277
278ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
279WritableMemoryBuffer::getFile(const Twine &Filename, bool IsVolatile,
280                              std::optional<Align> Alignment) {
281  return getFileAux<WritableMemoryBuffer>(
282      Filename, /*MapSize=*/-1, /*Offset=*/0, /*IsText=*/false,
283      /*RequiresNullTerminator=*/false, IsVolatile, Alignment);
284}
285
286ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
287WritableMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize,
288                                   uint64_t Offset, bool IsVolatile,
289                                   std::optional<Align> Alignment) {
290  return getFileAux<WritableMemoryBuffer>(
291      Filename, MapSize, Offset, /*IsText=*/false,
292      /*RequiresNullTerminator=*/false, IsVolatile, Alignment);
293}
294
295std::unique_ptr<WritableMemoryBuffer>
296WritableMemoryBuffer::getNewUninitMemBuffer(size_t Size,
297                                            const Twine &BufferName,
298                                            std::optional<Align> Alignment) {
299  using MemBuffer = MemoryBufferMem<WritableMemoryBuffer>;
300
301  // Use 16-byte alignment if no alignment is specified.
302  Align BufAlign = Alignment.value_or(Align(16));
303
304  // Allocate space for the MemoryBuffer, the data and the name. It is important
305  // that MemoryBuffer and data are aligned so PointerIntPair works with them.
306  SmallString<256> NameBuf;
307  StringRef NameRef = BufferName.toStringRef(NameBuf);
308
309  size_t StringLen = sizeof(MemBuffer) + sizeof(size_t) + NameRef.size() + 1;
310  size_t RealLen = StringLen + Size + 1 + BufAlign.value();
311  if (RealLen <= Size) // Check for rollover.
312    return nullptr;
313  char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow));
314  if (!Mem)
315    return nullptr;
316
317  // The name is stored after the class itself.
318  *reinterpret_cast<size_t *>(Mem + sizeof(MemBuffer)) = NameRef.size();
319  CopyStringRef(Mem + sizeof(MemBuffer) + sizeof(size_t), NameRef);
320
321  // The buffer begins after the name and must be aligned.
322  char *Buf = (char *)alignAddr(Mem + StringLen, BufAlign);
323  Buf[Size] = 0; // Null terminate buffer.
324
325  auto *Ret = new (Mem) MemBuffer(StringRef(Buf, Size), true);
326  return std::unique_ptr<WritableMemoryBuffer>(Ret);
327}
328
329std::unique_ptr<WritableMemoryBuffer>
330WritableMemoryBuffer::getNewMemBuffer(size_t Size, const Twine &BufferName) {
331  auto SB = WritableMemoryBuffer::getNewUninitMemBuffer(Size, BufferName);
332  if (!SB)
333    return nullptr;
334  memset(SB->getBufferStart(), 0, Size);
335  return SB;
336}
337
338static bool shouldUseMmap(sys::fs::file_t FD,
339                          size_t FileSize,
340                          size_t MapSize,
341                          off_t Offset,
342                          bool RequiresNullTerminator,
343                          int PageSize,
344                          bool IsVolatile) {
345  // mmap may leave the buffer without null terminator if the file size changed
346  // by the time the last page is mapped in, so avoid it if the file size is
347  // likely to change.
348  if (IsVolatile && RequiresNullTerminator)
349    return false;
350
351  // We don't use mmap for small files because this can severely fragment our
352  // address space.
353  if (MapSize < 4 * 4096 || MapSize < (unsigned)PageSize)
354    return false;
355
356  if (!RequiresNullTerminator)
357    return true;
358
359  // If we don't know the file size, use fstat to find out.  fstat on an open
360  // file descriptor is cheaper than stat on a random path.
361  // FIXME: this chunk of code is duplicated, but it avoids a fstat when
362  // RequiresNullTerminator = false and MapSize != -1.
363  if (FileSize == size_t(-1)) {
364    sys::fs::file_status Status;
365    if (sys::fs::status(FD, Status))
366      return false;
367    FileSize = Status.getSize();
368  }
369
370  // If we need a null terminator and the end of the map is inside the file,
371  // we cannot use mmap.
372  size_t End = Offset + MapSize;
373  assert(End <= FileSize);
374  if (End != FileSize)
375    return false;
376
377  // Don't try to map files that are exactly a multiple of the system page size
378  // if we need a null terminator.
379  if ((FileSize & (PageSize -1)) == 0)
380    return false;
381
382#if defined(__CYGWIN__)
383  // Don't try to map files that are exactly a multiple of the physical page size
384  // if we need a null terminator.
385  // FIXME: We should reorganize again getPageSize() on Win32.
386  if ((FileSize & (4096 - 1)) == 0)
387    return false;
388#endif
389
390  return true;
391}
392
393static ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>>
394getReadWriteFile(const Twine &Filename, uint64_t FileSize, uint64_t MapSize,
395                 uint64_t Offset) {
396  Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForReadWrite(
397      Filename, sys::fs::CD_OpenExisting, sys::fs::OF_None);
398  if (!FDOrErr)
399    return errorToErrorCode(FDOrErr.takeError());
400  sys::fs::file_t FD = *FDOrErr;
401
402  // Default is to map the full file.
403  if (MapSize == uint64_t(-1)) {
404    // If we don't know the file size, use fstat to find out.  fstat on an open
405    // file descriptor is cheaper than stat on a random path.
406    if (FileSize == uint64_t(-1)) {
407      sys::fs::file_status Status;
408      std::error_code EC = sys::fs::status(FD, Status);
409      if (EC)
410        return EC;
411
412      // If this not a file or a block device (e.g. it's a named pipe
413      // or character device), we can't mmap it, so error out.
414      sys::fs::file_type Type = Status.type();
415      if (Type != sys::fs::file_type::regular_file &&
416          Type != sys::fs::file_type::block_file)
417        return make_error_code(errc::invalid_argument);
418
419      FileSize = Status.getSize();
420    }
421    MapSize = FileSize;
422  }
423
424  std::error_code EC;
425  std::unique_ptr<WriteThroughMemoryBuffer> Result(
426      new (NamedBufferAlloc(Filename))
427          MemoryBufferMMapFile<WriteThroughMemoryBuffer>(false, FD, MapSize,
428                                                         Offset, EC));
429  if (EC)
430    return EC;
431  return std::move(Result);
432}
433
434ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>>
435WriteThroughMemoryBuffer::getFile(const Twine &Filename, int64_t FileSize) {
436  return getReadWriteFile(Filename, FileSize, FileSize, 0);
437}
438
439/// Map a subrange of the specified file as a WritableMemoryBuffer.
440ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>>
441WriteThroughMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize,
442                                       uint64_t Offset) {
443  return getReadWriteFile(Filename, -1, MapSize, Offset);
444}
445
446template <typename MB>
447static ErrorOr<std::unique_ptr<MB>>
448getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
449                uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
450                bool IsVolatile, std::optional<Align> Alignment) {
451  static int PageSize = sys::Process::getPageSizeEstimate();
452
453  // Default is to map the full file.
454  if (MapSize == uint64_t(-1)) {
455    // If we don't know the file size, use fstat to find out.  fstat on an open
456    // file descriptor is cheaper than stat on a random path.
457    if (FileSize == uint64_t(-1)) {
458      sys::fs::file_status Status;
459      std::error_code EC = sys::fs::status(FD, Status);
460      if (EC)
461        return EC;
462
463      // If this not a file or a block device (e.g. it's a named pipe
464      // or character device), we can't trust the size. Create the memory
465      // buffer by copying off the stream.
466      sys::fs::file_type Type = Status.type();
467      if (Type != sys::fs::file_type::regular_file &&
468          Type != sys::fs::file_type::block_file)
469        return getMemoryBufferForStream(FD, Filename);
470
471      FileSize = Status.getSize();
472    }
473    MapSize = FileSize;
474  }
475
476  if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
477                    PageSize, IsVolatile)) {
478    std::error_code EC;
479    std::unique_ptr<MB> Result(
480        new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile<MB>(
481            RequiresNullTerminator, FD, MapSize, Offset, EC));
482    if (!EC)
483      return std::move(Result);
484  }
485
486#ifdef __MVS__
487  // Set codepage auto-conversion for z/OS.
488  if (auto EC = llvm::enableAutoConversion(FD))
489    return EC;
490#endif
491
492  auto Buf =
493      WritableMemoryBuffer::getNewUninitMemBuffer(MapSize, Filename, Alignment);
494  if (!Buf) {
495    // Failed to create a buffer. The only way it can fail is if
496    // new(std::nothrow) returns 0.
497    return make_error_code(errc::not_enough_memory);
498  }
499
500  // Read until EOF, zero-initialize the rest.
501  MutableArrayRef<char> ToRead = Buf->getBuffer();
502  while (!ToRead.empty()) {
503    Expected<size_t> ReadBytes =
504        sys::fs::readNativeFileSlice(FD, ToRead, Offset);
505    if (!ReadBytes)
506      return errorToErrorCode(ReadBytes.takeError());
507    if (*ReadBytes == 0) {
508      std::memset(ToRead.data(), 0, ToRead.size());
509      break;
510    }
511    ToRead = ToRead.drop_front(*ReadBytes);
512    Offset += *ReadBytes;
513  }
514
515  return std::move(Buf);
516}
517
518ErrorOr<std::unique_ptr<MemoryBuffer>>
519MemoryBuffer::getOpenFile(sys::fs::file_t FD, const Twine &Filename,
520                          uint64_t FileSize, bool RequiresNullTerminator,
521                          bool IsVolatile, std::optional<Align> Alignment) {
522  return getOpenFileImpl<MemoryBuffer>(FD, Filename, FileSize, FileSize, 0,
523                                       RequiresNullTerminator, IsVolatile,
524                                       Alignment);
525}
526
527ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getOpenFileSlice(
528    sys::fs::file_t FD, const Twine &Filename, uint64_t MapSize, int64_t Offset,
529    bool IsVolatile, std::optional<Align> Alignment) {
530  assert(MapSize != uint64_t(-1));
531  return getOpenFileImpl<MemoryBuffer>(FD, Filename, -1, MapSize, Offset, false,
532                                       IsVolatile, Alignment);
533}
534
535ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() {
536  // Read in all of the data from stdin, we cannot mmap stdin.
537  //
538  // FIXME: That isn't necessarily true, we should try to mmap stdin and
539  // fallback if it fails.
540  sys::ChangeStdinMode(sys::fs::OF_Text);
541
542  return getMemoryBufferForStream(sys::fs::getStdinHandle(), "<stdin>");
543}
544
545ErrorOr<std::unique_ptr<MemoryBuffer>>
546MemoryBuffer::getFileAsStream(const Twine &Filename) {
547  Expected<sys::fs::file_t> FDOrErr =
548      sys::fs::openNativeFileForRead(Filename, sys::fs::OF_None);
549  if (!FDOrErr)
550    return errorToErrorCode(FDOrErr.takeError());
551  sys::fs::file_t FD = *FDOrErr;
552  ErrorOr<std::unique_ptr<MemoryBuffer>> Ret =
553      getMemoryBufferForStream(FD, Filename);
554  sys::fs::closeFile(FD);
555  return Ret;
556}
557
558MemoryBufferRef MemoryBuffer::getMemBufferRef() const {
559  StringRef Data = getBuffer();
560  StringRef Identifier = getBufferIdentifier();
561  return MemoryBufferRef(Data, Identifier);
562}
563
564SmallVectorMemoryBuffer::~SmallVectorMemoryBuffer() = default;
565