MemoryBuffer.cpp revision 360784
1//===--- MemoryBuffer.cpp - Memory Buffer implementation ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//  This file implements the MemoryBuffer interface.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Support/MemoryBuffer.h"
14#include "llvm/ADT/SmallString.h"
15#include "llvm/Config/config.h"
16#include "llvm/Support/Errc.h"
17#include "llvm/Support/Errno.h"
18#include "llvm/Support/FileSystem.h"
19#include "llvm/Support/MathExtras.h"
20#include "llvm/Support/Path.h"
21#include "llvm/Support/Process.h"
22#include "llvm/Support/Program.h"
23#include "llvm/Support/SmallVectorMemoryBuffer.h"
24#include <cassert>
25#include <cerrno>
26#include <cstring>
27#include <new>
28#include <sys/types.h>
29#include <system_error>
30#if !defined(_MSC_VER) && !defined(__MINGW32__)
31#include <unistd.h>
32#else
33#include <io.h>
34#endif
35using namespace llvm;
36
37//===----------------------------------------------------------------------===//
38// MemoryBuffer implementation itself.
39//===----------------------------------------------------------------------===//
40
41MemoryBuffer::~MemoryBuffer() { }
42
43/// init - Initialize this MemoryBuffer as a reference to externally allocated
44/// memory, memory that we know is already null terminated.
45void MemoryBuffer::init(const char *BufStart, const char *BufEnd,
46                        bool RequiresNullTerminator) {
47  assert((!RequiresNullTerminator || BufEnd[0] == 0) &&
48         "Buffer is not null terminated!");
49  BufferStart = BufStart;
50  BufferEnd = BufEnd;
51}
52
53//===----------------------------------------------------------------------===//
54// MemoryBufferMem implementation.
55//===----------------------------------------------------------------------===//
56
57/// CopyStringRef - Copies contents of a StringRef into a block of memory and
58/// null-terminates it.
59static void CopyStringRef(char *Memory, StringRef Data) {
60  if (!Data.empty())
61    memcpy(Memory, Data.data(), Data.size());
62  Memory[Data.size()] = 0; // Null terminate string.
63}
64
65namespace {
66struct NamedBufferAlloc {
67  const Twine &Name;
68  NamedBufferAlloc(const Twine &Name) : Name(Name) {}
69};
70}
71
72void *operator new(size_t N, const NamedBufferAlloc &Alloc) {
73  SmallString<256> NameBuf;
74  StringRef NameRef = Alloc.Name.toStringRef(NameBuf);
75
76  char *Mem = static_cast<char *>(operator new(N + NameRef.size() + 1));
77  CopyStringRef(Mem + N, NameRef);
78  return Mem;
79}
80
81namespace {
82/// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory.
83template<typename MB>
84class MemoryBufferMem : public MB {
85public:
86  MemoryBufferMem(StringRef InputData, bool RequiresNullTerminator) {
87    MemoryBuffer::init(InputData.begin(), InputData.end(),
88                       RequiresNullTerminator);
89  }
90
91  /// Disable sized deallocation for MemoryBufferMem, because it has
92  /// tail-allocated data.
93  void operator delete(void *p) { ::operator delete(p); }
94
95  StringRef getBufferIdentifier() const override {
96    // The name is stored after the class itself.
97    return StringRef(reinterpret_cast<const char *>(this + 1));
98  }
99
100  MemoryBuffer::BufferKind getBufferKind() const override {
101    return MemoryBuffer::MemoryBuffer_Malloc;
102  }
103};
104}
105
106template <typename MB>
107static ErrorOr<std::unique_ptr<MB>>
108getFileAux(const Twine &Filename, int64_t FileSize, uint64_t MapSize,
109           uint64_t Offset, bool RequiresNullTerminator, bool IsVolatile);
110
111std::unique_ptr<MemoryBuffer>
112MemoryBuffer::getMemBuffer(StringRef InputData, StringRef BufferName,
113                           bool RequiresNullTerminator) {
114  auto *Ret = new (NamedBufferAlloc(BufferName))
115      MemoryBufferMem<MemoryBuffer>(InputData, RequiresNullTerminator);
116  return std::unique_ptr<MemoryBuffer>(Ret);
117}
118
119std::unique_ptr<MemoryBuffer>
120MemoryBuffer::getMemBuffer(MemoryBufferRef Ref, bool RequiresNullTerminator) {
121  return std::unique_ptr<MemoryBuffer>(getMemBuffer(
122      Ref.getBuffer(), Ref.getBufferIdentifier(), RequiresNullTerminator));
123}
124
125static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
126getMemBufferCopyImpl(StringRef InputData, const Twine &BufferName) {
127  auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(InputData.size(), BufferName);
128  if (!Buf)
129    return make_error_code(errc::not_enough_memory);
130  memcpy(Buf->getBufferStart(), InputData.data(), InputData.size());
131  return std::move(Buf);
132}
133
134std::unique_ptr<MemoryBuffer>
135MemoryBuffer::getMemBufferCopy(StringRef InputData, const Twine &BufferName) {
136  auto Buf = getMemBufferCopyImpl(InputData, BufferName);
137  if (Buf)
138    return std::move(*Buf);
139  return nullptr;
140}
141
142ErrorOr<std::unique_ptr<MemoryBuffer>>
143MemoryBuffer::getFileOrSTDIN(const Twine &Filename, int64_t FileSize,
144                             bool RequiresNullTerminator) {
145  SmallString<256> NameBuf;
146  StringRef NameRef = Filename.toStringRef(NameBuf);
147
148  if (NameRef == "-")
149    return getSTDIN();
150  return getFile(Filename, FileSize, RequiresNullTerminator);
151}
152
153ErrorOr<std::unique_ptr<MemoryBuffer>>
154MemoryBuffer::getFileSlice(const Twine &FilePath, uint64_t MapSize,
155                           uint64_t Offset, bool IsVolatile) {
156  return getFileAux<MemoryBuffer>(FilePath, -1, MapSize, Offset, false,
157                                  IsVolatile);
158}
159
160//===----------------------------------------------------------------------===//
161// MemoryBuffer::getFile implementation.
162//===----------------------------------------------------------------------===//
163
164namespace {
165/// Memory maps a file descriptor using sys::fs::mapped_file_region.
166///
167/// This handles converting the offset into a legal offset on the platform.
168template<typename MB>
169class MemoryBufferMMapFile : public MB {
170  sys::fs::mapped_file_region MFR;
171
172  static uint64_t getLegalMapOffset(uint64_t Offset) {
173    return Offset & ~(sys::fs::mapped_file_region::alignment() - 1);
174  }
175
176  static uint64_t getLegalMapSize(uint64_t Len, uint64_t Offset) {
177    return Len + (Offset - getLegalMapOffset(Offset));
178  }
179
180  const char *getStart(uint64_t Len, uint64_t Offset) {
181    return MFR.const_data() + (Offset - getLegalMapOffset(Offset));
182  }
183
184public:
185  MemoryBufferMMapFile(bool RequiresNullTerminator, sys::fs::file_t FD, uint64_t Len,
186                       uint64_t Offset, std::error_code &EC)
187      : MFR(FD, MB::Mapmode, getLegalMapSize(Len, Offset),
188            getLegalMapOffset(Offset), EC) {
189    if (!EC) {
190      const char *Start = getStart(Len, Offset);
191      MemoryBuffer::init(Start, Start + Len, RequiresNullTerminator);
192    }
193  }
194
195  /// Disable sized deallocation for MemoryBufferMMapFile, because it has
196  /// tail-allocated data.
197  void operator delete(void *p) { ::operator delete(p); }
198
199  StringRef getBufferIdentifier() const override {
200    // The name is stored after the class itself.
201    return StringRef(reinterpret_cast<const char *>(this + 1));
202  }
203
204  MemoryBuffer::BufferKind getBufferKind() const override {
205    return MemoryBuffer::MemoryBuffer_MMap;
206  }
207};
208}
209
210static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
211getMemoryBufferForStream(sys::fs::file_t FD, const Twine &BufferName) {
212  const ssize_t ChunkSize = 4096*4;
213  SmallString<ChunkSize> Buffer;
214  // Read into Buffer until we hit EOF.
215  for (;;) {
216    Buffer.reserve(Buffer.size() + ChunkSize);
217    Expected<size_t> ReadBytes = sys::fs::readNativeFile(
218        FD, makeMutableArrayRef(Buffer.end(), ChunkSize));
219    if (!ReadBytes)
220      return errorToErrorCode(ReadBytes.takeError());
221    if (*ReadBytes == 0)
222      break;
223    Buffer.set_size(Buffer.size() + *ReadBytes);
224  }
225
226  return getMemBufferCopyImpl(Buffer, BufferName);
227}
228
229
230ErrorOr<std::unique_ptr<MemoryBuffer>>
231MemoryBuffer::getFile(const Twine &Filename, int64_t FileSize,
232                      bool RequiresNullTerminator, bool IsVolatile) {
233  return getFileAux<MemoryBuffer>(Filename, FileSize, FileSize, 0,
234                                  RequiresNullTerminator, IsVolatile);
235}
236
237template <typename MB>
238static ErrorOr<std::unique_ptr<MB>>
239getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
240                uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
241                bool IsVolatile);
242
243template <typename MB>
244static ErrorOr<std::unique_ptr<MB>>
245getFileAux(const Twine &Filename, int64_t FileSize, uint64_t MapSize,
246           uint64_t Offset, bool RequiresNullTerminator, bool IsVolatile) {
247  Expected<sys::fs::file_t> FDOrErr =
248      sys::fs::openNativeFileForRead(Filename, sys::fs::OF_None);
249  if (!FDOrErr)
250    return errorToErrorCode(FDOrErr.takeError());
251  sys::fs::file_t FD = *FDOrErr;
252  auto Ret = getOpenFileImpl<MB>(FD, Filename, FileSize, MapSize, Offset,
253                                 RequiresNullTerminator, IsVolatile);
254  sys::fs::closeFile(FD);
255  return Ret;
256}
257
258ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
259WritableMemoryBuffer::getFile(const Twine &Filename, int64_t FileSize,
260                              bool IsVolatile) {
261  return getFileAux<WritableMemoryBuffer>(Filename, FileSize, FileSize, 0,
262                                          /*RequiresNullTerminator*/ false,
263                                          IsVolatile);
264}
265
266ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
267WritableMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize,
268                                   uint64_t Offset, bool IsVolatile) {
269  return getFileAux<WritableMemoryBuffer>(Filename, -1, MapSize, Offset, false,
270                                          IsVolatile);
271}
272
273std::unique_ptr<WritableMemoryBuffer>
274WritableMemoryBuffer::getNewUninitMemBuffer(size_t Size, const Twine &BufferName) {
275  using MemBuffer = MemoryBufferMem<WritableMemoryBuffer>;
276  // Allocate space for the MemoryBuffer, the data and the name. It is important
277  // that MemoryBuffer and data are aligned so PointerIntPair works with them.
278  // TODO: Is 16-byte alignment enough?  We copy small object files with large
279  // alignment expectations into this buffer.
280  SmallString<256> NameBuf;
281  StringRef NameRef = BufferName.toStringRef(NameBuf);
282  size_t AlignedStringLen = alignTo(sizeof(MemBuffer) + NameRef.size() + 1, 16);
283  size_t RealLen = AlignedStringLen + Size + 1;
284  char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow));
285  if (!Mem)
286    return nullptr;
287
288  // The name is stored after the class itself.
289  CopyStringRef(Mem + sizeof(MemBuffer), NameRef);
290
291  // The buffer begins after the name and must be aligned.
292  char *Buf = Mem + AlignedStringLen;
293  Buf[Size] = 0; // Null terminate buffer.
294
295  auto *Ret = new (Mem) MemBuffer(StringRef(Buf, Size), true);
296  return std::unique_ptr<WritableMemoryBuffer>(Ret);
297}
298
299std::unique_ptr<WritableMemoryBuffer>
300WritableMemoryBuffer::getNewMemBuffer(size_t Size, const Twine &BufferName) {
301  auto SB = WritableMemoryBuffer::getNewUninitMemBuffer(Size, BufferName);
302  if (!SB)
303    return nullptr;
304  memset(SB->getBufferStart(), 0, Size);
305  return SB;
306}
307
308static bool shouldUseMmap(sys::fs::file_t FD,
309                          size_t FileSize,
310                          size_t MapSize,
311                          off_t Offset,
312                          bool RequiresNullTerminator,
313                          int PageSize,
314                          bool IsVolatile) {
315  // mmap may leave the buffer without null terminator if the file size changed
316  // by the time the last page is mapped in, so avoid it if the file size is
317  // likely to change.
318  if (IsVolatile)
319    return false;
320
321  // We don't use mmap for small files because this can severely fragment our
322  // address space.
323  if (MapSize < 4 * 4096 || MapSize < (unsigned)PageSize)
324    return false;
325
326  if (!RequiresNullTerminator)
327    return true;
328
329  // If we don't know the file size, use fstat to find out.  fstat on an open
330  // file descriptor is cheaper than stat on a random path.
331  // FIXME: this chunk of code is duplicated, but it avoids a fstat when
332  // RequiresNullTerminator = false and MapSize != -1.
333  if (FileSize == size_t(-1)) {
334    sys::fs::file_status Status;
335    if (sys::fs::status(FD, Status))
336      return false;
337    FileSize = Status.getSize();
338  }
339
340  // If we need a null terminator and the end of the map is inside the file,
341  // we cannot use mmap.
342  size_t End = Offset + MapSize;
343  assert(End <= FileSize);
344  if (End != FileSize)
345    return false;
346
347  // Don't try to map files that are exactly a multiple of the system page size
348  // if we need a null terminator.
349  if ((FileSize & (PageSize -1)) == 0)
350    return false;
351
352#if defined(__CYGWIN__)
353  // Don't try to map files that are exactly a multiple of the physical page size
354  // if we need a null terminator.
355  // FIXME: We should reorganize again getPageSize() on Win32.
356  if ((FileSize & (4096 - 1)) == 0)
357    return false;
358#endif
359
360  return true;
361}
362
363static ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>>
364getReadWriteFile(const Twine &Filename, uint64_t FileSize, uint64_t MapSize,
365                 uint64_t Offset) {
366  Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForReadWrite(
367      Filename, sys::fs::CD_OpenExisting, sys::fs::OF_None);
368  if (!FDOrErr)
369    return errorToErrorCode(FDOrErr.takeError());
370  sys::fs::file_t FD = *FDOrErr;
371
372  // Default is to map the full file.
373  if (MapSize == uint64_t(-1)) {
374    // If we don't know the file size, use fstat to find out.  fstat on an open
375    // file descriptor is cheaper than stat on a random path.
376    if (FileSize == uint64_t(-1)) {
377      sys::fs::file_status Status;
378      std::error_code EC = sys::fs::status(FD, Status);
379      if (EC)
380        return EC;
381
382      // If this not a file or a block device (e.g. it's a named pipe
383      // or character device), we can't mmap it, so error out.
384      sys::fs::file_type Type = Status.type();
385      if (Type != sys::fs::file_type::regular_file &&
386          Type != sys::fs::file_type::block_file)
387        return make_error_code(errc::invalid_argument);
388
389      FileSize = Status.getSize();
390    }
391    MapSize = FileSize;
392  }
393
394  std::error_code EC;
395  std::unique_ptr<WriteThroughMemoryBuffer> Result(
396      new (NamedBufferAlloc(Filename))
397          MemoryBufferMMapFile<WriteThroughMemoryBuffer>(false, FD, MapSize,
398                                                         Offset, EC));
399  if (EC)
400    return EC;
401  return std::move(Result);
402}
403
404ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>>
405WriteThroughMemoryBuffer::getFile(const Twine &Filename, int64_t FileSize) {
406  return getReadWriteFile(Filename, FileSize, FileSize, 0);
407}
408
409/// Map a subrange of the specified file as a WritableMemoryBuffer.
410ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>>
411WriteThroughMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize,
412                                       uint64_t Offset) {
413  return getReadWriteFile(Filename, -1, MapSize, Offset);
414}
415
416template <typename MB>
417static ErrorOr<std::unique_ptr<MB>>
418getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
419                uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
420                bool IsVolatile) {
421  static int PageSize = sys::Process::getPageSizeEstimate();
422
423  // Default is to map the full file.
424  if (MapSize == uint64_t(-1)) {
425    // If we don't know the file size, use fstat to find out.  fstat on an open
426    // file descriptor is cheaper than stat on a random path.
427    if (FileSize == uint64_t(-1)) {
428      sys::fs::file_status Status;
429      std::error_code EC = sys::fs::status(FD, Status);
430      if (EC)
431        return EC;
432
433      // If this not a file or a block device (e.g. it's a named pipe
434      // or character device), we can't trust the size. Create the memory
435      // buffer by copying off the stream.
436      sys::fs::file_type Type = Status.type();
437      if (Type != sys::fs::file_type::regular_file &&
438          Type != sys::fs::file_type::block_file)
439        return getMemoryBufferForStream(FD, Filename);
440
441      FileSize = Status.getSize();
442    }
443    MapSize = FileSize;
444  }
445
446  if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
447                    PageSize, IsVolatile)) {
448    std::error_code EC;
449    std::unique_ptr<MB> Result(
450        new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile<MB>(
451            RequiresNullTerminator, FD, MapSize, Offset, EC));
452    if (!EC)
453      return std::move(Result);
454  }
455
456  auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
457  if (!Buf) {
458    // Failed to create a buffer. The only way it can fail is if
459    // new(std::nothrow) returns 0.
460    return make_error_code(errc::not_enough_memory);
461  }
462
463  // Read until EOF, zero-initialize the rest.
464  MutableArrayRef<char> ToRead = Buf->getBuffer();
465  while (!ToRead.empty()) {
466    Expected<size_t> ReadBytes =
467        sys::fs::readNativeFileSlice(FD, ToRead, Offset);
468    if (!ReadBytes)
469      return errorToErrorCode(ReadBytes.takeError());
470    if (*ReadBytes == 0) {
471      std::memset(ToRead.data(), 0, ToRead.size());
472      break;
473    }
474    ToRead = ToRead.drop_front(*ReadBytes);
475    Offset += *ReadBytes;
476  }
477
478  return std::move(Buf);
479}
480
481ErrorOr<std::unique_ptr<MemoryBuffer>>
482MemoryBuffer::getOpenFile(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
483                          bool RequiresNullTerminator, bool IsVolatile) {
484  return getOpenFileImpl<MemoryBuffer>(FD, Filename, FileSize, FileSize, 0,
485                         RequiresNullTerminator, IsVolatile);
486}
487
488ErrorOr<std::unique_ptr<MemoryBuffer>>
489MemoryBuffer::getOpenFileSlice(sys::fs::file_t FD, const Twine &Filename, uint64_t MapSize,
490                               int64_t Offset, bool IsVolatile) {
491  assert(MapSize != uint64_t(-1));
492  return getOpenFileImpl<MemoryBuffer>(FD, Filename, -1, MapSize, Offset, false,
493                                       IsVolatile);
494}
495
496ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() {
497  // Read in all of the data from stdin, we cannot mmap stdin.
498  //
499  // FIXME: That isn't necessarily true, we should try to mmap stdin and
500  // fallback if it fails.
501  sys::ChangeStdinToBinary();
502
503  return getMemoryBufferForStream(sys::fs::getStdinHandle(), "<stdin>");
504}
505
506ErrorOr<std::unique_ptr<MemoryBuffer>>
507MemoryBuffer::getFileAsStream(const Twine &Filename) {
508  Expected<sys::fs::file_t> FDOrErr =
509      sys::fs::openNativeFileForRead(Filename, sys::fs::OF_None);
510  if (!FDOrErr)
511    return errorToErrorCode(FDOrErr.takeError());
512  sys::fs::file_t FD = *FDOrErr;
513  ErrorOr<std::unique_ptr<MemoryBuffer>> Ret =
514      getMemoryBufferForStream(FD, Filename);
515  sys::fs::closeFile(FD);
516  return Ret;
517}
518
519MemoryBufferRef MemoryBuffer::getMemBufferRef() const {
520  StringRef Data = getBuffer();
521  StringRef Identifier = getBufferIdentifier();
522  return MemoryBufferRef(Data, Identifier);
523}
524
525SmallVectorMemoryBuffer::~SmallVectorMemoryBuffer() {}
526