1193323Sed//===--- MemoryBuffer.cpp - Memory Buffer implementation ------------------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed//  This file implements the MemoryBuffer interface.
11193323Sed//
12193323Sed//===----------------------------------------------------------------------===//
13193323Sed
14193323Sed#include "llvm/Support/MemoryBuffer.h"
15193323Sed#include "llvm/ADT/OwningPtr.h"
16193323Sed#include "llvm/ADT/SmallString.h"
17235633Sdim#include "llvm/Config/config.h"
18218893Sdim#include "llvm/Support/Errno.h"
19245431Sdim#include "llvm/Support/FileSystem.h"
20252723Sdim#include "llvm/Support/MathExtras.h"
21218893Sdim#include "llvm/Support/Path.h"
22218893Sdim#include "llvm/Support/Process.h"
23218893Sdim#include "llvm/Support/Program.h"
24218893Sdim#include "llvm/Support/system_error.h"
25193323Sed#include <cassert>
26252723Sdim#include <cerrno>
27193323Sed#include <cstdio>
28193323Sed#include <cstring>
29218893Sdim#include <new>
30252723Sdim#include <sys/stat.h>
31193323Sed#include <sys/types.h>
32193323Sed#if !defined(_MSC_VER) && !defined(__MINGW32__)
33193323Sed#include <unistd.h>
34193323Sed#else
35193323Sed#include <io.h>
36263509Sdim// Simplistic definitinos of these macros for use in getOpenFile.
37252723Sdim#ifndef S_ISREG
38252723Sdim#define S_ISREG(x) (1)
39193323Sed#endif
40252723Sdim#ifndef S_ISBLK
41252723Sdim#define S_ISBLK(x) (0)
42245431Sdim#endif
43252723Sdim#endif
44193323Sedusing namespace llvm;
45193323Sed
46193323Sed//===----------------------------------------------------------------------===//
47193323Sed// MemoryBuffer implementation itself.
48193323Sed//===----------------------------------------------------------------------===//
49193323Sed
50210299SedMemoryBuffer::~MemoryBuffer() { }
51193323Sed
52193323Sed/// init - Initialize this MemoryBuffer as a reference to externally allocated
53193323Sed/// memory, memory that we know is already null terminated.
54221345Sdimvoid MemoryBuffer::init(const char *BufStart, const char *BufEnd,
55221345Sdim                        bool RequiresNullTerminator) {
56221345Sdim  assert((!RequiresNullTerminator || BufEnd[0] == 0) &&
57221345Sdim         "Buffer is not null terminated!");
58193323Sed  BufferStart = BufStart;
59193323Sed  BufferEnd = BufEnd;
60193323Sed}
61193323Sed
62193323Sed//===----------------------------------------------------------------------===//
63193323Sed// MemoryBufferMem implementation.
64193323Sed//===----------------------------------------------------------------------===//
65193323Sed
66210299Sed/// CopyStringRef - Copies contents of a StringRef into a block of memory and
67210299Sed/// null-terminates it.
68210299Sedstatic void CopyStringRef(char *Memory, StringRef Data) {
69210299Sed  memcpy(Memory, Data.data(), Data.size());
70210299Sed  Memory[Data.size()] = 0; // Null terminate string.
71210299Sed}
72210299Sed
73252723Sdimnamespace {
74252723Sdimstruct NamedBufferAlloc {
75252723Sdim  StringRef Name;
76252723Sdim  NamedBufferAlloc(StringRef Name) : Name(Name) {}
77252723Sdim};
78210299Sed}
79210299Sed
80252723Sdimvoid *operator new(size_t N, const NamedBufferAlloc &Alloc) {
81252723Sdim  char *Mem = static_cast<char *>(operator new(N + Alloc.Name.size() + 1));
82252723Sdim  CopyStringRef(Mem + N, Alloc.Name);
83252723Sdim  return Mem;
84252723Sdim}
85252723Sdim
86193323Sednamespace {
87210299Sed/// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory.
88193323Sedclass MemoryBufferMem : public MemoryBuffer {
89193323Sedpublic:
90221345Sdim  MemoryBufferMem(StringRef InputData, bool RequiresNullTerminator) {
91221345Sdim    init(InputData.begin(), InputData.end(), RequiresNullTerminator);
92193323Sed  }
93210299Sed
94245431Sdim  virtual const char *getBufferIdentifier() const LLVM_OVERRIDE {
95210299Sed     // The name is stored after the class itself.
96210299Sed    return reinterpret_cast<const char*>(this + 1);
97193323Sed  }
98245431Sdim
99245431Sdim  virtual BufferKind getBufferKind() const LLVM_OVERRIDE {
100221345Sdim    return MemoryBuffer_Malloc;
101221345Sdim  }
102193323Sed};
103193323Sed}
104193323Sed
105193323Sed/// getMemBuffer - Open the specified memory range as a MemoryBuffer.  Note
106223017Sdim/// that InputData must be a null terminated if RequiresNullTerminator is true!
107206274SrdivackyMemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData,
108221345Sdim                                         StringRef BufferName,
109221345Sdim                                         bool RequiresNullTerminator) {
110252723Sdim  return new (NamedBufferAlloc(BufferName))
111252723Sdim      MemoryBufferMem(InputData, RequiresNullTerminator);
112193323Sed}
113193323Sed
114193323Sed/// getMemBufferCopy - Open the specified memory range as a MemoryBuffer,
115193323Sed/// copying the contents and taking ownership of it.  This has no requirements
116193323Sed/// on EndPtr[0].
117206274SrdivackyMemoryBuffer *MemoryBuffer::getMemBufferCopy(StringRef InputData,
118210299Sed                                             StringRef BufferName) {
119210299Sed  MemoryBuffer *Buf = getNewUninitMemBuffer(InputData.size(), BufferName);
120210299Sed  if (!Buf) return 0;
121210299Sed  memcpy(const_cast<char*>(Buf->getBufferStart()), InputData.data(),
122210299Sed         InputData.size());
123210299Sed  return Buf;
124193323Sed}
125193323Sed
126193323Sed/// getNewUninitMemBuffer - Allocate a new MemoryBuffer of the specified size
127210299Sed/// that is not initialized.  Note that the caller should initialize the
128210299Sed/// memory allocated by this method.  The memory is owned by the MemoryBuffer
129210299Sed/// object.
130193323SedMemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size,
131199481Srdivacky                                                  StringRef BufferName) {
132210299Sed  // Allocate space for the MemoryBuffer, the data and the name. It is important
133210299Sed  // that MemoryBuffer and data are aligned so PointerIntPair works with them.
134210299Sed  size_t AlignedStringLen =
135210299Sed    RoundUpToAlignment(sizeof(MemoryBufferMem) + BufferName.size() + 1,
136210299Sed                       sizeof(void*)); // TODO: Is sizeof(void*) enough?
137210299Sed  size_t RealLen = AlignedStringLen + Size + 1;
138210299Sed  char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow));
139210299Sed  if (!Mem) return 0;
140210299Sed
141210299Sed  // The name is stored after the class itself.
142210299Sed  CopyStringRef(Mem + sizeof(MemoryBufferMem), BufferName);
143210299Sed
144210299Sed  // The buffer begins after the name and must be aligned.
145210299Sed  char *Buf = Mem + AlignedStringLen;
146210299Sed  Buf[Size] = 0; // Null terminate buffer.
147210299Sed
148221345Sdim  return new (Mem) MemoryBufferMem(StringRef(Buf, Size), true);
149193323Sed}
150193323Sed
151193323Sed/// getNewMemBuffer - Allocate a new MemoryBuffer of the specified size that
152193323Sed/// is completely initialized to zeros.  Note that the caller should
153193323Sed/// initialize the memory allocated by this method.  The memory is owned by
154193323Sed/// the MemoryBuffer object.
155210299SedMemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) {
156193323Sed  MemoryBuffer *SB = getNewUninitMemBuffer(Size, BufferName);
157193323Sed  if (!SB) return 0;
158210299Sed  memset(const_cast<char*>(SB->getBufferStart()), 0, Size);
159193323Sed  return SB;
160193323Sed}
161193323Sed
162193323Sed
163193323Sed/// getFileOrSTDIN - Open the specified file as a MemoryBuffer, or open stdin
164193323Sed/// if the Filename is "-".  If an error occurs, this returns null and fills
165193323Sed/// in *ErrStr with a reason.  If stdin is empty, this API (unlike getSTDIN)
166193323Sed/// returns an empty buffer.
167218893Sdimerror_code MemoryBuffer::getFileOrSTDIN(StringRef Filename,
168218893Sdim                                        OwningPtr<MemoryBuffer> &result,
169218893Sdim                                        int64_t FileSize) {
170199481Srdivacky  if (Filename == "-")
171218893Sdim    return getSTDIN(result);
172218893Sdim  return getFile(Filename, result, FileSize);
173193323Sed}
174193323Sed
175193323Sed//===----------------------------------------------------------------------===//
176193323Sed// MemoryBuffer::getFile implementation.
177193323Sed//===----------------------------------------------------------------------===//
178193323Sed
179193323Sednamespace {
180263509Sdim/// \brief Memory maps a file descriptor using sys::fs::mapped_file_region.
181252723Sdim///
182252723Sdim/// This handles converting the offset into a legal offset on the platform.
183252723Sdimclass MemoryBufferMMapFile : public MemoryBuffer {
184252723Sdim  sys::fs::mapped_file_region MFR;
185210299Sed
186252723Sdim  static uint64_t getLegalMapOffset(uint64_t Offset) {
187252723Sdim    return Offset & ~(sys::fs::mapped_file_region::alignment() - 1);
188252723Sdim  }
189221345Sdim
190252723Sdim  static uint64_t getLegalMapSize(uint64_t Len, uint64_t Offset) {
191252723Sdim    return Len + (Offset - getLegalMapOffset(Offset));
192252723Sdim  }
193221345Sdim
194252723Sdim  const char *getStart(uint64_t Len, uint64_t Offset) {
195252723Sdim    return MFR.const_data() + (Offset - getLegalMapOffset(Offset));
196193323Sed  }
197245431Sdim
198252723Sdimpublic:
199252723Sdim  MemoryBufferMMapFile(bool RequiresNullTerminator, int FD, uint64_t Len,
200252723Sdim                       uint64_t Offset, error_code EC)
201252723Sdim      : MFR(FD, false, sys::fs::mapped_file_region::readonly,
202252723Sdim            getLegalMapSize(Len, Offset), getLegalMapOffset(Offset), EC) {
203252723Sdim    if (!EC) {
204252723Sdim      const char *Start = getStart(Len, Offset);
205252723Sdim      init(Start, Start + Len, RequiresNullTerminator);
206252723Sdim    }
207252723Sdim  }
208252723Sdim
209252723Sdim  virtual const char *getBufferIdentifier() const LLVM_OVERRIDE {
210252723Sdim    // The name is stored after the class itself.
211252723Sdim    return reinterpret_cast<const char *>(this + 1);
212252723Sdim  }
213252723Sdim
214245431Sdim  virtual BufferKind getBufferKind() const LLVM_OVERRIDE {
215221345Sdim    return MemoryBuffer_MMap;
216221345Sdim  }
217193323Sed};
218193323Sed}
219193323Sed
220263509Sdimstatic error_code getMemoryBufferForStream(int FD,
221245431Sdim                                           StringRef BufferName,
222245431Sdim                                           OwningPtr<MemoryBuffer> &result) {
223245431Sdim  const ssize_t ChunkSize = 4096*4;
224245431Sdim  SmallString<ChunkSize> Buffer;
225245431Sdim  ssize_t ReadBytes;
226245431Sdim  // Read into Buffer until we hit EOF.
227245431Sdim  do {
228245431Sdim    Buffer.reserve(Buffer.size() + ChunkSize);
229245431Sdim    ReadBytes = read(FD, Buffer.end(), ChunkSize);
230245431Sdim    if (ReadBytes == -1) {
231245431Sdim      if (errno == EINTR) continue;
232245431Sdim      return error_code(errno, posix_category());
233245431Sdim    }
234245431Sdim    Buffer.set_size(Buffer.size() + ReadBytes);
235245431Sdim  } while (ReadBytes != 0);
236245431Sdim
237245431Sdim  result.reset(MemoryBuffer::getMemBufferCopy(Buffer, BufferName));
238245431Sdim  return error_code::success();
239245431Sdim}
240245431Sdim
241263509Sdimstatic error_code getFileAux(const char *Filename,
242263509Sdim                             OwningPtr<MemoryBuffer> &result, int64_t FileSize,
243263509Sdim                             bool RequiresNullTerminator);
244263509Sdim
245263509Sdimerror_code MemoryBuffer::getFile(Twine Filename,
246218893Sdim                                 OwningPtr<MemoryBuffer> &result,
247221345Sdim                                 int64_t FileSize,
248221345Sdim                                 bool RequiresNullTerminator) {
249218893Sdim  // Ensure the path is null terminated.
250263509Sdim  SmallString<256> PathBuf;
251263509Sdim  StringRef NullTerminatedName = Filename.toNullTerminatedStringRef(PathBuf);
252263509Sdim  return getFileAux(NullTerminatedName.data(), result, FileSize,
253263509Sdim                    RequiresNullTerminator);
254210299Sed}
255210299Sed
256263509Sdimstatic error_code getOpenFileImpl(int FD, const char *Filename,
257263509Sdim                                  OwningPtr<MemoryBuffer> &Result,
258263509Sdim                                  uint64_t FileSize, uint64_t MapSize,
259263509Sdim                                  int64_t Offset, bool RequiresNullTerminator);
260245431Sdim
261263509Sdimstatic error_code getFileAux(const char *Filename,
262263509Sdim                             OwningPtr<MemoryBuffer> &result, int64_t FileSize,
263263509Sdim                             bool RequiresNullTerminator) {
264263509Sdim  int FD;
265263509Sdim  error_code EC = sys::fs::openFileForRead(Filename, FD);
266263509Sdim  if (EC)
267263509Sdim    return EC;
268223017Sdim
269263509Sdim  error_code ret = getOpenFileImpl(FD, Filename, result, FileSize, FileSize, 0,
270263509Sdim                                   RequiresNullTerminator);
271218893Sdim  close(FD);
272218893Sdim  return ret;
273218893Sdim}
274218893Sdim
275221345Sdimstatic bool shouldUseMmap(int FD,
276221345Sdim                          size_t FileSize,
277221345Sdim                          size_t MapSize,
278221345Sdim                          off_t Offset,
279221345Sdim                          bool RequiresNullTerminator,
280221345Sdim                          int PageSize) {
281221345Sdim  // We don't use mmap for small files because this can severely fragment our
282221345Sdim  // address space.
283263509Sdim  if (MapSize < 4 * 4096 || MapSize < (unsigned)PageSize)
284221345Sdim    return false;
285221345Sdim
286221345Sdim  if (!RequiresNullTerminator)
287221345Sdim    return true;
288221345Sdim
289221345Sdim
290193323Sed  // If we don't know the file size, use fstat to find out.  fstat on an open
291193323Sed  // file descriptor is cheaper than stat on a random path.
292221345Sdim  // FIXME: this chunk of code is duplicated, but it avoids a fstat when
293221345Sdim  // RequiresNullTerminator = false and MapSize != -1.
294221345Sdim  if (FileSize == size_t(-1)) {
295263509Sdim    sys::fs::file_status Status;
296263509Sdim    error_code EC = sys::fs::status(FD, Status);
297263509Sdim    if (EC)
298263509Sdim      return EC;
299263509Sdim    FileSize = Status.getSize();
300193323Sed  }
301218893Sdim
302221345Sdim  // If we need a null terminator and the end of the map is inside the file,
303221345Sdim  // we cannot use mmap.
304221345Sdim  size_t End = Offset + MapSize;
305221345Sdim  assert(End <= FileSize);
306221345Sdim  if (End != FileSize)
307221345Sdim    return false;
308218893Sdim
309263509Sdim#if defined(_WIN32) || defined(__CYGWIN__)
310263509Sdim  // Don't peek the next page if file is multiple of *physical* pagesize(4k)
311263509Sdim  // but is not multiple of AllocationGranularity(64k),
312263509Sdim  // when a null terminator is required.
313263509Sdim  // FIXME: It's not good to hardcode 4096 here. dwPageSize shows 4096.
314263509Sdim  if ((FileSize & (4096 - 1)) == 0)
315263509Sdim    return false;
316263509Sdim#endif
317263509Sdim
318221345Sdim  // Don't try to map files that are exactly a multiple of the system page size
319221345Sdim  // if we need a null terminator.
320221345Sdim  if ((FileSize & (PageSize -1)) == 0)
321221345Sdim    return false;
322221345Sdim
323221345Sdim  return true;
324221345Sdim}
325221345Sdim
326263509Sdimstatic error_code getOpenFileImpl(int FD, const char *Filename,
327263509Sdim                                  OwningPtr<MemoryBuffer> &result,
328263509Sdim                                  uint64_t FileSize, uint64_t MapSize,
329263509Sdim                                  int64_t Offset, bool RequiresNullTerminator) {
330252723Sdim  static int PageSize = sys::process::get_self()->page_size();
331221345Sdim
332221345Sdim  // Default is to map the full file.
333226890Sdim  if (MapSize == uint64_t(-1)) {
334221345Sdim    // If we don't know the file size, use fstat to find out.  fstat on an open
335221345Sdim    // file descriptor is cheaper than stat on a random path.
336226890Sdim    if (FileSize == uint64_t(-1)) {
337263509Sdim      sys::fs::file_status Status;
338263509Sdim      error_code EC = sys::fs::status(FD, Status);
339263509Sdim      if (EC)
340263509Sdim        return EC;
341245431Sdim
342252723Sdim      // If this not a file or a block device (e.g. it's a named pipe
343252723Sdim      // or character device), we can't trust the size. Create the memory
344245431Sdim      // buffer by copying off the stream.
345263509Sdim      sys::fs::file_type Type = Status.type();
346263509Sdim      if (Type != sys::fs::file_type::regular_file &&
347263509Sdim          Type != sys::fs::file_type::block_file)
348245431Sdim        return getMemoryBufferForStream(FD, Filename, result);
349245431Sdim
350263509Sdim      FileSize = Status.getSize();
351221345Sdim    }
352221345Sdim    MapSize = FileSize;
353221345Sdim  }
354221345Sdim
355221345Sdim  if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
356221345Sdim                    PageSize)) {
357252723Sdim    error_code EC;
358252723Sdim    result.reset(new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile(
359252723Sdim        RequiresNullTerminator, FD, MapSize, Offset, EC));
360252723Sdim    if (!EC)
361235633Sdim      return error_code::success();
362193323Sed  }
363193323Sed
364221345Sdim  MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
365193323Sed  if (!Buf) {
366218893Sdim    // Failed to create a buffer. The only way it can fail is if
367218893Sdim    // new(std::nothrow) returns 0.
368218893Sdim    return make_error_code(errc::not_enough_memory);
369193323Sed  }
370193323Sed
371193323Sed  OwningPtr<MemoryBuffer> SB(Buf);
372193323Sed  char *BufPtr = const_cast<char*>(SB->getBufferStart());
373206083Srdivacky
374221345Sdim  size_t BytesLeft = MapSize;
375235633Sdim#ifndef HAVE_PREAD
376221345Sdim  if (lseek(FD, Offset, SEEK_SET) == -1)
377221345Sdim    return error_code(errno, posix_category());
378235633Sdim#endif
379221345Sdim
380193323Sed  while (BytesLeft) {
381235633Sdim#ifdef HAVE_PREAD
382235633Sdim    ssize_t NumRead = ::pread(FD, BufPtr, BytesLeft, MapSize-BytesLeft+Offset);
383235633Sdim#else
384193323Sed    ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
385235633Sdim#endif
386206083Srdivacky    if (NumRead == -1) {
387206083Srdivacky      if (errno == EINTR)
388206083Srdivacky        continue;
389206083Srdivacky      // Error while reading.
390218893Sdim      return error_code(errno, posix_category());
391193323Sed    }
392235633Sdim    if (NumRead == 0) {
393235633Sdim      assert(0 && "We got inaccurate FileSize value or fstat reported an "
394235633Sdim                   "invalid file size.");
395235633Sdim      *BufPtr = '\0'; // null-terminate at the actual size.
396235633Sdim      break;
397235633Sdim    }
398206083Srdivacky    BytesLeft -= NumRead;
399206083Srdivacky    BufPtr += NumRead;
400193323Sed  }
401206083Srdivacky
402218893Sdim  result.swap(SB);
403235633Sdim  return error_code::success();
404193323Sed}
405193323Sed
406263509Sdimerror_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
407263509Sdim                                     OwningPtr<MemoryBuffer> &Result,
408263509Sdim                                     uint64_t FileSize,
409263509Sdim                                     bool RequiresNullTerminator) {
410263509Sdim  return getOpenFileImpl(FD, Filename, Result, FileSize, FileSize, 0,
411263509Sdim                         RequiresNullTerminator);
412263509Sdim}
413263509Sdim
414263509Sdimerror_code MemoryBuffer::getOpenFileSlice(int FD, const char *Filename,
415263509Sdim                                          OwningPtr<MemoryBuffer> &Result,
416263509Sdim                                          uint64_t MapSize, int64_t Offset) {
417263509Sdim  return getOpenFileImpl(FD, Filename, Result, -1, MapSize, Offset, false);
418263509Sdim}
419263509Sdim
420193323Sed//===----------------------------------------------------------------------===//
421193323Sed// MemoryBuffer::getSTDIN implementation.
422193323Sed//===----------------------------------------------------------------------===//
423193323Sed
424218893Sdimerror_code MemoryBuffer::getSTDIN(OwningPtr<MemoryBuffer> &result) {
425193323Sed  // Read in all of the data from stdin, we cannot mmap stdin.
426199481Srdivacky  //
427199481Srdivacky  // FIXME: That isn't necessarily true, we should try to mmap stdin and
428199481Srdivacky  // fallback if it fails.
429263509Sdim  sys::ChangeStdinToBinary();
430210299Sed
431245431Sdim  return getMemoryBufferForStream(0, "<stdin>", result);
432193323Sed}
433