1193323Sed//===--- MemoryBuffer.cpp - Memory Buffer implementation ------------------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed//  This file implements the MemoryBuffer interface.
11193323Sed//
12193323Sed//===----------------------------------------------------------------------===//
13193323Sed
14193323Sed#include "llvm/Support/MemoryBuffer.h"
15193323Sed#include "llvm/ADT/OwningPtr.h"
16193323Sed#include "llvm/ADT/SmallString.h"
17234353Sdim#include "llvm/Config/config.h"
18218893Sdim#include "llvm/Support/Errno.h"
19239462Sdim#include "llvm/Support/FileSystem.h"
20249423Sdim#include "llvm/Support/MathExtras.h"
21218893Sdim#include "llvm/Support/Path.h"
22218893Sdim#include "llvm/Support/Process.h"
23218893Sdim#include "llvm/Support/Program.h"
24218893Sdim#include "llvm/Support/system_error.h"
25193323Sed#include <cassert>
26249423Sdim#include <cerrno>
27193323Sed#include <cstdio>
28193323Sed#include <cstring>
29218893Sdim#include <new>
30249423Sdim#include <sys/stat.h>
31193323Sed#include <sys/types.h>
32193323Sed#if !defined(_MSC_VER) && !defined(__MINGW32__)
33193323Sed#include <unistd.h>
34193323Sed#else
35193323Sed#include <io.h>
36263508Sdim// Simplistic definitinos of these macros for use in getOpenFile.
37249423Sdim#ifndef S_ISREG
38249423Sdim#define S_ISREG(x) (1)
39193323Sed#endif
40249423Sdim#ifndef S_ISBLK
41249423Sdim#define S_ISBLK(x) (0)
42243830Sdim#endif
43249423Sdim#endif
44193323Sedusing namespace llvm;
45193323Sed
46193323Sed//===----------------------------------------------------------------------===//
47193323Sed// MemoryBuffer implementation itself.
48193323Sed//===----------------------------------------------------------------------===//
49193323Sed
50210299SedMemoryBuffer::~MemoryBuffer() { }
51193323Sed
52193323Sed/// init - Initialize this MemoryBuffer as a reference to externally allocated
53193323Sed/// memory, memory that we know is already null terminated.
54221345Sdimvoid MemoryBuffer::init(const char *BufStart, const char *BufEnd,
55221345Sdim                        bool RequiresNullTerminator) {
56221345Sdim  assert((!RequiresNullTerminator || BufEnd[0] == 0) &&
57221345Sdim         "Buffer is not null terminated!");
58193323Sed  BufferStart = BufStart;
59193323Sed  BufferEnd = BufEnd;
60193323Sed}
61193323Sed
62193323Sed//===----------------------------------------------------------------------===//
63193323Sed// MemoryBufferMem implementation.
64193323Sed//===----------------------------------------------------------------------===//
65193323Sed
66210299Sed/// CopyStringRef - Copies contents of a StringRef into a block of memory and
67210299Sed/// null-terminates it.
68210299Sedstatic void CopyStringRef(char *Memory, StringRef Data) {
69210299Sed  memcpy(Memory, Data.data(), Data.size());
70210299Sed  Memory[Data.size()] = 0; // Null terminate string.
71210299Sed}
72210299Sed
73249423Sdimnamespace {
74249423Sdimstruct NamedBufferAlloc {
75249423Sdim  StringRef Name;
76249423Sdim  NamedBufferAlloc(StringRef Name) : Name(Name) {}
77249423Sdim};
78210299Sed}
79210299Sed
80249423Sdimvoid *operator new(size_t N, const NamedBufferAlloc &Alloc) {
81249423Sdim  char *Mem = static_cast<char *>(operator new(N + Alloc.Name.size() + 1));
82249423Sdim  CopyStringRef(Mem + N, Alloc.Name);
83249423Sdim  return Mem;
84249423Sdim}
85249423Sdim
86193323Sednamespace {
87210299Sed/// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory.
88193323Sedclass MemoryBufferMem : public MemoryBuffer {
89193323Sedpublic:
90221345Sdim  MemoryBufferMem(StringRef InputData, bool RequiresNullTerminator) {
91221345Sdim    init(InputData.begin(), InputData.end(), RequiresNullTerminator);
92193323Sed  }
93210299Sed
94243830Sdim  virtual const char *getBufferIdentifier() const LLVM_OVERRIDE {
95210299Sed     // The name is stored after the class itself.
96210299Sed    return reinterpret_cast<const char*>(this + 1);
97193323Sed  }
98243830Sdim
99243830Sdim  virtual BufferKind getBufferKind() const LLVM_OVERRIDE {
100221345Sdim    return MemoryBuffer_Malloc;
101221345Sdim  }
102193323Sed};
103193323Sed}
104193323Sed
105193323Sed/// getMemBuffer - Open the specified memory range as a MemoryBuffer.  Note
106223017Sdim/// that InputData must be a null terminated if RequiresNullTerminator is true!
107206274SrdivackyMemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData,
108221345Sdim                                         StringRef BufferName,
109221345Sdim                                         bool RequiresNullTerminator) {
110249423Sdim  return new (NamedBufferAlloc(BufferName))
111249423Sdim      MemoryBufferMem(InputData, RequiresNullTerminator);
112193323Sed}
113193323Sed
114193323Sed/// getMemBufferCopy - Open the specified memory range as a MemoryBuffer,
115193323Sed/// copying the contents and taking ownership of it.  This has no requirements
116193323Sed/// on EndPtr[0].
117206274SrdivackyMemoryBuffer *MemoryBuffer::getMemBufferCopy(StringRef InputData,
118210299Sed                                             StringRef BufferName) {
119210299Sed  MemoryBuffer *Buf = getNewUninitMemBuffer(InputData.size(), BufferName);
120210299Sed  if (!Buf) return 0;
121210299Sed  memcpy(const_cast<char*>(Buf->getBufferStart()), InputData.data(),
122210299Sed         InputData.size());
123210299Sed  return Buf;
124193323Sed}
125193323Sed
126193323Sed/// getNewUninitMemBuffer - Allocate a new MemoryBuffer of the specified size
127210299Sed/// that is not initialized.  Note that the caller should initialize the
128210299Sed/// memory allocated by this method.  The memory is owned by the MemoryBuffer
129210299Sed/// object.
130193323SedMemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size,
131199481Srdivacky                                                  StringRef BufferName) {
132210299Sed  // Allocate space for the MemoryBuffer, the data and the name. It is important
133210299Sed  // that MemoryBuffer and data are aligned so PointerIntPair works with them.
134210299Sed  size_t AlignedStringLen =
135210299Sed    RoundUpToAlignment(sizeof(MemoryBufferMem) + BufferName.size() + 1,
136210299Sed                       sizeof(void*)); // TODO: Is sizeof(void*) enough?
137210299Sed  size_t RealLen = AlignedStringLen + Size + 1;
138210299Sed  char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow));
139210299Sed  if (!Mem) return 0;
140210299Sed
141210299Sed  // The name is stored after the class itself.
142210299Sed  CopyStringRef(Mem + sizeof(MemoryBufferMem), BufferName);
143210299Sed
144210299Sed  // The buffer begins after the name and must be aligned.
145210299Sed  char *Buf = Mem + AlignedStringLen;
146210299Sed  Buf[Size] = 0; // Null terminate buffer.
147210299Sed
148221345Sdim  return new (Mem) MemoryBufferMem(StringRef(Buf, Size), true);
149193323Sed}
150193323Sed
151193323Sed/// getNewMemBuffer - Allocate a new MemoryBuffer of the specified size that
152193323Sed/// is completely initialized to zeros.  Note that the caller should
153193323Sed/// initialize the memory allocated by this method.  The memory is owned by
154193323Sed/// the MemoryBuffer object.
155210299SedMemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) {
156193323Sed  MemoryBuffer *SB = getNewUninitMemBuffer(Size, BufferName);
157193323Sed  if (!SB) return 0;
158210299Sed  memset(const_cast<char*>(SB->getBufferStart()), 0, Size);
159193323Sed  return SB;
160193323Sed}
161193323Sed
162193323Sed
163193323Sed/// getFileOrSTDIN - Open the specified file as a MemoryBuffer, or open stdin
164193323Sed/// if the Filename is "-".  If an error occurs, this returns null and fills
165193323Sed/// in *ErrStr with a reason.  If stdin is empty, this API (unlike getSTDIN)
166193323Sed/// returns an empty buffer.
167218893Sdimerror_code MemoryBuffer::getFileOrSTDIN(StringRef Filename,
168218893Sdim                                        OwningPtr<MemoryBuffer> &result,
169218893Sdim                                        int64_t FileSize) {
170199481Srdivacky  if (Filename == "-")
171218893Sdim    return getSTDIN(result);
172218893Sdim  return getFile(Filename, result, FileSize);
173193323Sed}
174193323Sed
175193323Sed//===----------------------------------------------------------------------===//
176193323Sed// MemoryBuffer::getFile implementation.
177193323Sed//===----------------------------------------------------------------------===//
178193323Sed
179193323Sednamespace {
180263508Sdim/// \brief Memory maps a file descriptor using sys::fs::mapped_file_region.
181249423Sdim///
182249423Sdim/// This handles converting the offset into a legal offset on the platform.
183249423Sdimclass MemoryBufferMMapFile : public MemoryBuffer {
184249423Sdim  sys::fs::mapped_file_region MFR;
185210299Sed
186249423Sdim  static uint64_t getLegalMapOffset(uint64_t Offset) {
187249423Sdim    return Offset & ~(sys::fs::mapped_file_region::alignment() - 1);
188249423Sdim  }
189221345Sdim
190249423Sdim  static uint64_t getLegalMapSize(uint64_t Len, uint64_t Offset) {
191249423Sdim    return Len + (Offset - getLegalMapOffset(Offset));
192249423Sdim  }
193221345Sdim
194249423Sdim  const char *getStart(uint64_t Len, uint64_t Offset) {
195249423Sdim    return MFR.const_data() + (Offset - getLegalMapOffset(Offset));
196193323Sed  }
197243830Sdim
198249423Sdimpublic:
199249423Sdim  MemoryBufferMMapFile(bool RequiresNullTerminator, int FD, uint64_t Len,
200249423Sdim                       uint64_t Offset, error_code EC)
201249423Sdim      : MFR(FD, false, sys::fs::mapped_file_region::readonly,
202249423Sdim            getLegalMapSize(Len, Offset), getLegalMapOffset(Offset), EC) {
203249423Sdim    if (!EC) {
204249423Sdim      const char *Start = getStart(Len, Offset);
205249423Sdim      init(Start, Start + Len, RequiresNullTerminator);
206249423Sdim    }
207249423Sdim  }
208249423Sdim
209249423Sdim  virtual const char *getBufferIdentifier() const LLVM_OVERRIDE {
210249423Sdim    // The name is stored after the class itself.
211249423Sdim    return reinterpret_cast<const char *>(this + 1);
212249423Sdim  }
213249423Sdim
214243830Sdim  virtual BufferKind getBufferKind() const LLVM_OVERRIDE {
215221345Sdim    return MemoryBuffer_MMap;
216221345Sdim  }
217193323Sed};
218193323Sed}
219193323Sed
220263508Sdimstatic error_code getMemoryBufferForStream(int FD,
221243830Sdim                                           StringRef BufferName,
222243830Sdim                                           OwningPtr<MemoryBuffer> &result) {
223243830Sdim  const ssize_t ChunkSize = 4096*4;
224243830Sdim  SmallString<ChunkSize> Buffer;
225243830Sdim  ssize_t ReadBytes;
226243830Sdim  // Read into Buffer until we hit EOF.
227243830Sdim  do {
228243830Sdim    Buffer.reserve(Buffer.size() + ChunkSize);
229243830Sdim    ReadBytes = read(FD, Buffer.end(), ChunkSize);
230243830Sdim    if (ReadBytes == -1) {
231243830Sdim      if (errno == EINTR) continue;
232243830Sdim      return error_code(errno, posix_category());
233243830Sdim    }
234243830Sdim    Buffer.set_size(Buffer.size() + ReadBytes);
235243830Sdim  } while (ReadBytes != 0);
236243830Sdim
237243830Sdim  result.reset(MemoryBuffer::getMemBufferCopy(Buffer, BufferName));
238243830Sdim  return error_code::success();
239243830Sdim}
240243830Sdim
241263508Sdimstatic error_code getFileAux(const char *Filename,
242263508Sdim                             OwningPtr<MemoryBuffer> &result, int64_t FileSize,
243263508Sdim                             bool RequiresNullTerminator);
244263508Sdim
245263508Sdimerror_code MemoryBuffer::getFile(Twine Filename,
246218893Sdim                                 OwningPtr<MemoryBuffer> &result,
247221345Sdim                                 int64_t FileSize,
248221345Sdim                                 bool RequiresNullTerminator) {
249218893Sdim  // Ensure the path is null terminated.
250263508Sdim  SmallString<256> PathBuf;
251263508Sdim  StringRef NullTerminatedName = Filename.toNullTerminatedStringRef(PathBuf);
252263508Sdim  return getFileAux(NullTerminatedName.data(), result, FileSize,
253263508Sdim                    RequiresNullTerminator);
254210299Sed}
255210299Sed
256263508Sdimstatic error_code getOpenFileImpl(int FD, const char *Filename,
257263508Sdim                                  OwningPtr<MemoryBuffer> &Result,
258263508Sdim                                  uint64_t FileSize, uint64_t MapSize,
259263508Sdim                                  int64_t Offset, bool RequiresNullTerminator);
260239462Sdim
261263508Sdimstatic error_code getFileAux(const char *Filename,
262263508Sdim                             OwningPtr<MemoryBuffer> &result, int64_t FileSize,
263263508Sdim                             bool RequiresNullTerminator) {
264263508Sdim  int FD;
265263508Sdim  error_code EC = sys::fs::openFileForRead(Filename, FD);
266263508Sdim  if (EC)
267263508Sdim    return EC;
268223017Sdim
269263508Sdim  error_code ret = getOpenFileImpl(FD, Filename, result, FileSize, FileSize, 0,
270263508Sdim                                   RequiresNullTerminator);
271218893Sdim  close(FD);
272218893Sdim  return ret;
273218893Sdim}
274218893Sdim
275221345Sdimstatic bool shouldUseMmap(int FD,
276221345Sdim                          size_t FileSize,
277221345Sdim                          size_t MapSize,
278221345Sdim                          off_t Offset,
279221345Sdim                          bool RequiresNullTerminator,
280221345Sdim                          int PageSize) {
281221345Sdim  // We don't use mmap for small files because this can severely fragment our
282221345Sdim  // address space.
283263508Sdim  if (MapSize < 4 * 4096 || MapSize < (unsigned)PageSize)
284221345Sdim    return false;
285221345Sdim
286221345Sdim  if (!RequiresNullTerminator)
287221345Sdim    return true;
288221345Sdim
289221345Sdim
290193323Sed  // If we don't know the file size, use fstat to find out.  fstat on an open
291193323Sed  // file descriptor is cheaper than stat on a random path.
292221345Sdim  // FIXME: this chunk of code is duplicated, but it avoids a fstat when
293221345Sdim  // RequiresNullTerminator = false and MapSize != -1.
294221345Sdim  if (FileSize == size_t(-1)) {
295263508Sdim    sys::fs::file_status Status;
296263508Sdim    error_code EC = sys::fs::status(FD, Status);
297263508Sdim    if (EC)
298263508Sdim      return EC;
299263508Sdim    FileSize = Status.getSize();
300193323Sed  }
301218893Sdim
302221345Sdim  // If we need a null terminator and the end of the map is inside the file,
303221345Sdim  // we cannot use mmap.
304221345Sdim  size_t End = Offset + MapSize;
305221345Sdim  assert(End <= FileSize);
306221345Sdim  if (End != FileSize)
307221345Sdim    return false;
308218893Sdim
309263508Sdim#if defined(_WIN32) || defined(__CYGWIN__)
310263508Sdim  // Don't peek the next page if file is multiple of *physical* pagesize(4k)
311263508Sdim  // but is not multiple of AllocationGranularity(64k),
312263508Sdim  // when a null terminator is required.
313263508Sdim  // FIXME: It's not good to hardcode 4096 here. dwPageSize shows 4096.
314263508Sdim  if ((FileSize & (4096 - 1)) == 0)
315263508Sdim    return false;
316263508Sdim#endif
317263508Sdim
318221345Sdim  // Don't try to map files that are exactly a multiple of the system page size
319221345Sdim  // if we need a null terminator.
320221345Sdim  if ((FileSize & (PageSize -1)) == 0)
321221345Sdim    return false;
322221345Sdim
323221345Sdim  return true;
324221345Sdim}
325221345Sdim
326263508Sdimstatic error_code getOpenFileImpl(int FD, const char *Filename,
327263508Sdim                                  OwningPtr<MemoryBuffer> &result,
328263508Sdim                                  uint64_t FileSize, uint64_t MapSize,
329263508Sdim                                  int64_t Offset, bool RequiresNullTerminator) {
330249423Sdim  static int PageSize = sys::process::get_self()->page_size();
331221345Sdim
332221345Sdim  // Default is to map the full file.
333226633Sdim  if (MapSize == uint64_t(-1)) {
334221345Sdim    // If we don't know the file size, use fstat to find out.  fstat on an open
335221345Sdim    // file descriptor is cheaper than stat on a random path.
336226633Sdim    if (FileSize == uint64_t(-1)) {
337263508Sdim      sys::fs::file_status Status;
338263508Sdim      error_code EC = sys::fs::status(FD, Status);
339263508Sdim      if (EC)
340263508Sdim        return EC;
341243830Sdim
342249423Sdim      // If this not a file or a block device (e.g. it's a named pipe
343249423Sdim      // or character device), we can't trust the size. Create the memory
344243830Sdim      // buffer by copying off the stream.
345263508Sdim      sys::fs::file_type Type = Status.type();
346263508Sdim      if (Type != sys::fs::file_type::regular_file &&
347263508Sdim          Type != sys::fs::file_type::block_file)
348243830Sdim        return getMemoryBufferForStream(FD, Filename, result);
349243830Sdim
350263508Sdim      FileSize = Status.getSize();
351221345Sdim    }
352221345Sdim    MapSize = FileSize;
353221345Sdim  }
354221345Sdim
355221345Sdim  if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
356221345Sdim                    PageSize)) {
357249423Sdim    error_code EC;
358249423Sdim    result.reset(new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile(
359249423Sdim        RequiresNullTerminator, FD, MapSize, Offset, EC));
360249423Sdim    if (!EC)
361234353Sdim      return error_code::success();
362193323Sed  }
363193323Sed
364221345Sdim  MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
365193323Sed  if (!Buf) {
366218893Sdim    // Failed to create a buffer. The only way it can fail is if
367218893Sdim    // new(std::nothrow) returns 0.
368218893Sdim    return make_error_code(errc::not_enough_memory);
369193323Sed  }
370193323Sed
371193323Sed  OwningPtr<MemoryBuffer> SB(Buf);
372193323Sed  char *BufPtr = const_cast<char*>(SB->getBufferStart());
373206083Srdivacky
374221345Sdim  size_t BytesLeft = MapSize;
375234353Sdim#ifndef HAVE_PREAD
376221345Sdim  if (lseek(FD, Offset, SEEK_SET) == -1)
377221345Sdim    return error_code(errno, posix_category());
378234353Sdim#endif
379221345Sdim
380193323Sed  while (BytesLeft) {
381234353Sdim#ifdef HAVE_PREAD
382234353Sdim    ssize_t NumRead = ::pread(FD, BufPtr, BytesLeft, MapSize-BytesLeft+Offset);
383234353Sdim#else
384193323Sed    ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
385234353Sdim#endif
386206083Srdivacky    if (NumRead == -1) {
387206083Srdivacky      if (errno == EINTR)
388206083Srdivacky        continue;
389206083Srdivacky      // Error while reading.
390218893Sdim      return error_code(errno, posix_category());
391193323Sed    }
392234353Sdim    if (NumRead == 0) {
393234353Sdim      assert(0 && "We got inaccurate FileSize value or fstat reported an "
394234353Sdim                   "invalid file size.");
395234353Sdim      *BufPtr = '\0'; // null-terminate at the actual size.
396234353Sdim      break;
397234353Sdim    }
398206083Srdivacky    BytesLeft -= NumRead;
399206083Srdivacky    BufPtr += NumRead;
400193323Sed  }
401206083Srdivacky
402218893Sdim  result.swap(SB);
403234353Sdim  return error_code::success();
404193323Sed}
405193323Sed
406263508Sdimerror_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
407263508Sdim                                     OwningPtr<MemoryBuffer> &Result,
408263508Sdim                                     uint64_t FileSize,
409263508Sdim                                     bool RequiresNullTerminator) {
410263508Sdim  return getOpenFileImpl(FD, Filename, Result, FileSize, FileSize, 0,
411263508Sdim                         RequiresNullTerminator);
412263508Sdim}
413263508Sdim
414263508Sdimerror_code MemoryBuffer::getOpenFileSlice(int FD, const char *Filename,
415263508Sdim                                          OwningPtr<MemoryBuffer> &Result,
416263508Sdim                                          uint64_t MapSize, int64_t Offset) {
417263508Sdim  return getOpenFileImpl(FD, Filename, Result, -1, MapSize, Offset, false);
418263508Sdim}
419263508Sdim
420193323Sed//===----------------------------------------------------------------------===//
421193323Sed// MemoryBuffer::getSTDIN implementation.
422193323Sed//===----------------------------------------------------------------------===//
423193323Sed
424218893Sdimerror_code MemoryBuffer::getSTDIN(OwningPtr<MemoryBuffer> &result) {
425193323Sed  // Read in all of the data from stdin, we cannot mmap stdin.
426199481Srdivacky  //
427199481Srdivacky  // FIXME: That isn't necessarily true, we should try to mmap stdin and
428199481Srdivacky  // fallback if it fails.
429263508Sdim  sys::ChangeStdinToBinary();
430210299Sed
431243830Sdim  return getMemoryBufferForStream(0, "<stdin>", result);
432193323Sed}
433