1245803Stheraven/*- 2245803Stheraven * Copyright (c) 2013 David Chisnall 3245803Stheraven * All rights reserved. 4245803Stheraven * 5245803Stheraven * This software was developed by SRI International and the University of 6245803Stheraven * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) 7245803Stheraven * ("CTSRD"), as part of the DARPA CRASH research programme. 8245803Stheraven * 9245803Stheraven * Redistribution and use in source and binary forms, with or without 10245803Stheraven * modification, are permitted provided that the following conditions 11245803Stheraven * are met: 12245803Stheraven * 1. Redistributions of source code must retain the above copyright 13245803Stheraven * notice, this list of conditions and the following disclaimer. 14245803Stheraven * 2. Redistributions in binary form must reproduce the above copyright 15245803Stheraven * notice, this list of conditions and the following disclaimer in the 16245803Stheraven * documentation and/or other materials provided with the distribution. 17245803Stheraven * 18245803Stheraven * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19245803Stheraven * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20245803Stheraven * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21245803Stheraven * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22245803Stheraven * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23245803Stheraven * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24245803Stheraven * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25245803Stheraven * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26245803Stheraven * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27245803Stheraven * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28245803Stheraven * SUCH DAMAGE. 29245803Stheraven * 30245803Stheraven * $FreeBSD$ 31245803Stheraven */ 32245803Stheraven 33245803Stheraven#ifndef _INPUT_BUFFER_HH_ 34245803Stheraven#define _INPUT_BUFFER_HH_ 35245803Stheraven#include "util.hh" 36245803Stheraven#include <assert.h> 37245803Stheraven 38245803Stheravennamespace dtc 39245803Stheraven{ 40245803Stheraven 41245803Stheraven/** 42245803Stheraven * Class encapsulating the input file. Can be used as a const char*, but has 43245803Stheraven * range checking. Attempting to access anything out of range will return a 0 44245803Stheraven * byte. The input buffer can be cheaply copied, without copying the 45245803Stheraven * underlying memory, however it is the user's responsibility to ensure that 46245803Stheraven * such copies do not persist beyond the lifetime of the underlying memory. 47245803Stheraven * 48245803Stheraven * This also contains methods for reporting errors and for consuming the token 49245803Stheraven * stream. 50245803Stheraven */ 51245803Stheravenclass input_buffer 52245803Stheraven{ 53245803Stheraven protected: 54245803Stheraven /** 55245803Stheraven * The buffer. This class doesn't own the buffer, but the 56245803Stheraven * mmap_input_buffer subclass does. 57245803Stheraven */ 58245803Stheraven const char* buffer; 59245803Stheraven /** 60245803Stheraven * The size of the buffer. 61245803Stheraven */ 62245803Stheraven int size; 63245803Stheraven private: 64245803Stheraven /** 65245803Stheraven * The current place in the buffer where we are reading. This class 66245803Stheraven * keeps a separate size, pointer, and cursor so that we can move 67245803Stheraven * forwards and backwards and still have checks that we haven't fallen 68245803Stheraven * off either end. 69245803Stheraven */ 70245803Stheraven int cursor; 71245803Stheraven /** 72245803Stheraven * Private constructor. This is used to create input buffers that 73245803Stheraven * refer to the same memory, but have different cursors. 74245803Stheraven */ 75245803Stheraven input_buffer(const char* b, int s, int c) : buffer(b), size(s), 76245803Stheraven cursor(c) {} 77245803Stheraven /** 78245803Stheraven * Reads forward past any spaces. The DTS format is not whitespace 79245803Stheraven * sensitive and so we want to scan past whitespace when reading it. 80245803Stheraven */ 81245803Stheraven void skip_spaces(); 82245803Stheraven public: 83245803Stheraven /** 84245803Stheraven * Virtual destructor. Does nothing, but exists so that subclasses 85245803Stheraven * that own the memory can run cleanup code for deallocating it. 86245803Stheraven */ 87245803Stheraven virtual ~input_buffer() {}; 88245803Stheraven /** 89245803Stheraven * Constructs an empty buffer. 90245803Stheraven */ 91245803Stheraven input_buffer() : buffer(0), size(0), cursor(0) {} 92245803Stheraven /** 93245803Stheraven * Constructs a new buffer with a specified memory region and size. 94245803Stheraven */ 95245803Stheraven input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){} 96245803Stheraven /** 97245803Stheraven * Returns a new input buffer referring into this input, clamped to the 98245803Stheraven * specified size. If the requested buffer would fall outside the 99245803Stheraven * range of this one, then it returns an empty buffer. 100245803Stheraven * 101245803Stheraven * The returned buffer shares the same underlying storage as the 102245803Stheraven * original. This is intended to be used for splitting up the various 103245803Stheraven * sections of a device tree blob. Requesting a size of 0 will give a 104245803Stheraven * buffer that extends to the end of the available memory. 105245803Stheraven */ 106245803Stheraven input_buffer buffer_from_offset(int offset, int s=0); 107245803Stheraven /** 108245803Stheraven * Returns true if this buffer has no unconsumed space in it. 109245803Stheraven */ 110245803Stheraven inline bool empty() 111245803Stheraven { 112245803Stheraven return cursor >= size; 113245803Stheraven } 114245803Stheraven /** 115245803Stheraven * Dereferencing operator, allows the buffer to be treated as a char* 116245803Stheraven * and dereferenced to give a character. This returns a null byte if 117245803Stheraven * the cursor is out of range. 118245803Stheraven */ 119245803Stheraven inline char operator*() 120245803Stheraven { 121245803Stheraven if (cursor >= size) { return '\0'; } 122245803Stheraven if (cursor < 0) { return '\0'; } 123245803Stheraven return buffer[cursor]; 124245803Stheraven } 125245803Stheraven /** 126245803Stheraven * Array subscripting operator, returns a character at the specified 127245803Stheraven * index offset from the current cursor. The offset may be negative, 128245803Stheraven * to reread characters that have already been read. If the current 129245803Stheraven * cursor plus offset is outside of the range, this returns a nul 130245803Stheraven * byte. 131245803Stheraven */ 132245803Stheraven inline char operator[](int offset) 133245803Stheraven { 134245803Stheraven if (cursor + offset >= size) { return '\0'; } 135245803Stheraven if (cursor + offset < 0) { return '\0'; } 136245803Stheraven return buffer[cursor + offset]; 137245803Stheraven } 138245803Stheraven /** 139245803Stheraven * Increments the cursor, iterating forward in the buffer. 140245803Stheraven */ 141245803Stheraven inline input_buffer &operator++() 142245803Stheraven { 143245803Stheraven cursor++; 144245803Stheraven return *this; 145245803Stheraven } 146245803Stheraven /** 147245803Stheraven * Cast to char* operator. Returns a pointer into the buffer that can 148245803Stheraven * be used for constructing strings. 149245803Stheraven */ 150245803Stheraven inline operator const char*() 151245803Stheraven { 152245803Stheraven if (cursor >= size) { return 0; } 153245803Stheraven if (cursor < 0) { return 0; } 154245803Stheraven return &buffer[cursor]; 155245803Stheraven } 156245803Stheraven /** 157245803Stheraven * Consumes a character. Moves the cursor one character forward if the 158245803Stheraven * next character matches the argument, returning true. If the current 159245803Stheraven * character does not match the argument, returns false. 160245803Stheraven */ 161245803Stheraven inline bool consume(char c) 162245803Stheraven { 163245803Stheraven if ((*this)[0] == c) 164245803Stheraven { 165245803Stheraven ++(*this); 166245803Stheraven return true; 167245803Stheraven } 168245803Stheraven return false; 169245803Stheraven } 170245803Stheraven /** 171245803Stheraven * Consumes a string. If the (null-terminated) string passed as the 172245803Stheraven * argument appears in the input, advances the cursor to the end and 173245803Stheraven * returns true. Returns false if the string does not appear at the 174245803Stheraven * current point in the input. 175245803Stheraven */ 176245803Stheraven bool consume(const char *str); 177245803Stheraven /** 178245803Stheraven * Reads an integer in base 8, 10, or 16. Returns true and advances 179245803Stheraven * the cursor to the end of the integer if the cursor points to an 180245803Stheraven * integer, returns false and does not move the cursor otherwise. 181245803Stheraven * 182245803Stheraven * The parsed value is returned via the argument. 183245803Stheraven */ 184245803Stheraven bool consume_integer(long long &outInt); 185245803Stheraven /** 186245803Stheraven * Template function that consumes a binary value in big-endian format 187245803Stheraven * from the input stream. Returns true and advances the cursor if 188245803Stheraven * there is a value of the correct size. This function assumes that 189245803Stheraven * all values must be natively aligned, and so advances the cursor to 190245803Stheraven * the correct alignment before reading. 191245803Stheraven */ 192245803Stheraven template<typename T> 193245803Stheraven bool consume_binary(T &out) 194245803Stheraven { 195245803Stheraven int align = 0; 196245803Stheraven int type_size = sizeof(T); 197245803Stheraven if (cursor % type_size != 0) 198245803Stheraven { 199245803Stheraven align = type_size - (cursor % type_size); 200245803Stheraven } 201245803Stheraven if (size < cursor + align + type_size) 202245803Stheraven { 203245803Stheraven return false; 204245803Stheraven } 205245803Stheraven cursor += align; 206245803Stheraven assert(cursor % type_size == 0); 207245803Stheraven out = 0; 208245803Stheraven for (int i=0 ; i<type_size ; ++i) 209245803Stheraven { 210245803Stheraven out <<= 8; 211245803Stheraven out |= (((T)buffer[cursor++]) & 0xff); 212245803Stheraven } 213245803Stheraven return true; 214245803Stheraven } 215245803Stheraven /** 216245803Stheraven * Consumes two hex digits and return the resulting byte via the first 217245803Stheraven * argument. If the next two characters are hex digits, returns true 218245803Stheraven * and advances the cursor. If not, then returns false and leaves the 219245803Stheraven * cursor in place. 220245803Stheraven */ 221245803Stheraven bool consume_hex_byte(uint8_t &outByte); 222245803Stheraven /** 223245803Stheraven * Advances the cursor to the start of the next token, skipping 224245803Stheraven * comments and whitespace. If the cursor already points to the start 225245803Stheraven * of a token, then this function does nothing. 226245803Stheraven */ 227245803Stheraven input_buffer &next_token(); 228245803Stheraven /** 229245803Stheraven * Prints a message indicating the location of a parse error. 230245803Stheraven */ 231245803Stheraven void parse_error(const char *msg); 232245803Stheraven /** 233245803Stheraven * Dumps the current cursor value and the unconsumed values in the 234245803Stheraven * input buffer to the standard error. This method is intended solely 235245803Stheraven * for debugging. 236245803Stheraven */ 237245803Stheraven void dump(); 238245803Stheraven}; 239245803Stheraven/** 240245803Stheraven * Explicit specialisation for reading a single byte. 241245803Stheraven */ 242245803Stheraventemplate<> 243245803Stheraveninline bool input_buffer::consume_binary(uint8_t &out) 244245803Stheraven{ 245245803Stheraven if (size < cursor + 1) 246245803Stheraven { 247245803Stheraven return false; 248245803Stheraven } 249245803Stheraven out = buffer[cursor++]; 250245803Stheraven return true; 251245803Stheraven} 252245803Stheraven 253245803Stheraven/** 254245803Stheraven * Subclass of input_buffer that mmap()s a file and owns the resulting memory. 255245803Stheraven * When this object is destroyed, the memory is unmapped. 256245803Stheraven */ 257245803Stheravenstruct mmap_input_buffer : public input_buffer 258245803Stheraven{ 259245803Stheraven /** 260245803Stheraven * Constructs a new buffer from the file passed in as a file 261245803Stheraven * descriptor. 262245803Stheraven */ 263245803Stheraven mmap_input_buffer(int fd); 264245803Stheraven /** 265245803Stheraven * Unmaps the buffer, if one exists. 266245803Stheraven */ 267245803Stheraven virtual ~mmap_input_buffer(); 268245803Stheraven}; 269245803Stheraven/** 270245803Stheraven * Input buffer read from standard input. This is used for reading device tree 271245803Stheraven * blobs and source from standard input. It reads the entire input into 272245803Stheraven * malloc'd memory, so will be very slow for large inputs. DTS and DTB files 273245803Stheraven * are very rarely more than 10KB though, so this is probably not a problem. 274245803Stheraven */ 275245803Stheravenstruct stream_input_buffer : public input_buffer 276245803Stheraven{ 277245803Stheraven /** 278245803Stheraven * The buffer that will store the data read from the standard input. 279245803Stheraven */ 280245803Stheraven std::vector<char> b; 281245803Stheraven /** 282245803Stheraven * Constructs a new buffer from the standard input. 283245803Stheraven */ 284245803Stheraven stream_input_buffer(); 285245803Stheraven}; 286245803Stheraven 287245803Stheraven} // namespace dtc 288245803Stheraven 289245803Stheraven#endif // !_INPUT_BUFFER_HH_ 290