1245803Stheraven/*- 2245803Stheraven * Copyright (c) 2013 David Chisnall 3245803Stheraven * All rights reserved. 4245803Stheraven * 5245803Stheraven * This software was developed by SRI International and the University of 6245803Stheraven * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) 7245803Stheraven * ("CTSRD"), as part of the DARPA CRASH research programme. 8245803Stheraven * 9245803Stheraven * Redistribution and use in source and binary forms, with or without 10245803Stheraven * modification, are permitted provided that the following conditions 11245803Stheraven * are met: 12245803Stheraven * 1. Redistributions of source code must retain the above copyright 13245803Stheraven * notice, this list of conditions and the following disclaimer. 14245803Stheraven * 2. Redistributions in binary form must reproduce the above copyright 15245803Stheraven * notice, this list of conditions and the following disclaimer in the 16245803Stheraven * documentation and/or other materials provided with the distribution. 17245803Stheraven * 18245803Stheraven * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19245803Stheraven * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20245803Stheraven * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21245803Stheraven * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22245803Stheraven * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23245803Stheraven * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24245803Stheraven * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25245803Stheraven * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26245803Stheraven * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27245803Stheraven * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28245803Stheraven * SUCH DAMAGE. 29245803Stheraven * 30245803Stheraven * $FreeBSD: releng/11.0/usr.bin/dtc/input_buffer.hh 292876 2015-12-29 16:29:42Z theraven $ 31245803Stheraven */ 32245803Stheraven 33245803Stheraven#ifndef _INPUT_BUFFER_HH_ 34245803Stheraven#define _INPUT_BUFFER_HH_ 35245803Stheraven#include "util.hh" 36245803Stheraven#include <assert.h> 37245803Stheraven 38245803Stheravennamespace dtc 39245803Stheraven{ 40245803Stheraven 41292876Stheravennamespace { 42292876Stheravenstruct expression; 43292876Stheraventypedef std::unique_ptr<expression> expression_ptr; 44292876Stheraven} 45292876Stheraven 46245803Stheraven/** 47245803Stheraven * Class encapsulating the input file. Can be used as a const char*, but has 48245803Stheraven * range checking. Attempting to access anything out of range will return a 0 49245803Stheraven * byte. The input buffer can be cheaply copied, without copying the 50245803Stheraven * underlying memory, however it is the user's responsibility to ensure that 51245803Stheraven * such copies do not persist beyond the lifetime of the underlying memory. 52245803Stheraven * 53245803Stheraven * This also contains methods for reporting errors and for consuming the token 54245803Stheraven * stream. 55245803Stheraven */ 56245803Stheravenclass input_buffer 57245803Stheraven{ 58245803Stheraven protected: 59245803Stheraven /** 60245803Stheraven * The buffer. This class doesn't own the buffer, but the 61245803Stheraven * mmap_input_buffer subclass does. 62245803Stheraven */ 63245803Stheraven const char* buffer; 64245803Stheraven /** 65245803Stheraven * The size of the buffer. 66245803Stheraven */ 67245803Stheraven int size; 68245803Stheraven private: 69245803Stheraven /** 70292876Stheraven * Parse an expression. If `stopAtParen` is set, then only parse a number 71292876Stheraven * or a parenthetical expression, otherwise assume that either is the 72292876Stheraven * left-hand side of a binary expression and try to parse the right-hand 73292876Stheraven * side. 74292876Stheraven */ 75292876Stheraven expression_ptr parse_expression(bool stopAtParen=false); 76292876Stheraven /** 77292876Stheraven * Parse a binary expression, having already parsed the right-hand side. 78292876Stheraven */ 79292876Stheraven expression_ptr parse_binary_expression(expression_ptr lhs); 80292876Stheraven /** 81245803Stheraven * The current place in the buffer where we are reading. This class 82245803Stheraven * keeps a separate size, pointer, and cursor so that we can move 83245803Stheraven * forwards and backwards and still have checks that we haven't fallen 84245803Stheraven * off either end. 85245803Stheraven */ 86245803Stheraven int cursor; 87245803Stheraven /** 88245803Stheraven * Private constructor. This is used to create input buffers that 89245803Stheraven * refer to the same memory, but have different cursors. 90245803Stheraven */ 91245803Stheraven input_buffer(const char* b, int s, int c) : buffer(b), size(s), 92245803Stheraven cursor(c) {} 93245803Stheraven /** 94245803Stheraven * Reads forward past any spaces. The DTS format is not whitespace 95245803Stheraven * sensitive and so we want to scan past whitespace when reading it. 96245803Stheraven */ 97245803Stheraven void skip_spaces(); 98245803Stheraven public: 99245803Stheraven /** 100289935Stheraven * Return whether all input has been consumed. 101289935Stheraven */ 102289935Stheraven bool finished() { return cursor >= size; } 103289935Stheraven /** 104245803Stheraven * Virtual destructor. Does nothing, but exists so that subclasses 105245803Stheraven * that own the memory can run cleanup code for deallocating it. 106245803Stheraven */ 107245803Stheraven virtual ~input_buffer() {}; 108245803Stheraven /** 109245803Stheraven * Constructs an empty buffer. 110245803Stheraven */ 111245803Stheraven input_buffer() : buffer(0), size(0), cursor(0) {} 112245803Stheraven /** 113245803Stheraven * Constructs a new buffer with a specified memory region and size. 114245803Stheraven */ 115245803Stheraven input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){} 116245803Stheraven /** 117245803Stheraven * Returns a new input buffer referring into this input, clamped to the 118245803Stheraven * specified size. If the requested buffer would fall outside the 119245803Stheraven * range of this one, then it returns an empty buffer. 120245803Stheraven * 121245803Stheraven * The returned buffer shares the same underlying storage as the 122245803Stheraven * original. This is intended to be used for splitting up the various 123245803Stheraven * sections of a device tree blob. Requesting a size of 0 will give a 124245803Stheraven * buffer that extends to the end of the available memory. 125245803Stheraven */ 126245803Stheraven input_buffer buffer_from_offset(int offset, int s=0); 127245803Stheraven /** 128245803Stheraven * Returns true if this buffer has no unconsumed space in it. 129245803Stheraven */ 130245803Stheraven inline bool empty() 131245803Stheraven { 132245803Stheraven return cursor >= size; 133245803Stheraven } 134245803Stheraven /** 135245803Stheraven * Dereferencing operator, allows the buffer to be treated as a char* 136245803Stheraven * and dereferenced to give a character. This returns a null byte if 137245803Stheraven * the cursor is out of range. 138245803Stheraven */ 139245803Stheraven inline char operator*() 140245803Stheraven { 141245803Stheraven if (cursor >= size) { return '\0'; } 142245803Stheraven if (cursor < 0) { return '\0'; } 143245803Stheraven return buffer[cursor]; 144245803Stheraven } 145245803Stheraven /** 146245803Stheraven * Array subscripting operator, returns a character at the specified 147245803Stheraven * index offset from the current cursor. The offset may be negative, 148245803Stheraven * to reread characters that have already been read. If the current 149245803Stheraven * cursor plus offset is outside of the range, this returns a nul 150245803Stheraven * byte. 151245803Stheraven */ 152245803Stheraven inline char operator[](int offset) 153245803Stheraven { 154245803Stheraven if (cursor + offset >= size) { return '\0'; } 155245803Stheraven if (cursor + offset < 0) { return '\0'; } 156245803Stheraven return buffer[cursor + offset]; 157245803Stheraven } 158245803Stheraven /** 159245803Stheraven * Increments the cursor, iterating forward in the buffer. 160245803Stheraven */ 161245803Stheraven inline input_buffer &operator++() 162245803Stheraven { 163245803Stheraven cursor++; 164245803Stheraven return *this; 165245803Stheraven } 166245803Stheraven /** 167245803Stheraven * Cast to char* operator. Returns a pointer into the buffer that can 168245803Stheraven * be used for constructing strings. 169245803Stheraven */ 170245803Stheraven inline operator const char*() 171245803Stheraven { 172245803Stheraven if (cursor >= size) { return 0; } 173245803Stheraven if (cursor < 0) { return 0; } 174245803Stheraven return &buffer[cursor]; 175245803Stheraven } 176245803Stheraven /** 177245803Stheraven * Consumes a character. Moves the cursor one character forward if the 178245803Stheraven * next character matches the argument, returning true. If the current 179245803Stheraven * character does not match the argument, returns false. 180245803Stheraven */ 181245803Stheraven inline bool consume(char c) 182245803Stheraven { 183245803Stheraven if ((*this)[0] == c) 184245803Stheraven { 185245803Stheraven ++(*this); 186245803Stheraven return true; 187245803Stheraven } 188245803Stheraven return false; 189245803Stheraven } 190245803Stheraven /** 191245803Stheraven * Consumes a string. If the (null-terminated) string passed as the 192245803Stheraven * argument appears in the input, advances the cursor to the end and 193245803Stheraven * returns true. Returns false if the string does not appear at the 194245803Stheraven * current point in the input. 195245803Stheraven */ 196245803Stheraven bool consume(const char *str); 197245803Stheraven /** 198245803Stheraven * Reads an integer in base 8, 10, or 16. Returns true and advances 199245803Stheraven * the cursor to the end of the integer if the cursor points to an 200245803Stheraven * integer, returns false and does not move the cursor otherwise. 201245803Stheraven * 202245803Stheraven * The parsed value is returned via the argument. 203245803Stheraven */ 204289935Stheraven bool consume_integer(unsigned long long &outInt); 205245803Stheraven /** 206292876Stheraven * Reads an arithmetic expression (containing any of the normal C 207292876Stheraven * operators), evaluates it, and returns the result. 208292876Stheraven */ 209292876Stheraven bool consume_integer_expression(unsigned long long &outInt); 210292876Stheraven /** 211245803Stheraven * Template function that consumes a binary value in big-endian format 212245803Stheraven * from the input stream. Returns true and advances the cursor if 213245803Stheraven * there is a value of the correct size. This function assumes that 214245803Stheraven * all values must be natively aligned, and so advances the cursor to 215245803Stheraven * the correct alignment before reading. 216245803Stheraven */ 217245803Stheraven template<typename T> 218245803Stheraven bool consume_binary(T &out) 219245803Stheraven { 220245803Stheraven int align = 0; 221245803Stheraven int type_size = sizeof(T); 222245803Stheraven if (cursor % type_size != 0) 223245803Stheraven { 224245803Stheraven align = type_size - (cursor % type_size); 225245803Stheraven } 226245803Stheraven if (size < cursor + align + type_size) 227245803Stheraven { 228245803Stheraven return false; 229245803Stheraven } 230245803Stheraven cursor += align; 231245803Stheraven assert(cursor % type_size == 0); 232245803Stheraven out = 0; 233245803Stheraven for (int i=0 ; i<type_size ; ++i) 234245803Stheraven { 235245803Stheraven out <<= 8; 236245803Stheraven out |= (((T)buffer[cursor++]) & 0xff); 237245803Stheraven } 238245803Stheraven return true; 239245803Stheraven } 240245803Stheraven /** 241245803Stheraven * Consumes two hex digits and return the resulting byte via the first 242245803Stheraven * argument. If the next two characters are hex digits, returns true 243245803Stheraven * and advances the cursor. If not, then returns false and leaves the 244245803Stheraven * cursor in place. 245245803Stheraven */ 246245803Stheraven bool consume_hex_byte(uint8_t &outByte); 247245803Stheraven /** 248245803Stheraven * Advances the cursor to the start of the next token, skipping 249245803Stheraven * comments and whitespace. If the cursor already points to the start 250245803Stheraven * of a token, then this function does nothing. 251245803Stheraven */ 252245803Stheraven input_buffer &next_token(); 253245803Stheraven /** 254245803Stheraven * Prints a message indicating the location of a parse error. 255245803Stheraven */ 256245803Stheraven void parse_error(const char *msg); 257292876Stheraven#ifndef NDEBUG 258245803Stheraven /** 259245803Stheraven * Dumps the current cursor value and the unconsumed values in the 260245803Stheraven * input buffer to the standard error. This method is intended solely 261245803Stheraven * for debugging. 262245803Stheraven */ 263245803Stheraven void dump(); 264292876Stheraven#endif 265245803Stheraven}; 266245803Stheraven/** 267245803Stheraven * Explicit specialisation for reading a single byte. 268245803Stheraven */ 269245803Stheraventemplate<> 270245803Stheraveninline bool input_buffer::consume_binary(uint8_t &out) 271245803Stheraven{ 272245803Stheraven if (size < cursor + 1) 273245803Stheraven { 274245803Stheraven return false; 275245803Stheraven } 276245803Stheraven out = buffer[cursor++]; 277245803Stheraven return true; 278245803Stheraven} 279245803Stheraven 280245803Stheraven/** 281245803Stheraven * Subclass of input_buffer that mmap()s a file and owns the resulting memory. 282245803Stheraven * When this object is destroyed, the memory is unmapped. 283245803Stheraven */ 284245803Stheravenstruct mmap_input_buffer : public input_buffer 285245803Stheraven{ 286245803Stheraven /** 287245803Stheraven * Constructs a new buffer from the file passed in as a file 288245803Stheraven * descriptor. 289245803Stheraven */ 290245803Stheraven mmap_input_buffer(int fd); 291245803Stheraven /** 292245803Stheraven * Unmaps the buffer, if one exists. 293245803Stheraven */ 294245803Stheraven virtual ~mmap_input_buffer(); 295245803Stheraven}; 296245803Stheraven/** 297245803Stheraven * Input buffer read from standard input. This is used for reading device tree 298245803Stheraven * blobs and source from standard input. It reads the entire input into 299245803Stheraven * malloc'd memory, so will be very slow for large inputs. DTS and DTB files 300245803Stheraven * are very rarely more than 10KB though, so this is probably not a problem. 301245803Stheraven */ 302245803Stheravenstruct stream_input_buffer : public input_buffer 303245803Stheraven{ 304245803Stheraven /** 305245803Stheraven * The buffer that will store the data read from the standard input. 306245803Stheraven */ 307245803Stheraven std::vector<char> b; 308245803Stheraven /** 309245803Stheraven * Constructs a new buffer from the standard input. 310245803Stheraven */ 311245803Stheraven stream_input_buffer(); 312245803Stheraven}; 313245803Stheraven 314245803Stheraven} // namespace dtc 315245803Stheraven 316245803Stheraven#endif // !_INPUT_BUFFER_HH_ 317