1245803Stheraven/*-
2245803Stheraven * Copyright (c) 2013 David Chisnall
3245803Stheraven * All rights reserved.
4245803Stheraven *
5245803Stheraven * This software was developed by SRI International and the University of
6245803Stheraven * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
7245803Stheraven * ("CTSRD"), as part of the DARPA CRASH research programme.
8245803Stheraven *
9245803Stheraven * Redistribution and use in source and binary forms, with or without
10245803Stheraven * modification, are permitted provided that the following conditions
11245803Stheraven * are met:
12245803Stheraven * 1. Redistributions of source code must retain the above copyright
13245803Stheraven *    notice, this list of conditions and the following disclaimer.
14245803Stheraven * 2. Redistributions in binary form must reproduce the above copyright
15245803Stheraven *    notice, this list of conditions and the following disclaimer in the
16245803Stheraven *    documentation and/or other materials provided with the distribution.
17245803Stheraven *
18245803Stheraven * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19245803Stheraven * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20245803Stheraven * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21245803Stheraven * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22245803Stheraven * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23245803Stheraven * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24245803Stheraven * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25245803Stheraven * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26245803Stheraven * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27245803Stheraven * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28245803Stheraven * SUCH DAMAGE.
29245803Stheraven *
30245803Stheraven * $FreeBSD$
31245803Stheraven */
32245803Stheraven
33245803Stheraven#ifndef _INPUT_BUFFER_HH_
34245803Stheraven#define _INPUT_BUFFER_HH_
35245803Stheraven#include "util.hh"
36245803Stheraven#include <assert.h>
37245803Stheraven
38245803Stheravennamespace dtc
39245803Stheraven{
40245803Stheraven
41245803Stheraven/**
42245803Stheraven * Class encapsulating the input file.  Can be used as a const char*, but has
43245803Stheraven * range checking.  Attempting to access anything out of range will return a 0
44245803Stheraven * byte.  The input buffer can be cheaply copied, without copying the
45245803Stheraven * underlying memory, however it is the user's responsibility to ensure that
46245803Stheraven * such copies do not persist beyond the lifetime of the underlying memory.
47245803Stheraven *
48245803Stheraven * This also contains methods for reporting errors and for consuming the token
49245803Stheraven * stream.
50245803Stheraven */
51245803Stheravenclass input_buffer
52245803Stheraven{
53245803Stheraven	protected:
54245803Stheraven	/**
55245803Stheraven	 * The buffer.  This class doesn't own the buffer, but the
56245803Stheraven	 * mmap_input_buffer subclass does.
57245803Stheraven	 */
58245803Stheraven	const char* buffer;
59245803Stheraven	/**
60245803Stheraven	 * The size of the buffer.
61245803Stheraven	 */
62245803Stheraven	int size;
63245803Stheraven	private:
64245803Stheraven	/**
65245803Stheraven	 * The current place in the buffer where we are reading.  This class
66245803Stheraven	 * keeps a separate size, pointer, and cursor so that we can move
67245803Stheraven	 * forwards and backwards and still have checks that we haven't fallen
68245803Stheraven	 * off either end.
69245803Stheraven	 */
70245803Stheraven	int cursor;
71245803Stheraven	/**
72245803Stheraven	 * Private constructor.  This is used to create input buffers that
73245803Stheraven	 * refer to the same memory, but have different cursors.
74245803Stheraven	 */
75245803Stheraven	input_buffer(const char* b, int s, int c) : buffer(b), size(s),
76245803Stheraven		cursor(c) {}
77245803Stheraven	/**
78245803Stheraven	 * Reads forward past any spaces.  The DTS format is not whitespace
79245803Stheraven	 * sensitive and so we want to scan past whitespace when reading it.
80245803Stheraven	 */
81245803Stheraven	void skip_spaces();
82245803Stheraven	public:
83245803Stheraven	/**
84245803Stheraven	 * Virtual destructor.  Does nothing, but exists so that subclasses
85245803Stheraven	 * that own the memory can run cleanup code for deallocating it.
86245803Stheraven	 */
87245803Stheraven	virtual ~input_buffer() {};
88245803Stheraven	/**
89245803Stheraven	 * Constructs an empty buffer.
90245803Stheraven	 */
91245803Stheraven	input_buffer() : buffer(0), size(0), cursor(0) {}
92245803Stheraven	/**
93245803Stheraven	 * Constructs a new buffer with a specified memory region and size.
94245803Stheraven	 */
95245803Stheraven	input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){}
96245803Stheraven	/**
97245803Stheraven	 * Returns a new input buffer referring into this input, clamped to the
98245803Stheraven	 * specified size.  If the requested buffer would fall outside the
99245803Stheraven	 * range of this one, then it returns an empty buffer.
100245803Stheraven	 *
101245803Stheraven	 * The returned buffer shares the same underlying storage as the
102245803Stheraven	 * original.  This is intended to be used for splitting up the various
103245803Stheraven	 * sections of a device tree blob.  Requesting a size of 0 will give a
104245803Stheraven	 * buffer that extends to the end of the available memory.
105245803Stheraven	 */
106245803Stheraven	input_buffer buffer_from_offset(int offset, int s=0);
107245803Stheraven	/**
108245803Stheraven	 * Returns true if this buffer has no unconsumed space in it.
109245803Stheraven	 */
110245803Stheraven	inline bool empty()
111245803Stheraven	{
112245803Stheraven		return cursor >= size;
113245803Stheraven	}
114245803Stheraven	/**
115245803Stheraven	 * Dereferencing operator, allows the buffer to be treated as a char*
116245803Stheraven	 * and dereferenced to give a character.  This returns a null byte if
117245803Stheraven	 * the cursor is out of range.
118245803Stheraven	 */
119245803Stheraven	inline char operator*()
120245803Stheraven	{
121245803Stheraven		if (cursor >= size) { return '\0'; }
122245803Stheraven		if (cursor < 0) { return '\0'; }
123245803Stheraven		return buffer[cursor];
124245803Stheraven	}
125245803Stheraven	/**
126245803Stheraven	 * Array subscripting operator, returns a character at the specified
127245803Stheraven	 * index offset from the current cursor.  The offset may be negative,
128245803Stheraven	 * to reread characters that have already been read.  If the current
129245803Stheraven	 * cursor plus offset is outside of the range, this returns a nul
130245803Stheraven	 * byte.
131245803Stheraven	 */
132245803Stheraven	inline char operator[](int offset)
133245803Stheraven	{
134245803Stheraven		if (cursor + offset >= size) { return '\0'; }
135245803Stheraven		if (cursor + offset < 0) { return '\0'; }
136245803Stheraven		return buffer[cursor + offset];
137245803Stheraven	}
138245803Stheraven	/**
139245803Stheraven	 * Increments the cursor, iterating forward in the buffer.
140245803Stheraven	 */
141245803Stheraven	inline input_buffer &operator++()
142245803Stheraven	{
143245803Stheraven		cursor++;
144245803Stheraven		return *this;
145245803Stheraven	}
146245803Stheraven	/**
147245803Stheraven	 * Cast to char* operator.  Returns a pointer into the buffer that can
148245803Stheraven	 * be used for constructing strings.
149245803Stheraven	 */
150245803Stheraven	inline operator const char*()
151245803Stheraven	{
152245803Stheraven		if (cursor >= size) { return 0; }
153245803Stheraven		if (cursor < 0) { return 0; }
154245803Stheraven		return &buffer[cursor];
155245803Stheraven	}
156245803Stheraven	/**
157245803Stheraven	 * Consumes a character.  Moves the cursor one character forward if the
158245803Stheraven	 * next character matches the argument, returning true.  If the current
159245803Stheraven	 * character does not match the argument, returns false.
160245803Stheraven	 */
161245803Stheraven	inline bool consume(char c)
162245803Stheraven	{
163245803Stheraven		if ((*this)[0] == c)
164245803Stheraven		{
165245803Stheraven			++(*this);
166245803Stheraven			return true;
167245803Stheraven		}
168245803Stheraven		return false;
169245803Stheraven	}
170245803Stheraven	/**
171245803Stheraven	 * Consumes a string.  If the (null-terminated) string passed as the
172245803Stheraven	 * argument appears in the input, advances the cursor to the end and
173245803Stheraven	 * returns true.  Returns false if the string does not appear at the
174245803Stheraven	 * current point in the input.
175245803Stheraven	 */
176245803Stheraven	bool consume(const char *str);
177245803Stheraven	/**
178245803Stheraven	 * Reads an integer in base 8, 10, or 16.  Returns true and advances
179245803Stheraven	 * the cursor to the end of the integer if the cursor points to an
180245803Stheraven	 * integer, returns false and does not move the cursor otherwise.
181245803Stheraven	 *
182245803Stheraven	 * The parsed value is returned via the argument.
183245803Stheraven	 */
184245803Stheraven	bool consume_integer(long long &outInt);
185245803Stheraven	/**
186245803Stheraven	 * Template function that consumes a binary value in big-endian format
187245803Stheraven	 * from the input stream.  Returns true and advances the cursor if
188245803Stheraven	 * there is a value of the correct size.  This function assumes that
189245803Stheraven	 * all values must be natively aligned, and so advances the cursor to
190245803Stheraven	 * the correct alignment before reading.
191245803Stheraven	 */
192245803Stheraven	template<typename T>
193245803Stheraven	bool consume_binary(T &out)
194245803Stheraven	{
195245803Stheraven		int align = 0;
196245803Stheraven		int type_size = sizeof(T);
197245803Stheraven		if (cursor % type_size != 0)
198245803Stheraven		{
199245803Stheraven			align = type_size - (cursor % type_size);
200245803Stheraven		}
201245803Stheraven		if (size < cursor + align + type_size)
202245803Stheraven		{
203245803Stheraven			return false;
204245803Stheraven		}
205245803Stheraven		cursor += align;
206245803Stheraven		assert(cursor % type_size == 0);
207245803Stheraven		out = 0;
208245803Stheraven		for (int i=0 ; i<type_size ; ++i)
209245803Stheraven		{
210245803Stheraven			out <<= 8;
211245803Stheraven			out |= (((T)buffer[cursor++]) & 0xff);
212245803Stheraven		}
213245803Stheraven		return true;
214245803Stheraven	}
215245803Stheraven	/**
216245803Stheraven	 * Consumes two hex digits and return the resulting byte via the first
217245803Stheraven	 * argument.  If the next two characters are hex digits, returns true
218245803Stheraven	 * and advances the cursor.  If not, then returns false and leaves the
219245803Stheraven	 * cursor in place.
220245803Stheraven	 */
221245803Stheraven	bool consume_hex_byte(uint8_t &outByte);
222245803Stheraven	/**
223245803Stheraven	 * Advances the cursor to the start of the next token, skipping
224245803Stheraven	 * comments and whitespace.  If the cursor already points to the start
225245803Stheraven	 * of a token, then this function does nothing.
226245803Stheraven	 */
227245803Stheraven	input_buffer &next_token();
228245803Stheraven	/**
229245803Stheraven	 * Prints a message indicating the location of a parse error.
230245803Stheraven	 */
231245803Stheraven	void parse_error(const char *msg);
232245803Stheraven	/**
233245803Stheraven	 * Dumps the current cursor value and the unconsumed values in the
234245803Stheraven	 * input buffer to the standard error.  This method is intended solely
235245803Stheraven	 * for debugging.
236245803Stheraven	 */
237245803Stheraven	void dump();
238245803Stheraven};
239245803Stheraven/**
240245803Stheraven * Explicit specialisation for reading a single byte.
241245803Stheraven */
242245803Stheraventemplate<>
243245803Stheraveninline bool input_buffer::consume_binary(uint8_t &out)
244245803Stheraven{
245245803Stheraven	if (size < cursor + 1)
246245803Stheraven	{
247245803Stheraven		return false;
248245803Stheraven	}
249245803Stheraven	out = buffer[cursor++];
250245803Stheraven	return true;
251245803Stheraven}
252245803Stheraven
253245803Stheraven/**
254245803Stheraven * Subclass of input_buffer that mmap()s a file and owns the resulting memory.
255245803Stheraven * When this object is destroyed, the memory is unmapped.
256245803Stheraven */
257245803Stheravenstruct mmap_input_buffer : public input_buffer
258245803Stheraven{
259245803Stheraven	/**
260245803Stheraven	 * Constructs a new buffer from the file passed in as a file
261245803Stheraven	 * descriptor.
262245803Stheraven	 */
263245803Stheraven	mmap_input_buffer(int fd);
264245803Stheraven	/**
265245803Stheraven	 * Unmaps the buffer, if one exists.
266245803Stheraven	 */
267245803Stheraven	virtual ~mmap_input_buffer();
268245803Stheraven};
269245803Stheraven/**
270245803Stheraven * Input buffer read from standard input.  This is used for reading device tree
271245803Stheraven * blobs and source from standard input.  It reads the entire input into
272245803Stheraven * malloc'd memory, so will be very slow for large inputs.  DTS and DTB files
273245803Stheraven * are very rarely more than 10KB though, so this is probably not a problem.
274245803Stheraven */
275245803Stheravenstruct stream_input_buffer : public input_buffer
276245803Stheraven{
277245803Stheraven	/**
278245803Stheraven	 * The buffer that will store the data read from the standard input.
279245803Stheraven	 */
280245803Stheraven	std::vector<char> b;
281245803Stheraven	/**
282245803Stheraven	 * Constructs a new buffer from the standard input.
283245803Stheraven	 */
284245803Stheraven	stream_input_buffer();
285245803Stheraven};
286245803Stheraven
287245803Stheraven} // namespace dtc
288245803Stheraven
289245803Stheraven#endif // !_INPUT_BUFFER_HH_
290