1245803Stheraven/*-
2245803Stheraven * Copyright (c) 2013 David Chisnall
3245803Stheraven * All rights reserved.
4245803Stheraven *
5245803Stheraven * This software was developed by SRI International and the University of
6245803Stheraven * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
7245803Stheraven * ("CTSRD"), as part of the DARPA CRASH research programme.
8245803Stheraven *
9245803Stheraven * Redistribution and use in source and binary forms, with or without
10245803Stheraven * modification, are permitted provided that the following conditions
11245803Stheraven * are met:
12245803Stheraven * 1. Redistributions of source code must retain the above copyright
13245803Stheraven *    notice, this list of conditions and the following disclaimer.
14245803Stheraven * 2. Redistributions in binary form must reproduce the above copyright
15245803Stheraven *    notice, this list of conditions and the following disclaimer in the
16245803Stheraven *    documentation and/or other materials provided with the distribution.
17245803Stheraven *
18245803Stheraven * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19245803Stheraven * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20245803Stheraven * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21245803Stheraven * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22245803Stheraven * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23245803Stheraven * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24245803Stheraven * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25245803Stheraven * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26245803Stheraven * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27245803Stheraven * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28245803Stheraven * SUCH DAMAGE.
29245803Stheraven *
30245803Stheraven * $FreeBSD: releng/11.0/usr.bin/dtc/input_buffer.hh 292876 2015-12-29 16:29:42Z theraven $
31245803Stheraven */
32245803Stheraven
33245803Stheraven#ifndef _INPUT_BUFFER_HH_
34245803Stheraven#define _INPUT_BUFFER_HH_
35245803Stheraven#include "util.hh"
36245803Stheraven#include <assert.h>
37245803Stheraven
38245803Stheravennamespace dtc
39245803Stheraven{
40245803Stheraven
41292876Stheravennamespace {
42292876Stheravenstruct expression;
43292876Stheraventypedef std::unique_ptr<expression> expression_ptr;
44292876Stheraven}
45292876Stheraven
46245803Stheraven/**
47245803Stheraven * Class encapsulating the input file.  Can be used as a const char*, but has
48245803Stheraven * range checking.  Attempting to access anything out of range will return a 0
49245803Stheraven * byte.  The input buffer can be cheaply copied, without copying the
50245803Stheraven * underlying memory, however it is the user's responsibility to ensure that
51245803Stheraven * such copies do not persist beyond the lifetime of the underlying memory.
52245803Stheraven *
53245803Stheraven * This also contains methods for reporting errors and for consuming the token
54245803Stheraven * stream.
55245803Stheraven */
56245803Stheravenclass input_buffer
57245803Stheraven{
58245803Stheraven	protected:
59245803Stheraven	/**
60245803Stheraven	 * The buffer.  This class doesn't own the buffer, but the
61245803Stheraven	 * mmap_input_buffer subclass does.
62245803Stheraven	 */
63245803Stheraven	const char* buffer;
64245803Stheraven	/**
65245803Stheraven	 * The size of the buffer.
66245803Stheraven	 */
67245803Stheraven	int size;
68245803Stheraven	private:
69245803Stheraven	/**
70292876Stheraven	 * Parse an expression.  If `stopAtParen` is set, then only parse a number
71292876Stheraven	 * or a parenthetical expression, otherwise assume that either is the
72292876Stheraven	 * left-hand side of a binary expression and try to parse the right-hand
73292876Stheraven	 * side.
74292876Stheraven	 */
75292876Stheraven	expression_ptr parse_expression(bool stopAtParen=false);
76292876Stheraven	/**
77292876Stheraven	 * Parse a binary expression, having already parsed the right-hand side.
78292876Stheraven	 */
79292876Stheraven	expression_ptr parse_binary_expression(expression_ptr lhs);
80292876Stheraven	/**
81245803Stheraven	 * The current place in the buffer where we are reading.  This class
82245803Stheraven	 * keeps a separate size, pointer, and cursor so that we can move
83245803Stheraven	 * forwards and backwards and still have checks that we haven't fallen
84245803Stheraven	 * off either end.
85245803Stheraven	 */
86245803Stheraven	int cursor;
87245803Stheraven	/**
88245803Stheraven	 * Private constructor.  This is used to create input buffers that
89245803Stheraven	 * refer to the same memory, but have different cursors.
90245803Stheraven	 */
91245803Stheraven	input_buffer(const char* b, int s, int c) : buffer(b), size(s),
92245803Stheraven		cursor(c) {}
93245803Stheraven	/**
94245803Stheraven	 * Reads forward past any spaces.  The DTS format is not whitespace
95245803Stheraven	 * sensitive and so we want to scan past whitespace when reading it.
96245803Stheraven	 */
97245803Stheraven	void skip_spaces();
98245803Stheraven	public:
99245803Stheraven	/**
100289935Stheraven	 * Return whether all input has been consumed.
101289935Stheraven	 */
102289935Stheraven	bool finished() { return cursor >= size; }
103289935Stheraven	/**
104245803Stheraven	 * Virtual destructor.  Does nothing, but exists so that subclasses
105245803Stheraven	 * that own the memory can run cleanup code for deallocating it.
106245803Stheraven	 */
107245803Stheraven	virtual ~input_buffer() {};
108245803Stheraven	/**
109245803Stheraven	 * Constructs an empty buffer.
110245803Stheraven	 */
111245803Stheraven	input_buffer() : buffer(0), size(0), cursor(0) {}
112245803Stheraven	/**
113245803Stheraven	 * Constructs a new buffer with a specified memory region and size.
114245803Stheraven	 */
115245803Stheraven	input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){}
116245803Stheraven	/**
117245803Stheraven	 * Returns a new input buffer referring into this input, clamped to the
118245803Stheraven	 * specified size.  If the requested buffer would fall outside the
119245803Stheraven	 * range of this one, then it returns an empty buffer.
120245803Stheraven	 *
121245803Stheraven	 * The returned buffer shares the same underlying storage as the
122245803Stheraven	 * original.  This is intended to be used for splitting up the various
123245803Stheraven	 * sections of a device tree blob.  Requesting a size of 0 will give a
124245803Stheraven	 * buffer that extends to the end of the available memory.
125245803Stheraven	 */
126245803Stheraven	input_buffer buffer_from_offset(int offset, int s=0);
127245803Stheraven	/**
128245803Stheraven	 * Returns true if this buffer has no unconsumed space in it.
129245803Stheraven	 */
130245803Stheraven	inline bool empty()
131245803Stheraven	{
132245803Stheraven		return cursor >= size;
133245803Stheraven	}
134245803Stheraven	/**
135245803Stheraven	 * Dereferencing operator, allows the buffer to be treated as a char*
136245803Stheraven	 * and dereferenced to give a character.  This returns a null byte if
137245803Stheraven	 * the cursor is out of range.
138245803Stheraven	 */
139245803Stheraven	inline char operator*()
140245803Stheraven	{
141245803Stheraven		if (cursor >= size) { return '\0'; }
142245803Stheraven		if (cursor < 0) { return '\0'; }
143245803Stheraven		return buffer[cursor];
144245803Stheraven	}
145245803Stheraven	/**
146245803Stheraven	 * Array subscripting operator, returns a character at the specified
147245803Stheraven	 * index offset from the current cursor.  The offset may be negative,
148245803Stheraven	 * to reread characters that have already been read.  If the current
149245803Stheraven	 * cursor plus offset is outside of the range, this returns a nul
150245803Stheraven	 * byte.
151245803Stheraven	 */
152245803Stheraven	inline char operator[](int offset)
153245803Stheraven	{
154245803Stheraven		if (cursor + offset >= size) { return '\0'; }
155245803Stheraven		if (cursor + offset < 0) { return '\0'; }
156245803Stheraven		return buffer[cursor + offset];
157245803Stheraven	}
158245803Stheraven	/**
159245803Stheraven	 * Increments the cursor, iterating forward in the buffer.
160245803Stheraven	 */
161245803Stheraven	inline input_buffer &operator++()
162245803Stheraven	{
163245803Stheraven		cursor++;
164245803Stheraven		return *this;
165245803Stheraven	}
166245803Stheraven	/**
167245803Stheraven	 * Cast to char* operator.  Returns a pointer into the buffer that can
168245803Stheraven	 * be used for constructing strings.
169245803Stheraven	 */
170245803Stheraven	inline operator const char*()
171245803Stheraven	{
172245803Stheraven		if (cursor >= size) { return 0; }
173245803Stheraven		if (cursor < 0) { return 0; }
174245803Stheraven		return &buffer[cursor];
175245803Stheraven	}
176245803Stheraven	/**
177245803Stheraven	 * Consumes a character.  Moves the cursor one character forward if the
178245803Stheraven	 * next character matches the argument, returning true.  If the current
179245803Stheraven	 * character does not match the argument, returns false.
180245803Stheraven	 */
181245803Stheraven	inline bool consume(char c)
182245803Stheraven	{
183245803Stheraven		if ((*this)[0] == c)
184245803Stheraven		{
185245803Stheraven			++(*this);
186245803Stheraven			return true;
187245803Stheraven		}
188245803Stheraven		return false;
189245803Stheraven	}
190245803Stheraven	/**
191245803Stheraven	 * Consumes a string.  If the (null-terminated) string passed as the
192245803Stheraven	 * argument appears in the input, advances the cursor to the end and
193245803Stheraven	 * returns true.  Returns false if the string does not appear at the
194245803Stheraven	 * current point in the input.
195245803Stheraven	 */
196245803Stheraven	bool consume(const char *str);
197245803Stheraven	/**
198245803Stheraven	 * Reads an integer in base 8, 10, or 16.  Returns true and advances
199245803Stheraven	 * the cursor to the end of the integer if the cursor points to an
200245803Stheraven	 * integer, returns false and does not move the cursor otherwise.
201245803Stheraven	 *
202245803Stheraven	 * The parsed value is returned via the argument.
203245803Stheraven	 */
204289935Stheraven	bool consume_integer(unsigned long long &outInt);
205245803Stheraven	/**
206292876Stheraven	 * Reads an arithmetic expression (containing any of the normal C
207292876Stheraven	 * operators), evaluates it, and returns the result.
208292876Stheraven	 */
209292876Stheraven	bool consume_integer_expression(unsigned long long &outInt);
210292876Stheraven	/**
211245803Stheraven	 * Template function that consumes a binary value in big-endian format
212245803Stheraven	 * from the input stream.  Returns true and advances the cursor if
213245803Stheraven	 * there is a value of the correct size.  This function assumes that
214245803Stheraven	 * all values must be natively aligned, and so advances the cursor to
215245803Stheraven	 * the correct alignment before reading.
216245803Stheraven	 */
217245803Stheraven	template<typename T>
218245803Stheraven	bool consume_binary(T &out)
219245803Stheraven	{
220245803Stheraven		int align = 0;
221245803Stheraven		int type_size = sizeof(T);
222245803Stheraven		if (cursor % type_size != 0)
223245803Stheraven		{
224245803Stheraven			align = type_size - (cursor % type_size);
225245803Stheraven		}
226245803Stheraven		if (size < cursor + align + type_size)
227245803Stheraven		{
228245803Stheraven			return false;
229245803Stheraven		}
230245803Stheraven		cursor += align;
231245803Stheraven		assert(cursor % type_size == 0);
232245803Stheraven		out = 0;
233245803Stheraven		for (int i=0 ; i<type_size ; ++i)
234245803Stheraven		{
235245803Stheraven			out <<= 8;
236245803Stheraven			out |= (((T)buffer[cursor++]) & 0xff);
237245803Stheraven		}
238245803Stheraven		return true;
239245803Stheraven	}
240245803Stheraven	/**
241245803Stheraven	 * Consumes two hex digits and return the resulting byte via the first
242245803Stheraven	 * argument.  If the next two characters are hex digits, returns true
243245803Stheraven	 * and advances the cursor.  If not, then returns false and leaves the
244245803Stheraven	 * cursor in place.
245245803Stheraven	 */
246245803Stheraven	bool consume_hex_byte(uint8_t &outByte);
247245803Stheraven	/**
248245803Stheraven	 * Advances the cursor to the start of the next token, skipping
249245803Stheraven	 * comments and whitespace.  If the cursor already points to the start
250245803Stheraven	 * of a token, then this function does nothing.
251245803Stheraven	 */
252245803Stheraven	input_buffer &next_token();
253245803Stheraven	/**
254245803Stheraven	 * Prints a message indicating the location of a parse error.
255245803Stheraven	 */
256245803Stheraven	void parse_error(const char *msg);
257292876Stheraven#ifndef NDEBUG
258245803Stheraven	/**
259245803Stheraven	 * Dumps the current cursor value and the unconsumed values in the
260245803Stheraven	 * input buffer to the standard error.  This method is intended solely
261245803Stheraven	 * for debugging.
262245803Stheraven	 */
263245803Stheraven	void dump();
264292876Stheraven#endif
265245803Stheraven};
266245803Stheraven/**
267245803Stheraven * Explicit specialisation for reading a single byte.
268245803Stheraven */
269245803Stheraventemplate<>
270245803Stheraveninline bool input_buffer::consume_binary(uint8_t &out)
271245803Stheraven{
272245803Stheraven	if (size < cursor + 1)
273245803Stheraven	{
274245803Stheraven		return false;
275245803Stheraven	}
276245803Stheraven	out = buffer[cursor++];
277245803Stheraven	return true;
278245803Stheraven}
279245803Stheraven
280245803Stheraven/**
281245803Stheraven * Subclass of input_buffer that mmap()s a file and owns the resulting memory.
282245803Stheraven * When this object is destroyed, the memory is unmapped.
283245803Stheraven */
284245803Stheravenstruct mmap_input_buffer : public input_buffer
285245803Stheraven{
286245803Stheraven	/**
287245803Stheraven	 * Constructs a new buffer from the file passed in as a file
288245803Stheraven	 * descriptor.
289245803Stheraven	 */
290245803Stheraven	mmap_input_buffer(int fd);
291245803Stheraven	/**
292245803Stheraven	 * Unmaps the buffer, if one exists.
293245803Stheraven	 */
294245803Stheraven	virtual ~mmap_input_buffer();
295245803Stheraven};
296245803Stheraven/**
297245803Stheraven * Input buffer read from standard input.  This is used for reading device tree
298245803Stheraven * blobs and source from standard input.  It reads the entire input into
299245803Stheraven * malloc'd memory, so will be very slow for large inputs.  DTS and DTB files
300245803Stheraven * are very rarely more than 10KB though, so this is probably not a problem.
301245803Stheraven */
302245803Stheravenstruct stream_input_buffer : public input_buffer
303245803Stheraven{
304245803Stheraven	/**
305245803Stheraven	 * The buffer that will store the data read from the standard input.
306245803Stheraven	 */
307245803Stheraven	std::vector<char> b;
308245803Stheraven	/**
309245803Stheraven	 * Constructs a new buffer from the standard input.
310245803Stheraven	 */
311245803Stheraven	stream_input_buffer();
312245803Stheraven};
313245803Stheraven
314245803Stheraven} // namespace dtc
315245803Stheraven
316245803Stheraven#endif // !_INPUT_BUFFER_HH_
317