1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2013 David Chisnall
5 * All rights reserved.
6 *
7 * This software was developed by SRI International and the University of
8 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
9 * ("CTSRD"), as part of the DARPA CRASH research programme.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * $FreeBSD$
33 */
34
35#ifndef _INPUT_BUFFER_HH_
36#define _INPUT_BUFFER_HH_
37#include "util.hh"
38#include <assert.h>
39#include <stack>
40#include <string>
41#include <unordered_set>
42
43namespace dtc
44{
45
46namespace {
47struct expression;
48typedef std::unique_ptr<expression> expression_ptr;
49}
50
51/**
52 * Class encapsulating the input file.  Can be used as a const char*, but has
53 * range checking.  Attempting to access anything out of range will return a 0
54 * byte.  The input buffer can be cheaply copied, without copying the
55 * underlying memory, however it is the user's responsibility to ensure that
56 * such copies do not persist beyond the lifetime of the underlying memory.
57 *
58 * This also contains methods for reporting errors and for consuming the token
59 * stream.
60 */
61class input_buffer
62{
63	friend class text_input_buffer;
64	protected:
65	/**
66	 * The buffer.  This class doesn't own the buffer, but the
67	 * mmap_input_buffer subclass does.
68	 */
69	const char* buffer;
70	/**
71	 * The size of the buffer.
72	 */
73	int size;
74	private:
75	/**
76	 * The current place in the buffer where we are reading.  This class
77	 * keeps a separate size, pointer, and cursor so that we can move
78	 * forwards and backwards and still have checks that we haven't fallen
79	 * off either end.
80	 */
81	int cursor;
82	/**
83	 * Private constructor.  This is used to create input buffers that
84	 * refer to the same memory, but have different cursors.
85	 */
86	input_buffer(const char* b, int s, int c) : buffer(b), size(s),
87		cursor(c) {}
88	public:
89	/**
90	 * Returns the file name associated with this buffer.
91	 */
92	virtual const std::string &filename() const
93	{
94		static std::string s;
95		return s;
96	}
97	static std::unique_ptr<input_buffer> buffer_for_file(const std::string &path,
98	                                                     bool warn=true);
99	/**
100	 * Skips all characters in the input until the specified character is
101	 * encountered.
102	 */
103	void skip_to(char);
104	/**
105	 * Parses up to a specified character and returns the intervening
106	 * characters as a string.
107	 */
108	std::string parse_to(char);
109	/**
110	 * Return whether all input has been consumed.
111	 */
112	bool finished() { return cursor >= size; }
113	/**
114	 * Virtual destructor.  Does nothing, but exists so that subclasses
115	 * that own the memory can run cleanup code for deallocating it.
116	 */
117	virtual ~input_buffer() {};
118	/**
119	 * Constructs an empty buffer.
120	 */
121	input_buffer() : buffer(0), size(0), cursor(0) {}
122	/**
123	 * Constructs a new buffer with a specified memory region and size.
124	 */
125	input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){}
126	/**
127	 * Returns a new input buffer referring into this input, clamped to the
128	 * specified size.  If the requested buffer would fall outside the
129	 * range of this one, then it returns an empty buffer.
130	 *
131	 * The returned buffer shares the same underlying storage as the
132	 * original.  This is intended to be used for splitting up the various
133	 * sections of a device tree blob.  Requesting a size of 0 will give a
134	 * buffer that extends to the end of the available memory.
135	 */
136	input_buffer buffer_from_offset(int offset, int s=0);
137	/**
138	 * Dereferencing operator, allows the buffer to be treated as a char*
139	 * and dereferenced to give a character.  This returns a null byte if
140	 * the cursor is out of range.
141	 */
142	inline char operator*()
143	{
144		if (cursor >= size) { return '\0'; }
145		if (cursor < 0) { return '\0'; }
146		return buffer[cursor];
147	}
148	/**
149	 * Array subscripting operator, returns a character at the specified
150	 * index offset from the current cursor.  The offset may be negative,
151	 * to reread characters that have already been read.  If the current
152	 * cursor plus offset is outside of the range, this returns a nul
153	 * byte.
154	 */
155	inline char operator[](int offset)
156	{
157		if (cursor + offset >= size) { return '\0'; }
158		if (cursor + offset < 0) { return '\0'; }
159		return buffer[cursor + offset];
160	}
161	/**
162	 * Increments the cursor, iterating forward in the buffer.
163	 */
164	inline input_buffer &operator++()
165	{
166		cursor++;
167		return *this;
168	}
169	const char *begin()
170	{
171		return buffer;
172	}
173	const char *end()
174	{
175		return buffer + size;
176	}
177	/**
178	 * Consumes a character.  Moves the cursor one character forward if the
179	 * next character matches the argument, returning true.  If the current
180	 * character does not match the argument, returns false.
181	 */
182	inline bool consume(char c)
183	{
184		if (*(*this) == c)
185		{
186			++(*this);
187			return true;
188		}
189		return false;
190	}
191	/**
192	 * Consumes a string.  If the (null-terminated) string passed as the
193	 * argument appears in the input, advances the cursor to the end and
194	 * returns true.  Returns false if the string does not appear at the
195	 * current point in the input.
196	 */
197	bool consume(const char *str);
198	/**
199	 * Reads an integer in base 8, 10, or 16.  Returns true and advances
200	 * the cursor to the end of the integer if the cursor points to an
201	 * integer, returns false and does not move the cursor otherwise.
202	 *
203	 * The parsed value is returned via the argument.
204	 */
205	bool consume_integer(unsigned long long &outInt);
206	/**
207	 * Reads an arithmetic expression (containing any of the normal C
208	 * operators), evaluates it, and returns the result.
209	 */
210	bool consume_integer_expression(unsigned long long &outInt);
211	/**
212	 * Consumes two hex digits and return the resulting byte via the first
213	 * argument.  If the next two characters are hex digits, returns true
214	 * and advances the cursor.  If not, then returns false and leaves the
215	 * cursor in place.
216	 */
217	bool consume_hex_byte(uint8_t &outByte);
218	/**
219	 * Template function that consumes a binary value in big-endian format
220	 * from the input stream.  Returns true and advances the cursor if
221	 * there is a value of the correct size.  This function assumes that
222	 * all values must be natively aligned, and so advances the cursor to
223	 * the correct alignment before reading.
224	 */
225	template<typename T>
226	bool consume_binary(T &out)
227	{
228		int align = 0;
229		int type_size = sizeof(T);
230		if (cursor % type_size != 0)
231		{
232			align = type_size - (cursor % type_size);
233		}
234		if (size < cursor + align + type_size)
235		{
236			return false;
237		}
238		cursor += align;
239		assert(cursor % type_size == 0);
240		out = 0;
241		for (int i=0 ; i<type_size ; ++i)
242		{
243			if (size < cursor)
244			{
245				return false;
246			}
247			out <<= 8;
248			out |= (((T)buffer[cursor++]) & 0xff);
249		}
250		return true;
251	}
252#ifndef NDEBUG
253	/**
254	 * Dumps the current cursor value and the unconsumed values in the
255	 * input buffer to the standard error.  This method is intended solely
256	 * for debugging.
257	 */
258	void dump();
259#endif
260};
261/**
262 * Explicit specialisation for reading a single byte.
263 */
264template<>
265inline bool input_buffer::consume_binary(uint8_t &out)
266{
267	if (size < cursor + 1)
268	{
269		return false;
270	}
271	out = buffer[cursor++];
272	return true;
273}
274
275/**
276 * An input buffer subclass used for parsing DTS files.  This manages a stack
277 * of input buffers to handle /input/ operations.
278 */
279class text_input_buffer
280{
281	std::unordered_set<std::string> defines;
282	/**
283	 * The cursor is the input into the input stream where we are currently reading.
284	 */
285	int cursor = 0;
286	/**
287	 * The current stack of includes.  The current input is always from the top
288	 * of the stack.
289	 */
290	std::stack<std::shared_ptr<input_buffer>> input_stack;
291	/**
292	 *
293	 */
294	const std::vector<std::string> include_paths;
295	/**
296	 * Reads forward past any spaces.  The DTS format is not whitespace
297	 * sensitive and so we want to scan past whitespace when reading it.
298	 */
299	void skip_spaces();
300	/**
301	 * Returns the character immediately after the current one.
302	 *
303	 * This method does not look between files.
304	 */
305	char peek();
306	/**
307	 * If a /include/ token is encountered, then look up the corresponding
308	 * input file, push it onto the input stack, and continue.
309	 */
310	void handle_include();
311	/**
312	 * The base directory for this file.
313	 */
314	const std::string dir;
315	/**
316	 * The file where dependencies should be output.
317	 */
318	FILE *depfile;
319	public:
320	/**
321	 * Construct a new text input buffer with the specified buffer as the start
322	 * of parsing and the specified set of input paths for handling new
323	 * inclusions.
324	 */
325	text_input_buffer(std::unique_ptr<input_buffer> &&b,
326	                  std::unordered_set<std::string> &&d,
327	                  std::vector<std::string> &&i,
328	                  const std::string directory,
329	                  FILE *deps)
330		: defines(d), include_paths(i), dir(directory), depfile(deps)
331	{
332		input_stack.push(std::move(b));
333	}
334	/**
335	 * Skips all characters in the input until the specified character is
336	 * encountered.
337	 */
338	void skip_to(char);
339	/**
340	 * Parse an expression.  If `stopAtParen` is set, then only parse a number
341	 * or a parenthetical expression, otherwise assume that either is the
342	 * left-hand side of a binary expression and try to parse the right-hand
343	 * side.
344	 */
345	expression_ptr parse_expression(bool stopAtParen=false);
346	/**
347	 * Parse a binary expression, having already parsed the right-hand side.
348	 */
349	expression_ptr parse_binary_expression(expression_ptr lhs);
350	/**
351	 * Return whether all input has been consumed.
352	 */
353	bool finished()
354	{
355		return input_stack.empty() ||
356			((input_stack.size() == 1) && input_stack.top()->finished());
357	}
358	/**
359	 * Dereferencing operator.  Returns the current character in the top input buffer.
360	 */
361	inline char operator*()
362	{
363		if (input_stack.empty())
364		{
365			return 0;
366		}
367		return *(*input_stack.top());
368	}
369	/**
370	 * Increments the cursor, iterating forward in the buffer.
371	 */
372	inline text_input_buffer &operator++()
373	{
374		if (input_stack.empty())
375		{
376			return *this;
377		}
378		cursor++;
379		auto &top = *input_stack.top();
380		++top;
381		if (top.finished())
382		{
383			input_stack.pop();
384		}
385		return *this;
386	}
387	/**
388	 * Consumes a character.  Moves the cursor one character forward if the
389	 * next character matches the argument, returning true.  If the current
390	 * character does not match the argument, returns false.
391	 */
392	inline bool consume(char c)
393	{
394		if (*(*this) == c)
395		{
396			++(*this);
397			return true;
398		}
399		return false;
400	}
401	/**
402	 * Consumes a string.  If the (null-terminated) string passed as the
403	 * argument appears in the input, advances the cursor to the end and
404	 * returns true.  Returns false if the string does not appear at the
405	 * current point in the input.
406	 *
407	 * This method does not scan between files.
408	 */
409	bool consume(const char *str)
410	{
411		if (input_stack.empty())
412		{
413			return false;
414		}
415		return input_stack.top()->consume(str);
416	}
417	/**
418	 * Reads an integer in base 8, 10, or 16.  Returns true and advances
419	 * the cursor to the end of the integer if the cursor points to an
420	 * integer, returns false and does not move the cursor otherwise.
421	 *
422	 * The parsed value is returned via the argument.
423	 *
424	 * This method does not scan between files.
425	 */
426	bool consume_integer(unsigned long long &outInt)
427	{
428		if (input_stack.empty())
429		{
430			return false;
431		}
432		return input_stack.top()->consume_integer(outInt);
433	}
434	/**
435	 * Reads an arithmetic expression (containing any of the normal C
436	 * operators), evaluates it, and returns the result.
437	 */
438	bool consume_integer_expression(unsigned long long &outInt);
439	/**
440	 * Consumes two hex digits and return the resulting byte via the first
441	 * argument.  If the next two characters are hex digits, returns true
442	 * and advances the cursor.  If not, then returns false and leaves the
443	 * cursor in place.
444	 *
445	 * This method does not scan between files.
446	 */
447	bool consume_hex_byte(uint8_t &outByte)
448	{
449		if (input_stack.empty())
450		{
451			return false;
452		}
453		return input_stack.top()->consume_hex_byte(outByte);
454	}
455	/**
456	 * Returns the longest string in the input buffer starting at the
457	 * current cursor and composed entirely of characters that are valid in
458	 * node names.
459	*/
460	std::string parse_node_name();
461	/**
462	 * Returns the longest string in the input buffer starting at the
463	 * current cursor and composed entirely of characters that are valid in
464	 * property names.
465	 */
466	std::string parse_property_name();
467	/**
468	 * Parses either a node or a property name.  If is_property is true on
469	 * entry, then only property names are parsed.  If it is false, then it
470	 * will be set, on return, to indicate whether the parsed name is only
471	 * valid as a property.
472	 */
473	std::string parse_node_or_property_name(bool &is_property);
474	/**
475	 * Parses up to a specified character and returns the intervening
476	 * characters as a string.
477	 */
478	std::string parse_to(char);
479	/**
480	 * Advances the cursor to the start of the next token, skipping
481	 * comments and whitespace.  If the cursor already points to the start
482	 * of a token, then this function does nothing.
483	 */
484	text_input_buffer &next_token();
485	/**
486	 * Location in the source file.  This should never be interpreted by
487	 * anything other than error reporting functions of this class.  It will
488	 * eventually become something more complex than an `int`.
489	 */
490	class source_location
491	{
492		friend class text_input_buffer;
493		/**
494		 * The text buffer object that included `b`.
495		 */
496		text_input_buffer &buffer;
497		/**
498		 * The underlying buffer that contains this location.
499		 */
500		std::shared_ptr<input_buffer> b;
501		/**
502		 * The offset within the current buffer of the source location.
503		 */
504		int cursor;
505		source_location(text_input_buffer &buf)
506			: buffer(buf),
507			  b(buf.input_stack.empty() ? nullptr : buf.input_stack.top()),
508			  cursor(b ? b->cursor : 0) {}
509		public:
510		/**
511		 * Report an error at this location.
512		 */
513		void report_error(const char *msg)
514		{
515			if (b)
516			{
517				buffer.parse_error(msg, *b, cursor);
518			}
519			else
520			{
521				buffer.parse_error(msg);
522			}
523		}
524	};
525	/**
526	 * Returns the current source location.
527	 */
528	source_location location()
529	{
530		return { *this };
531	}
532	/**
533	 * Prints a message indicating the location of a parse error.
534	 */
535	void parse_error(const char *msg);
536	/**
537	 * Reads the contents of a binary file into `b`.  The file name is assumed
538	 * to be relative to one of the include paths.
539	 *
540	 * Returns true if the file exists and can be read, false otherwise.
541	 */
542	bool read_binary_file(const std::string &filename, byte_buffer &b);
543	private:
544	/**
545	 * Prints a message indicating the location of a parse error, given a
546	 * specified location.  This is used when input has already moved beyond
547	 * the location that caused the failure.
548	 */
549	void parse_error(const char *msg, input_buffer &b, int loc);
550};
551
552} // namespace dtc
553
554#endif // !_INPUT_BUFFER_HH_
555