1245803Stheraven/*-
2245803Stheraven * Copyright (c) 2013 David Chisnall
3245803Stheraven * All rights reserved.
4245803Stheraven *
5245803Stheraven * This software was developed by SRI International and the University of
6245803Stheraven * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
7245803Stheraven * ("CTSRD"), as part of the DARPA CRASH research programme.
8245803Stheraven *
9245803Stheraven * Redistribution and use in source and binary forms, with or without
10245803Stheraven * modification, are permitted provided that the following conditions
11245803Stheraven * are met:
12245803Stheraven * 1. Redistributions of source code must retain the above copyright
13245803Stheraven *    notice, this list of conditions and the following disclaimer.
14245803Stheraven * 2. Redistributions in binary form must reproduce the above copyright
15245803Stheraven *    notice, this list of conditions and the following disclaimer in the
16245803Stheraven *    documentation and/or other materials provided with the distribution.
17245803Stheraven *
18245803Stheraven * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19245803Stheraven * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20245803Stheraven * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21245803Stheraven * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22245803Stheraven * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23245803Stheraven * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24245803Stheraven * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25245803Stheraven * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26245803Stheraven * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27245803Stheraven * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28245803Stheraven * SUCH DAMAGE.
29245803Stheraven *
30245803Stheraven * $FreeBSD$
31245803Stheraven */
32245803Stheraven
33245803Stheraven#include "input_buffer.hh"
34245839Stheraven#include <ctype.h>
35245839Stheraven#include <limits.h>
36245839Stheraven#include <stdint.h>
37245839Stheraven#include <stdio.h>
38245839Stheraven#include <stdlib.h>
39245803Stheraven#include <string.h>
40245803Stheraven
41245839Stheraven
42245803Stheraven#include <sys/stat.h>
43245803Stheraven#include <sys/mman.h>
44245803Stheraven#include <assert.h>
45245803Stheraven
46247006Suqs#ifndef MAP_PREFAULT_READ
47247006Suqs#define MAP_PREFAULT_READ 0
48247006Suqs#endif
49247006Suqs
50245803Stheravennamespace dtc
51245803Stheraven{
52245803Stheraven
53245803Stheravenvoid
54245803Stheraveninput_buffer::skip_spaces()
55245803Stheraven{
56245803Stheraven	if (cursor >= size) { return; }
57245803Stheraven	if (cursor < 0) { return; }
58245803Stheraven	char c = buffer[cursor];
59245803Stheraven	while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\f')
60245803Stheraven	       || (c == '\v') || (c == '\r'))
61245803Stheraven	{
62245803Stheraven		cursor++;
63245803Stheraven		if (cursor > size)
64245803Stheraven		{
65245803Stheraven			c = '\0';
66245803Stheraven		}
67245803Stheraven		else
68245803Stheraven		{
69245803Stheraven			c = buffer[cursor];
70245803Stheraven		}
71245803Stheraven	}
72245803Stheraven}
73245803Stheraven
74245803Stheraveninput_buffer
75245803Stheraveninput_buffer::buffer_from_offset(int offset, int s)
76245803Stheraven{
77245803Stheraven	if (s == 0)
78245803Stheraven	{
79245803Stheraven		s = size - offset;
80245803Stheraven	}
81245803Stheraven	if (offset > size)
82245803Stheraven	{
83245803Stheraven		return input_buffer();
84245803Stheraven	}
85245803Stheraven	if (s > (size-offset))
86245803Stheraven	{
87245803Stheraven		return input_buffer();
88245803Stheraven	}
89245803Stheraven	return input_buffer(&buffer[offset], s);
90245803Stheraven}
91245803Stheraven
92245803Stheravenbool
93245803Stheraveninput_buffer::consume(const char *str)
94245803Stheraven{
95245803Stheraven	int len = strlen(str);
96245803Stheraven	if (len > size - cursor)
97245803Stheraven	{
98245803Stheraven		return false;
99245803Stheraven	}
100245803Stheraven	else
101245803Stheraven	{
102245803Stheraven		for (int i=0 ; i<len ; ++i)
103245803Stheraven		{
104245803Stheraven			if (str[i] != buffer[cursor + i])
105245803Stheraven			{
106245803Stheraven				return false;
107245803Stheraven			}
108245803Stheraven		}
109245803Stheraven		cursor += len;
110245803Stheraven		return true;
111245803Stheraven	}
112245803Stheraven	return false;
113245803Stheraven}
114245803Stheraven
115245803Stheravenbool
116245803Stheraveninput_buffer::consume_integer(long long &outInt)
117245803Stheraven{
118245803Stheraven	// The first character must be a digit.  Hex and octal strings
119245803Stheraven	// are prefixed by 0 and 0x, respectively.
120245803Stheraven	if (!isdigit((*this)[0]))
121245803Stheraven	{
122245803Stheraven		return false;
123245803Stheraven	}
124245803Stheraven	char *end=0;
125245803Stheraven	outInt = strtoll(&buffer[cursor], &end, 0);
126245803Stheraven	if (end == &buffer[cursor])
127245803Stheraven	{
128245803Stheraven		return false;
129245803Stheraven	}
130245803Stheraven	cursor = end - buffer;
131245803Stheraven	return true;
132245803Stheraven}
133245803Stheraven
134245803Stheravenbool
135245803Stheraveninput_buffer::consume_hex_byte(uint8_t &outByte)
136245803Stheraven{
137245803Stheraven	if (!ishexdigit((*this)[0]) && !ishexdigit((*this)[1]))
138245803Stheraven	{
139245803Stheraven		return false;
140245803Stheraven	}
141245803Stheraven	outByte = (digittoint((*this)[0]) << 4) | digittoint((*this)[1]);
142245803Stheraven	cursor += 2;
143245803Stheraven	return true;
144245803Stheraven}
145245803Stheraven
146245803Stheraveninput_buffer&
147245803Stheraveninput_buffer::next_token()
148245803Stheraven{
149245803Stheraven	int start;
150245803Stheraven	do {
151245803Stheraven		start = cursor;
152245803Stheraven		skip_spaces();
153245803Stheraven		// Parse /* comments
154245803Stheraven		if (((*this)[0] == '/') && ((*this)[1] == '*'))
155245803Stheraven		{
156245803Stheraven			// eat the start of the comment
157245803Stheraven			++(*this);
158245803Stheraven			++(*this);
159245803Stheraven			do {
160245803Stheraven				// Find the ending * of */
161245803Stheraven				while ((**this != '\0') && (**this != '*'))
162245803Stheraven				{
163245803Stheraven					++(*this);
164245803Stheraven				}
165245803Stheraven				// Eat the *
166245803Stheraven				++(*this);
167245803Stheraven			} while ((**this != '\0') && (**this != '/'));
168245803Stheraven			// Eat the /
169245803Stheraven			++(*this);
170245803Stheraven		}
171245803Stheraven		// Parse // comments
172245803Stheraven		if (((*this)[0] == '/') && ((*this)[1] == '/'))
173245803Stheraven		{
174245803Stheraven			// eat the start of the comment
175245803Stheraven			++(*this);
176245803Stheraven			++(*this);
177245803Stheraven			// Find the ending * of */
178245803Stheraven			while (**this != '\n')
179245803Stheraven			{
180245803Stheraven				++(*this);
181245803Stheraven			}
182245803Stheraven			// Eat the \n
183245803Stheraven			++(*this);
184245803Stheraven		}
185245803Stheraven	} while (start != cursor);
186245803Stheraven	return *this;
187245803Stheraven}
188245803Stheraven
189245803Stheravenvoid
190245803Stheraveninput_buffer::parse_error(const char *msg)
191245803Stheraven{
192245803Stheraven	int line_count = 1;
193245803Stheraven	int line_start = 0;
194245803Stheraven	int line_end = cursor;
195245803Stheraven	for (int i=cursor ; i>0 ; --i)
196245803Stheraven	{
197245803Stheraven		if (buffer[i] == '\n')
198245803Stheraven		{
199245803Stheraven			line_count++;
200245803Stheraven			if (line_start == 0)
201245803Stheraven			{
202245803Stheraven				line_start = i+1;
203245803Stheraven			}
204245803Stheraven		}
205245803Stheraven	}
206245803Stheraven	for (int i=cursor+1 ; i<size ; ++i)
207245803Stheraven	{
208245803Stheraven		if (buffer[i] == '\n')
209245803Stheraven		{
210245803Stheraven			line_end = i;
211245803Stheraven			break;
212245803Stheraven		}
213245803Stheraven	}
214245803Stheraven	fprintf(stderr, "Error on line %d: %s\n", line_count, msg);
215245803Stheraven	fwrite(&buffer[line_start], line_end-line_start, 1, stderr);
216245803Stheraven	putc('\n', stderr);
217245803Stheraven	for (int i=0 ; i<(cursor-line_start) ; ++i)
218245803Stheraven	{
219254327Stheraven		char c = (buffer[i+line_start] == '\t') ? '\t' : ' ';
220254327Stheraven		putc(c, stderr);
221245803Stheraven	}
222245803Stheraven	putc('^', stderr);
223245803Stheraven	putc('\n', stderr);
224245803Stheraven}
225245803Stheravenvoid
226245803Stheraveninput_buffer::dump()
227245803Stheraven{
228245803Stheraven	fprintf(stderr, "Current cursor: %d\n", cursor);
229245803Stheraven	fwrite(&buffer[cursor], size-cursor, 1, stderr);
230245803Stheraven}
231245803Stheraven
232245803Stheravenmmap_input_buffer::mmap_input_buffer(int fd) : input_buffer(0, 0)
233245803Stheraven{
234245803Stheraven	struct stat sb;
235245803Stheraven	if (fstat(fd, &sb))
236245803Stheraven	{
237245803Stheraven		perror("Failed to stat file");
238245803Stheraven	}
239245803Stheraven	size = sb.st_size;
240245803Stheraven	buffer = (const char*)mmap(0, size, PROT_READ,
241245803Stheraven		MAP_PREFAULT_READ, fd, 0);
242245803Stheraven	if (buffer == 0)
243245803Stheraven	{
244245803Stheraven		perror("Failed to mmap file");
245245803Stheraven	}
246245803Stheraven}
247245803Stheraven
248245803Stheravenmmap_input_buffer::~mmap_input_buffer()
249245803Stheraven{
250245803Stheraven	if (buffer != 0)
251245803Stheraven	{
252245803Stheraven		munmap((void*)buffer, size);
253245803Stheraven	}
254245803Stheraven}
255245803Stheraven
256245803Stheravenstream_input_buffer::stream_input_buffer() : input_buffer(0, 0)
257245803Stheraven{
258245803Stheraven	int c;
259245803Stheraven	while ((c = fgetc(stdin)) != EOF)
260245803Stheraven	{
261245803Stheraven		b.push_back(c);
262245803Stheraven	}
263245803Stheraven	buffer = b.data();
264245803Stheraven	size = b.size();
265245803Stheraven}
266245803Stheraven
267245803Stheraven} // namespace dtc
268245803Stheraven
269