1245803Stheraven/*- 2245803Stheraven * Copyright (c) 2013 David Chisnall 3245803Stheraven * All rights reserved. 4245803Stheraven * 5245803Stheraven * This software was developed by SRI International and the University of 6245803Stheraven * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) 7245803Stheraven * ("CTSRD"), as part of the DARPA CRASH research programme. 8245803Stheraven * 9245803Stheraven * Redistribution and use in source and binary forms, with or without 10245803Stheraven * modification, are permitted provided that the following conditions 11245803Stheraven * are met: 12245803Stheraven * 1. Redistributions of source code must retain the above copyright 13245803Stheraven * notice, this list of conditions and the following disclaimer. 14245803Stheraven * 2. Redistributions in binary form must reproduce the above copyright 15245803Stheraven * notice, this list of conditions and the following disclaimer in the 16245803Stheraven * documentation and/or other materials provided with the distribution. 17245803Stheraven * 18245803Stheraven * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19245803Stheraven * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20245803Stheraven * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21245803Stheraven * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22245803Stheraven * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23245803Stheraven * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24245803Stheraven * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25245803Stheraven * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26245803Stheraven * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27245803Stheraven * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28245803Stheraven * SUCH DAMAGE. 29245803Stheraven * 30245803Stheraven * $FreeBSD$ 31245803Stheraven */ 32245803Stheraven 33245803Stheraven#include "input_buffer.hh" 34245839Stheraven#include <ctype.h> 35245839Stheraven#include <limits.h> 36245839Stheraven#include <stdint.h> 37245839Stheraven#include <stdio.h> 38245839Stheraven#include <stdlib.h> 39245803Stheraven#include <string.h> 40245803Stheraven 41245839Stheraven 42245803Stheraven#include <sys/stat.h> 43245803Stheraven#include <sys/mman.h> 44245803Stheraven#include <assert.h> 45245803Stheraven 46247006Suqs#ifndef MAP_PREFAULT_READ 47247006Suqs#define MAP_PREFAULT_READ 0 48247006Suqs#endif 49247006Suqs 50245803Stheravennamespace dtc 51245803Stheraven{ 52245803Stheraven 53245803Stheravenvoid 54245803Stheraveninput_buffer::skip_spaces() 55245803Stheraven{ 56245803Stheraven if (cursor >= size) { return; } 57245803Stheraven if (cursor < 0) { return; } 58245803Stheraven char c = buffer[cursor]; 59245803Stheraven while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\f') 60245803Stheraven || (c == '\v') || (c == '\r')) 61245803Stheraven { 62245803Stheraven cursor++; 63245803Stheraven if (cursor > size) 64245803Stheraven { 65245803Stheraven c = '\0'; 66245803Stheraven } 67245803Stheraven else 68245803Stheraven { 69245803Stheraven c = buffer[cursor]; 70245803Stheraven } 71245803Stheraven } 72245803Stheraven} 73245803Stheraven 74245803Stheraveninput_buffer 75245803Stheraveninput_buffer::buffer_from_offset(int offset, int s) 76245803Stheraven{ 77245803Stheraven if (s == 0) 78245803Stheraven { 79245803Stheraven s = size - offset; 80245803Stheraven } 81245803Stheraven if (offset > size) 82245803Stheraven { 83245803Stheraven return input_buffer(); 84245803Stheraven } 85245803Stheraven if (s > (size-offset)) 86245803Stheraven { 87245803Stheraven return input_buffer(); 88245803Stheraven } 89245803Stheraven return input_buffer(&buffer[offset], s); 90245803Stheraven} 91245803Stheraven 92245803Stheravenbool 93245803Stheraveninput_buffer::consume(const char *str) 94245803Stheraven{ 95245803Stheraven int len = strlen(str); 96245803Stheraven if (len > size - cursor) 97245803Stheraven { 98245803Stheraven return false; 99245803Stheraven } 100245803Stheraven else 101245803Stheraven { 102245803Stheraven for (int i=0 ; i<len ; ++i) 103245803Stheraven { 104245803Stheraven if (str[i] != buffer[cursor + i]) 105245803Stheraven { 106245803Stheraven return false; 107245803Stheraven } 108245803Stheraven } 109245803Stheraven cursor += len; 110245803Stheraven return true; 111245803Stheraven } 112245803Stheraven return false; 113245803Stheraven} 114245803Stheraven 115245803Stheravenbool 116245803Stheraveninput_buffer::consume_integer(long long &outInt) 117245803Stheraven{ 118245803Stheraven // The first character must be a digit. Hex and octal strings 119245803Stheraven // are prefixed by 0 and 0x, respectively. 120245803Stheraven if (!isdigit((*this)[0])) 121245803Stheraven { 122245803Stheraven return false; 123245803Stheraven } 124245803Stheraven char *end=0; 125245803Stheraven outInt = strtoll(&buffer[cursor], &end, 0); 126245803Stheraven if (end == &buffer[cursor]) 127245803Stheraven { 128245803Stheraven return false; 129245803Stheraven } 130245803Stheraven cursor = end - buffer; 131245803Stheraven return true; 132245803Stheraven} 133245803Stheraven 134245803Stheravenbool 135245803Stheraveninput_buffer::consume_hex_byte(uint8_t &outByte) 136245803Stheraven{ 137245803Stheraven if (!ishexdigit((*this)[0]) && !ishexdigit((*this)[1])) 138245803Stheraven { 139245803Stheraven return false; 140245803Stheraven } 141245803Stheraven outByte = (digittoint((*this)[0]) << 4) | digittoint((*this)[1]); 142245803Stheraven cursor += 2; 143245803Stheraven return true; 144245803Stheraven} 145245803Stheraven 146245803Stheraveninput_buffer& 147245803Stheraveninput_buffer::next_token() 148245803Stheraven{ 149245803Stheraven int start; 150245803Stheraven do { 151245803Stheraven start = cursor; 152245803Stheraven skip_spaces(); 153245803Stheraven // Parse /* comments 154245803Stheraven if (((*this)[0] == '/') && ((*this)[1] == '*')) 155245803Stheraven { 156245803Stheraven // eat the start of the comment 157245803Stheraven ++(*this); 158245803Stheraven ++(*this); 159245803Stheraven do { 160245803Stheraven // Find the ending * of */ 161245803Stheraven while ((**this != '\0') && (**this != '*')) 162245803Stheraven { 163245803Stheraven ++(*this); 164245803Stheraven } 165245803Stheraven // Eat the * 166245803Stheraven ++(*this); 167245803Stheraven } while ((**this != '\0') && (**this != '/')); 168245803Stheraven // Eat the / 169245803Stheraven ++(*this); 170245803Stheraven } 171245803Stheraven // Parse // comments 172245803Stheraven if (((*this)[0] == '/') && ((*this)[1] == '/')) 173245803Stheraven { 174245803Stheraven // eat the start of the comment 175245803Stheraven ++(*this); 176245803Stheraven ++(*this); 177245803Stheraven // Find the ending * of */ 178245803Stheraven while (**this != '\n') 179245803Stheraven { 180245803Stheraven ++(*this); 181245803Stheraven } 182245803Stheraven // Eat the \n 183245803Stheraven ++(*this); 184245803Stheraven } 185245803Stheraven } while (start != cursor); 186245803Stheraven return *this; 187245803Stheraven} 188245803Stheraven 189245803Stheravenvoid 190245803Stheraveninput_buffer::parse_error(const char *msg) 191245803Stheraven{ 192245803Stheraven int line_count = 1; 193245803Stheraven int line_start = 0; 194245803Stheraven int line_end = cursor; 195245803Stheraven for (int i=cursor ; i>0 ; --i) 196245803Stheraven { 197245803Stheraven if (buffer[i] == '\n') 198245803Stheraven { 199245803Stheraven line_count++; 200245803Stheraven if (line_start == 0) 201245803Stheraven { 202245803Stheraven line_start = i+1; 203245803Stheraven } 204245803Stheraven } 205245803Stheraven } 206245803Stheraven for (int i=cursor+1 ; i<size ; ++i) 207245803Stheraven { 208245803Stheraven if (buffer[i] == '\n') 209245803Stheraven { 210245803Stheraven line_end = i; 211245803Stheraven break; 212245803Stheraven } 213245803Stheraven } 214245803Stheraven fprintf(stderr, "Error on line %d: %s\n", line_count, msg); 215245803Stheraven fwrite(&buffer[line_start], line_end-line_start, 1, stderr); 216245803Stheraven putc('\n', stderr); 217245803Stheraven for (int i=0 ; i<(cursor-line_start) ; ++i) 218245803Stheraven { 219254327Stheraven char c = (buffer[i+line_start] == '\t') ? '\t' : ' '; 220254327Stheraven putc(c, stderr); 221245803Stheraven } 222245803Stheraven putc('^', stderr); 223245803Stheraven putc('\n', stderr); 224245803Stheraven} 225245803Stheravenvoid 226245803Stheraveninput_buffer::dump() 227245803Stheraven{ 228245803Stheraven fprintf(stderr, "Current cursor: %d\n", cursor); 229245803Stheraven fwrite(&buffer[cursor], size-cursor, 1, stderr); 230245803Stheraven} 231245803Stheraven 232245803Stheravenmmap_input_buffer::mmap_input_buffer(int fd) : input_buffer(0, 0) 233245803Stheraven{ 234245803Stheraven struct stat sb; 235245803Stheraven if (fstat(fd, &sb)) 236245803Stheraven { 237245803Stheraven perror("Failed to stat file"); 238245803Stheraven } 239245803Stheraven size = sb.st_size; 240245803Stheraven buffer = (const char*)mmap(0, size, PROT_READ, 241245803Stheraven MAP_PREFAULT_READ, fd, 0); 242245803Stheraven if (buffer == 0) 243245803Stheraven { 244245803Stheraven perror("Failed to mmap file"); 245245803Stheraven } 246245803Stheraven} 247245803Stheraven 248245803Stheravenmmap_input_buffer::~mmap_input_buffer() 249245803Stheraven{ 250245803Stheraven if (buffer != 0) 251245803Stheraven { 252245803Stheraven munmap((void*)buffer, size); 253245803Stheraven } 254245803Stheraven} 255245803Stheraven 256245803Stheravenstream_input_buffer::stream_input_buffer() : input_buffer(0, 0) 257245803Stheraven{ 258245803Stheraven int c; 259245803Stheraven while ((c = fgetc(stdin)) != EOF) 260245803Stheraven { 261245803Stheraven b.push_back(c); 262245803Stheraven } 263245803Stheraven buffer = b.data(); 264245803Stheraven size = b.size(); 265245803Stheraven} 266245803Stheraven 267245803Stheraven} // namespace dtc 268245803Stheraven 269