1245803Stheraven/*-
2245803Stheraven * Copyright (c) 2013 David Chisnall
3245803Stheraven * All rights reserved.
4245803Stheraven *
5245803Stheraven * This software was developed by SRI International and the University of
6245803Stheraven * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
7245803Stheraven * ("CTSRD"), as part of the DARPA CRASH research programme.
8245803Stheraven *
9245803Stheraven * Redistribution and use in source and binary forms, with or without
10245803Stheraven * modification, are permitted provided that the following conditions
11245803Stheraven * are met:
12245803Stheraven * 1. Redistributions of source code must retain the above copyright
13245803Stheraven *    notice, this list of conditions and the following disclaimer.
14245803Stheraven * 2. Redistributions in binary form must reproduce the above copyright
15245803Stheraven *    notice, this list of conditions and the following disclaimer in the
16245803Stheraven *    documentation and/or other materials provided with the distribution.
17245803Stheraven *
18245803Stheraven * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19245803Stheraven * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20245803Stheraven * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21245803Stheraven * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22245803Stheraven * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23245803Stheraven * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24245803Stheraven * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25245803Stheraven * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26245803Stheraven * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27245803Stheraven * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28245803Stheraven * SUCH DAMAGE.
29245803Stheraven *
30245803Stheraven * $FreeBSD$
31245803Stheraven */
32245803Stheraven
33245803Stheraven#include "string.hh"
34245839Stheraven#include <ctype.h>
35245839Stheraven#include <stdio.h>
36245803Stheraven
37245803Stheravennamespace
38245803Stheraven{
39245803Stheraven/**
40245803Stheraven * The source files are ASCII, so we provide a non-locale-aware version of
41245803Stheraven * isalpha.  This is a class so that it can be used with a template function
42245803Stheraven * for parsing strings.
43245803Stheraven */
44245803Stheravenstruct is_alpha
45245803Stheraven{
46245803Stheraven	static inline bool check(const char c)
47245803Stheraven	{
48245803Stheraven		return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') &&
49245803Stheraven			(c <= 'Z'));
50245803Stheraven	}
51245803Stheraven};
52245803Stheraven/**
53245803Stheraven * Check whether a character is in the set allowed for node names.  This is a
54245803Stheraven * class so that it can be used with a template function for parsing strings.
55245803Stheraven */
56245803Stheravenstruct is_node_name_character
57245803Stheraven{
58245803Stheraven	static inline bool check(const char c)
59245803Stheraven	{
60245803Stheraven		switch(c)
61245803Stheraven		{
62245803Stheraven			default:
63245803Stheraven				return false;
64245803Stheraven			case 'a'...'z': case 'A'...'Z': case '0'...'9':
65245803Stheraven			case ',': case '.': case '+': case '-':
66245803Stheraven			case '_':
67245803Stheraven				return true;
68245803Stheraven		}
69245803Stheraven	}
70245803Stheraven};
71245803Stheraven/**
72245803Stheraven * Check whether a character is in the set allowed for property names.  This is
73245803Stheraven * a class so that it can be used with a template function for parsing strings.
74245803Stheraven */
75245803Stheravenstruct is_property_name_character
76245803Stheraven{
77245803Stheraven	static inline bool check(const char c)
78245803Stheraven	{
79245803Stheraven		switch(c)
80245803Stheraven		{
81245803Stheraven			default:
82245803Stheraven				return false;
83245803Stheraven			case 'a'...'z': case 'A'...'Z': case '0'...'9':
84245803Stheraven			case ',': case '.': case '+': case '-':
85245803Stheraven			case '_': case '#':
86245803Stheraven				return true;
87245803Stheraven		}
88245803Stheraven	}
89245803Stheraven};
90245803Stheraven
91245803Stheraven}
92245803Stheraven
93245803Stheravennamespace dtc
94245803Stheraven{
95245803Stheraven
96245803Stheraventemplate<class T> string
97245803Stheravenstring::parse(input_buffer &s)
98245803Stheraven{
99245803Stheraven	const char *start = s;
100245803Stheraven	int l=0;
101245803Stheraven	while (T::check(*s)) { l++; ++s; }
102245803Stheraven	return string(start, l);
103245803Stheraven}
104245803Stheraven
105245803Stheravenstring::string(input_buffer &s) : start((const char*)s), length(0)
106245803Stheraven{
107245803Stheraven	while(s[length] != '\0')
108245803Stheraven	{
109245803Stheraven		length++;
110245803Stheraven	}
111245803Stheraven}
112245803Stheraven
113245803Stheravenstring
114245803Stheravenstring::parse_node_name(input_buffer &s)
115245803Stheraven{
116245803Stheraven	return parse<is_node_name_character>(s);
117245803Stheraven}
118245803Stheraven
119245803Stheravenstring
120245803Stheravenstring::parse_property_name(input_buffer &s)
121245803Stheraven{
122245803Stheraven	return parse<is_property_name_character>(s);
123245803Stheraven}
124245803Stheravenstring
125245803Stheravenstring::parse_node_or_property_name(input_buffer &s, bool &is_property)
126245803Stheraven{
127245803Stheraven	if (is_property)
128245803Stheraven	{
129245803Stheraven		return parse_property_name(s);
130245803Stheraven	}
131245803Stheraven	const char *start = s;
132245803Stheraven	int l=0;
133245803Stheraven	while (is_node_name_character::check(*s))
134245803Stheraven	{
135245803Stheraven		l++;
136245803Stheraven		++s;
137245803Stheraven	}
138245803Stheraven	while (is_property_name_character::check(*s))
139245803Stheraven	{
140245803Stheraven		l++;
141245803Stheraven		++s;
142245803Stheraven		is_property = true;
143245803Stheraven	}
144245803Stheraven	return string(start, l);
145245803Stheraven}
146245803Stheraven
147245803Stheravenbool
148245803Stheravenstring::operator==(const string& other) const
149245803Stheraven{
150245803Stheraven	return (length == other.length) &&
151245803Stheraven	       (memcmp(start, other.start, length) == 0);
152245803Stheraven}
153245803Stheraven
154245803Stheravenbool
155245803Stheravenstring::operator==(const char *other) const
156245803Stheraven{
157245803Stheraven	return strncmp(other, start, length) == 0;
158245803Stheraven}
159245803Stheraven
160245803Stheravenbool
161245803Stheravenstring::operator<(const string& other) const
162245803Stheraven{
163245803Stheraven	if (length < other.length) { return true; }
164245803Stheraven	if (length > other.length) { return false; }
165245803Stheraven	return memcmp(start, other.start, length) < 0;
166245803Stheraven}
167245803Stheraven
168245803Stheravenvoid
169245803Stheravenstring::push_to_buffer(byte_buffer &buffer, bool escapes)
170245803Stheraven{
171245803Stheraven	for (int i=0 ; i<length ; ++i)
172245803Stheraven	{
173245803Stheraven		uint8_t c = start[i];
174245803Stheraven		if (escapes && c == '\\' && i+1 < length)
175245803Stheraven		{
176245803Stheraven			c = start[++i];
177245803Stheraven			switch (c)
178245803Stheraven			{
179245803Stheraven				// For now, we just ignore invalid escape sequences.
180245803Stheraven				default:
181245803Stheraven				case '"':
182245803Stheraven				case '\'':
183245803Stheraven				case '\\':
184245803Stheraven					break;
185245803Stheraven				case 'a':
186245803Stheraven					c = '\a';
187245803Stheraven					break;
188245803Stheraven				case 'b':
189245803Stheraven					c = '\b';
190245803Stheraven					break;
191245803Stheraven				case 't':
192245803Stheraven					c = '\t';
193245803Stheraven					break;
194245803Stheraven				case 'n':
195245803Stheraven					c = '\n';
196245803Stheraven					break;
197245803Stheraven				case 'v':
198245803Stheraven					c = '\v';
199245803Stheraven					break;
200245803Stheraven				case 'f':
201245803Stheraven					c = '\f';
202245803Stheraven					break;
203245803Stheraven				case 'r':
204245803Stheraven					c = '\r';
205245803Stheraven					break;
206245803Stheraven				case '0'...'7':
207245803Stheraven				{
208245803Stheraven					int v = digittoint(c);
209245803Stheraven					if (i+1 < length && start[i+1] <= '7' && start[i+1] >= '0')
210245803Stheraven					{
211245803Stheraven						v <<= 3;
212245803Stheraven						v |= digittoint(start[i+1]);
213245803Stheraven						i++;
214245803Stheraven						if (i+1 < length && start[i+1] <= '7' && start[i+1] >= '0')
215245803Stheraven						{
216245803Stheraven							v <<= 3;
217245803Stheraven							v |= digittoint(start[i+1]);
218245803Stheraven						}
219245803Stheraven					}
220245803Stheraven					c = (uint8_t)v;
221245803Stheraven					break;
222245803Stheraven				}
223245803Stheraven				case 'x':
224245803Stheraven				{
225245803Stheraven					++i;
226245803Stheraven					if (i >= length)
227245803Stheraven					{
228245803Stheraven						break;
229245803Stheraven					}
230245803Stheraven					int v = digittoint(start[i]);
231245803Stheraven					if (i+1 < length && ishexdigit(start[i+1]))
232245803Stheraven					{
233245803Stheraven						v <<= 4;
234245803Stheraven						v |= digittoint(start[++i]);
235245803Stheraven					}
236245803Stheraven					c = (uint8_t)v;
237245803Stheraven					break;
238245803Stheraven				}
239245803Stheraven			}
240245803Stheraven		}
241245803Stheraven		buffer.push_back(c);
242245803Stheraven	}
243245803Stheraven}
244245803Stheraven
245245803Stheravenvoid
246245803Stheravenstring::print(FILE *file)
247245803Stheraven{
248245803Stheraven	fwrite(start, length, 1, file);
249245803Stheraven}
250245803Stheraven
251245803Stheravenvoid
252245803Stheravenstring::dump()
253245803Stheraven{
254245803Stheraven	print(stderr);
255245803Stheraven}
256245803Stheraven
257245803Stheraven} // namespace dtc
258245803Stheraven
259