string.cc revision 245803
1245803Stheraven/*-
2245803Stheraven * Copyright (c) 2013 David Chisnall
3245803Stheraven * All rights reserved.
4245803Stheraven *
5245803Stheraven * This software was developed by SRI International and the University of
6245803Stheraven * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
7245803Stheraven * ("CTSRD"), as part of the DARPA CRASH research programme.
8245803Stheraven *
9245803Stheraven * Redistribution and use in source and binary forms, with or without
10245803Stheraven * modification, are permitted provided that the following conditions
11245803Stheraven * are met:
12245803Stheraven * 1. Redistributions of source code must retain the above copyright
13245803Stheraven *    notice, this list of conditions and the following disclaimer.
14245803Stheraven * 2. Redistributions in binary form must reproduce the above copyright
15245803Stheraven *    notice, this list of conditions and the following disclaimer in the
16245803Stheraven *    documentation and/or other materials provided with the distribution.
17245803Stheraven *
18245803Stheraven * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19245803Stheraven * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20245803Stheraven * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21245803Stheraven * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22245803Stheraven * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23245803Stheraven * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24245803Stheraven * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25245803Stheraven * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26245803Stheraven * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27245803Stheraven * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28245803Stheraven * SUCH DAMAGE.
29245803Stheraven *
30245803Stheraven * $FreeBSD: head/usr.bin/dtc/string.cc 245803 2013-01-22 17:49:51Z theraven $
31245803Stheraven */
32245803Stheraven
33245803Stheraven#include "string.hh"
34245803Stheraven
35245803Stheravennamespace
36245803Stheraven{
37245803Stheraven/**
38245803Stheraven * The source files are ASCII, so we provide a non-locale-aware version of
39245803Stheraven * isalpha.  This is a class so that it can be used with a template function
40245803Stheraven * for parsing strings.
41245803Stheraven */
42245803Stheravenstruct is_alpha
43245803Stheraven{
44245803Stheraven	static inline bool check(const char c)
45245803Stheraven	{
46245803Stheraven		return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') &&
47245803Stheraven			(c <= 'Z'));
48245803Stheraven	}
49245803Stheraven};
50245803Stheraven/**
51245803Stheraven * Check whether a character is in the set allowed for node names.  This is a
52245803Stheraven * class so that it can be used with a template function for parsing strings.
53245803Stheraven */
54245803Stheravenstruct is_node_name_character
55245803Stheraven{
56245803Stheraven	static inline bool check(const char c)
57245803Stheraven	{
58245803Stheraven		switch(c)
59245803Stheraven		{
60245803Stheraven			default:
61245803Stheraven				return false;
62245803Stheraven			case 'a'...'z': case 'A'...'Z': case '0'...'9':
63245803Stheraven			case ',': case '.': case '+': case '-':
64245803Stheraven			case '_':
65245803Stheraven				return true;
66245803Stheraven		}
67245803Stheraven	}
68245803Stheraven};
69245803Stheraven/**
70245803Stheraven * Check whether a character is in the set allowed for property names.  This is
71245803Stheraven * a class so that it can be used with a template function for parsing strings.
72245803Stheraven */
73245803Stheravenstruct is_property_name_character
74245803Stheraven{
75245803Stheraven	static inline bool check(const char c)
76245803Stheraven	{
77245803Stheraven		switch(c)
78245803Stheraven		{
79245803Stheraven			default:
80245803Stheraven				return false;
81245803Stheraven			case 'a'...'z': case 'A'...'Z': case '0'...'9':
82245803Stheraven			case ',': case '.': case '+': case '-':
83245803Stheraven			case '_': case '#':
84245803Stheraven				return true;
85245803Stheraven		}
86245803Stheraven	}
87245803Stheraven};
88245803Stheraven
89245803Stheraven}
90245803Stheraven
91245803Stheravennamespace dtc
92245803Stheraven{
93245803Stheraven
94245803Stheraventemplate<class T> string
95245803Stheravenstring::parse(input_buffer &s)
96245803Stheraven{
97245803Stheraven	const char *start = s;
98245803Stheraven	int l=0;
99245803Stheraven	while (T::check(*s)) { l++; ++s; }
100245803Stheraven	return string(start, l);
101245803Stheraven}
102245803Stheraven
103245803Stheravenstring::string(input_buffer &s) : start((const char*)s), length(0)
104245803Stheraven{
105245803Stheraven	while(s[length] != '\0')
106245803Stheraven	{
107245803Stheraven		length++;
108245803Stheraven	}
109245803Stheraven}
110245803Stheraven
111245803Stheravenstring
112245803Stheravenstring::parse_node_name(input_buffer &s)
113245803Stheraven{
114245803Stheraven	return parse<is_node_name_character>(s);
115245803Stheraven}
116245803Stheraven
117245803Stheravenstring
118245803Stheravenstring::parse_property_name(input_buffer &s)
119245803Stheraven{
120245803Stheraven	return parse<is_property_name_character>(s);
121245803Stheraven}
122245803Stheravenstring
123245803Stheravenstring::parse_node_or_property_name(input_buffer &s, bool &is_property)
124245803Stheraven{
125245803Stheraven	if (is_property)
126245803Stheraven	{
127245803Stheraven		return parse_property_name(s);
128245803Stheraven	}
129245803Stheraven	const char *start = s;
130245803Stheraven	int l=0;
131245803Stheraven	while (is_node_name_character::check(*s))
132245803Stheraven	{
133245803Stheraven		l++;
134245803Stheraven		++s;
135245803Stheraven	}
136245803Stheraven	while (is_property_name_character::check(*s))
137245803Stheraven	{
138245803Stheraven		l++;
139245803Stheraven		++s;
140245803Stheraven		is_property = true;
141245803Stheraven	}
142245803Stheraven	return string(start, l);
143245803Stheraven}
144245803Stheraven
145245803Stheravenbool
146245803Stheravenstring::operator==(const string& other) const
147245803Stheraven{
148245803Stheraven	return (length == other.length) &&
149245803Stheraven	       (memcmp(start, other.start, length) == 0);
150245803Stheraven}
151245803Stheraven
152245803Stheravenbool
153245803Stheravenstring::operator==(const char *other) const
154245803Stheraven{
155245803Stheraven	return strncmp(other, start, length) == 0;
156245803Stheraven}
157245803Stheraven
158245803Stheravenbool
159245803Stheravenstring::operator<(const string& other) const
160245803Stheraven{
161245803Stheraven	if (length < other.length) { return true; }
162245803Stheraven	if (length > other.length) { return false; }
163245803Stheraven	return memcmp(start, other.start, length) < 0;
164245803Stheraven}
165245803Stheraven
166245803Stheravenvoid
167245803Stheravenstring::push_to_buffer(byte_buffer &buffer, bool escapes)
168245803Stheraven{
169245803Stheraven	for (int i=0 ; i<length ; ++i)
170245803Stheraven	{
171245803Stheraven		uint8_t c = start[i];
172245803Stheraven		if (escapes && c == '\\' && i+1 < length)
173245803Stheraven		{
174245803Stheraven			c = start[++i];
175245803Stheraven			switch (c)
176245803Stheraven			{
177245803Stheraven				// For now, we just ignore invalid escape sequences.
178245803Stheraven				default:
179245803Stheraven				case '"':
180245803Stheraven				case '\'':
181245803Stheraven				case '\\':
182245803Stheraven					break;
183245803Stheraven				case 'a':
184245803Stheraven					c = '\a';
185245803Stheraven					break;
186245803Stheraven				case 'b':
187245803Stheraven					c = '\b';
188245803Stheraven					break;
189245803Stheraven				case 't':
190245803Stheraven					c = '\t';
191245803Stheraven					break;
192245803Stheraven				case 'n':
193245803Stheraven					c = '\n';
194245803Stheraven					break;
195245803Stheraven				case 'v':
196245803Stheraven					c = '\v';
197245803Stheraven					break;
198245803Stheraven				case 'f':
199245803Stheraven					c = '\f';
200245803Stheraven					break;
201245803Stheraven				case 'r':
202245803Stheraven					c = '\r';
203245803Stheraven					break;
204245803Stheraven				case '0'...'7':
205245803Stheraven				{
206245803Stheraven					int v = digittoint(c);
207245803Stheraven					if (i+1 < length && start[i+1] <= '7' && start[i+1] >= '0')
208245803Stheraven					{
209245803Stheraven						v <<= 3;
210245803Stheraven						v |= digittoint(start[i+1]);
211245803Stheraven						i++;
212245803Stheraven						if (i+1 < length && start[i+1] <= '7' && start[i+1] >= '0')
213245803Stheraven						{
214245803Stheraven							v <<= 3;
215245803Stheraven							v |= digittoint(start[i+1]);
216245803Stheraven						}
217245803Stheraven					}
218245803Stheraven					c = (uint8_t)v;
219245803Stheraven					break;
220245803Stheraven				}
221245803Stheraven				case 'x':
222245803Stheraven				{
223245803Stheraven					++i;
224245803Stheraven					if (i >= length)
225245803Stheraven					{
226245803Stheraven						break;
227245803Stheraven					}
228245803Stheraven					int v = digittoint(start[i]);
229245803Stheraven					if (i+1 < length && ishexdigit(start[i+1]))
230245803Stheraven					{
231245803Stheraven						v <<= 4;
232245803Stheraven						v |= digittoint(start[++i]);
233245803Stheraven					}
234245803Stheraven					c = (uint8_t)v;
235245803Stheraven					break;
236245803Stheraven				}
237245803Stheraven			}
238245803Stheraven		}
239245803Stheraven		buffer.push_back(c);
240245803Stheraven	}
241245803Stheraven}
242245803Stheraven
243245803Stheravenvoid
244245803Stheravenstring::print(FILE *file)
245245803Stheraven{
246245803Stheraven	fwrite(start, length, 1, file);
247245803Stheraven}
248245803Stheraven
249245803Stheravenvoid
250245803Stheravenstring::dump()
251245803Stheraven{
252245803Stheraven	print(stderr);
253245803Stheraven}
254245803Stheraven
255245803Stheraven} // namespace dtc
256245803Stheraven
257