string.cc revision 245803
1245803Stheraven/*- 2245803Stheraven * Copyright (c) 2013 David Chisnall 3245803Stheraven * All rights reserved. 4245803Stheraven * 5245803Stheraven * This software was developed by SRI International and the University of 6245803Stheraven * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) 7245803Stheraven * ("CTSRD"), as part of the DARPA CRASH research programme. 8245803Stheraven * 9245803Stheraven * Redistribution and use in source and binary forms, with or without 10245803Stheraven * modification, are permitted provided that the following conditions 11245803Stheraven * are met: 12245803Stheraven * 1. Redistributions of source code must retain the above copyright 13245803Stheraven * notice, this list of conditions and the following disclaimer. 14245803Stheraven * 2. Redistributions in binary form must reproduce the above copyright 15245803Stheraven * notice, this list of conditions and the following disclaimer in the 16245803Stheraven * documentation and/or other materials provided with the distribution. 17245803Stheraven * 18245803Stheraven * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19245803Stheraven * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20245803Stheraven * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21245803Stheraven * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22245803Stheraven * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23245803Stheraven * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24245803Stheraven * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25245803Stheraven * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26245803Stheraven * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27245803Stheraven * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28245803Stheraven * SUCH DAMAGE. 29245803Stheraven * 30245803Stheraven * $FreeBSD: head/usr.bin/dtc/string.cc 245803 2013-01-22 17:49:51Z theraven $ 31245803Stheraven */ 32245803Stheraven 33245803Stheraven#include "string.hh" 34245803Stheraven 35245803Stheravennamespace 36245803Stheraven{ 37245803Stheraven/** 38245803Stheraven * The source files are ASCII, so we provide a non-locale-aware version of 39245803Stheraven * isalpha. This is a class so that it can be used with a template function 40245803Stheraven * for parsing strings. 41245803Stheraven */ 42245803Stheravenstruct is_alpha 43245803Stheraven{ 44245803Stheraven static inline bool check(const char c) 45245803Stheraven { 46245803Stheraven return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && 47245803Stheraven (c <= 'Z')); 48245803Stheraven } 49245803Stheraven}; 50245803Stheraven/** 51245803Stheraven * Check whether a character is in the set allowed for node names. This is a 52245803Stheraven * class so that it can be used with a template function for parsing strings. 53245803Stheraven */ 54245803Stheravenstruct is_node_name_character 55245803Stheraven{ 56245803Stheraven static inline bool check(const char c) 57245803Stheraven { 58245803Stheraven switch(c) 59245803Stheraven { 60245803Stheraven default: 61245803Stheraven return false; 62245803Stheraven case 'a'...'z': case 'A'...'Z': case '0'...'9': 63245803Stheraven case ',': case '.': case '+': case '-': 64245803Stheraven case '_': 65245803Stheraven return true; 66245803Stheraven } 67245803Stheraven } 68245803Stheraven}; 69245803Stheraven/** 70245803Stheraven * Check whether a character is in the set allowed for property names. This is 71245803Stheraven * a class so that it can be used with a template function for parsing strings. 72245803Stheraven */ 73245803Stheravenstruct is_property_name_character 74245803Stheraven{ 75245803Stheraven static inline bool check(const char c) 76245803Stheraven { 77245803Stheraven switch(c) 78245803Stheraven { 79245803Stheraven default: 80245803Stheraven return false; 81245803Stheraven case 'a'...'z': case 'A'...'Z': case '0'...'9': 82245803Stheraven case ',': case '.': case '+': case '-': 83245803Stheraven case '_': case '#': 84245803Stheraven return true; 85245803Stheraven } 86245803Stheraven } 87245803Stheraven}; 88245803Stheraven 89245803Stheraven} 90245803Stheraven 91245803Stheravennamespace dtc 92245803Stheraven{ 93245803Stheraven 94245803Stheraventemplate<class T> string 95245803Stheravenstring::parse(input_buffer &s) 96245803Stheraven{ 97245803Stheraven const char *start = s; 98245803Stheraven int l=0; 99245803Stheraven while (T::check(*s)) { l++; ++s; } 100245803Stheraven return string(start, l); 101245803Stheraven} 102245803Stheraven 103245803Stheravenstring::string(input_buffer &s) : start((const char*)s), length(0) 104245803Stheraven{ 105245803Stheraven while(s[length] != '\0') 106245803Stheraven { 107245803Stheraven length++; 108245803Stheraven } 109245803Stheraven} 110245803Stheraven 111245803Stheravenstring 112245803Stheravenstring::parse_node_name(input_buffer &s) 113245803Stheraven{ 114245803Stheraven return parse<is_node_name_character>(s); 115245803Stheraven} 116245803Stheraven 117245803Stheravenstring 118245803Stheravenstring::parse_property_name(input_buffer &s) 119245803Stheraven{ 120245803Stheraven return parse<is_property_name_character>(s); 121245803Stheraven} 122245803Stheravenstring 123245803Stheravenstring::parse_node_or_property_name(input_buffer &s, bool &is_property) 124245803Stheraven{ 125245803Stheraven if (is_property) 126245803Stheraven { 127245803Stheraven return parse_property_name(s); 128245803Stheraven } 129245803Stheraven const char *start = s; 130245803Stheraven int l=0; 131245803Stheraven while (is_node_name_character::check(*s)) 132245803Stheraven { 133245803Stheraven l++; 134245803Stheraven ++s; 135245803Stheraven } 136245803Stheraven while (is_property_name_character::check(*s)) 137245803Stheraven { 138245803Stheraven l++; 139245803Stheraven ++s; 140245803Stheraven is_property = true; 141245803Stheraven } 142245803Stheraven return string(start, l); 143245803Stheraven} 144245803Stheraven 145245803Stheravenbool 146245803Stheravenstring::operator==(const string& other) const 147245803Stheraven{ 148245803Stheraven return (length == other.length) && 149245803Stheraven (memcmp(start, other.start, length) == 0); 150245803Stheraven} 151245803Stheraven 152245803Stheravenbool 153245803Stheravenstring::operator==(const char *other) const 154245803Stheraven{ 155245803Stheraven return strncmp(other, start, length) == 0; 156245803Stheraven} 157245803Stheraven 158245803Stheravenbool 159245803Stheravenstring::operator<(const string& other) const 160245803Stheraven{ 161245803Stheraven if (length < other.length) { return true; } 162245803Stheraven if (length > other.length) { return false; } 163245803Stheraven return memcmp(start, other.start, length) < 0; 164245803Stheraven} 165245803Stheraven 166245803Stheravenvoid 167245803Stheravenstring::push_to_buffer(byte_buffer &buffer, bool escapes) 168245803Stheraven{ 169245803Stheraven for (int i=0 ; i<length ; ++i) 170245803Stheraven { 171245803Stheraven uint8_t c = start[i]; 172245803Stheraven if (escapes && c == '\\' && i+1 < length) 173245803Stheraven { 174245803Stheraven c = start[++i]; 175245803Stheraven switch (c) 176245803Stheraven { 177245803Stheraven // For now, we just ignore invalid escape sequences. 178245803Stheraven default: 179245803Stheraven case '"': 180245803Stheraven case '\'': 181245803Stheraven case '\\': 182245803Stheraven break; 183245803Stheraven case 'a': 184245803Stheraven c = '\a'; 185245803Stheraven break; 186245803Stheraven case 'b': 187245803Stheraven c = '\b'; 188245803Stheraven break; 189245803Stheraven case 't': 190245803Stheraven c = '\t'; 191245803Stheraven break; 192245803Stheraven case 'n': 193245803Stheraven c = '\n'; 194245803Stheraven break; 195245803Stheraven case 'v': 196245803Stheraven c = '\v'; 197245803Stheraven break; 198245803Stheraven case 'f': 199245803Stheraven c = '\f'; 200245803Stheraven break; 201245803Stheraven case 'r': 202245803Stheraven c = '\r'; 203245803Stheraven break; 204245803Stheraven case '0'...'7': 205245803Stheraven { 206245803Stheraven int v = digittoint(c); 207245803Stheraven if (i+1 < length && start[i+1] <= '7' && start[i+1] >= '0') 208245803Stheraven { 209245803Stheraven v <<= 3; 210245803Stheraven v |= digittoint(start[i+1]); 211245803Stheraven i++; 212245803Stheraven if (i+1 < length && start[i+1] <= '7' && start[i+1] >= '0') 213245803Stheraven { 214245803Stheraven v <<= 3; 215245803Stheraven v |= digittoint(start[i+1]); 216245803Stheraven } 217245803Stheraven } 218245803Stheraven c = (uint8_t)v; 219245803Stheraven break; 220245803Stheraven } 221245803Stheraven case 'x': 222245803Stheraven { 223245803Stheraven ++i; 224245803Stheraven if (i >= length) 225245803Stheraven { 226245803Stheraven break; 227245803Stheraven } 228245803Stheraven int v = digittoint(start[i]); 229245803Stheraven if (i+1 < length && ishexdigit(start[i+1])) 230245803Stheraven { 231245803Stheraven v <<= 4; 232245803Stheraven v |= digittoint(start[++i]); 233245803Stheraven } 234245803Stheraven c = (uint8_t)v; 235245803Stheraven break; 236245803Stheraven } 237245803Stheraven } 238245803Stheraven } 239245803Stheraven buffer.push_back(c); 240245803Stheraven } 241245803Stheraven} 242245803Stheraven 243245803Stheravenvoid 244245803Stheravenstring::print(FILE *file) 245245803Stheraven{ 246245803Stheraven fwrite(start, length, 1, file); 247245803Stheraven} 248245803Stheraven 249245803Stheravenvoid 250245803Stheravenstring::dump() 251245803Stheraven{ 252245803Stheraven print(stderr); 253245803Stheraven} 254245803Stheraven 255245803Stheraven} // namespace dtc 256245803Stheraven 257