1254721Semaste//===-- StringExtractor.cpp -------------------------------------*- C++ -*-===// 2254721Semaste// 3254721Semaste// The LLVM Compiler Infrastructure 4254721Semaste// 5254721Semaste// This file is distributed under the University of Illinois Open Source 6254721Semaste// License. See LICENSE.TXT for details. 7254721Semaste// 8254721Semaste//===----------------------------------------------------------------------===// 9254721Semaste 10254721Semaste#include "Utility/StringExtractor.h" 11254721Semaste 12254721Semaste// C Includes 13254721Semaste#include <stdlib.h> 14254721Semaste 15254721Semaste// C++ Includes 16254721Semaste// Other libraries and framework includes 17254721Semaste// Project includes 18254721Semaste 19254721Semastestatic const uint8_t 20254721Semasteg_hex_ascii_to_hex_integer[256] = { 21254721Semaste 22254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 23254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 24254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 25254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 26254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 27254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 28254721Semaste 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 29254721Semaste 0x8, 0x9, 255, 255, 255, 255, 255, 255, 30254721Semaste 255, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 255, 31254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 32254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 33254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 34254721Semaste 255, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 255, 35254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 36254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 37254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 38254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 39254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 40254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 41254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 42254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 43254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 44254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 45254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 46254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 47254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 48254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 49254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 50254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 51254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 52254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 53254721Semaste 255, 255, 255, 255, 255, 255, 255, 255, 54254721Semaste}; 55254721Semaste 56254721Semastestatic inline int 57254721Semastexdigit_to_sint (char ch) 58254721Semaste{ 59254721Semaste if (ch >= 'a' && ch <= 'f') 60254721Semaste return 10 + ch - 'a'; 61254721Semaste if (ch >= 'A' && ch <= 'F') 62254721Semaste return 10 + ch - 'A'; 63254721Semaste return ch - '0'; 64254721Semaste} 65254721Semaste 66254721Semastestatic inline unsigned int 67254721Semastexdigit_to_uint (uint8_t ch) 68254721Semaste{ 69254721Semaste if (ch >= 'a' && ch <= 'f') 70254721Semaste return 10u + ch - 'a'; 71254721Semaste if (ch >= 'A' && ch <= 'F') 72254721Semaste return 10u + ch - 'A'; 73254721Semaste return ch - '0'; 74254721Semaste} 75254721Semaste 76254721Semaste//---------------------------------------------------------------------- 77254721Semaste// StringExtractor constructor 78254721Semaste//---------------------------------------------------------------------- 79254721SemasteStringExtractor::StringExtractor() : 80254721Semaste m_packet(), 81254721Semaste m_index (0) 82254721Semaste{ 83254721Semaste} 84254721Semaste 85254721Semaste 86254721SemasteStringExtractor::StringExtractor(const char *packet_cstr) : 87254721Semaste m_packet(), 88254721Semaste m_index (0) 89254721Semaste{ 90254721Semaste if (packet_cstr) 91254721Semaste m_packet.assign (packet_cstr); 92254721Semaste} 93254721Semaste 94254721Semaste 95254721Semaste//---------------------------------------------------------------------- 96254721Semaste// StringExtractor copy constructor 97254721Semaste//---------------------------------------------------------------------- 98254721SemasteStringExtractor::StringExtractor(const StringExtractor& rhs) : 99254721Semaste m_packet (rhs.m_packet), 100254721Semaste m_index (rhs.m_index) 101254721Semaste{ 102254721Semaste 103254721Semaste} 104254721Semaste 105254721Semaste//---------------------------------------------------------------------- 106254721Semaste// StringExtractor assignment operator 107254721Semaste//---------------------------------------------------------------------- 108254721Semasteconst StringExtractor& 109254721SemasteStringExtractor::operator=(const StringExtractor& rhs) 110254721Semaste{ 111254721Semaste if (this != &rhs) 112254721Semaste { 113254721Semaste m_packet = rhs.m_packet; 114254721Semaste m_index = rhs.m_index; 115254721Semaste 116254721Semaste } 117254721Semaste return *this; 118254721Semaste} 119254721Semaste 120254721Semaste//---------------------------------------------------------------------- 121254721Semaste// Destructor 122254721Semaste//---------------------------------------------------------------------- 123254721SemasteStringExtractor::~StringExtractor() 124254721Semaste{ 125254721Semaste} 126254721Semaste 127254721Semaste 128254721Semastechar 129254721SemasteStringExtractor::GetChar (char fail_value) 130254721Semaste{ 131254721Semaste if (m_index < m_packet.size()) 132254721Semaste { 133254721Semaste char ch = m_packet[m_index]; 134254721Semaste ++m_index; 135254721Semaste return ch; 136254721Semaste } 137254721Semaste m_index = UINT64_MAX; 138254721Semaste return fail_value; 139254721Semaste} 140254721Semaste 141254721Semaste//---------------------------------------------------------------------- 142254721Semaste// Extract an unsigned character from two hex ASCII chars in the packet 143254721Semaste// string 144254721Semaste//---------------------------------------------------------------------- 145254721Semasteuint8_t 146254721SemasteStringExtractor::GetHexU8 (uint8_t fail_value, bool set_eof_on_fail) 147254721Semaste{ 148263367Semaste if (GetBytesLeft() >= 2) 149254721Semaste { 150263367Semaste const uint8_t hi_nibble = g_hex_ascii_to_hex_integer[static_cast<uint8_t>(m_packet[m_index])]; 151263367Semaste const uint8_t lo_nibble = g_hex_ascii_to_hex_integer[static_cast<uint8_t>(m_packet[m_index+1])]; 152254721Semaste if (hi_nibble < 16 && lo_nibble < 16) 153254721Semaste { 154254721Semaste m_index += 2; 155254721Semaste return (hi_nibble << 4) + lo_nibble; 156254721Semaste } 157254721Semaste } 158254721Semaste if (set_eof_on_fail || m_index >= m_packet.size()) 159254721Semaste m_index = UINT64_MAX; 160254721Semaste return fail_value; 161254721Semaste} 162254721Semaste 163254721Semasteuint32_t 164254721SemasteStringExtractor::GetU32 (uint32_t fail_value, int base) 165254721Semaste{ 166254721Semaste if (m_index < m_packet.size()) 167254721Semaste { 168254721Semaste char *end = NULL; 169254721Semaste const char *start = m_packet.c_str(); 170263363Semaste const char *cstr = start + m_index; 171263363Semaste uint32_t result = ::strtoul (cstr, &end, base); 172254721Semaste 173263363Semaste if (end && end != cstr) 174254721Semaste { 175254721Semaste m_index = end - start; 176254721Semaste return result; 177254721Semaste } 178254721Semaste } 179254721Semaste return fail_value; 180254721Semaste} 181254721Semaste 182263363Semasteint32_t 183263363SemasteStringExtractor::GetS32 (int32_t fail_value, int base) 184263363Semaste{ 185263363Semaste if (m_index < m_packet.size()) 186263363Semaste { 187263363Semaste char *end = NULL; 188263363Semaste const char *start = m_packet.c_str(); 189263363Semaste const char *cstr = start + m_index; 190263363Semaste int32_t result = ::strtol (cstr, &end, base); 191263363Semaste 192263363Semaste if (end && end != cstr) 193263363Semaste { 194263363Semaste m_index = end - start; 195263363Semaste return result; 196263363Semaste } 197263363Semaste } 198263363Semaste return fail_value; 199263363Semaste} 200254721Semaste 201263363Semaste 202263363Semasteuint64_t 203263363SemasteStringExtractor::GetU64 (uint64_t fail_value, int base) 204263363Semaste{ 205263363Semaste if (m_index < m_packet.size()) 206263363Semaste { 207263363Semaste char *end = NULL; 208263363Semaste const char *start = m_packet.c_str(); 209263363Semaste const char *cstr = start + m_index; 210263363Semaste uint64_t result = ::strtoull (cstr, &end, base); 211263363Semaste 212263363Semaste if (end && end != cstr) 213263363Semaste { 214263363Semaste m_index = end - start; 215263363Semaste return result; 216263363Semaste } 217263363Semaste } 218263363Semaste return fail_value; 219263363Semaste} 220263363Semaste 221263363Semasteint64_t 222263363SemasteStringExtractor::GetS64 (int64_t fail_value, int base) 223263363Semaste{ 224263363Semaste if (m_index < m_packet.size()) 225263363Semaste { 226263363Semaste char *end = NULL; 227263363Semaste const char *start = m_packet.c_str(); 228263363Semaste const char *cstr = start + m_index; 229263363Semaste int64_t result = ::strtoll (cstr, &end, base); 230263363Semaste 231263363Semaste if (end && end != cstr) 232263363Semaste { 233263363Semaste m_index = end - start; 234263363Semaste return result; 235263363Semaste } 236263363Semaste } 237263363Semaste return fail_value; 238263363Semaste} 239263363Semaste 240263363Semaste 241254721Semasteuint32_t 242254721SemasteStringExtractor::GetHexMaxU32 (bool little_endian, uint32_t fail_value) 243254721Semaste{ 244254721Semaste uint32_t result = 0; 245254721Semaste uint32_t nibble_count = 0; 246254721Semaste 247254721Semaste if (little_endian) 248254721Semaste { 249254721Semaste uint32_t shift_amount = 0; 250254721Semaste while (m_index < m_packet.size() && ::isxdigit (m_packet[m_index])) 251254721Semaste { 252254721Semaste // Make sure we don't exceed the size of a uint32_t... 253254721Semaste if (nibble_count >= (sizeof(uint32_t) * 2)) 254254721Semaste { 255254721Semaste m_index = UINT64_MAX; 256254721Semaste return fail_value; 257254721Semaste } 258254721Semaste 259254721Semaste uint8_t nibble_lo; 260254721Semaste uint8_t nibble_hi = xdigit_to_sint (m_packet[m_index]); 261254721Semaste ++m_index; 262254721Semaste if (m_index < m_packet.size() && ::isxdigit (m_packet[m_index])) 263254721Semaste { 264254721Semaste nibble_lo = xdigit_to_sint (m_packet[m_index]); 265254721Semaste ++m_index; 266254721Semaste result |= ((uint32_t)nibble_hi << (shift_amount + 4)); 267254721Semaste result |= ((uint32_t)nibble_lo << shift_amount); 268254721Semaste nibble_count += 2; 269254721Semaste shift_amount += 8; 270254721Semaste } 271254721Semaste else 272254721Semaste { 273254721Semaste result |= ((uint32_t)nibble_hi << shift_amount); 274254721Semaste nibble_count += 1; 275254721Semaste shift_amount += 4; 276254721Semaste } 277254721Semaste 278254721Semaste } 279254721Semaste } 280254721Semaste else 281254721Semaste { 282254721Semaste while (m_index < m_packet.size() && ::isxdigit (m_packet[m_index])) 283254721Semaste { 284254721Semaste // Make sure we don't exceed the size of a uint32_t... 285254721Semaste if (nibble_count >= (sizeof(uint32_t) * 2)) 286254721Semaste { 287254721Semaste m_index = UINT64_MAX; 288254721Semaste return fail_value; 289254721Semaste } 290254721Semaste 291254721Semaste uint8_t nibble = xdigit_to_sint (m_packet[m_index]); 292254721Semaste // Big Endian 293254721Semaste result <<= 4; 294254721Semaste result |= nibble; 295254721Semaste 296254721Semaste ++m_index; 297254721Semaste ++nibble_count; 298254721Semaste } 299254721Semaste } 300254721Semaste return result; 301254721Semaste} 302254721Semaste 303254721Semasteuint64_t 304254721SemasteStringExtractor::GetHexMaxU64 (bool little_endian, uint64_t fail_value) 305254721Semaste{ 306254721Semaste uint64_t result = 0; 307254721Semaste uint32_t nibble_count = 0; 308254721Semaste 309254721Semaste if (little_endian) 310254721Semaste { 311254721Semaste uint32_t shift_amount = 0; 312254721Semaste while (m_index < m_packet.size() && ::isxdigit (m_packet[m_index])) 313254721Semaste { 314254721Semaste // Make sure we don't exceed the size of a uint64_t... 315254721Semaste if (nibble_count >= (sizeof(uint64_t) * 2)) 316254721Semaste { 317254721Semaste m_index = UINT64_MAX; 318254721Semaste return fail_value; 319254721Semaste } 320254721Semaste 321254721Semaste uint8_t nibble_lo; 322254721Semaste uint8_t nibble_hi = xdigit_to_sint (m_packet[m_index]); 323254721Semaste ++m_index; 324254721Semaste if (m_index < m_packet.size() && ::isxdigit (m_packet[m_index])) 325254721Semaste { 326254721Semaste nibble_lo = xdigit_to_sint (m_packet[m_index]); 327254721Semaste ++m_index; 328254721Semaste result |= ((uint64_t)nibble_hi << (shift_amount + 4)); 329254721Semaste result |= ((uint64_t)nibble_lo << shift_amount); 330254721Semaste nibble_count += 2; 331254721Semaste shift_amount += 8; 332254721Semaste } 333254721Semaste else 334254721Semaste { 335254721Semaste result |= ((uint64_t)nibble_hi << shift_amount); 336254721Semaste nibble_count += 1; 337254721Semaste shift_amount += 4; 338254721Semaste } 339254721Semaste 340254721Semaste } 341254721Semaste } 342254721Semaste else 343254721Semaste { 344254721Semaste while (m_index < m_packet.size() && ::isxdigit (m_packet[m_index])) 345254721Semaste { 346254721Semaste // Make sure we don't exceed the size of a uint64_t... 347254721Semaste if (nibble_count >= (sizeof(uint64_t) * 2)) 348254721Semaste { 349254721Semaste m_index = UINT64_MAX; 350254721Semaste return fail_value; 351254721Semaste } 352254721Semaste 353254721Semaste uint8_t nibble = xdigit_to_sint (m_packet[m_index]); 354254721Semaste // Big Endian 355254721Semaste result <<= 4; 356254721Semaste result |= nibble; 357254721Semaste 358254721Semaste ++m_index; 359254721Semaste ++nibble_count; 360254721Semaste } 361254721Semaste } 362254721Semaste return result; 363254721Semaste} 364254721Semaste 365254721Semastesize_t 366254721SemasteStringExtractor::GetHexBytes (void *dst_void, size_t dst_len, uint8_t fail_fill_value) 367254721Semaste{ 368254721Semaste uint8_t *dst = (uint8_t*)dst_void; 369254721Semaste size_t bytes_extracted = 0; 370254721Semaste while (bytes_extracted < dst_len && GetBytesLeft ()) 371254721Semaste { 372254721Semaste dst[bytes_extracted] = GetHexU8 (fail_fill_value); 373254721Semaste if (IsGood()) 374254721Semaste ++bytes_extracted; 375254721Semaste else 376254721Semaste break; 377254721Semaste } 378254721Semaste 379254721Semaste for (size_t i = bytes_extracted; i < dst_len; ++i) 380254721Semaste dst[i] = fail_fill_value; 381254721Semaste 382254721Semaste return bytes_extracted; 383254721Semaste} 384254721Semaste 385254721Semaste 386254721Semaste// Consume ASCII hex nibble character pairs until we have decoded byte_size 387254721Semaste// bytes of data. 388254721Semaste 389254721Semasteuint64_t 390254721SemasteStringExtractor::GetHexWithFixedSize (uint32_t byte_size, bool little_endian, uint64_t fail_value) 391254721Semaste{ 392254721Semaste if (byte_size <= 8 && GetBytesLeft() >= byte_size * 2) 393254721Semaste { 394254721Semaste uint64_t result = 0; 395254721Semaste uint32_t i; 396254721Semaste if (little_endian) 397254721Semaste { 398254721Semaste // Little Endian 399254721Semaste uint32_t shift_amount; 400254721Semaste for (i = 0, shift_amount = 0; 401254721Semaste i < byte_size && IsGood(); 402254721Semaste ++i, shift_amount += 8) 403254721Semaste { 404254721Semaste result |= ((uint64_t)GetHexU8() << shift_amount); 405254721Semaste } 406254721Semaste } 407254721Semaste else 408254721Semaste { 409254721Semaste // Big Endian 410254721Semaste for (i = 0; i < byte_size && IsGood(); ++i) 411254721Semaste { 412254721Semaste result <<= 8; 413254721Semaste result |= GetHexU8(); 414254721Semaste } 415254721Semaste } 416254721Semaste } 417254721Semaste m_index = UINT64_MAX; 418254721Semaste return fail_value; 419254721Semaste} 420254721Semaste 421254721Semastesize_t 422254721SemasteStringExtractor::GetHexByteString (std::string &str) 423254721Semaste{ 424254721Semaste str.clear(); 425254721Semaste char ch; 426254721Semaste while ((ch = GetHexU8()) != '\0') 427254721Semaste str.append(1, ch); 428254721Semaste return str.size(); 429254721Semaste} 430254721Semaste 431263363Semastesize_t 432263363SemasteStringExtractor::GetHexByteStringTerminatedBy (std::string &str, 433263363Semaste char terminator) 434263363Semaste{ 435263363Semaste str.clear(); 436263363Semaste char ch; 437263363Semaste while ((ch = GetHexU8(0,false)) != '\0') 438263363Semaste str.append(1, ch); 439263363Semaste if (Peek() && *Peek() == terminator) 440263363Semaste return str.size(); 441263363Semaste str.clear(); 442263363Semaste return str.size(); 443263363Semaste} 444263363Semaste 445254721Semastebool 446254721SemasteStringExtractor::GetNameColonValue (std::string &name, std::string &value) 447254721Semaste{ 448254721Semaste // Read something in the form of NNNN:VVVV; where NNNN is any character 449254721Semaste // that is not a colon, followed by a ':' character, then a value (one or 450254721Semaste // more ';' chars), followed by a ';' 451254721Semaste if (m_index < m_packet.size()) 452254721Semaste { 453254721Semaste const size_t colon_idx = m_packet.find (':', m_index); 454254721Semaste if (colon_idx != std::string::npos) 455254721Semaste { 456254721Semaste const size_t semicolon_idx = m_packet.find (';', colon_idx); 457254721Semaste if (semicolon_idx != std::string::npos) 458254721Semaste { 459254721Semaste name.assign (m_packet, m_index, colon_idx - m_index); 460254721Semaste value.assign (m_packet, colon_idx + 1, semicolon_idx - (colon_idx + 1)); 461254721Semaste m_index = semicolon_idx + 1; 462254721Semaste return true; 463254721Semaste } 464254721Semaste } 465254721Semaste } 466254721Semaste m_index = UINT64_MAX; 467254721Semaste return false; 468254721Semaste} 469