1227825Stheraven//===-------------------------- regex.cpp ---------------------------------===// 2227825Stheraven// 3227825Stheraven// The LLVM Compiler Infrastructure 4227825Stheraven// 5227825Stheraven// This file is dual licensed under the MIT and the University of Illinois Open 6227825Stheraven// Source Licenses. See LICENSE.TXT for details. 7227825Stheraven// 8227825Stheraven//===----------------------------------------------------------------------===// 9227825Stheraven 10227825Stheraven#include "regex" 11227825Stheraven#include "algorithm" 12227825Stheraven#include "iterator" 13227825Stheraven 14227825Stheraven_LIBCPP_BEGIN_NAMESPACE_STD 15227825Stheraven 16227825Stheravenstatic 17227825Stheravenconst char* 18227825Stheravenmake_error_type_string(regex_constants::error_type ecode) 19227825Stheraven{ 20227825Stheraven switch (ecode) 21227825Stheraven { 22227825Stheraven case regex_constants::error_collate: 23227825Stheraven return "The expression contained an invalid collating element name."; 24227825Stheraven case regex_constants::error_ctype: 25227825Stheraven return "The expression contained an invalid character class name."; 26227825Stheraven case regex_constants::error_escape: 27227825Stheraven return "The expression contained an invalid escaped character, or a " 28227825Stheraven "trailing escape."; 29227825Stheraven case regex_constants::error_backref: 30227825Stheraven return "The expression contained an invalid back reference."; 31227825Stheraven case regex_constants::error_brack: 32227825Stheraven return "The expression contained mismatched [ and ]."; 33227825Stheraven case regex_constants::error_paren: 34227825Stheraven return "The expression contained mismatched ( and )."; 35227825Stheraven case regex_constants::error_brace: 36227825Stheraven return "The expression contained mismatched { and }."; 37227825Stheraven case regex_constants::error_badbrace: 38227825Stheraven return "The expression contained an invalid range in a {} expression."; 39227825Stheraven case regex_constants::error_range: 40227825Stheraven return "The expression contained an invalid character range, " 41227825Stheraven "such as [b-a] in most encodings."; 42227825Stheraven case regex_constants::error_space: 43227825Stheraven return "There was insufficient memory to convert the expression into " 44227825Stheraven "a finite state machine."; 45227825Stheraven case regex_constants::error_badrepeat: 46227825Stheraven return "One of *?+{ was not preceded by a valid regular expression."; 47227825Stheraven case regex_constants::error_complexity: 48227825Stheraven return "The complexity of an attempted match against a regular " 49227825Stheraven "expression exceeded a pre-set level."; 50227825Stheraven case regex_constants::error_stack: 51227825Stheraven return "There was insufficient memory to determine whether the regular " 52227825Stheraven "expression could match the specified character sequence."; 53227825Stheraven case regex_constants::__re_err_grammar: 54227825Stheraven return "An invalid regex grammar has been requested."; 55227825Stheraven case regex_constants::__re_err_empty: 56227825Stheraven return "An empty regex is not allowed in the POSIX grammar."; 57227825Stheraven default: 58227825Stheraven break; 59227825Stheraven } 60227825Stheraven return "Unknown error type"; 61227825Stheraven} 62227825Stheraven 63227825Stheravenregex_error::regex_error(regex_constants::error_type ecode) 64227825Stheraven : runtime_error(make_error_type_string(ecode)), 65227825Stheraven __code_(ecode) 66227825Stheraven{} 67227825Stheraven 68227825Stheravenregex_error::~regex_error() throw() {} 69227825Stheraven 70227825Stheravennamespace { 71227825Stheraven 72232950Stheraven#pragma clang diagnostic push 73232950Stheraven#pragma clang diagnostic ignored "-Wpadded" 74232950Stheraven 75227825Stheravenstruct collationnames 76227825Stheraven{ 77227825Stheraven const char* elem_; 78227825Stheraven char char_; 79227825Stheraven}; 80227825Stheraven 81232950Stheraven#pragma clang diagnostic pop 82232950Stheraven 83227825Stheravenconst collationnames collatenames[] = 84227825Stheraven{ 85227825Stheraven {"A", 0x41}, 86227825Stheraven {"B", 0x42}, 87227825Stheraven {"C", 0x43}, 88227825Stheraven {"D", 0x44}, 89227825Stheraven {"E", 0x45}, 90227825Stheraven {"F", 0x46}, 91227825Stheraven {"G", 0x47}, 92227825Stheraven {"H", 0x48}, 93227825Stheraven {"I", 0x49}, 94227825Stheraven {"J", 0x4a}, 95227825Stheraven {"K", 0x4b}, 96227825Stheraven {"L", 0x4c}, 97227825Stheraven {"M", 0x4d}, 98227825Stheraven {"N", 0x4e}, 99227825Stheraven {"NUL", 0x00}, 100227825Stheraven {"O", 0x4f}, 101227825Stheraven {"P", 0x50}, 102227825Stheraven {"Q", 0x51}, 103227825Stheraven {"R", 0x52}, 104227825Stheraven {"S", 0x53}, 105227825Stheraven {"T", 0x54}, 106227825Stheraven {"U", 0x55}, 107227825Stheraven {"V", 0x56}, 108227825Stheraven {"W", 0x57}, 109227825Stheraven {"X", 0x58}, 110227825Stheraven {"Y", 0x59}, 111227825Stheraven {"Z", 0x5a}, 112227825Stheraven {"a", 0x61}, 113227825Stheraven {"alert", 0x07}, 114227825Stheraven {"ampersand", 0x26}, 115227825Stheraven {"apostrophe", 0x27}, 116227825Stheraven {"asterisk", 0x2a}, 117227825Stheraven {"b", 0x62}, 118227825Stheraven {"backslash", 0x5c}, 119227825Stheraven {"backspace", 0x08}, 120227825Stheraven {"c", 0x63}, 121227825Stheraven {"carriage-return", 0x0d}, 122227825Stheraven {"circumflex", 0x5e}, 123227825Stheraven {"circumflex-accent", 0x5e}, 124227825Stheraven {"colon", 0x3a}, 125227825Stheraven {"comma", 0x2c}, 126227825Stheraven {"commercial-at", 0x40}, 127227825Stheraven {"d", 0x64}, 128227825Stheraven {"dollar-sign", 0x24}, 129227825Stheraven {"e", 0x65}, 130227825Stheraven {"eight", 0x38}, 131227825Stheraven {"equals-sign", 0x3d}, 132227825Stheraven {"exclamation-mark", 0x21}, 133227825Stheraven {"f", 0x66}, 134227825Stheraven {"five", 0x35}, 135227825Stheraven {"form-feed", 0x0c}, 136227825Stheraven {"four", 0x34}, 137227825Stheraven {"full-stop", 0x2e}, 138227825Stheraven {"g", 0x67}, 139227825Stheraven {"grave-accent", 0x60}, 140227825Stheraven {"greater-than-sign", 0x3e}, 141227825Stheraven {"h", 0x68}, 142227825Stheraven {"hyphen", 0x2d}, 143227825Stheraven {"hyphen-minus", 0x2d}, 144227825Stheraven {"i", 0x69}, 145227825Stheraven {"j", 0x6a}, 146227825Stheraven {"k", 0x6b}, 147227825Stheraven {"l", 0x6c}, 148227825Stheraven {"left-brace", 0x7b}, 149227825Stheraven {"left-curly-bracket", 0x7b}, 150227825Stheraven {"left-parenthesis", 0x28}, 151227825Stheraven {"left-square-bracket", 0x5b}, 152227825Stheraven {"less-than-sign", 0x3c}, 153227825Stheraven {"low-line", 0x5f}, 154227825Stheraven {"m", 0x6d}, 155227825Stheraven {"n", 0x6e}, 156227825Stheraven {"newline", 0x0a}, 157227825Stheraven {"nine", 0x39}, 158227825Stheraven {"number-sign", 0x23}, 159227825Stheraven {"o", 0x6f}, 160227825Stheraven {"one", 0x31}, 161227825Stheraven {"p", 0x70}, 162227825Stheraven {"percent-sign", 0x25}, 163227825Stheraven {"period", 0x2e}, 164227825Stheraven {"plus-sign", 0x2b}, 165227825Stheraven {"q", 0x71}, 166227825Stheraven {"question-mark", 0x3f}, 167227825Stheraven {"quotation-mark", 0x22}, 168227825Stheraven {"r", 0x72}, 169227825Stheraven {"reverse-solidus", 0x5c}, 170227825Stheraven {"right-brace", 0x7d}, 171227825Stheraven {"right-curly-bracket", 0x7d}, 172227825Stheraven {"right-parenthesis", 0x29}, 173227825Stheraven {"right-square-bracket", 0x5d}, 174227825Stheraven {"s", 0x73}, 175227825Stheraven {"semicolon", 0x3b}, 176227825Stheraven {"seven", 0x37}, 177227825Stheraven {"six", 0x36}, 178227825Stheraven {"slash", 0x2f}, 179227825Stheraven {"solidus", 0x2f}, 180227825Stheraven {"space", 0x20}, 181227825Stheraven {"t", 0x74}, 182227825Stheraven {"tab", 0x09}, 183227825Stheraven {"three", 0x33}, 184227825Stheraven {"tilde", 0x7e}, 185227825Stheraven {"two", 0x32}, 186227825Stheraven {"u", 0x75}, 187227825Stheraven {"underscore", 0x5f}, 188227825Stheraven {"v", 0x76}, 189227825Stheraven {"vertical-line", 0x7c}, 190227825Stheraven {"vertical-tab", 0x0b}, 191227825Stheraven {"w", 0x77}, 192227825Stheraven {"x", 0x78}, 193227825Stheraven {"y", 0x79}, 194227825Stheraven {"z", 0x7a}, 195227825Stheraven {"zero", 0x30} 196227825Stheraven}; 197227825Stheraven 198232950Stheraven#pragma clang diagnostic push 199232950Stheraven#pragma clang diagnostic ignored "-Wpadded" 200232950Stheraven 201227825Stheravenstruct classnames 202227825Stheraven{ 203227825Stheraven const char* elem_; 204227825Stheraven ctype_base::mask mask_; 205227825Stheraven}; 206227825Stheraven 207232950Stheraven#pragma clang diagnostic pop 208232950Stheraven 209227825Stheravenconst classnames ClassNames[] = 210227825Stheraven{ 211227825Stheraven {"alnum", ctype_base::alnum}, 212227825Stheraven {"alpha", ctype_base::alpha}, 213227825Stheraven {"blank", ctype_base::blank}, 214227825Stheraven {"cntrl", ctype_base::cntrl}, 215227825Stheraven {"d", ctype_base::digit}, 216227825Stheraven {"digit", ctype_base::digit}, 217227825Stheraven {"graph", ctype_base::graph}, 218227825Stheraven {"lower", ctype_base::lower}, 219227825Stheraven {"print", ctype_base::print}, 220227825Stheraven {"punct", ctype_base::punct}, 221227825Stheraven {"s", ctype_base::space}, 222227825Stheraven {"space", ctype_base::space}, 223227825Stheraven {"upper", ctype_base::upper}, 224227825Stheraven {"w", regex_traits<char>::__regex_word}, 225227825Stheraven {"xdigit", ctype_base::xdigit} 226227825Stheraven}; 227227825Stheraven 228227825Stheravenstruct use_strcmp 229227825Stheraven{ 230227825Stheraven bool operator()(const collationnames& x, const char* y) 231227825Stheraven {return strcmp(x.elem_, y) < 0;} 232227825Stheraven bool operator()(const classnames& x, const char* y) 233227825Stheraven {return strcmp(x.elem_, y) < 0;} 234227825Stheraven}; 235227825Stheraven 236227825Stheraven} 237227825Stheraven 238227825Stheravenstring 239227825Stheraven__get_collation_name(const char* s) 240227825Stheraven{ 241227825Stheraven const collationnames* i = 242227825Stheraven _VSTD::lower_bound(begin(collatenames), end(collatenames), s, use_strcmp()); 243227825Stheraven string r; 244227825Stheraven if (i != end(collatenames) && strcmp(s, i->elem_) == 0) 245227825Stheraven r = char(i->char_); 246227825Stheraven return r; 247227825Stheraven} 248227825Stheraven 249227825Stheravenctype_base::mask 250227825Stheraven__get_classname(const char* s, bool __icase) 251227825Stheraven{ 252227825Stheraven const classnames* i = 253227825Stheraven _VSTD::lower_bound(begin(ClassNames), end(ClassNames), s, use_strcmp()); 254227825Stheraven ctype_base::mask r = 0; 255227825Stheraven if (i != end(ClassNames) && strcmp(s, i->elem_) == 0) 256227825Stheraven { 257227825Stheraven r = i->mask_; 258227825Stheraven if (r == regex_traits<char>::__regex_word) 259227825Stheraven r |= ctype_base::alnum | ctype_base::upper | ctype_base::lower; 260227825Stheraven else if (__icase) 261227825Stheraven { 262227825Stheraven if (r & (ctype_base::lower | ctype_base::upper)) 263227825Stheraven r |= ctype_base::alpha; 264227825Stheraven } 265227825Stheraven } 266227825Stheraven return r; 267227825Stheraven} 268227825Stheraven 269227825Stheraventemplate <> 270227825Stheravenvoid 271227825Stheraven__match_any_but_newline<char>::__exec(__state& __s) const 272227825Stheraven{ 273227825Stheraven if (__s.__current_ != __s.__last_) 274227825Stheraven { 275227825Stheraven switch (*__s.__current_) 276227825Stheraven { 277227825Stheraven case '\r': 278227825Stheraven case '\n': 279227825Stheraven __s.__do_ = __state::__reject; 280227825Stheraven __s.__node_ = nullptr; 281227825Stheraven break; 282227825Stheraven default: 283227825Stheraven __s.__do_ = __state::__accept_and_consume; 284227825Stheraven ++__s.__current_; 285227825Stheraven __s.__node_ = this->first(); 286227825Stheraven break; 287227825Stheraven } 288227825Stheraven } 289227825Stheraven else 290227825Stheraven { 291227825Stheraven __s.__do_ = __state::__reject; 292227825Stheraven __s.__node_ = nullptr; 293227825Stheraven } 294227825Stheraven} 295227825Stheraven 296227825Stheraventemplate <> 297227825Stheravenvoid 298227825Stheraven__match_any_but_newline<wchar_t>::__exec(__state& __s) const 299227825Stheraven{ 300227825Stheraven if (__s.__current_ != __s.__last_) 301227825Stheraven { 302227825Stheraven switch (*__s.__current_) 303227825Stheraven { 304227825Stheraven case '\r': 305227825Stheraven case '\n': 306227825Stheraven case 0x2028: 307227825Stheraven case 0x2029: 308227825Stheraven __s.__do_ = __state::__reject; 309227825Stheraven __s.__node_ = nullptr; 310227825Stheraven break; 311227825Stheraven default: 312227825Stheraven __s.__do_ = __state::__accept_and_consume; 313227825Stheraven ++__s.__current_; 314227825Stheraven __s.__node_ = this->first(); 315227825Stheraven break; 316227825Stheraven } 317227825Stheraven } 318227825Stheraven else 319227825Stheraven { 320227825Stheraven __s.__do_ = __state::__reject; 321227825Stheraven __s.__node_ = nullptr; 322227825Stheraven } 323227825Stheraven} 324227825Stheraven 325227825Stheraven_LIBCPP_END_NAMESPACE_STD 326