1227825Stheraven//===-------------------------- regex.cpp ---------------------------------===// 2227825Stheraven// 3227825Stheraven// The LLVM Compiler Infrastructure 4227825Stheraven// 5227825Stheraven// This file is dual licensed under the MIT and the University of Illinois Open 6227825Stheraven// Source Licenses. See LICENSE.TXT for details. 7227825Stheraven// 8227825Stheraven//===----------------------------------------------------------------------===// 9227825Stheraven 10227825Stheraven#include "regex" 11227825Stheraven#include "algorithm" 12227825Stheraven#include "iterator" 13227825Stheraven 14227825Stheraven_LIBCPP_BEGIN_NAMESPACE_STD 15227825Stheraven 16227825Stheravenstatic 17227825Stheravenconst char* 18227825Stheravenmake_error_type_string(regex_constants::error_type ecode) 19227825Stheraven{ 20227825Stheraven switch (ecode) 21227825Stheraven { 22227825Stheraven case regex_constants::error_collate: 23227825Stheraven return "The expression contained an invalid collating element name."; 24227825Stheraven case regex_constants::error_ctype: 25227825Stheraven return "The expression contained an invalid character class name."; 26227825Stheraven case regex_constants::error_escape: 27227825Stheraven return "The expression contained an invalid escaped character, or a " 28227825Stheraven "trailing escape."; 29227825Stheraven case regex_constants::error_backref: 30227825Stheraven return "The expression contained an invalid back reference."; 31227825Stheraven case regex_constants::error_brack: 32227825Stheraven return "The expression contained mismatched [ and ]."; 33227825Stheraven case regex_constants::error_paren: 34227825Stheraven return "The expression contained mismatched ( and )."; 35227825Stheraven case regex_constants::error_brace: 36227825Stheraven return "The expression contained mismatched { and }."; 37227825Stheraven case regex_constants::error_badbrace: 38227825Stheraven return "The expression contained an invalid range in a {} expression."; 39227825Stheraven case regex_constants::error_range: 40227825Stheraven return "The expression contained an invalid character range, " 41227825Stheraven "such as [b-a] in most encodings."; 42227825Stheraven case regex_constants::error_space: 43227825Stheraven return "There was insufficient memory to convert the expression into " 44227825Stheraven "a finite state machine."; 45227825Stheraven case regex_constants::error_badrepeat: 46227825Stheraven return "One of *?+{ was not preceded by a valid regular expression."; 47227825Stheraven case regex_constants::error_complexity: 48227825Stheraven return "The complexity of an attempted match against a regular " 49227825Stheraven "expression exceeded a pre-set level."; 50227825Stheraven case regex_constants::error_stack: 51227825Stheraven return "There was insufficient memory to determine whether the regular " 52227825Stheraven "expression could match the specified character sequence."; 53227825Stheraven case regex_constants::__re_err_grammar: 54227825Stheraven return "An invalid regex grammar has been requested."; 55227825Stheraven case regex_constants::__re_err_empty: 56227825Stheraven return "An empty regex is not allowed in the POSIX grammar."; 57227825Stheraven default: 58227825Stheraven break; 59227825Stheraven } 60227825Stheraven return "Unknown error type"; 61227825Stheraven} 62227825Stheraven 63227825Stheravenregex_error::regex_error(regex_constants::error_type ecode) 64227825Stheraven : runtime_error(make_error_type_string(ecode)), 65227825Stheraven __code_(ecode) 66227825Stheraven{} 67227825Stheraven 68227825Stheravenregex_error::~regex_error() throw() {} 69227825Stheraven 70227825Stheravennamespace { 71227825Stheraven 72278724Sdim#if defined(__clang__) 73232950Stheraven#pragma clang diagnostic push 74232950Stheraven#pragma clang diagnostic ignored "-Wpadded" 75278724Sdim#endif 76232950Stheraven 77227825Stheravenstruct collationnames 78227825Stheraven{ 79227825Stheraven const char* elem_; 80227825Stheraven char char_; 81227825Stheraven}; 82227825Stheraven 83278724Sdim#if defined(__clang__) 84232950Stheraven#pragma clang diagnostic pop 85278724Sdim#endif 86232950Stheraven 87227825Stheravenconst collationnames collatenames[] = 88227825Stheraven{ 89227825Stheraven {"A", 0x41}, 90227825Stheraven {"B", 0x42}, 91227825Stheraven {"C", 0x43}, 92227825Stheraven {"D", 0x44}, 93227825Stheraven {"E", 0x45}, 94227825Stheraven {"F", 0x46}, 95227825Stheraven {"G", 0x47}, 96227825Stheraven {"H", 0x48}, 97227825Stheraven {"I", 0x49}, 98227825Stheraven {"J", 0x4a}, 99227825Stheraven {"K", 0x4b}, 100227825Stheraven {"L", 0x4c}, 101227825Stheraven {"M", 0x4d}, 102227825Stheraven {"N", 0x4e}, 103227825Stheraven {"NUL", 0x00}, 104227825Stheraven {"O", 0x4f}, 105227825Stheraven {"P", 0x50}, 106227825Stheraven {"Q", 0x51}, 107227825Stheraven {"R", 0x52}, 108227825Stheraven {"S", 0x53}, 109227825Stheraven {"T", 0x54}, 110227825Stheraven {"U", 0x55}, 111227825Stheraven {"V", 0x56}, 112227825Stheraven {"W", 0x57}, 113227825Stheraven {"X", 0x58}, 114227825Stheraven {"Y", 0x59}, 115227825Stheraven {"Z", 0x5a}, 116227825Stheraven {"a", 0x61}, 117227825Stheraven {"alert", 0x07}, 118227825Stheraven {"ampersand", 0x26}, 119227825Stheraven {"apostrophe", 0x27}, 120227825Stheraven {"asterisk", 0x2a}, 121227825Stheraven {"b", 0x62}, 122227825Stheraven {"backslash", 0x5c}, 123227825Stheraven {"backspace", 0x08}, 124227825Stheraven {"c", 0x63}, 125227825Stheraven {"carriage-return", 0x0d}, 126227825Stheraven {"circumflex", 0x5e}, 127227825Stheraven {"circumflex-accent", 0x5e}, 128227825Stheraven {"colon", 0x3a}, 129227825Stheraven {"comma", 0x2c}, 130227825Stheraven {"commercial-at", 0x40}, 131227825Stheraven {"d", 0x64}, 132227825Stheraven {"dollar-sign", 0x24}, 133227825Stheraven {"e", 0x65}, 134227825Stheraven {"eight", 0x38}, 135227825Stheraven {"equals-sign", 0x3d}, 136227825Stheraven {"exclamation-mark", 0x21}, 137227825Stheraven {"f", 0x66}, 138227825Stheraven {"five", 0x35}, 139227825Stheraven {"form-feed", 0x0c}, 140227825Stheraven {"four", 0x34}, 141227825Stheraven {"full-stop", 0x2e}, 142227825Stheraven {"g", 0x67}, 143227825Stheraven {"grave-accent", 0x60}, 144227825Stheraven {"greater-than-sign", 0x3e}, 145227825Stheraven {"h", 0x68}, 146227825Stheraven {"hyphen", 0x2d}, 147227825Stheraven {"hyphen-minus", 0x2d}, 148227825Stheraven {"i", 0x69}, 149227825Stheraven {"j", 0x6a}, 150227825Stheraven {"k", 0x6b}, 151227825Stheraven {"l", 0x6c}, 152227825Stheraven {"left-brace", 0x7b}, 153227825Stheraven {"left-curly-bracket", 0x7b}, 154227825Stheraven {"left-parenthesis", 0x28}, 155227825Stheraven {"left-square-bracket", 0x5b}, 156227825Stheraven {"less-than-sign", 0x3c}, 157227825Stheraven {"low-line", 0x5f}, 158227825Stheraven {"m", 0x6d}, 159227825Stheraven {"n", 0x6e}, 160227825Stheraven {"newline", 0x0a}, 161227825Stheraven {"nine", 0x39}, 162227825Stheraven {"number-sign", 0x23}, 163227825Stheraven {"o", 0x6f}, 164227825Stheraven {"one", 0x31}, 165227825Stheraven {"p", 0x70}, 166227825Stheraven {"percent-sign", 0x25}, 167227825Stheraven {"period", 0x2e}, 168227825Stheraven {"plus-sign", 0x2b}, 169227825Stheraven {"q", 0x71}, 170227825Stheraven {"question-mark", 0x3f}, 171227825Stheraven {"quotation-mark", 0x22}, 172227825Stheraven {"r", 0x72}, 173227825Stheraven {"reverse-solidus", 0x5c}, 174227825Stheraven {"right-brace", 0x7d}, 175227825Stheraven {"right-curly-bracket", 0x7d}, 176227825Stheraven {"right-parenthesis", 0x29}, 177227825Stheraven {"right-square-bracket", 0x5d}, 178227825Stheraven {"s", 0x73}, 179227825Stheraven {"semicolon", 0x3b}, 180227825Stheraven {"seven", 0x37}, 181227825Stheraven {"six", 0x36}, 182227825Stheraven {"slash", 0x2f}, 183227825Stheraven {"solidus", 0x2f}, 184227825Stheraven {"space", 0x20}, 185227825Stheraven {"t", 0x74}, 186227825Stheraven {"tab", 0x09}, 187227825Stheraven {"three", 0x33}, 188227825Stheraven {"tilde", 0x7e}, 189227825Stheraven {"two", 0x32}, 190227825Stheraven {"u", 0x75}, 191227825Stheraven {"underscore", 0x5f}, 192227825Stheraven {"v", 0x76}, 193227825Stheraven {"vertical-line", 0x7c}, 194227825Stheraven {"vertical-tab", 0x0b}, 195227825Stheraven {"w", 0x77}, 196227825Stheraven {"x", 0x78}, 197227825Stheraven {"y", 0x79}, 198227825Stheraven {"z", 0x7a}, 199227825Stheraven {"zero", 0x30} 200227825Stheraven}; 201227825Stheraven 202278724Sdim#if defined(__clang__) 203232950Stheraven#pragma clang diagnostic push 204232950Stheraven#pragma clang diagnostic ignored "-Wpadded" 205278724Sdim#endif 206232950Stheraven 207227825Stheravenstruct classnames 208227825Stheraven{ 209227825Stheraven const char* elem_; 210278724Sdim regex_traits<char>::char_class_type mask_; 211227825Stheraven}; 212227825Stheraven 213278724Sdim#if defined(__clang__) 214232950Stheraven#pragma clang diagnostic pop 215278724Sdim#endif 216232950Stheraven 217227825Stheravenconst classnames ClassNames[] = 218227825Stheraven{ 219227825Stheraven {"alnum", ctype_base::alnum}, 220227825Stheraven {"alpha", ctype_base::alpha}, 221227825Stheraven {"blank", ctype_base::blank}, 222227825Stheraven {"cntrl", ctype_base::cntrl}, 223227825Stheraven {"d", ctype_base::digit}, 224227825Stheraven {"digit", ctype_base::digit}, 225227825Stheraven {"graph", ctype_base::graph}, 226227825Stheraven {"lower", ctype_base::lower}, 227227825Stheraven {"print", ctype_base::print}, 228227825Stheraven {"punct", ctype_base::punct}, 229227825Stheraven {"s", ctype_base::space}, 230227825Stheraven {"space", ctype_base::space}, 231227825Stheraven {"upper", ctype_base::upper}, 232227825Stheraven {"w", regex_traits<char>::__regex_word}, 233227825Stheraven {"xdigit", ctype_base::xdigit} 234227825Stheraven}; 235227825Stheraven 236227825Stheravenstruct use_strcmp 237227825Stheraven{ 238227825Stheraven bool operator()(const collationnames& x, const char* y) 239227825Stheraven {return strcmp(x.elem_, y) < 0;} 240227825Stheraven bool operator()(const classnames& x, const char* y) 241227825Stheraven {return strcmp(x.elem_, y) < 0;} 242227825Stheraven}; 243227825Stheraven 244227825Stheraven} 245227825Stheraven 246227825Stheravenstring 247227825Stheraven__get_collation_name(const char* s) 248227825Stheraven{ 249227825Stheraven const collationnames* i = 250227825Stheraven _VSTD::lower_bound(begin(collatenames), end(collatenames), s, use_strcmp()); 251227825Stheraven string r; 252227825Stheraven if (i != end(collatenames) && strcmp(s, i->elem_) == 0) 253227825Stheraven r = char(i->char_); 254227825Stheraven return r; 255227825Stheraven} 256227825Stheraven 257278724Sdimregex_traits<char>::char_class_type 258227825Stheraven__get_classname(const char* s, bool __icase) 259227825Stheraven{ 260227825Stheraven const classnames* i = 261227825Stheraven _VSTD::lower_bound(begin(ClassNames), end(ClassNames), s, use_strcmp()); 262278724Sdim regex_traits<char>::char_class_type r = 0; 263227825Stheraven if (i != end(ClassNames) && strcmp(s, i->elem_) == 0) 264227825Stheraven { 265227825Stheraven r = i->mask_; 266227825Stheraven if (r == regex_traits<char>::__regex_word) 267227825Stheraven r |= ctype_base::alnum | ctype_base::upper | ctype_base::lower; 268227825Stheraven else if (__icase) 269227825Stheraven { 270227825Stheraven if (r & (ctype_base::lower | ctype_base::upper)) 271227825Stheraven r |= ctype_base::alpha; 272227825Stheraven } 273227825Stheraven } 274227825Stheraven return r; 275227825Stheraven} 276227825Stheraven 277227825Stheraventemplate <> 278227825Stheravenvoid 279227825Stheraven__match_any_but_newline<char>::__exec(__state& __s) const 280227825Stheraven{ 281227825Stheraven if (__s.__current_ != __s.__last_) 282227825Stheraven { 283227825Stheraven switch (*__s.__current_) 284227825Stheraven { 285227825Stheraven case '\r': 286227825Stheraven case '\n': 287227825Stheraven __s.__do_ = __state::__reject; 288227825Stheraven __s.__node_ = nullptr; 289227825Stheraven break; 290227825Stheraven default: 291227825Stheraven __s.__do_ = __state::__accept_and_consume; 292227825Stheraven ++__s.__current_; 293227825Stheraven __s.__node_ = this->first(); 294227825Stheraven break; 295227825Stheraven } 296227825Stheraven } 297227825Stheraven else 298227825Stheraven { 299227825Stheraven __s.__do_ = __state::__reject; 300227825Stheraven __s.__node_ = nullptr; 301227825Stheraven } 302227825Stheraven} 303227825Stheraven 304227825Stheraventemplate <> 305227825Stheravenvoid 306227825Stheraven__match_any_but_newline<wchar_t>::__exec(__state& __s) const 307227825Stheraven{ 308227825Stheraven if (__s.__current_ != __s.__last_) 309227825Stheraven { 310227825Stheraven switch (*__s.__current_) 311227825Stheraven { 312227825Stheraven case '\r': 313227825Stheraven case '\n': 314227825Stheraven case 0x2028: 315227825Stheraven case 0x2029: 316227825Stheraven __s.__do_ = __state::__reject; 317227825Stheraven __s.__node_ = nullptr; 318227825Stheraven break; 319227825Stheraven default: 320227825Stheraven __s.__do_ = __state::__accept_and_consume; 321227825Stheraven ++__s.__current_; 322227825Stheraven __s.__node_ = this->first(); 323227825Stheraven break; 324227825Stheraven } 325227825Stheraven } 326227825Stheraven else 327227825Stheraven { 328227825Stheraven __s.__do_ = __state::__reject; 329227825Stheraven __s.__node_ = nullptr; 330227825Stheraven } 331227825Stheraven} 332227825Stheraven 333227825Stheraven_LIBCPP_END_NAMESPACE_STD 334