161452Sdfr//===-------------------------- regex.cpp ---------------------------------===// 261452Sdfr// 361452Sdfr// The LLVM Compiler Infrastructure 461452Sdfr// 561452Sdfr// This file is dual licensed under the MIT and the University of Illinois Open 661452Sdfr// Source Licenses. See LICENSE.TXT for details. 761452Sdfr// 861452Sdfr//===----------------------------------------------------------------------===// 961452Sdfr 1061452Sdfr#include "regex" 1161452Sdfr#include "algorithm" 1261452Sdfr#include "iterator" 1361452Sdfr 1461452Sdfr_LIBCPP_BEGIN_NAMESPACE_STD 1561452Sdfr 1661452Sdfrstatic 1761452Sdfrconst char* 1861452Sdfrmake_error_type_string(regex_constants::error_type ecode) 1961452Sdfr{ 2061452Sdfr switch (ecode) 2161452Sdfr { 2261452Sdfr case regex_constants::error_collate: 2361452Sdfr return "The expression contained an invalid collating element name."; 2461452Sdfr case regex_constants::error_ctype: 2561452Sdfr return "The expression contained an invalid character class name."; 2661452Sdfr case regex_constants::error_escape: 27116192Sobrien return "The expression contained an invalid escaped character, or a " 28116192Sobrien "trailing escape."; 29116192Sobrien case regex_constants::error_backref: 3061452Sdfr return "The expression contained an invalid back reference."; 3161452Sdfr case regex_constants::error_brack: 3261452Sdfr return "The expression contained mismatched [ and ]."; 3361452Sdfr case regex_constants::error_paren: 3461452Sdfr return "The expression contained mismatched ( and )."; 3561452Sdfr case regex_constants::error_brace: 36129878Sphk return "The expression contained mismatched { and }."; 3761452Sdfr case regex_constants::error_badbrace: 3861452Sdfr return "The expression contained an invalid range in a {} expression."; 3961452Sdfr case regex_constants::error_range: 4061452Sdfr return "The expression contained an invalid character range, " 4161452Sdfr "such as [b-a] in most encodings."; 4276827Salfred case regex_constants::error_space: 4369927Sjhb return "There was insufficient memory to convert the expression into " 4461452Sdfr "a finite state machine."; 45173573Sjhb case regex_constants::error_badrepeat: 46173573Sjhb return "One of *?+{ was not preceded by a valid regular expression."; 47173573Sjhb case regex_constants::error_complexity: 48119288Simp return "The complexity of an attempted match against a regular " 49119288Simp "expression exceeded a pre-set level."; 5061452Sdfr case regex_constants::error_stack: 5161452Sdfr return "There was insufficient memory to determine whether the regular " 5261452Sdfr "expression could match the specified character sequence."; 5361452Sdfr case regex_constants::__re_err_grammar: 5461452Sdfr return "An invalid regex grammar has been requested."; 5561452Sdfr case regex_constants::__re_err_empty: 5661452Sdfr return "An empty regex is not allowed in the POSIX grammar."; 5761452Sdfr default: 5861452Sdfr break; 5961452Sdfr } 6061452Sdfr return "Unknown error type"; 6161452Sdfr} 6261452Sdfr 6361452Sdfrregex_error::regex_error(regex_constants::error_type ecode) 6470185Sassar : runtime_error(make_error_type_string(ecode)), 6561452Sdfr __code_(ecode) 6661452Sdfr{} 6761452Sdfr 6861452Sdfrregex_error::~regex_error() throw() {} 6961452Sdfr 7061452Sdfrnamespace { 7161452Sdfr 7261452Sdfr#pragma clang diagnostic push 73126080Sphk#pragma clang diagnostic ignored "-Wpadded" 74126080Sphk 75111815Sphkstruct collationnames 76111815Sphk{ 77111815Sphk const char* elem_; 78111815Sphk char char_; 79111815Sphk}; 8061452Sdfr 8161452Sdfr#pragma clang diagnostic pop 8261452Sdfr 8361452Sdfrconst collationnames collatenames[] = 8461452Sdfr{ 8561452Sdfr {"A", 0x41}, 8661452Sdfr {"B", 0x42}, 8761501Sdfr {"C", 0x43}, 8861501Sdfr {"D", 0x44}, 8961501Sdfr {"E", 0x45}, 90133852Sobrien {"F", 0x46}, 9161501Sdfr {"G", 0x47}, 9261452Sdfr {"H", 0x48}, 9361501Sdfr {"I", 0x49}, 9461452Sdfr {"J", 0x4a}, 9561452Sdfr {"K", 0x4b}, 9661452Sdfr {"L", 0x4c}, 9761452Sdfr {"M", 0x4d}, 98153561Sjhb {"N", 0x4e}, 9961452Sdfr {"NUL", 0x00}, 10061452Sdfr {"O", 0x4f}, 101153561Sjhb {"P", 0x50}, 102153561Sjhb {"Q", 0x51}, 103153561Sjhb {"R", 0x52}, 10461452Sdfr {"S", 0x53}, 10561452Sdfr {"T", 0x54}, 10661452Sdfr {"U", 0x55}, 10761452Sdfr {"V", 0x56}, 10861452Sdfr {"W", 0x57}, 10961452Sdfr {"X", 0x58}, 11061452Sdfr {"Y", 0x59}, 11161452Sdfr {"Z", 0x5a}, 11261452Sdfr {"a", 0x61}, 11361452Sdfr {"alert", 0x07}, 11461452Sdfr {"ampersand", 0x26}, 11561452Sdfr {"apostrophe", 0x27}, 11661452Sdfr {"asterisk", 0x2a}, 11761452Sdfr {"b", 0x62}, 11861452Sdfr {"backslash", 0x5c}, 11961452Sdfr {"backspace", 0x08}, 12061452Sdfr {"c", 0x63}, 121182068Simp {"carriage-return", 0x0d}, 122182068Simp {"circumflex", 0x5e}, 12361452Sdfr {"circumflex-accent", 0x5e}, 12461452Sdfr {"colon", 0x3a}, 12561452Sdfr {"comma", 0x2c}, 12661452Sdfr {"commercial-at", 0x40}, 12761452Sdfr {"d", 0x64}, 12861452Sdfr {"dollar-sign", 0x24}, 12961452Sdfr {"e", 0x65}, 13061452Sdfr {"eight", 0x38}, 13161452Sdfr {"equals-sign", 0x3d}, 13261452Sdfr {"exclamation-mark", 0x21}, 13361452Sdfr {"f", 0x66}, 13461452Sdfr {"five", 0x35}, 13561452Sdfr {"form-feed", 0x0c}, 13661452Sdfr {"four", 0x34}, 13761452Sdfr {"full-stop", 0x2e}, 13861452Sdfr {"g", 0x67}, 13961452Sdfr {"grave-accent", 0x60}, 14061452Sdfr {"greater-than-sign", 0x3e}, 14161452Sdfr {"h", 0x68}, 14261452Sdfr {"hyphen", 0x2d}, 14361452Sdfr {"hyphen-minus", 0x2d}, 14461452Sdfr {"i", 0x69}, 14561452Sdfr {"j", 0x6a}, 14661452Sdfr {"k", 0x6b}, 14761452Sdfr {"l", 0x6c}, 14861452Sdfr {"left-brace", 0x7b}, 14961452Sdfr {"left-curly-bracket", 0x7b}, 15061452Sdfr {"left-parenthesis", 0x28}, 151122513Sanholt {"left-square-bracket", 0x5b}, 152122513Sanholt {"less-than-sign", 0x3c}, 153122513Sanholt {"low-line", 0x5f}, 154122513Sanholt {"m", 0x6d}, 155122513Sanholt {"n", 0x6e}, 15661452Sdfr {"newline", 0x0a}, 15761452Sdfr {"nine", 0x39}, 15861452Sdfr {"number-sign", 0x23}, 15961452Sdfr {"o", 0x6f}, 16061452Sdfr {"one", 0x31}, 16161452Sdfr {"p", 0x70}, 16261452Sdfr {"percent-sign", 0x25}, 16361452Sdfr {"period", 0x2e}, 16461452Sdfr {"plus-sign", 0x2b}, 16561452Sdfr {"q", 0x71}, 16661452Sdfr {"question-mark", 0x3f}, 16761452Sdfr {"quotation-mark", 0x22}, 16861452Sdfr {"r", 0x72}, 16961452Sdfr {"reverse-solidus", 0x5c}, 17061452Sdfr {"right-brace", 0x7d}, 17161452Sdfr {"right-curly-bracket", 0x7d}, 17261452Sdfr {"right-parenthesis", 0x29}, 17361452Sdfr {"right-square-bracket", 0x5d}, 17461452Sdfr {"s", 0x73}, 17561452Sdfr {"semicolon", 0x3b}, 17661452Sdfr {"seven", 0x37}, 17761452Sdfr {"six", 0x36}, 17861452Sdfr {"slash", 0x2f}, 17961452Sdfr {"solidus", 0x2f}, 18061452Sdfr {"space", 0x20}, 18161452Sdfr {"t", 0x74}, 18261452Sdfr {"tab", 0x09}, 18361452Sdfr {"three", 0x33}, 184163362Stanimura {"tilde", 0x7e}, 18561452Sdfr {"two", 0x32}, 18661452Sdfr {"u", 0x75}, 18761452Sdfr {"underscore", 0x5f}, 18861452Sdfr {"v", 0x76}, 18961452Sdfr {"vertical-line", 0x7c}, 19061452Sdfr {"vertical-tab", 0x0b}, 19161452Sdfr {"w", 0x77}, 19261452Sdfr {"x", 0x78}, 19361452Sdfr {"y", 0x79}, 19461452Sdfr {"z", 0x7a}, 19561452Sdfr {"zero", 0x30} 19661452Sdfr}; 197171433Sanholt 198171433Sanholt#pragma clang diagnostic push 199171433Sanholt#pragma clang diagnostic ignored "-Wpadded" 200171433Sanholt 201171433Sanholtstruct classnames 202171433Sanholt{ 203171433Sanholt const char* elem_; 204171433Sanholt ctype_base::mask mask_; 205171433Sanholt}; 206171433Sanholt 207171433Sanholt#pragma clang diagnostic pop 208171433Sanholt 209171433Sanholtconst classnames ClassNames[] = 210171433Sanholt{ 21161452Sdfr {"alnum", ctype_base::alnum}, 21261452Sdfr {"alpha", ctype_base::alpha}, 21361452Sdfr {"blank", ctype_base::blank}, 21461452Sdfr {"cntrl", ctype_base::cntrl}, 215171433Sanholt {"d", ctype_base::digit}, 216163362Stanimura {"digit", ctype_base::digit}, 21761452Sdfr {"graph", ctype_base::graph}, 21861452Sdfr {"lower", ctype_base::lower}, 219171433Sanholt {"print", ctype_base::print}, 220171433Sanholt {"punct", ctype_base::punct}, 22161452Sdfr {"s", ctype_base::space}, 222171433Sanholt {"space", ctype_base::space}, 223171433Sanholt {"upper", ctype_base::upper}, 224171433Sanholt {"w", regex_traits<char>::__regex_word}, 225171433Sanholt {"xdigit", ctype_base::xdigit} 226171433Sanholt}; 22761452Sdfr 22861452Sdfrstruct use_strcmp 22961452Sdfr{ 23061452Sdfr bool operator()(const collationnames& x, const char* y) 23161452Sdfr {return strcmp(x.elem_, y) < 0;} 23261452Sdfr bool operator()(const classnames& x, const char* y) 23361452Sdfr {return strcmp(x.elem_, y) < 0;} 23461452Sdfr}; 23561452Sdfr 23661452Sdfr} 23761452Sdfr 23861452Sdfrstring 23961452Sdfr__get_collation_name(const char* s) 24061452Sdfr{ 24161452Sdfr const collationnames* i = 24261452Sdfr _VSTD::lower_bound(begin(collatenames), end(collatenames), s, use_strcmp()); 24361452Sdfr string r; 24461452Sdfr if (i != end(collatenames) && strcmp(s, i->elem_) == 0) 24561452Sdfr r = char(i->char_); 246129579Smux return r; 24761452Sdfr} 24861452Sdfr 24961452Sdfrctype_base::mask 25061452Sdfr__get_classname(const char* s, bool __icase) 25161452Sdfr{ 25261452Sdfr const classnames* i = 25361452Sdfr _VSTD::lower_bound(begin(ClassNames), end(ClassNames), s, use_strcmp()); 25461452Sdfr ctype_base::mask r = 0; 25561452Sdfr if (i != end(ClassNames) && strcmp(s, i->elem_) == 0) 25661452Sdfr { 25761452Sdfr r = i->mask_; 25861452Sdfr if (r == regex_traits<char>::__regex_word) 25961452Sdfr r |= ctype_base::alnum | ctype_base::upper | ctype_base::lower; 26061452Sdfr else if (__icase) 26161452Sdfr { 26261452Sdfr if (r & (ctype_base::lower | ctype_base::upper)) 26361452Sdfr r |= ctype_base::alpha; 26461452Sdfr } 265173203Sjhb } 266173203Sjhb return r; 26761452Sdfr} 26861452Sdfr 269153562Sjhbtemplate <> 270153562Sjhbvoid 271173203Sjhb__match_any_but_newline<char>::__exec(__state& __s) const 272173203Sjhb{ 273173203Sjhb if (__s.__current_ != __s.__last_) 274173203Sjhb { 275173203Sjhb switch (*__s.__current_) 276173203Sjhb { 277173203Sjhb case '\r': 278171433Sanholt case '\n': 279171433Sanholt __s.__do_ = __state::__reject; 280129579Smux __s.__node_ = nullptr; 28161452Sdfr break; 282173203Sjhb default: 283173203Sjhb __s.__do_ = __state::__accept_and_consume; 284173203Sjhb ++__s.__current_; 285173203Sjhb __s.__node_ = this->first(); 286173203Sjhb break; 287173203Sjhb } 288173203Sjhb } 289173203Sjhb else 29061452Sdfr { 29161452Sdfr __s.__do_ = __state::__reject; 29261452Sdfr __s.__node_ = nullptr; 293171433Sanholt } 294171433Sanholt} 295171433Sanholt 296171433Sanholttemplate <> 297171433Sanholtvoid 298171433Sanholt__match_any_but_newline<wchar_t>::__exec(__state& __s) const 299171433Sanholt{ 300171433Sanholt if (__s.__current_ != __s.__last_) 301171433Sanholt { 302171433Sanholt switch (*__s.__current_) 303171433Sanholt { 304171433Sanholt case '\r': 305171433Sanholt case '\n': 306171433Sanholt case 0x2028: 307171433Sanholt case 0x2029: 308171433Sanholt __s.__do_ = __state::__reject; 309171433Sanholt __s.__node_ = nullptr; 310171433Sanholt break; 311171433Sanholt default: 312171433Sanholt __s.__do_ = __state::__accept_and_consume; 313171433Sanholt ++__s.__current_; 314171433Sanholt __s.__node_ = this->first(); 315171433Sanholt break; 316171433Sanholt } 317171433Sanholt } 318171433Sanholt else 319171433Sanholt { 320171433Sanholt __s.__do_ = __state::__reject; 321121440Sjhb __s.__node_ = nullptr; 322121440Sjhb } 323121440Sjhb} 324121440Sjhb 325121440Sjhb_LIBCPP_END_NAMESPACE_STD 326121440Sjhb