| //===-------------------------- regex.cpp ---------------------------------===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is dual licensed under the MIT and the University of Illinois Open |
| // Source Licenses. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "regex" |
| #include "algorithm" |
| #include "iterator" |
| |
| _LIBCPP_BEGIN_NAMESPACE_STD |
| |
| static |
| const char* |
| make_error_type_string(regex_constants::error_type ecode) |
| { |
| switch (ecode) |
| { |
| case regex_constants::error_collate: |
| return "The expression contained an invalid collating element name."; |
| case regex_constants::error_ctype: |
| return "The expression contained an invalid character class name."; |
| case regex_constants::error_escape: |
| return "The expression contained an invalid escaped character, or a " |
| "trailing escape."; |
| case regex_constants::error_backref: |
| return "The expression contained an invalid back reference."; |
| case regex_constants::error_brack: |
| return "The expression contained mismatched [ and ]."; |
| case regex_constants::error_paren: |
| return "The expression contained mismatched ( and )."; |
| case regex_constants::error_brace: |
| return "The expression contained mismatched { and }."; |
| case regex_constants::error_badbrace: |
| return "The expression contained an invalid range in a {} expression."; |
| case regex_constants::error_range: |
| return "The expression contained an invalid character range, " |
| "such as [b-a] in most encodings."; |
| case regex_constants::error_space: |
| return "There was insufficient memory to convert the expression into " |
| "a finite state machine."; |
| case regex_constants::error_badrepeat: |
| return "One of *?+{ was not preceded by a valid regular expression."; |
| case regex_constants::error_complexity: |
| return "The complexity of an attempted match against a regular " |
| "expression exceeded a pre-set level."; |
| case regex_constants::error_stack: |
| return "There was insufficient memory to determine whether the regular " |
| "expression could match the specified character sequence."; |
| case regex_constants::__re_err_grammar: |
| return "An invalid regex grammar has been requested."; |
| case regex_constants::__re_err_empty: |
| return "An empty regex is not allowed in the POSIX grammar."; |
| default: |
| break; |
| } |
| return "Unknown error type"; |
| } |
| |
| regex_error::regex_error(regex_constants::error_type ecode) |
| : runtime_error(make_error_type_string(ecode)), |
| __code_(ecode) |
| {} |
| |
| regex_error::~regex_error() throw() {} |
| |
| namespace { |
| |
| struct collationnames |
| { |
| const char* elem_; |
| char char_; |
| }; |
| |
| const collationnames collatenames[] = |
| { |
| {"A", 0x41}, |
| {"B", 0x42}, |
| {"C", 0x43}, |
| {"D", 0x44}, |
| {"E", 0x45}, |
| {"F", 0x46}, |
| {"G", 0x47}, |
| {"H", 0x48}, |
| {"I", 0x49}, |
| {"J", 0x4a}, |
| {"K", 0x4b}, |
| {"L", 0x4c}, |
| {"M", 0x4d}, |
| {"N", 0x4e}, |
| {"NUL", 0x00}, |
| {"O", 0x4f}, |
| {"P", 0x50}, |
| {"Q", 0x51}, |
| {"R", 0x52}, |
| {"S", 0x53}, |
| {"T", 0x54}, |
| {"U", 0x55}, |
| {"V", 0x56}, |
| {"W", 0x57}, |
| {"X", 0x58}, |
| {"Y", 0x59}, |
| {"Z", 0x5a}, |
| {"a", 0x61}, |
| {"alert", 0x07}, |
| {"ampersand", 0x26}, |
| {"apostrophe", 0x27}, |
| {"asterisk", 0x2a}, |
| {"b", 0x62}, |
| {"backslash", 0x5c}, |
| {"backspace", 0x08}, |
| {"c", 0x63}, |
| {"carriage-return", 0x0d}, |
| {"circumflex", 0x5e}, |
| {"circumflex-accent", 0x5e}, |
| {"colon", 0x3a}, |
| {"comma", 0x2c}, |
| {"commercial-at", 0x40}, |
| {"d", 0x64}, |
| {"dollar-sign", 0x24}, |
| {"e", 0x65}, |
| {"eight", 0x38}, |
| {"equals-sign", 0x3d}, |
| {"exclamation-mark", 0x21}, |
| {"f", 0x66}, |
| {"five", 0x35}, |
| {"form-feed", 0x0c}, |
| {"four", 0x34}, |
| {"full-stop", 0x2e}, |
| {"g", 0x67}, |
| {"grave-accent", 0x60}, |
| {"greater-than-sign", 0x3e}, |
| {"h", 0x68}, |
| {"hyphen", 0x2d}, |
| {"hyphen-minus", 0x2d}, |
| {"i", 0x69}, |
| {"j", 0x6a}, |
| {"k", 0x6b}, |
| {"l", 0x6c}, |
| {"left-brace", 0x7b}, |
| {"left-curly-bracket", 0x7b}, |
| {"left-parenthesis", 0x28}, |
| {"left-square-bracket", 0x5b}, |
| {"less-than-sign", 0x3c}, |
| {"low-line", 0x5f}, |
| {"m", 0x6d}, |
| {"n", 0x6e}, |
| {"newline", 0x0a}, |
| {"nine", 0x39}, |
| {"number-sign", 0x23}, |
| {"o", 0x6f}, |
| {"one", 0x31}, |
| {"p", 0x70}, |
| {"percent-sign", 0x25}, |
| {"period", 0x2e}, |
| {"plus-sign", 0x2b}, |
| {"q", 0x71}, |
| {"question-mark", 0x3f}, |
| {"quotation-mark", 0x22}, |
| {"r", 0x72}, |
| {"reverse-solidus", 0x5c}, |
| {"right-brace", 0x7d}, |
| {"right-curly-bracket", 0x7d}, |
| {"right-parenthesis", 0x29}, |
| {"right-square-bracket", 0x5d}, |
| {"s", 0x73}, |
| {"semicolon", 0x3b}, |
| {"seven", 0x37}, |
| {"six", 0x36}, |
| {"slash", 0x2f}, |
| {"solidus", 0x2f}, |
| {"space", 0x20}, |
| {"t", 0x74}, |
| {"tab", 0x09}, |
| {"three", 0x33}, |
| {"tilde", 0x7e}, |
| {"two", 0x32}, |
| {"u", 0x75}, |
| {"underscore", 0x5f}, |
| {"v", 0x76}, |
| {"vertical-line", 0x7c}, |
| {"vertical-tab", 0x0b}, |
| {"w", 0x77}, |
| {"x", 0x78}, |
| {"y", 0x79}, |
| {"z", 0x7a}, |
| {"zero", 0x30} |
| }; |
| |
| struct classnames |
| { |
| const char* elem_; |
| ctype_base::mask mask_; |
| }; |
| |
| const classnames ClassNames[] = |
| { |
| {"alnum", ctype_base::alnum}, |
| {"alpha", ctype_base::alpha}, |
| {"blank", ctype_base::blank}, |
| {"cntrl", ctype_base::cntrl}, |
| {"d", ctype_base::digit}, |
| {"digit", ctype_base::digit}, |
| {"graph", ctype_base::graph}, |
| {"lower", ctype_base::lower}, |
| {"print", ctype_base::print}, |
| {"punct", ctype_base::punct}, |
| {"s", ctype_base::space}, |
| {"space", ctype_base::space}, |
| {"upper", ctype_base::upper}, |
| {"w", regex_traits<char>::__regex_word}, |
| {"xdigit", ctype_base::xdigit} |
| }; |
| |
| struct use_strcmp |
| { |
| bool operator()(const collationnames& x, const char* y) |
| {return strcmp(x.elem_, y) < 0;} |
| bool operator()(const classnames& x, const char* y) |
| {return strcmp(x.elem_, y) < 0;} |
| }; |
| |
| } |
| |
| string |
| __get_collation_name(const char* s) |
| { |
| const collationnames* i = |
| _VSTD::lower_bound(begin(collatenames), end(collatenames), s, use_strcmp()); |
| string r; |
| if (i != end(collatenames) && strcmp(s, i->elem_) == 0) |
| r = char(i->char_); |
| return r; |
| } |
| |
| ctype_base::mask |
| __get_classname(const char* s, bool __icase) |
| { |
| const classnames* i = |
| _VSTD::lower_bound(begin(ClassNames), end(ClassNames), s, use_strcmp()); |
| ctype_base::mask r = 0; |
| if (i != end(ClassNames) && strcmp(s, i->elem_) == 0) |
| { |
| r = i->mask_; |
| if (r == regex_traits<char>::__regex_word) |
| r |= ctype_base::alnum | ctype_base::upper | ctype_base::lower; |
| else if (__icase) |
| { |
| if (r & (ctype_base::lower | ctype_base::upper)) |
| r |= ctype_base::alpha; |
| } |
| } |
| return r; |
| } |
| |
| template <> |
| void |
| __match_any_but_newline<char>::__exec(__state& __s) const |
| { |
| if (__s.__current_ != __s.__last_) |
| { |
| switch (*__s.__current_) |
| { |
| case '\r': |
| case '\n': |
| __s.__do_ = __state::__reject; |
| __s.__node_ = nullptr; |
| break; |
| default: |
| __s.__do_ = __state::__accept_and_consume; |
| ++__s.__current_; |
| __s.__node_ = this->first(); |
| break; |
| } |
| } |
| else |
| { |
| __s.__do_ = __state::__reject; |
| __s.__node_ = nullptr; |
| } |
| } |
| |
| template <> |
| void |
| __match_any_but_newline<wchar_t>::__exec(__state& __s) const |
| { |
| if (__s.__current_ != __s.__last_) |
| { |
| switch (*__s.__current_) |
| { |
| case '\r': |
| case '\n': |
| case 0x2028: |
| case 0x2029: |
| __s.__do_ = __state::__reject; |
| __s.__node_ = nullptr; |
| break; |
| default: |
| __s.__do_ = __state::__accept_and_consume; |
| ++__s.__current_; |
| __s.__node_ = this->first(); |
| break; |
| } |
| } |
| else |
| { |
| __s.__do_ = __state::__reject; |
| __s.__node_ = nullptr; |
| } |
| } |
| |
| _LIBCPP_END_NAMESPACE_STD |