diff --git a/.github/workflows/tools.yml b/.github/workflows/tools.yml index f5091a872a6b7e..52dc634ee846a2 100644 --- a/.github/workflows/tools.yml +++ b/.github/workflows/tools.yml @@ -161,6 +161,16 @@ jobs: cat temp-output tail -n1 temp-output | grep "NEW_VERSION=" >> "$GITHUB_ENV" || true rm temp-output + - id: ada + subsystem: deps + label: dependencies + run: | + NEW_VERSION=$(gh api repos/ada-url/ada/releases/latest -q '.tag_name|ltrimstr("v")') + CURRENT_VERSION=$(grep "#define ADA_VERSION" ./deps/ada/ada.h | sed -n "s/^.*VERSION \(.*\)/\1/p") + if [ "$NEW_VERSION" != "$CURRENT_VERSION" ]; then + echo "NEW_VERSION=$NEW_VERSION" >> $GITHUB_ENV + ./tools/dep_updaters/update-ada.sh "$NEW_VERSION" + fi steps: - uses: actions/checkout@v3 with: diff --git a/LICENSE b/LICENSE index f8fa687202dcb9..26221cb042bdd1 100644 --- a/LICENSE +++ b/LICENSE @@ -1338,6 +1338,28 @@ The externally maintained libraries used by Node.js are: CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +- ada, located at deps/ada, is licensed as follows: + """ + Copyright 2023 Ada authors + + Permission is hereby granted, free of charge, to any person obtaining a copy of + this software and associated documentation files (the "Software"), to deal in + the Software without restriction, including without limitation the rights to + use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + the Software, and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + """ + - npm, located at deps/npm, is licensed as follows: """ The npm application diff --git a/Makefile b/Makefile index a6485b87f69767..0be0659d372d2d 100644 --- a/Makefile +++ b/Makefile @@ -170,7 +170,7 @@ with-code-cache test-code-cache: out/Makefile: config.gypi common.gypi node.gyp \ deps/uv/uv.gyp deps/llhttp/llhttp.gyp deps/zlib/zlib.gyp \ - deps/simdutf/simdutf.gyp \ + deps/simdutf/simdutf.gyp deps/ada/ada.gyp \ tools/v8_gypfiles/toolchain.gypi tools/v8_gypfiles/features.gypi \ tools/v8_gypfiles/inspector.gypi tools/v8_gypfiles/v8.gyp $(PYTHON) tools/gyp_node.py -f make diff --git a/deps/ada/LICENSE-MIT b/deps/ada/LICENSE-MIT new file mode 100644 index 00000000000000..9f3e50e0db7856 --- /dev/null +++ b/deps/ada/LICENSE-MIT @@ -0,0 +1,18 @@ +Copyright 2023 Ada authors + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/deps/ada/README.md b/deps/ada/README.md new file mode 100644 index 00000000000000..018262c9519f0a --- /dev/null +++ b/deps/ada/README.md @@ -0,0 +1,12 @@ +# ada + +This project implements WHATWG URL specification in a performant way. + +The source is pulled from: https://github.com/ada-url/ada + +Active development occurs in the default branch (currently named `main`). + +## Updating + +See [tools/dep_updaters/README.md#ada](../../tools/dep_updaters/README.md#ada) +for instructions. diff --git a/deps/ada/ada.cpp b/deps/ada/ada.cpp new file mode 100644 index 00000000000000..8b2cdd38ad0bb1 --- /dev/null +++ b/deps/ada/ada.cpp @@ -0,0 +1,2760 @@ +/* auto-generated on 2023-02-26 15:07:41 -0500. Do not edit! */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=ada.cpp +/* begin file src/ada.cpp */ +#include "ada.h" +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=checkers.cpp +/* begin file src/checkers.cpp */ +#include + +namespace ada::checkers { + + ada_really_inline ada_constexpr bool is_ipv4(std::string_view view) noexcept { + size_t last_dot = view.rfind('.'); + if(last_dot == view.size() - 1) { + view.remove_suffix(1); + last_dot = view.rfind('.'); + } + std::string_view number = (last_dot == std::string_view::npos) ? view : view.substr(last_dot+1); + if(number.empty()) { return false; } + /** Optimization opportunity: we have basically identified the last number of the + ipv4 if we return true here. We might as well parse it and have at least one + number parsed when we get to parse_ipv4. */ + if(std::all_of(number.begin(), number.end(), ada::checkers::is_digit)) { return true; } + return (checkers::has_hex_prefix(number) && std::all_of(number.begin()+2, number.end(), ada::unicode::is_lowercase_hex)); + } + + + // for use with path_signature, we include all characters that need percent encoding. + static constexpr uint8_t path_signature_table[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + static_assert(path_signature_table[uint8_t('?')] == 1); + static_assert(path_signature_table[uint8_t('`')] == 1); + static_assert(path_signature_table[uint8_t('{')] == 1); + static_assert(path_signature_table[uint8_t('}')] == 1); + // + static_assert(path_signature_table[uint8_t(' ')] == 1); + static_assert(path_signature_table[uint8_t('?')] == 1); + static_assert(path_signature_table[uint8_t('"')] == 1); + static_assert(path_signature_table[uint8_t('#')] == 1); + static_assert(path_signature_table[uint8_t('<')] == 1); + static_assert(path_signature_table[uint8_t('>')] == 1); + // + static_assert(path_signature_table[0] == 1); + static_assert(path_signature_table[31] == 1); + static_assert(path_signature_table[127] == 1); + static_assert(path_signature_table[128] == 1); + static_assert(path_signature_table[255] == 1); + + ada_really_inline constexpr uint8_t path_signature(std::string_view input) noexcept { + // The path percent-encode set is the query percent-encode set and U+003F (?), U+0060 (`), U+007B ({), and U+007D (}). + // The query percent-encode set is the C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+0023 (#), U+003C (<), and U+003E (>). + // The C0 control percent-encode set are the C0 controls and all code points greater than U+007E (~). + size_t i = 0; + uint8_t accumulator{}; + for (; i + 7 < input.size(); i += 8) { + accumulator |= uint8_t(path_signature_table[uint8_t(input[i])] | + path_signature_table[uint8_t(input[i + 1])] | + path_signature_table[uint8_t(input[i + 2])] | + path_signature_table[uint8_t(input[i + 3])] | + path_signature_table[uint8_t(input[i + 4])] | + path_signature_table[uint8_t(input[i + 5])] | + path_signature_table[uint8_t(input[i + 6])] | + path_signature_table[uint8_t(input[i + 7])]); + } + for (; i < input.size(); i++) { + accumulator |= uint8_t(path_signature_table[uint8_t(input[i])]); + } + return accumulator; + } + + + ada_really_inline constexpr bool verify_dns_length(std::string_view input) noexcept { + if(input.back() == '.') { + if(input.size() > 254) return false; + } else if (input.size() > 253) return false; + + size_t start = 0; + while (start < input.size()) { + auto dot_location = input.find('.', start); + // If not found, it's likely the end of the domain + if(dot_location == std::string_view::npos) dot_location = input.size(); + + auto label_size = dot_location - start; + if (label_size > 63 || label_size == 0) return false; + + start = dot_location + 1; + } + + return true; + } +} // namespace ada::checkers +/* end file src/checkers.cpp */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=unicode.cpp +/* begin file src/unicode.cpp */ + +#include +#if ADA_HAS_ICU +// We are good. +#else + +#if defined(_WIN32) && ADA_WINDOWS_TO_ASCII_FALLBACK + +#ifndef __wtypes_h__ +#include +#endif // __wtypes_h__ + +#ifndef __WINDEF_ +#include +#endif // __WINDEF_ + +#include +#endif //defined(_WIN32) && ADA_WINDOWS_TO_ASCII_FALLBACK + +#endif // ADA_HAS_ICU + +namespace ada::unicode { + + constexpr bool to_lower_ascii(char * input, size_t length) noexcept { + auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; }; + uint64_t broadcast_80 = broadcast(0x80); + uint64_t broadcast_Ap = broadcast(128 - 'A'); + uint64_t broadcast_Zp = broadcast(128 - 'Z'); + uint64_t non_ascii = 0; + size_t i = 0; + + for (; i + 7 < length; i += 8) { + uint64_t word{}; + memcpy(&word, input + i, sizeof(word)); + non_ascii |= (word & broadcast_80); + word ^= (((word+broadcast_Ap)^(word+broadcast_Zp))&broadcast_80)>>2; + memcpy(input + i, &word, sizeof(word)); + } + if (i < length) { + uint64_t word{}; + memcpy(&word, input + i, length - i); + non_ascii |= (word & broadcast_80); + word ^= (((word+broadcast_Ap)^(word+broadcast_Zp))&broadcast_80)>>2; + memcpy(input + i, &word, length - i); + } + return non_ascii == 0; + } + + ada_really_inline constexpr bool has_tabs_or_newline(std::string_view user_input) noexcept { + auto has_zero_byte = [](uint64_t v) { + return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); + }; + auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; }; + size_t i = 0; + uint64_t mask1 = broadcast('\r'); + uint64_t mask2 = broadcast('\n'); + uint64_t mask3 = broadcast('\t'); + uint64_t running{0}; + for (; i + 7 < user_input.size(); i += 8) { + uint64_t word{}; + memcpy(&word, user_input.data() + i, sizeof(word)); + uint64_t xor1 = word ^ mask1; + uint64_t xor2 = word ^ mask2; + uint64_t xor3 = word ^ mask3; + running |= has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3); + } + if (i < user_input.size()) { + uint64_t word{}; + memcpy(&word, user_input.data() + i, user_input.size() - i); + uint64_t xor1 = word ^ mask1; + uint64_t xor2 = word ^ mask2; + uint64_t xor3 = word ^ mask3; + running |= has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3); + } + return running; + } + + // A forbidden host code point is U+0000 NULL, U+0009 TAB, U+000A LF, U+000D CR, U+0020 SPACE, U+0023 (#), + // U+002F (/), U+003A (:), U+003C (<), U+003E (>), U+003F (?), U+0040 (@), U+005B ([), U+005C (\), U+005D (]), + // U+005E (^), or U+007C (|). + constexpr static bool is_forbidden_host_code_point_table[] = { + 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + static_assert(sizeof(is_forbidden_host_code_point_table) == 256); + + ada_really_inline constexpr bool is_forbidden_host_code_point(const char c) noexcept { + return is_forbidden_host_code_point_table[uint8_t(c)]; + } + + static_assert(unicode::is_forbidden_host_code_point('\0')); + static_assert(unicode::is_forbidden_host_code_point('\t')); + static_assert(unicode::is_forbidden_host_code_point('\n')); + static_assert(unicode::is_forbidden_host_code_point('\r')); + static_assert(unicode::is_forbidden_host_code_point(' ')); + static_assert(unicode::is_forbidden_host_code_point('#')); + static_assert(unicode::is_forbidden_host_code_point('/')); + static_assert(unicode::is_forbidden_host_code_point(':')); + static_assert(unicode::is_forbidden_host_code_point('?')); + static_assert(unicode::is_forbidden_host_code_point('@')); + static_assert(unicode::is_forbidden_host_code_point('[')); + static_assert(unicode::is_forbidden_host_code_point('?')); + static_assert(unicode::is_forbidden_host_code_point('<')); + static_assert(unicode::is_forbidden_host_code_point('>')); + static_assert(unicode::is_forbidden_host_code_point('\\')); + static_assert(unicode::is_forbidden_host_code_point(']')); + static_assert(unicode::is_forbidden_host_code_point('^')); + static_assert(unicode::is_forbidden_host_code_point('|')); + +constexpr static uint8_t is_forbidden_domain_code_point_table[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + + static_assert(sizeof(is_forbidden_domain_code_point_table) == 256); + + ada_really_inline constexpr bool is_forbidden_domain_code_point(const char c) noexcept { + return is_forbidden_domain_code_point_table[uint8_t(c)]; + } + + ada_really_inline constexpr bool contains_forbidden_domain_code_point(char * input, size_t length) noexcept { + size_t i = 0; + uint8_t accumulator{}; + for(; i + 4 <= length; i+=4) { + accumulator |= is_forbidden_domain_code_point_table[uint8_t(input[i])]; + accumulator |= is_forbidden_domain_code_point_table[uint8_t(input[i+1])]; + accumulator |= is_forbidden_domain_code_point_table[uint8_t(input[i+2])]; + accumulator |= is_forbidden_domain_code_point_table[uint8_t(input[i+3])]; + } + for(; i < length; i++) { + accumulator |= is_forbidden_domain_code_point_table[uint8_t(input[i])]; + } + return accumulator; + } + + static_assert(unicode::is_forbidden_domain_code_point('%')); + static_assert(unicode::is_forbidden_domain_code_point('\x7f')); + static_assert(unicode::is_forbidden_domain_code_point('\0')); + static_assert(unicode::is_forbidden_domain_code_point('\t')); + static_assert(unicode::is_forbidden_domain_code_point('\n')); + static_assert(unicode::is_forbidden_domain_code_point('\r')); + static_assert(unicode::is_forbidden_domain_code_point(' ')); + static_assert(unicode::is_forbidden_domain_code_point('#')); + static_assert(unicode::is_forbidden_domain_code_point('/')); + static_assert(unicode::is_forbidden_domain_code_point(':')); + static_assert(unicode::is_forbidden_domain_code_point('?')); + static_assert(unicode::is_forbidden_domain_code_point('@')); + static_assert(unicode::is_forbidden_domain_code_point('[')); + static_assert(unicode::is_forbidden_domain_code_point('?')); + static_assert(unicode::is_forbidden_domain_code_point('<')); + static_assert(unicode::is_forbidden_domain_code_point('>')); + static_assert(unicode::is_forbidden_domain_code_point('\\')); + static_assert(unicode::is_forbidden_domain_code_point(']')); + static_assert(unicode::is_forbidden_domain_code_point('^')); + static_assert(unicode::is_forbidden_domain_code_point('|')); + + constexpr static bool is_alnum_plus_table[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + static_assert(sizeof(is_alnum_plus_table) == 256); + + ada_really_inline constexpr bool is_alnum_plus(const char c) noexcept { + return is_alnum_plus_table[uint8_t(c)]; + // A table is almost surely much faster than the + // following under most compilers: return + // return (std::isalnum(c) || c == '+' || c == '-' || c == '.'); + } + static_assert(unicode::is_alnum_plus('+')); + static_assert(unicode::is_alnum_plus('-')); + static_assert(unicode::is_alnum_plus('.')); + static_assert(unicode::is_alnum_plus('0')); + static_assert(unicode::is_alnum_plus('1')); + static_assert(unicode::is_alnum_plus('a')); + static_assert(unicode::is_alnum_plus('b')); + + ada_really_inline constexpr bool is_ascii_hex_digit(const char c) noexcept { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c<= 'f'); + } + + ada_really_inline constexpr bool is_c0_control_or_space(const char c) noexcept { + return (unsigned char) c <= ' '; + } + + ada_really_inline constexpr bool is_ascii_tab_or_newline(const char c) noexcept { + return c == '\t' || c == '\n' || c == '\r'; + } + + constexpr std::string_view table_is_double_dot_path_segment[] = {"..", "%2e.", ".%2e", "%2e%2e"}; + + ada_really_inline ada_constexpr bool is_double_dot_path_segment(std::string_view input) noexcept { + // This will catch most cases: + // The length must be 2,4 or 6. + // We divide by two and require + // that the result be between 1 and 3 inclusively. + uint64_t half_length = uint64_t(input.size())/2; + if(half_length - 1 > 2) { return false; } + // We have a string of length 2, 4 or 6. + // We now check the first character: + if((input[0] != '.') && (input[0] != '%')) { return false; } + // We are unlikely the get beyond this point. + int hash_value = (input.size() + (unsigned)(input[0])) & 3; + const std::string_view target = table_is_double_dot_path_segment[hash_value]; + if(target.size() != input.size()) { return false; } + // We almost never get here. + // Optimizing the rest is relatively unimportant. + auto prefix_equal_unsafe = [](std::string_view a, std::string_view b) { + uint16_t A, B; + memcpy(&A,a.data(), sizeof(A)); + memcpy(&B,b.data(), sizeof(B)); + return A == B; + }; + if(!prefix_equal_unsafe(input,target)) { return false; } + for(size_t i = 2; i < input.size(); i++) { + char c = input[i]; + if((uint8_t((c|0x20) - 0x61) <= 25 ? (c|0x20) : c) != target[i]) { return false; } + } + return true; + // The above code might be a bit better than the code below. Compilers + // are not stupid and may use the fact that these strings have length 2,4 and 6 + // and other tricks. + //return input == ".." || + // input == ".%2e" || input == ".%2E" || + // input == "%2e." || input == "%2E." || + // input == "%2e%2e" || input == "%2E%2E" || input == "%2E%2e" || input == "%2e%2E"; + } + + ada_really_inline constexpr bool is_single_dot_path_segment(std::string_view input) noexcept { + return input == "." || input == "%2e" || input == "%2E"; + } + + ada_really_inline constexpr bool is_lowercase_hex(const char c) noexcept { + return (c >= '0' && c <= '9') || (c >= 'a' && c<= 'f'); + } + + unsigned constexpr convert_hex_to_binary(const char c) noexcept { + // this code can be optimized. + if (c <= '9') { return c - '0'; } + char del = c >= 'a' ? 'a' : 'A'; + return 10 + (c - del); + } + + std::string percent_decode(const std::string_view input, size_t first_percent) { + // next line is for safety only, we expect users to avoid calling percent_decode + // when first_percent is outside the range. + if(first_percent == std::string_view::npos) { return std::string(input); } + std::string dest(input.substr(0, first_percent)); + dest.reserve(input.length()); + const char* pointer = input.data() + first_percent; + const char* end = input.data() + input.size(); + // Optimization opportunity: if the following code gets + // called often, it can be optimized quite a bit. + while (pointer < end) { + const char ch = pointer[0]; + size_t remaining = end - pointer - 1; + if (ch != '%' || remaining < 2 || + (//ch == '%' && // It is unnecessary to check that ch == '%'. + (!is_ascii_hex_digit(pointer[1]) || + !is_ascii_hex_digit(pointer[2])))) { + dest += ch; + pointer++; + continue; + } else { + unsigned a = convert_hex_to_binary(pointer[1]); + unsigned b = convert_hex_to_binary(pointer[2]); + char c = static_cast(a * 16 + b); + dest += c; + pointer += 3; + } + } + return dest; + } + + std::string percent_encode(const std::string_view input, const uint8_t character_set[]) { + auto pointer = std::find_if(input.begin(), input.end(), [character_set](const char c) { + return character_sets::bit_at(character_set, c); + }); + // Optimization: Don't iterate if percent encode is not required + if (pointer == input.end()) { return std::string(input); } + + std::string result(input.substr(0,std::distance(input.begin(), pointer))); + result.reserve(input.length()); // in the worst case, percent encoding might produce 3 characters. + + for (;pointer != input.end(); pointer++) { + if (character_sets::bit_at(character_set, *pointer)) { + result.append(character_sets::hex + uint8_t(*pointer) * 4, 3); + } else { + result += *pointer; + } + } + + return result; + } + + + bool percent_encode(const std::string_view input, const uint8_t character_set[], std::string &out) { + auto pointer = std::find_if(input.begin(), input.end(), [character_set](const char c) { + return character_sets::bit_at(character_set, c); + }); + // Optimization: Don't iterate if percent encode is not required + if (pointer == input.end()) { return false; } + out.clear(); + out.append(input.data(), std::distance(input.begin(), pointer)); + + for (;pointer != input.end(); pointer++) { + if (character_sets::bit_at(character_set, *pointer)) { + out.append(character_sets::hex + uint8_t(*pointer) * 4, 3); + } else { + out += *pointer; + } + } + return true; + } + + bool to_ascii(std::optional& out, const std::string_view plain, const bool be_strict, size_t first_percent) { + std::string percent_decoded_buffer; + std::string_view input = plain; + if(first_percent != std::string_view::npos) { + percent_decoded_buffer = unicode::percent_decode(plain, first_percent); + input = percent_decoded_buffer; + } +#if ADA_HAS_ICU + out = std::string(255, 0); + + UErrorCode status = U_ZERO_ERROR; + uint32_t options = UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ | UIDNA_NONTRANSITIONAL_TO_ASCII; + + if (be_strict) { + options |= UIDNA_USE_STD3_RULES; + } + + UIDNA* uidna = uidna_openUTS46(options, &status); + if (U_FAILURE(status)) { + return false; + } + + UIDNAInfo info = UIDNA_INFO_INITIALIZER; + // RFC 1035 section 2.3.4. + // The domain name must be at most 255 octets. + // It cannot contain a label longer than 63 octets. + // Thus we should never need more than 255 octets, if we + // do the domain name is in error. + int32_t length = uidna_nameToASCII_UTF8(uidna, + input.data(), + int32_t(input.length()), + out.value().data(), 255, + &info, + &status); + + if (status == U_BUFFER_OVERFLOW_ERROR) { + status = U_ZERO_ERROR; + out.value().resize(length); + // When be_strict is true, this should not be allowed! + length = uidna_nameToASCII_UTF8(uidna, + input.data(), + int32_t(input.length()), + out.value().data(), length, + &info, + &status); + } + + // A label contains hyphen-minus ('-') in the third and fourth positions. + info.errors &= ~UIDNA_ERROR_HYPHEN_3_4; + // A label starts with a hyphen-minus ('-'). + info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN; + // A label ends with a hyphen-minus ('-'). + info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN; + + if (!be_strict) { // This seems to violate RFC 1035 section 2.3.4. + // A non-final domain name label (or the whole domain name) is empty. + info.errors &= ~UIDNA_ERROR_EMPTY_LABEL; + // A domain name label is longer than 63 bytes. + info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG; + // A domain name is longer than 255 bytes in its storage form. + info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; + } + + uidna_close(uidna); + + if (U_FAILURE(status) || info.errors != 0 || length == 0) { + out = std::nullopt; + return false; + } + out.value().resize(length); // we possibly want to call :shrink_to_fit otherwise we use 255 bytes. + out.value().shrink_to_fit(); +#elif defined(_WIN32) && ADA_WINDOWS_TO_ASCII_FALLBACK + (void)be_strict; // unused. + // Fallback on the system if ICU is not available. + // Windows function assumes UTF-16. + std::unique_ptr buffer(new char16_t[input.size()]); + auto convert = [](const char* buf, size_t len, char16_t* utf16_output) { + const uint8_t *data = reinterpret_cast(buf); + size_t pos = 0; + char16_t* start{utf16_output}; + while (pos < len) { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 16; + while(pos < final_pos) { + *utf16_output++ = char16_t(buf[pos]); + pos++; + } + continue; + } + } + uint8_t leading_byte = data[pos]; // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf16_output++ = char16_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8, it should become + // a single UTF-16 word. + if(pos + 1 >= len) { return 0; } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; } + // range check + uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); + if (code_point < 0x80 || 0x7ff < code_point) { return 0; } + *utf16_output++ = char16_t(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8, it should become + // a single UTF-16 word. + if(pos + 2 >= len) { return 0; } // minimal bound checking + + if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { return 0; } + // range check + uint32_t code_point = (leading_byte & 0b00001111) << 12 | + (data[pos + 1] & 0b00111111) << 6 | + (data[pos + 2] & 0b00111111); + if (code_point < 0x800 || 0xffff < code_point || + (0xd7ff < code_point && code_point < 0xe000)) { + return 0; + } + *utf16_output++ = char16_t(code_point); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if(pos + 3 >= len) { return 0; } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { return 0; } + if ((data[pos + 3] & 0b11000000) != 0b10000000) { return 0; } + + // range check + uint32_t code_point = + (leading_byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | + (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); + if (code_point <= 0xffff || 0x10ffff < code_point) { return 0; } + code_point -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + pos += 4; + } else { + return 0; + } + } + return int(utf16_output - start); + }; + size_t codepoints = convert(input.data(), input.size(), buffer.get()); + if(codepoints == 0) { + out = std::nullopt; + return false; + } + int required_buffer_size = IdnToAscii(IDN_ALLOW_UNASSIGNED, (LPCWSTR)buffer.get(), codepoints, NULL, 0); + + if(required_buffer_size == 0) { + out = std::nullopt; + return false; + } + + out = std::string(required_buffer_size, 0); + std::unique_ptr ascii_buffer(new char16_t[required_buffer_size]); + + required_buffer_size = IdnToAscii(IDN_ALLOW_UNASSIGNED, (LPCWSTR)buffer.get(), codepoints, (LPWSTR)ascii_buffer.get(), required_buffer_size); + if(required_buffer_size == 0) { + out = std::nullopt; + return false; + } + // This will not validate the punycode, so let us work it in reverse. + int test_reverse = IdnToUnicode(IDN_ALLOW_UNASSIGNED, (LPCWSTR)ascii_buffer.get(), required_buffer_size, NULL, 0); + if(test_reverse == 0) { + out = std::nullopt; + return false; + } + out = std::string(required_buffer_size, 0); + for(size_t i = 0; i < required_buffer_size; i++) { (*out)[i] = char(ascii_buffer.get()[i]); } +#else + (void)be_strict; // unused. + out = input; // We cannot do much more for now. +#endif + return true; + } + +} // namespace ada::unicode +/* end file src/unicode.cpp */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=serializers.cpp +/* begin file src/serializers.cpp */ + +#include +#include + +namespace ada::serializers { + + void find_longest_sequence_of_ipv6_pieces(const std::array& address, size_t& compress, size_t& compress_length) noexcept { + for (size_t i = 0; i < 8; i++) { + if (address[i] == 0) { + size_t next = i + 1; + while (next != 8 && address[next] == 0) ++next; + const size_t count = next - i; + if (compress_length < count) { + compress_length = count; + compress = i; + if (next == 8) break; + i = next; + } + } + } + } + + std::string ipv6(const std::array& address) noexcept { + size_t compress_length = 0; // The length of a long sequence of zeros. + size_t compress = 0; // The start of a long sequence of zeros. + find_longest_sequence_of_ipv6_pieces(address, compress, compress_length); + + if (compress_length <= 1) { + // Optimization opportunity: Find a faster way then snprintf for imploding and return here. + compress = compress_length = 8; + } + + std::string output(4 * 8 + 7 + 2, '\0'); + size_t piece_index = 0; + char *point = output.data(); + char *point_end = output.data() + output.size(); + *point++ = '['; + while (true) { + if (piece_index == compress) { + *point++ = ':'; + // If we skip a value initially, we need to write '::', otherwise + // a single ':' will do since it follows a previous ':'. + if(piece_index == 0) { *point++ = ':'; } + piece_index += compress_length; + if(piece_index == 8) { break; } + } + point = std::to_chars(point, point_end, address[piece_index], 16).ptr; + piece_index++; + if(piece_index == 8) { break; } + *point++ = ':'; + } + *point++ = ']'; + output.resize(point - output.data()); + return output; + } + + std::string ipv4(const uint64_t address) noexcept { + std::string output(15, '\0'); + char *point = output.data(); + char *point_end = output.data() + output.size(); + point = std::to_chars(point, point_end, uint8_t(address >> 24)).ptr; + for (int i = 2; i >= 0; i--) { + *point++ = '.'; + point = std::to_chars(point, point_end, uint8_t(address >> (i * 8))).ptr; + } + output.resize(point - output.data()); + return output; + } + +} // namespace ada::serializers +/* end file src/serializers.cpp */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=implementation.cpp +/* begin file src/implementation.cpp */ +#include + + +namespace ada { + + ada_warn_unused tl::expected parse(std::string_view input, + const ada::url* base_url, + ada::encoding_type encoding) { + if(encoding != encoding_type::UTF8) { + // @todo Add support for non UTF8 input + } + ada::url u = ada::parser::parse_url(input, base_url, encoding); + if(!u.is_valid) { return tl::unexpected(errors::generic_error); } + return u; + } + + std::string href_from_file(std::string_view input) { + // This is going to be much faster than constructing a URL. + std::string tmp_buffer; + std::string_view internal_input; + if(unicode::has_tabs_or_newline(input)) { + tmp_buffer = input; + helpers::remove_ascii_tab_or_newline(tmp_buffer); + internal_input = tmp_buffer; + } else { + internal_input = input; + } + std::string path; + if(internal_input.empty()) { + path = "/"; + } else if((internal_input[0] == '/') ||(internal_input[0] == '\\')){ + helpers::parse_prepared_path(internal_input.substr(1), ada::scheme::type::FILE, path); + } else { + helpers::parse_prepared_path(internal_input, ada::scheme::type::FILE, path); + } + return "file://" + path; + } + + ada_warn_unused std::string to_string(ada::encoding_type type) { + switch(type) { + case ada::encoding_type::UTF8 : return "UTF-8"; + case ada::encoding_type::UTF_16LE : return "UTF-16LE"; + case ada::encoding_type::UTF_16BE : return "UTF-16BE"; + default: unreachable(); + } + } + +} // namespace ada +/* end file src/implementation.cpp */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=helpers.cpp +/* begin file src/helpers.cpp */ + +#include +#include +#include +#include + +namespace ada::helpers { + + template + void encode_json(std::string_view view, out_iter out) { + // trivial implementation. could be faster. + const char * hexvalues = "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"; + for(uint8_t c : view) { + if(c == '\\') { + *out++ = '\\'; *out++ = '\\'; + } else if(c == '"') { + *out++ = '\\'; *out++ = '"'; + } else if(c <= 0x1f) { + *out++ = '\\'; *out++= 'u'; *out++= '0'; *out++= '0'; + *out++ = hexvalues[2*c]; + *out++ = hexvalues[2*c+1]; + } else { + *out++ = c; + } + } + } + + ada_unused std::string get_state(ada::state s) { + switch (s) { + case ada::state::AUTHORITY: return "Authority"; + case ada::state::SCHEME_START: return "Scheme Start"; + case ada::state::SCHEME: return "Scheme"; + case ada::state::HOST: return "Host"; + case ada::state::NO_SCHEME: return "No Scheme"; + case ada::state::FRAGMENT: return "Fragment"; + case ada::state::RELATIVE_SCHEME: return "Relative Scheme"; + case ada::state::RELATIVE_SLASH: return "Relative Slash"; + case ada::state::FILE: return "File"; + case ada::state::FILE_HOST: return "File Host"; + case ada::state::FILE_SLASH: return "File Slash"; + case ada::state::PATH_OR_AUTHORITY: return "Path or Authority"; + case ada::state::SPECIAL_AUTHORITY_IGNORE_SLASHES: return "Special Authority Ignore Slashes"; + case ada::state::SPECIAL_AUTHORITY_SLASHES: return "Special Authority Slashes"; + case ada::state::SPECIAL_RELATIVE_OR_AUTHORITY: return "Special Relative or Authority"; + case ada::state::QUERY: return "Query"; + case ada::state::PATH: return "Path"; + case ada::state::PATH_START: return "Path Start"; + case ada::state::OPAQUE_PATH: return "Opaque Path"; + case ada::state::PORT: return "Port"; + default: return "unknown state"; + } + } + + ada_really_inline std::optional prune_fragment(std::string_view& input) noexcept { + // compiles down to 20--30 instructions including a class to memchr (C function). + // this function should be quite fast. + size_t location_of_first = input.find('#'); + if(location_of_first == std::string_view::npos) { return std::nullopt; } + std::string_view fragment = input; + fragment.remove_prefix(location_of_first+1); + input.remove_suffix(input.size() - location_of_first); + return fragment; + } + + ada_really_inline void shorten_path(std::string& path, ada::scheme::type type) noexcept { + size_t first_delimiter = path.find_first_of('/', 1); + + // Let path be url’s path. + // If url’s scheme is "file", path’s size is 1, and path[0] is a normalized Windows drive letter, then return. + if (type == ada::scheme::type::FILE && first_delimiter == std::string_view::npos) { + if (checkers::is_normalized_windows_drive_letter(std::string_view(path.data() + 1, first_delimiter - 1))) { + return; + } + } + + // Remove path’s last item, if any. + if (!path.empty()) { + path.erase(path.rfind('/')); + } + } + + ada_really_inline void remove_ascii_tab_or_newline(std::string& input) noexcept { + // if this ever becomes a performance issue, we could use an approach similar to has_tabs_or_newline + input.erase(std::remove_if(input.begin(), input.end(), [](char c) { + return ada::unicode::is_ascii_tab_or_newline(c); + }), input.end()); + } + + ada_really_inline std::string_view substring(std::string_view input, size_t pos) noexcept { + ada_log("substring(", input, " [", input.size() ,"bytes],", pos, ")"); + return pos > input.size() ? std::string_view() : input.substr(pos); + } + + // Reverse the byte order. + ada_really_inline uint64_t swap_bytes(uint64_t val) noexcept { + // performance: this often compiles to a single instruction (e.g., bswap) + return ((((val) & 0xff00000000000000ull) >> 56) | + (((val) & 0x00ff000000000000ull) >> 40) | + (((val) & 0x0000ff0000000000ull) >> 24) | + (((val) & 0x000000ff00000000ull) >> 8 ) | + (((val) & 0x00000000ff000000ull) << 8 ) | + (((val) & 0x0000000000ff0000ull) << 24) | + (((val) & 0x000000000000ff00ull) << 40) | + (((val) & 0x00000000000000ffull) << 56)); + } + + ada_really_inline uint64_t swap_bytes_if_big_endian(uint64_t val) noexcept { + // performance: under little-endian systems (most systems), this function + // is free (just returns the input). +#if ADA_IS_BIG_ENDIAN + return swap_bytes(val); +#else + return val; // unchanged (trivial) +#endif + } + + // starting at index location, this finds the next location of a character + // :, /, \\, ? or [. If none is found, view.size() is returned. + // For use within get_host_delimiter_location. + ada_really_inline size_t find_next_host_delimiter_special(std::string_view view, size_t location) noexcept { + // performance: if you plan to call find_next_host_delimiter more than once, + // you *really* want find_next_host_delimiter to be inlined, because + // otherwise, the constants may get reloaded each time (bad). + auto has_zero_byte = [](uint64_t v) { + return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); + }; + auto index_of_first_set_byte = [](uint64_t v) { + return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; + }; + auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; }; + size_t i = location; + uint64_t mask1 = broadcast(':'); + uint64_t mask2 = broadcast('/'); + uint64_t mask3 = broadcast('\\'); + uint64_t mask4 = broadcast('?'); + uint64_t mask5 = broadcast('['); + // This loop will get autovectorized under many optimizing compilers, + // so you get actually SIMD! + for (; i + 7 < view.size(); i += 8) { + uint64_t word{}; + // performance: the next memcpy translates into a single CPU instruction. + memcpy(&word, view.data() + i, sizeof(word)); + // performance: on little-endian systems (most systems), this next line is free. + word = swap_bytes_if_big_endian(word); + uint64_t xor1 = word ^ mask1; + uint64_t xor2 = word ^ mask2; + uint64_t xor3 = word ^ mask3; + uint64_t xor4 = word ^ mask4; + uint64_t xor5 = word ^ mask5; + uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3) | has_zero_byte(xor4) | has_zero_byte(xor5); + if(is_match) { + return i + index_of_first_set_byte(is_match); + } + } + if (i < view.size()) { + uint64_t word{}; + // performance: the next memcpy translates into a function call, but + // that is difficult to avoid. Might be a bit expensive. + memcpy(&word, view.data() + i, view.size() - i); + word = swap_bytes_if_big_endian(word); + uint64_t xor1 = word ^ mask1; + uint64_t xor2 = word ^ mask2; + uint64_t xor3 = word ^ mask3; + uint64_t xor4 = word ^ mask4; + uint64_t xor5 = word ^ mask5; + uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3) | has_zero_byte(xor4) | has_zero_byte(xor5); + if(is_match) { + return i + index_of_first_set_byte(is_match); + } + } + return view.size(); + } + + // starting at index location, this finds the next location of a character + // :, /, ? or [. If none is found, view.size() is returned. + // For use within get_host_delimiter_location. + ada_really_inline size_t find_next_host_delimiter(std::string_view view, size_t location) noexcept { + // performance: if you plan to call find_next_host_delimiter more than once, + // you *really* want find_next_host_delimiter to be inlined, because + // otherwise, the constants may get reloaded each time (bad). + auto has_zero_byte = [](uint64_t v) { + return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); + }; + auto index_of_first_set_byte = [](uint64_t v) { + return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; + }; + auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; }; + size_t i = location; + uint64_t mask1 = broadcast(':'); + uint64_t mask2 = broadcast('/'); + uint64_t mask4 = broadcast('?'); + uint64_t mask5 = broadcast('['); + // This loop will get autovectorized under many optimizing compilers, + // so you get actually SIMD! + for (; i + 7 < view.size(); i += 8) { + uint64_t word{}; + // performance: the next memcpy translates into a single CPU instruction. + memcpy(&word, view.data() + i, sizeof(word)); + // performance: on little-endian systems (most systems), this next line is free. + word = swap_bytes_if_big_endian(word); + uint64_t xor1 = word ^ mask1; + uint64_t xor2 = word ^ mask2; + uint64_t xor4 = word ^ mask4; + uint64_t xor5 = word ^ mask5; + uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor4) | has_zero_byte(xor5); + if(is_match) { + return i + index_of_first_set_byte(is_match); + } + } + if (i < view.size()) { + uint64_t word{}; + // performance: the next memcpy translates into a function call, but + // that is difficult to avoid. Might be a bit expensive. + memcpy(&word, view.data() + i, view.size() - i); + // performance: on little-endian systems (most systems), this next line is free. + word = swap_bytes_if_big_endian(word); + uint64_t xor1 = word ^ mask1; + uint64_t xor2 = word ^ mask2; + uint64_t xor4 = word ^ mask4; + uint64_t xor5 = word ^ mask5; + uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor4) | has_zero_byte(xor5); + if(is_match) { + return i + index_of_first_set_byte(is_match); + } + } + return view.size(); + } + + ada_really_inline std::pair get_host_delimiter_location(const bool is_special, std::string_view& view) noexcept { + /** + * The spec at https://url.spec.whatwg.org/#hostname-state expects us to compute + * a variable called insideBrackets but this variable is only used once, to check + * whether a ':' character was found outside brackets. + * Exact text: + * "Otherwise, if c is U+003A (:) and insideBrackets is false, then:". + * It is conceptually simpler and arguably more efficient to just return a Boolean + * indicating whether ':' was found outside brackets. + */ + const size_t view_size = view.size(); + size_t location = 0; + bool found_colon = false; + /** + * Performance analysis: + * + * We are basically seeking the end of the hostname which can be indicated + * by the end of the view, or by one of the characters ':', '/', '?', '\\' (where '\\' is only + * applicable for special URLs). However, these must appear outside a bracket range. E.g., + * if you have [something?]fd: then the '?' does not count. + * + * So we can skip ahead to the next delimiter, as long as we include '[' in the set of delimiters, + * and that we handle it first. + * + * So the trick is to have a fast function that locates the next delimiter. Unless we find '[', + * then it only needs to be called once! Ideally, such a function would be provided by the C++ + * standard library, but it seems that find_first_of is not very fast, so we are forced to roll + * our own. + * + * We do not break into two loops for speed, but for clarity. + */ + if(is_special) { + // We move to the next delimiter. + location = find_next_host_delimiter_special(view, location); + // Unless we find '[' then we are going only going to have to call + // find_next_host_delimiter_special once. + for (;location < view_size; location = find_next_host_delimiter_special(view, location)) { + if (view[location] == '[') { + location = view.find(']', location); + if (location == std::string_view::npos) { + // performance: view.find might get translated to a memchr, which + // has no notion of std::string_view::npos, so the code does not + // reflect the assembly. + location = view_size; + break; + } + } else { + found_colon = view[location] == ':'; + break; + } + } + } else { + // We move to the next delimiter. + location = find_next_host_delimiter(view, location); + // Unless we find '[' then we are going only going to have to call + // find_next_host_delimiter_special once. + for (;location < view_size; location = find_next_host_delimiter(view, location)) { + if (view[location] == '[') { + location = view.find(']', location); + if (location == std::string_view::npos) { + // performance: view.find might get translated to a memchr, which + // has no notion of std::string_view::npos, so the code does not + // reflect the assembly. + location = view_size; + break; + } + } else { + found_colon = view[location] == ':'; + break; + } + } + } + // performance: remove_suffix may translate into a single instruction. + view.remove_suffix(view_size - location); + return {location, found_colon}; + } + + ada_really_inline void trim_c0_whitespace(std::string_view& input) noexcept { + while(!input.empty() && ada::unicode::is_c0_control_or_space(input.front())) { input.remove_prefix(1); } + while(!input.empty() && ada::unicode::is_c0_control_or_space(input.back())) { input.remove_suffix(1); } + } + + + ada_really_inline bool parse_prepared_path(std::string_view input, ada::scheme::type type, std::string& path) { + ada_log("parse_path ", input); + uint8_t accumulator = checkers::path_signature(input); + // Let us first detect a trivial case. + // If it is special, we check that we have no dot, no %, no \ and no + // character needing percent encoding. Otherwise, we check that we have no %, + // no dot, and no character needing percent encoding. + bool special = type != ada::scheme::NOT_SPECIAL; + bool trivial_path = + (special ? (accumulator == 0) : ((accumulator & 0b11111101) == 0)) && + (type != ada::scheme::type::FILE); + if (trivial_path) { + ada_log("parse_path trivial"); + path += '/'; + path += input; + return true; + } + // We are going to need to look a bit at the path, but let us see if we can + // ignore percent encoding *and* backslashes *and* percent characters. + // Except for the trivial case, this is likely to capture 99% of paths out + // there. + bool fast_path = (special && (accumulator & 0b11111011) == 0) && + (type != ada::scheme::type::FILE); + if (fast_path) { + ada_log("parse_path fast"); + // Here we don't need to worry about \ or percent encoding. + // We also do not have a file protocol. We might have dots, however, + // but dots must as appear as '.', and they cannot be encoded because + // the symbol '%' is not present. + size_t previous_location = 0; // We start at 0. + do { + size_t new_location = input.find('/', previous_location); + //std::string_view path_view = input; + // We process the last segment separately: + if (new_location == std::string_view::npos) { + std::string_view path_view = input.substr(previous_location); + if (path_view == "..") { // The path ends with .. + // e.g., if you receive ".." with an empty path, you go to "/". + if(path.empty()) { path = '/'; return true; } + // Fast case where we have nothing to do: + if(path.back() == '/') { return true; } + // If you have the path "/joe/myfriend", + // then you delete 'myfriend'. + path.resize(path.rfind('/') + 1); + return true; + } + path += '/'; + if (path_view != ".") { + path.append(path_view); + } + return true; + } else { + // This is a non-final segment. + std::string_view path_view = input.substr(previous_location, new_location - previous_location); + previous_location = new_location + 1; + if (path_view == "..") { + if(!path.empty()) { path.erase(path.rfind('/')); } + } else if (path_view != ".") { + path += '/'; + path.append(path_view); + } + } + } while (true); + } else { + ada_log("parse_path slow"); + // we have reached the general case + bool needs_percent_encoding = (accumulator & 1); + std::string path_buffer_tmp; + do { + size_t location = (special && (accumulator & 2)) + ? input.find_first_of("/\\") + : input.find('/'); + std::string_view path_view = input; + if (location != std::string_view::npos) { + path_view.remove_suffix(path_view.size() - location); + input.remove_prefix(location + 1); + } + // path_buffer is either path_view or it might point at a percent encoded temporary file. + std::string_view path_buffer = + (needs_percent_encoding + && ada::unicode::percent_encode(path_view, character_sets::PATH_PERCENT_ENCODE, path_buffer_tmp)) ? + path_buffer_tmp : + path_view; + if (unicode::is_double_dot_path_segment(path_buffer)) { + helpers::shorten_path(path, type); + if (location == std::string_view::npos) { + path += '/'; + } + } else if (unicode::is_single_dot_path_segment(path_buffer) && + (location == std::string_view::npos)) { + path += '/'; + } + // Otherwise, if path_buffer is not a single-dot path segment, then: + else if (!unicode::is_single_dot_path_segment(path_buffer)) { + // If url’s scheme is "file", url’s path is empty, and path_buffer is a + // Windows drive letter, then replace the second code point in + // path_buffer with U+003A (:). + if (type == ada::scheme::type::FILE && path.empty() && + checkers::is_windows_drive_letter(path_buffer)) { + path += '/'; + path += path_buffer[0]; + path += ':'; + path_buffer.remove_prefix(2); + path.append(path_buffer); + } else { + // Append path_buffer to url’s path. + path += '/'; + path.append(path_buffer); + } + } + if (location == std::string_view::npos) { + return true; + } + } while (true); + } + } + + ada_really_inline void strip_trailing_spaces_from_opaque_path(ada::url& url) noexcept { + if (!url.has_opaque_path) return; + if (url.fragment.has_value()) return; + if (url.query.has_value()) return; + while (!url.path.empty() && url.path.back() == ' ') { url.path.resize(url.path.size()-1); } + } + + ada_really_inline size_t find_authority_delimiter_special(std::string_view view) noexcept { + auto has_zero_byte = [](uint64_t v) { + return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); + }; + auto index_of_first_set_byte = [](uint64_t v) { + return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; + }; + auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; }; + size_t i = 0; + uint64_t mask1 = broadcast('@'); + uint64_t mask2 = broadcast('/'); + uint64_t mask3 = broadcast('?'); + uint64_t mask4 = broadcast('\\'); + + for (; i + 7 < view.size(); i += 8) { + uint64_t word{}; + memcpy(&word, view.data() + i, sizeof(word)); + word = swap_bytes_if_big_endian(word); + uint64_t xor1 = word ^ mask1; + uint64_t xor2 = word ^ mask2; + uint64_t xor3 = word ^ mask3; + uint64_t xor4 = word ^ mask4; + uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3) | has_zero_byte(xor4); + if (is_match) { + return i + index_of_first_set_byte(is_match); + } + } + + if (i < view.size()) { + uint64_t word{}; + memcpy(&word, view.data() + i, view.size() - i); + word = swap_bytes_if_big_endian(word); + uint64_t xor1 = word ^ mask1; + uint64_t xor2 = word ^ mask2; + uint64_t xor3 = word ^ mask3; + uint64_t xor4 = word ^ mask4; + uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3) | has_zero_byte(xor4); + if (is_match) { + return i + index_of_first_set_byte(is_match); + } + } + + return view.size(); + } + + ada_really_inline size_t find_authority_delimiter(std::string_view view) noexcept { + auto has_zero_byte = [](uint64_t v) { + return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); + }; + auto index_of_first_set_byte = [](uint64_t v) { + return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; + }; + auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; }; + size_t i = 0; + uint64_t mask1 = broadcast('@'); + uint64_t mask2 = broadcast('/'); + uint64_t mask3 = broadcast('?'); + + for (; i + 7 < view.size(); i += 8) { + uint64_t word{}; + memcpy(&word, view.data() + i, sizeof(word)); + word = swap_bytes_if_big_endian(word); + uint64_t xor1 = word ^ mask1; + uint64_t xor2 = word ^ mask2; + uint64_t xor3 = word ^ mask3; + uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3); + if (is_match) { + return i + index_of_first_set_byte(is_match); + } + } + + if (i < view.size()) { + uint64_t word{}; + memcpy(&word, view.data() + i, view.size() - i); + word = swap_bytes_if_big_endian(word); + uint64_t xor1 = word ^ mask1; + uint64_t xor2 = word ^ mask2; + uint64_t xor3 = word ^ mask3; + uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3); + if (is_match) { + return i + index_of_first_set_byte(is_match); + } + } + + return view.size(); + } +} // namespace ada::helpers + +namespace ada { + ada_warn_unused std::string to_string(ada::state state) { + return ada::helpers::get_state(state); + } +} +/* end file src/helpers.cpp */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=url.cpp +/* begin file src/url.cpp */ + +#include +#include +#include + +namespace ada { + ada_really_inline bool url::parse_path(std::string_view input) { + ada_log("parse_path ", input); + std::string tmp_buffer; + std::string_view internal_input; + if(unicode::has_tabs_or_newline(input)) { + tmp_buffer = input; + // Optimization opportunity: Instead of copying and then pruning, we could just directly + // build the string from user_input. + helpers::remove_ascii_tab_or_newline(tmp_buffer); + internal_input = tmp_buffer; + } else { + internal_input = input; + } + + // If url is special, then: + if (is_special()) { + if(internal_input.empty()) { + path = "/"; + } else if((internal_input[0] == '/') ||(internal_input[0] == '\\')){ + return helpers::parse_prepared_path(internal_input.substr(1), get_scheme_type(), path); + } else { + return helpers::parse_prepared_path(internal_input, get_scheme_type(), path); + } + } else if (!internal_input.empty()) { + if(internal_input[0] == '/') { + return helpers::parse_prepared_path(internal_input.substr(1), get_scheme_type(), path); + } else { + return helpers::parse_prepared_path(internal_input, get_scheme_type(), path); + } + } else { + if(!host.has_value()) { + path = "/"; + } + } + return true; + } + + bool url::parse_opaque_host(std::string_view input) { + ada_log("parse_opaque_host ", input, "[", input.size(), " bytes]"); + if (std::any_of(input.begin(), input.end(), ada::unicode::is_forbidden_host_code_point)) { + return is_valid = false; + } + + // Return the result of running UTF-8 percent-encode on input using the C0 control percent-encode set. + host = ada::unicode::percent_encode(input, ada::character_sets::C0_CONTROL_PERCENT_ENCODE); + return true; + } + + bool url::parse_ipv4(std::string_view input) { + ada_log("parse_ipv4 ", input, "[", input.size(), " bytes]"); + if(input.back()=='.') { + input.remove_suffix(1); + } + size_t digit_count{0}; + int pure_decimal_count = 0; // entries that are decimal + std::string_view original_input = input; // we might use this if pure_decimal_count == 4. + uint64_t ipv4{0}; + // we could unroll for better performance? + for(;(digit_count < 4) && !(input.empty()); digit_count++) { + uint32_t segment_result{}; // If any number exceeds 32 bits, we have an error. + bool is_hex = checkers::has_hex_prefix(input); + if(is_hex && ((input.length() == 2)|| ((input.length() > 2) && (input[2]=='.')))) { + // special case + segment_result = 0; + input.remove_prefix(2); + } else { + std::from_chars_result r; + if(is_hex) { + r = std::from_chars(input.data() + 2, input.data() + input.size(), segment_result, 16); + } else if ((input.length() >= 2) && input[0] == '0' && checkers::is_digit(input[1])) { + r = std::from_chars(input.data() + 1, input.data() + input.size(), segment_result, 8); + } else { + pure_decimal_count++; + r = std::from_chars(input.data(), input.data() + input.size(), segment_result, 10); + } + if (r.ec != std::errc()) { return is_valid = false; } + input.remove_prefix(r.ptr-input.data()); + } + if(input.empty()) { + // We have the last value. + // At this stage, ipv4 contains digit_count*8 bits. + // So we have 32-digit_count*8 bits left. + if(segment_result > (uint64_t(1)<<(32-digit_count*8))) { return is_valid = false; } + ipv4 <<=(32-digit_count*8); + ipv4 |= segment_result; + goto final; + } else { + // There is more, so that the value must no be larger than 255 + // and we must have a '.'. + if ((segment_result>255) || (input[0]!='.')) { return is_valid = false; } + ipv4 <<=8; + ipv4 |= segment_result; + input.remove_prefix(1); // remove '.' + } + } + if((digit_count != 4) || (!input.empty())) {return is_valid = false; } + final: + // We could also check r.ptr to see where the parsing ended. + if(pure_decimal_count == 4) { + host = original_input; // The original input was already all decimal and we validated it. + } else { + host = ada::serializers::ipv4(ipv4); // We have to reserialize the address. + } + return true; + } + + bool url::parse_ipv6(std::string_view input) { + ada_log("parse_ipv6 ", input, "[", input.size(), " bytes]"); + + if(input.empty()) { return is_valid = false; } + // Let address be a new IPv6 address whose IPv6 pieces are all 0. + std::array address{}; + + // Let pieceIndex be 0. + int piece_index = 0; + + // Let compress be null. + std::optional compress{}; + + // Let pointer be a pointer for input. + std::string_view::iterator pointer = input.begin(); + + // If c is U+003A (:), then: + if (input[0] == ':') { + // If remaining does not start with U+003A (:), validation error, return failure. + if(input.size() == 1 || input[1] != ':') { + ada_log("parse_ipv6 starts with : but the rest does not start with :"); + return is_valid = false; + } + + // Increase pointer by 2. + pointer += 2; + + // Increase pieceIndex by 1 and then set compress to pieceIndex. + compress = ++piece_index; + } + + // While c is not the EOF code point: + while (pointer != input.end()) { + // If pieceIndex is 8, validation error, return failure. + if (piece_index == 8) { + ada_log("parse_ipv6 piece_index == 8"); + return is_valid = false; + } + + // If c is U+003A (:), then: + if (*pointer == ':') { + // If compress is non-null, validation error, return failure. + if (compress.has_value()) { + ada_log("parse_ipv6 compress is non-null"); + return is_valid = false; + } + + // Increase pointer and pieceIndex by 1, set compress to pieceIndex, and then continue. + pointer++; + compress = ++piece_index; + continue; + } + + // Let value and length be 0. + uint16_t value = 0, length = 0; + + // While length is less than 4 and c is an ASCII hex digit, + // set value to value × 0x10 + c interpreted as hexadecimal number, and increase pointer and length by 1. + while (length < 4 && pointer != input.end() && unicode::is_ascii_hex_digit(*pointer)) { + // https://stackoverflow.com/questions/39060852/why-does-the-addition-of-two-shorts-return-an-int + value = uint16_t(value * 0x10 + unicode::convert_hex_to_binary(*pointer)); + pointer++; + length++; + } + + // If c is U+002E (.), then: + if (pointer != input.end() && *pointer == '.') { + // If length is 0, validation error, return failure. + if (length == 0) { + ada_log("parse_ipv6 length is 0"); + return is_valid = false; + } + + // Decrease pointer by length. + pointer -= length; + + // If pieceIndex is greater than 6, validation error, return failure. + if (piece_index > 6) { + ada_log("parse_ipv6 piece_index > 6"); + return is_valid = false; + } + + // Let numbersSeen be 0. + int numbers_seen = 0; + + // While c is not the EOF code point: + while (pointer != input.end()) { + // Let ipv4Piece be null. + std::optional ipv4_piece{}; + + // If numbersSeen is greater than 0, then: + if (numbers_seen > 0) { + // If c is a U+002E (.) and numbersSeen is less than 4, then increase pointer by 1. + if (*pointer == '.' && numbers_seen < 4) { + pointer++; + } + // Otherwise, validation error, return failure. + else { + ada_log("parse_ipv6 Otherwise, validation error, return failure"); + return is_valid = false; + } + } + + // If c is not an ASCII digit, validation error, return failure. + if (pointer == input.end() || !checkers::is_digit(*pointer)) { + ada_log("parse_ipv6 If c is not an ASCII digit, validation error, return failure"); + return is_valid = false; + } + + // While c is an ASCII digit: + while (pointer != input.end() && checkers::is_digit(*pointer)) { + // Let number be c interpreted as decimal number. + int number = *pointer - '0'; + + // If ipv4Piece is null, then set ipv4Piece to number. + if (!ipv4_piece.has_value()) { + ipv4_piece = number; + } + // Otherwise, if ipv4Piece is 0, validation error, return failure. + else if (ipv4_piece == 0) { + ada_log("parse_ipv6 if ipv4Piece is 0, validation error"); + return is_valid = false; + } + // Otherwise, set ipv4Piece to ipv4Piece × 10 + number. + else { + ipv4_piece = *ipv4_piece * 10 + number; + } + + // If ipv4Piece is greater than 255, validation error, return failure. + if (ipv4_piece > 255) { + ada_log("parse_ipv6 ipv4_piece > 255"); + return is_valid = false; + } + + // Increase pointer by 1. + pointer++; + } + + // Set address[pieceIndex] to address[pieceIndex] × 0x100 + ipv4Piece. + // https://stackoverflow.com/questions/39060852/why-does-the-addition-of-two-shorts-return-an-int + address[piece_index] = uint16_t(address[piece_index] * 0x100 + *ipv4_piece); + + // Increase numbersSeen by 1. + numbers_seen++; + + // If numbersSeen is 2 or 4, then increase pieceIndex by 1. + if (numbers_seen == 2 || numbers_seen == 4) { + piece_index++; + } + } + + // If numbersSeen is not 4, validation error, return failure. + if (numbers_seen != 4) { + return is_valid = false; + } + + // Break. + break; + } + // Otherwise, if c is U+003A (:): + else if ((pointer != input.end()) && (*pointer == ':')) { + // Increase pointer by 1. + pointer++; + + // If c is the EOF code point, validation error, return failure. + if (pointer == input.end()) { + ada_log("parse_ipv6 If c is the EOF code point, validation error, return failure"); + return is_valid = false; + } + } + // Otherwise, if c is not the EOF code point, validation error, return failure. + else if (pointer != input.end()) { + ada_log("parse_ipv6 Otherwise, if c is not the EOF code point, validation error, return failure"); + return is_valid = false; + } + + // Set address[pieceIndex] to value. + address[piece_index] = value; + + // Increase pieceIndex by 1. + piece_index++; + } + + // If compress is non-null, then: + if (compress.has_value()) { + // Let swaps be pieceIndex − compress. + int swaps = piece_index - *compress; + + // Set pieceIndex to 7. + piece_index = 7; + + // While pieceIndex is not 0 and swaps is greater than 0, + // swap address[pieceIndex] with address[compress + swaps − 1], and then decrease both pieceIndex and swaps by 1. + while (piece_index != 0 && swaps > 0) { + std::swap(address[piece_index], address[*compress + swaps - 1]); + piece_index--; + swaps--; + } + } + // Otherwise, if compress is null and pieceIndex is not 8, validation error, return failure. + else if (piece_index != 8) { + ada_log("parse_ipv6 if compress is null and pieceIndex is not 8, validation error, return failure"); + return is_valid = false; + } + host = ada::serializers::ipv6(address); + ada_log("parse_ipv6 ", *host); + return true; + } + + ada_really_inline bool url::parse_host(std::string_view input) { + ada_log("parse_host ", input, "[", input.size(), " bytes]"); + if(input.empty()) { return is_valid = false; } // technically unnecessary. + // If input starts with U+005B ([), then: + if (input[0] == '[') { + // If input does not end with U+005D (]), validation error, return failure. + if (input.back() != ']') { + return is_valid = false; + } + ada_log("parse_host ipv6"); + + // Return the result of IPv6 parsing input with its leading U+005B ([) and trailing U+005D (]) removed. + input.remove_prefix(1); + input.remove_suffix(1); + return parse_ipv6(input); + } + + // If isNotSpecial is true, then return the result of opaque-host parsing input. + if (!is_special()) { + return parse_opaque_host(input); + } + // Let domain be the result of running UTF-8 decode without BOM on the percent-decoding of input. + // Let asciiDomain be the result of running domain to ASCII with domain and false. + // The most common case is an ASCII input, in which case we do not need to call the expensive 'to_ascii' + // if a few conditions are met: no '%' and no 'xn-' subsequence. + std::string buffer = std::string(input); + // This next function checks that the result is ascii, but we are going to + // to check anyhow with is_forbidden. + // bool is_ascii = + unicode::to_lower_ascii(buffer.data(), buffer.size()); + bool is_forbidden = unicode::contains_forbidden_domain_code_point(buffer.data(), buffer.size()); + if (is_forbidden == 0 && buffer.find("xn-") == std::string_view::npos) { + // fast path + host = std::move(buffer); + if (checkers::is_ipv4(host.value())) { + ada_log("parse_host fast path ipv4"); + return parse_ipv4(host.value()); + } + ada_log("parse_host fast path ", *host); + return true; + } + ada_log("parse_host calling to_ascii"); + is_valid = ada::unicode::to_ascii(host, input, false, input.find('%')); + if (!is_valid) { + ada_log("parse_host to_ascii returns false"); + return is_valid = false; + } + + if(std::any_of(host.value().begin(), host.value().end(), ada::unicode::is_forbidden_domain_code_point)) { + host = std::nullopt; + return is_valid = false; + } + + // If asciiDomain ends in a number, then return the result of IPv4 parsing asciiDomain. + if(checkers::is_ipv4(host.value())) { + ada_log("parse_host got ipv4", *host); + return parse_ipv4(host.value()); + } + + return true; + } + + template + ada_really_inline bool url::parse_scheme(const std::string_view input) { + auto parsed_type = ada::scheme::get_scheme_type(input); + bool is_input_special = (parsed_type != ada::scheme::NOT_SPECIAL); + /** + * In the common case, we will immediately recognize a special scheme (e.g., http, https), + * in which case, we can go really fast. + **/ + if(is_input_special) { // fast path!!! + if (has_state_override) { + // If url’s scheme is not a special scheme and buffer is a special scheme, then return. + if (is_special() != is_input_special) { + return true; + } + + // If url includes credentials or has a non-null port, and buffer is "file", then return. + if ((includes_credentials() || port.has_value()) && parsed_type == ada::scheme::type::FILE) { + return true; + } + + // If url’s scheme is "file" and its host is an empty host, then return. + // An empty host is the empty string. + if (get_scheme_type() == ada::scheme::type::FILE && host.has_value() && host.value().empty()) { + return true; + } + } + + type = parsed_type; + + if (has_state_override) { + // This is uncommon. + uint16_t urls_scheme_port = get_special_port(); + + if (urls_scheme_port) { + // If url’s port is url’s scheme’s default port, then set url’s port to null. + if (port.has_value() && *port == urls_scheme_port) { + port = std::nullopt; + } + } + } + } else { // slow path + std::string _buffer = std::string(input); + // Next function is only valid if the input is ASCII and returns false + // otherwise, but it seems that we always have ascii content so we do not need + // to check the return value. + //bool is_ascii = + unicode::to_lower_ascii(_buffer.data(), _buffer.size()); + + if (has_state_override) { + // If url’s scheme is a special scheme and buffer is not a special scheme, then return. + // If url’s scheme is not a special scheme and buffer is a special scheme, then return. + if (is_special() != ada::scheme::is_special(_buffer)) { + return true; + } + + // If url includes credentials or has a non-null port, and buffer is "file", then return. + if ((includes_credentials() || port.has_value()) && _buffer == "file") { + return true; + } + + // If url’s scheme is "file" and its host is an empty host, then return. + // An empty host is the empty string. + if (get_scheme_type() == ada::scheme::type::FILE && host.has_value() && host.value().empty()) { + return true; + } + } + + set_scheme(std::move(_buffer)); + + if (has_state_override) { + // This is uncommon. + uint16_t urls_scheme_port = get_special_port(); + + if (urls_scheme_port) { + // If url’s port is url’s scheme’s default port, then set url’s port to null. + if (port.has_value() && *port == urls_scheme_port) { + port = std::nullopt; + } + } + } + } + + return true; + } + + std::string url::to_string() const { + if (!is_valid) { + return "null"; + } + std::string answer; + auto back = std::back_insert_iterator(answer); + answer.append("{\n"); + answer.append("\t\"scheme\":\""); + helpers::encode_json(get_scheme(), back); + answer.append("\",\n"); + if(includes_credentials()) { + answer.append("\t\"username\":\""); + helpers::encode_json(username, back); + answer.append("\",\n"); + answer.append("\t\"password\":\""); + helpers::encode_json(password, back); + answer.append("\",\n"); + } + if(host.has_value()) { + answer.append("\t\"host\":\""); + helpers::encode_json(host.value(), back); + answer.append("\",\n"); + } + if(port.has_value()) { + answer.append("\t\"port\":\""); + answer.append(std::to_string(port.value())); + answer.append("\",\n"); + } + answer.append("\t\"path\":\""); + helpers::encode_json(path, back); + answer.append("\",\n"); + answer.append("\t\"opaque path\":"); + answer.append((has_opaque_path ? "true" : "false")); + if(query.has_value()) { + answer.append(",\n"); + answer.append("\t\"query\":\""); + helpers::encode_json(query.value(), back); + answer.append("\""); + } + if(fragment.has_value()) { + answer.append(",\n"); + answer.append("\t\"fragment\":\""); + helpers::encode_json(fragment.value(), back); + answer.append("\""); + } + answer.append("\n}"); + return answer; + } + + [[nodiscard]] bool url::has_valid_domain() const noexcept { + if(!host.has_value()) { return false; } + return checkers::verify_dns_length(host.value()); + } +} // namespace ada +/* end file src/url.cpp */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=url-getters.cpp +/* begin file src/url-getters.cpp */ +/** + * @file url-getters.cpp + * Includes all the getters of `ada::url` + */ + +#include +#include + +namespace ada { + + [[nodiscard]] std::string url::get_href() const noexcept { + std::string output = get_protocol(); + size_t url_delimiter_count = std::count(path.begin(), path.end(), '/'); + + if (host.has_value()) { + output += "//"; + if (includes_credentials()) { + output += get_username(); + if (!get_password().empty()) { + output += ":" + get_password(); + } + output += "@"; + } + + output += get_host(); + } else if (!has_opaque_path && url_delimiter_count > 1 && path.length() >= 2 && path[0] == '/' && path[1] == '/') { + // If url’s host is null, url does not have an opaque path, url’s path’s size is greater than 1, + // and url’s path[0] is the empty string, then append U+002F (/) followed by U+002E (.) to output. + output += "/."; + } + + output += get_pathname() + // If query is non-null, then set this’s query object’s list to the result of parsing query. + + (query.has_value() ? "?" + query.value() : "") + // If url’s fragment is non-null, then append U+0023 (#), followed by url’s fragment, to output. + + (fragment.has_value() ? "#" + fragment.value() : ""); + return output; + } + + [[nodiscard]] std::string url::get_origin() const noexcept { + if (is_special()) { + // Return a new opaque origin. + if (get_scheme_type() == scheme::FILE) { return "null"; } + + return get_protocol() + "//" + get_host(); + } + + if (get_scheme() == "blob") { + if (path.length() > 0) { + url path_result = ada::parser::parse_url(get_pathname()); + if (path_result.is_valid) { + if (path_result.is_special()) { + return path_result.get_protocol() + "//" + path_result.get_host(); + } + } + } + } + + // Return a new opaque origin. + return "null"; + } + + [[nodiscard]] std::string url::get_protocol() const noexcept { + return std::string(get_scheme()) + ":"; + } + + [[nodiscard]] std::string url::get_host() const noexcept { + // If url’s host is null, then return the empty string. + // If url’s port is null, return url’s host, serialized. + // Return url’s host, serialized, followed by U+003A (:) and url’s port, serialized. + if (!host.has_value()) { return ""; } + return host.value() + (port.has_value() ? ":" + get_port() : ""); + } + + [[nodiscard]] std::string url::get_hostname() const noexcept { + return host.value_or(""); + } + + [[nodiscard]] std::string url::get_pathname() const noexcept { + return path; + } + + [[nodiscard]] std::string url::get_search() const noexcept { + // If this’s URL’s query is either null or the empty string, then return the empty string. + // Return U+003F (?), followed by this’s URL’s query. + return (!query.has_value() || (query.value().empty())) ? "" : "?" + query.value(); + } + + [[nodiscard]] std::string url::get_username() const noexcept { + return username; + } + + [[nodiscard]] std::string url::get_password() const noexcept { + return password; + } + + [[nodiscard]] std::string url::get_port() const noexcept { + return port.has_value() ? std::to_string(port.value()) : ""; + } + + [[nodiscard]] std::string url::get_hash() const noexcept { + // If this’s URL’s fragment is either null or the empty string, then return the empty string. + // Return U+0023 (#), followed by this’s URL’s fragment. + return (!fragment.has_value() || (fragment.value().empty())) ? "" : "#" + fragment.value(); + } + +} // namespace ada +/* end file src/url-getters.cpp */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=url-setters.cpp +/* begin file src/url-setters.cpp */ +/** + * @file url-setters.cpp + * Includes all the setters of `ada::url` + */ + +#include +#include + +namespace ada { + + bool url::set_username(const std::string_view input) { + if (cannot_have_credentials_or_port()) { return false; } + username = ada::unicode::percent_encode(input, character_sets::USERINFO_PERCENT_ENCODE); + return true; + } + + bool url::set_password(const std::string_view input) { + if (cannot_have_credentials_or_port()) { return false; } + password = ada::unicode::percent_encode(input, character_sets::USERINFO_PERCENT_ENCODE); + return true; + } + + bool url::set_port(const std::string_view input) { + if (cannot_have_credentials_or_port()) { return false; } + std::string trimmed(input); + helpers::remove_ascii_tab_or_newline(trimmed); + if (trimmed.empty()) { port = std::nullopt; return true; } + // Input should not start with control characters. + if (ada::unicode::is_c0_control_or_space(trimmed.front())) { return false; } + // Input should contain at least one ascii digit. + if (input.find_first_of("0123456789") == std::string_view::npos) { return false; } + + // Revert changes if parse_port fails. + std::optional previous_port = port; + parse_port(trimmed); + if (is_valid) { return true; } + port = previous_port; + is_valid = true; + return false; + } + + void url::set_hash(const std::string_view input) { + if (input.empty()) { + fragment = std::nullopt; + helpers::strip_trailing_spaces_from_opaque_path(*this); + return; + } + + std::string new_value; + new_value = input[0] == '#' ? input.substr(1) : input; + helpers::remove_ascii_tab_or_newline(new_value); + fragment = unicode::percent_encode(new_value, ada::character_sets::FRAGMENT_PERCENT_ENCODE); + return; + } + + void url::set_search(const std::string_view input) { + if (input.empty()) { + query = std::nullopt; + helpers::strip_trailing_spaces_from_opaque_path(*this); + return; + } + + std::string new_value; + new_value = input[0] == '?' ? input.substr(1) : input; + helpers::remove_ascii_tab_or_newline(new_value); + + auto query_percent_encode_set = is_special() ? + ada::character_sets::SPECIAL_QUERY_PERCENT_ENCODE : + ada::character_sets::QUERY_PERCENT_ENCODE; + + query = ada::unicode::percent_encode(std::string_view(new_value), query_percent_encode_set); + } + + bool url::set_pathname(const std::string_view input) { + if (has_opaque_path) { return false; } + path = ""; + return parse_path(input); + } + + bool url::set_host_or_hostname(const std::string_view input, bool override_hostname) { + if (has_opaque_path) { return false; } + + std::optional previous_host = host; + std::optional previous_port = port; + + size_t host_end_pos = input.find('#'); + std::string _host(input.data(), host_end_pos != std::string_view::npos ? host_end_pos : input.size()); + helpers::remove_ascii_tab_or_newline(_host); + std::string_view new_host(_host); + + // If url's scheme is "file", then set state to file host state, instead of host state. + if (get_scheme_type() != ada::scheme::type::FILE) { + std::string_view host_view(_host.data(), _host.length()); + auto [location,found_colon] = helpers::get_host_delimiter_location(is_special(), host_view); + + // Otherwise, if c is U+003A (:) and insideBrackets is false, then: + // Note: the 'found_colon' value is true if and only if a colon was encountered + // while not inside brackets. + if (found_colon) { + if (override_hostname) { return false; } + std::string_view buffer = new_host.substr(location+1); + if (!buffer.empty()) { set_port(buffer); } + } + // If url is special and host_view is the empty string, validation error, return failure. + // Otherwise, if state override is given, host_view is the empty string, + // and either url includes credentials or url’s port is non-null, return. + else if (host_view.empty() && (is_special() || includes_credentials() || port.has_value())) { + return false; + } + + // Let host be the result of host parsing host_view with url is not special. + if (host_view.empty()) { + host = ""; + return true; + } + + bool succeeded = parse_host(host_view); + if (!succeeded) { + host = previous_host; + port = previous_port; + } + return succeeded; + } + + size_t location = new_host.find_first_of("/\\?"); + if (location != std::string_view::npos) { new_host.remove_suffix(new_host.length() - location); } + + if (new_host.empty()) { + // Set url’s host to the empty string. + host = ""; + } + else { + // Let host be the result of host parsing buffer with url is not special. + if (!parse_host(new_host)) { + host = previous_host; + port = previous_port; + return false; + } + + // If host is "localhost", then set host to the empty string. + if (host.has_value() && host.value() == "localhost") { + host = ""; + } + } + return true; + } + + bool url::set_host(const std::string_view input) { + return set_host_or_hostname(input, false); + } + + bool url::set_hostname(const std::string_view input) { + return set_host_or_hostname(input, true); + } + + bool url::set_protocol(const std::string_view input) { + std::string view(input); + helpers::remove_ascii_tab_or_newline(view); + if (view.empty()) { return true; } + + // Schemes should start with alpha values. + if (!checkers::is_alpha(view[0])) { return false; } + + view.append(":"); + + std::string::iterator pointer = std::find_if_not(view.begin(), view.end(), unicode::is_alnum_plus); + + if (pointer != view.end() && *pointer == ':') { + return parse_scheme(std::string_view(view.data(), pointer - view.begin())); + } + return false; + } + + bool url::set_href(const std::string_view input) { + ada::result out = ada::parse(input); + + if (out) { + username = out->username; + password = out->password; + host = out->host; + port = out->port; + path = out->path; + query = out->query; + fragment = out->fragment; + type = out->type; + non_special_scheme = out->non_special_scheme; + has_opaque_path = out->has_opaque_path; + } + + return out.has_value(); + } + +} // namespace ada +/* end file src/url-setters.cpp */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=parser.cpp +/* begin file src/parser.cpp */ + +#include + +#include +#include + +namespace ada::parser { + + url parse_url(std::string_view user_input, + const ada::url* base_url, + ada::encoding_type encoding) { + ada_log("ada::parser::parse_url('", user_input, + "' [", user_input.size()," bytes],", (base_url != nullptr ? base_url->to_string() : "null"), + ",", ada::to_string(encoding), ")"); + + ada::state state = ada::state::SCHEME_START; + ada::url url = ada::url(); + + // If we are provided with an invalid base, or the optional_url was invalid, + // we must return. + if(base_url != nullptr) { url.is_valid &= base_url->is_valid; } + if(!url.is_valid) { return url; } + + std::string tmp_buffer; + std::string_view internal_input; + if(unicode::has_tabs_or_newline(user_input)) { + tmp_buffer = user_input; + // Optimization opportunity: Instead of copying and then pruning, we could just directly + // build the string from user_input. + helpers::remove_ascii_tab_or_newline(tmp_buffer); + internal_input = tmp_buffer; + } else { + internal_input = user_input; + } + + // Leading and trailing control characters are uncommon and easy to deal with (no performance concern). + std::string_view url_data = internal_input; + helpers::trim_c0_whitespace(url_data); + + // Optimization opportunity. Most websites do not have fragment. + std::optional fragment = helpers::prune_fragment(url_data); + if(fragment.has_value()) { + url.fragment = unicode::percent_encode(*fragment, + ada::character_sets::FRAGMENT_PERCENT_ENCODE); + } + + // Here url_data no longer has its fragment. + // We are going to access the data from url_data (it is immutable). + // At any given time, we are pointing at byte 'input_position' in url_data. + // The input_position variable should range from 0 to input_size. + // It is illegal to access url_data at input_size. + size_t input_position = 0; + const size_t input_size = url_data.size(); + // Keep running the following state machine by switching on state. + // If after a run pointer points to the EOF code point, go to the next step. + // Otherwise, increase pointer by 1 and continue with the state machine. + // We never decrement input_position. + while(input_position <= input_size) { + switch (state) { + case ada::state::SCHEME_START: { + ada_log("SCHEME_START ", helpers::substring(url_data, input_position)); + // If c is an ASCII alpha, append c, lowercased, to buffer, and set state to scheme state. + if ((input_position != input_size) && checkers::is_alpha(url_data[input_position])) { + state = ada::state::SCHEME; + input_position++; + } else { + // Otherwise, if state override is not given, set state to no scheme state and decrease pointer by 1. + state = ada::state::NO_SCHEME; + } + break; + } + case ada::state::SCHEME: { + ada_log("SCHEME ", helpers::substring(url_data, input_position)); + // If c is an ASCII alphanumeric, U+002B (+), U+002D (-), or U+002E (.), append c, lowercased, to buffer. + while((input_position != input_size) && (ada::unicode::is_alnum_plus(url_data[input_position]))) { + input_position++; + } + // Otherwise, if c is U+003A (:), then: + if ((input_position != input_size) && (url_data[input_position] == ':')) { + ada_log("SCHEME the scheme should be ", url_data.substr(0,input_position)); + if(!url.parse_scheme(url_data.substr(0,input_position))) { return url; } + ada_log("SCHEME the scheme is ", url.get_scheme()); + + // If url’s scheme is "file", then: + if (url.get_scheme_type() == ada::scheme::type::FILE) { + // Set state to file state. + state = ada::state::FILE; + } + // Otherwise, if url is special, base is non-null, and base’s scheme is url’s scheme: + // Note: Doing base_url->scheme is unsafe if base_url != nullptr is false. + else if (url.is_special() && base_url != nullptr && base_url->get_scheme_type() == url.get_scheme_type()) { + // Set state to special relative or authority state. + state = ada::state::SPECIAL_RELATIVE_OR_AUTHORITY; + } + // Otherwise, if url is special, set state to special authority slashes state. + else if (url.is_special()) { + state = ada::state::SPECIAL_AUTHORITY_SLASHES; + } + // Otherwise, if remaining starts with an U+002F (/), set state to path or authority state + // and increase pointer by 1. + else if (input_position + 1 < input_size && url_data[input_position + 1] == '/') { + state = ada::state::PATH_OR_AUTHORITY; + input_position++; + } + // Otherwise, set url’s path to the empty string and set state to opaque path state. + else { + state = ada::state::OPAQUE_PATH; + } + } + // Otherwise, if state override is not given, set buffer to the empty string, state to no scheme state, + // and start over (from the first code point in input). + else { + state = ada::state::NO_SCHEME; + input_position = 0; + break; + } + input_position++; + break; + } + case ada::state::NO_SCHEME: { + ada_log("NO_SCHEME ", helpers::substring(url_data, input_position)); + // If base is null, or base has an opaque path and c is not U+0023 (#), validation error, return failure. + // SCHEME state updates the state to NO_SCHEME and validates url_data is not empty. + if (base_url == nullptr || (base_url->has_opaque_path && url_data[input_position] != '#')) { + ada_log("NO_SCHEME validation error"); + url.is_valid = false; + return url; + } + // Otherwise, if base has an opaque path and c is U+0023 (#), + // set url’s scheme to base’s scheme, url’s path to base’s path, url’s query to base’s query, + // url’s fragment to the empty string, and set state to fragment state. + else if (base_url->has_opaque_path && url.fragment.has_value() && input_position == input_size) { + ada_log("NO_SCHEME opaque base with fragment"); + url.copy_scheme(*base_url); + url.path = base_url->path; + url.has_opaque_path = base_url->has_opaque_path; + url.query = base_url->query; + return url; + } + // Otherwise, if base’s scheme is not "file", set state to relative state and decrease pointer by 1. + else if (base_url->get_scheme_type() != ada::scheme::type::FILE) { + ada_log("NO_SCHEME non-file relative path"); + state = ada::state::RELATIVE_SCHEME; + } + // Otherwise, set state to file state and decrease pointer by 1. + else { + ada_log("NO_SCHEME file base type"); + state = ada::state::FILE; + } + break; + } + case ada::state::AUTHORITY: { + ada_log("AUTHORITY ", helpers::substring(url_data, input_position)); + // most URLs have no @. Having no @ tells us that we don't have to worry about AUTHORITY. Of course, + // we could have @ and still not have to worry about AUTHORITY. + // TODO: Instead of just collecting a bool, collect the location of the '@' and do something useful with it. + // TODO: We could do various processing early on, using a single pass over the string to collect + // information about it, e.g., telling us whether there is a @ and if so, where (or how many). + const bool contains_ampersand = (url_data.find('@', input_position) != std::string_view::npos); + + if(!contains_ampersand) { + state = ada::state::HOST; + break; + } + bool at_sign_seen{false}; + bool password_token_seen{false}; + do { + std::string_view view = helpers::substring(url_data, input_position); + size_t location = url.is_special() ? helpers::find_authority_delimiter_special(view) : helpers::find_authority_delimiter(view); + std::string_view authority_view(view.data(), location); + size_t end_of_authority = input_position + authority_view.size(); + // If c is U+0040 (@), then: + if ((end_of_authority != input_size) && (url_data[end_of_authority] == '@')) { + // If atSignSeen is true, then prepend "%40" to buffer. + if (at_sign_seen) { + if (password_token_seen) { + url.password += "%40"; + } else { + url.username += "%40"; + } + } + + at_sign_seen = true; + + if (!password_token_seen) { + size_t password_token_location = authority_view.find(':'); + password_token_seen = password_token_location != std::string_view::npos; + + if (!password_token_seen) { + url.username += unicode::percent_encode(authority_view, character_sets::USERINFO_PERCENT_ENCODE); + } else { + url.username += unicode::percent_encode(authority_view.substr(0,password_token_location), character_sets::USERINFO_PERCENT_ENCODE); + url.password += unicode::percent_encode(authority_view.substr(password_token_location+1), character_sets::USERINFO_PERCENT_ENCODE); + } + } + else { + url.password += unicode::percent_encode(authority_view, character_sets::USERINFO_PERCENT_ENCODE); + } + } + // Otherwise, if one of the following is true: + // - c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#) + // - url is special and c is U+005C (\) + else if (end_of_authority == input_size || url_data[end_of_authority] == '/' || url_data[end_of_authority] == '?' || (url.is_special() && url_data[end_of_authority] == '\\')) { + // If atSignSeen is true and authority_view is the empty string, validation error, return failure. + if (at_sign_seen && authority_view.empty()) { + url.is_valid = false; + return url; + } + state = ada::state::HOST; + break; + } + if(end_of_authority == input_size) { return url; } + input_position = end_of_authority + 1; + } while(true); + + break; + } + case ada::state::SPECIAL_RELATIVE_OR_AUTHORITY: { + ada_log("SPECIAL_RELATIVE_OR_AUTHORITY ", helpers::substring(url_data, input_position)); + + // If c is U+002F (/) and remaining starts with U+002F (/), + // then set state to special authority ignore slashes state and increase pointer by 1. + std::string_view view = helpers::substring(url_data, input_position); + if (ada::checkers::begins_with(view, "//")) { + state = ada::state::SPECIAL_AUTHORITY_IGNORE_SLASHES; + input_position += 2; + } else { + // Otherwise, validation error, set state to relative state and decrease pointer by 1. + state = ada::state::RELATIVE_SCHEME; + } + + break; + } + case ada::state::PATH_OR_AUTHORITY: { + ada_log("PATH_OR_AUTHORITY ", helpers::substring(url_data, input_position)); + + // If c is U+002F (/), then set state to authority state. + if ((input_position != input_size) && (url_data[input_position] == '/')) { + state = ada::state::AUTHORITY; + input_position++; + } else { + // Otherwise, set state to path state, and decrease pointer by 1. + state = ada::state::PATH; + } + + break; + } + case ada::state::RELATIVE_SCHEME: { + ada_log("RELATIVE_SCHEME ", helpers::substring(url_data, input_position)); + + // Set url’s scheme to base’s scheme. + url.copy_scheme(*base_url); + + // If c is U+002F (/), then set state to relative slash state. + if ((input_position != input_size) && (url_data[input_position] == '/')) { + ada_log("RELATIVE_SCHEME if c is U+002F (/), then set state to relative slash state"); + state = ada::state::RELATIVE_SLASH; + } else if (url.is_special() && (input_position != input_size) && (url_data[input_position] == '\\')) { + // Otherwise, if url is special and c is U+005C (\), validation error, set state to relative slash state. + ada_log("RELATIVE_SCHEME if url is special and c is U+005C, validation error, set state to relative slash state"); + state = ada::state::RELATIVE_SLASH; + } else { + ada_log("RELATIVE_SCHEME otherwise"); + // Set url’s username to base’s username, url’s password to base’s password, url’s host to base’s host, + // url’s port to base’s port, url’s path to a clone of base’s path, and url’s query to base’s query. + url.username = base_url->username; + url.password = base_url->password; + url.host = base_url->host; + url.port = base_url->port; + url.path = base_url->path; + url.has_opaque_path = base_url->has_opaque_path; + url.query = base_url->query; + + // If c is U+003F (?), then set url’s query to the empty string, and state to query state. + if ((input_position != input_size) && (url_data[input_position] == '?')) { + state = ada::state::QUERY; + } + // Otherwise, if c is not the EOF code point: + else if (input_position != input_size) { + // Set url’s query to null. + url.query = std::nullopt; + + // Shorten url’s path. + helpers::shorten_path(url.path, url.get_scheme_type()); + + // Set state to path state and decrease pointer by 1. + state = ada::state::PATH; + break; + } + } + input_position++; + break; + } + case ada::state::RELATIVE_SLASH: { + ada_log("RELATIVE_SLASH ", helpers::substring(url_data, input_position)); + + // If url is special and c is U+002F (/) or U+005C (\), then: + if (url.is_special() && (input_position != input_size) && (url_data[input_position] == '/' || url_data[input_position] =='\\')) { + // Set state to special authority ignore slashes state. + state = ada::state::SPECIAL_AUTHORITY_IGNORE_SLASHES; + } + // Otherwise, if c is U+002F (/), then set state to authority state. + else if ((input_position != input_size) && (url_data[input_position] == '/')) { + state = ada::state::AUTHORITY; + } + // Otherwise, set + // - url’s username to base’s username, + // - url’s password to base’s password, + // - url’s host to base’s host, + // - url’s port to base’s port, + // - state to path state, and then, decrease pointer by 1. + else { + url.username = base_url->username; + url.password = base_url->password; + url.host = base_url->host; + url.port = base_url->port; + state = ada::state::PATH; + break; + } + + input_position++; + break; + } + case ada::state::SPECIAL_AUTHORITY_SLASHES: { + ada_log("SPECIAL_AUTHORITY_SLASHES ", helpers::substring(url_data, input_position)); + + // If c is U+002F (/) and remaining starts with U+002F (/), + // then set state to special authority ignore slashes state and increase pointer by 1. + state = ada::state::SPECIAL_AUTHORITY_IGNORE_SLASHES; + std::string_view view = helpers::substring(url_data, input_position); + if (ada::checkers::begins_with(view, "//")) { + input_position += 2; + } + + [[fallthrough]]; + } + case ada::state::SPECIAL_AUTHORITY_IGNORE_SLASHES: { + ada_log("SPECIAL_AUTHORITY_IGNORE_SLASHES ", helpers::substring(url_data, input_position)); + + // If c is neither U+002F (/) nor U+005C (\), then set state to authority state and decrease pointer by 1. + while ((input_position != input_size) && ((url_data[input_position] == '/') || (url_data[input_position] == '\\'))) { + input_position++; + } + state = ada::state::AUTHORITY; + + break; + } + case ada::state::QUERY: { + ada_log("QUERY ", helpers::substring(url_data, input_position)); + // If encoding is not UTF-8 and one of the following is true: + // - url is not special + // - url’s scheme is "ws" or "wss" + if (encoding != ada::encoding_type::UTF8) { + if (!url.is_special() || url.get_scheme_type() == ada::scheme::type::WS || url.get_scheme_type() == ada::scheme::type::WSS) { + // then set encoding to UTF-8. + encoding = ada::encoding_type::UTF8; + } + } + // Let queryPercentEncodeSet be the special-query percent-encode set if url is special; + // otherwise the query percent-encode set. + auto query_percent_encode_set = url.is_special() ? + ada::character_sets::SPECIAL_QUERY_PERCENT_ENCODE : + ada::character_sets::QUERY_PERCENT_ENCODE; + + // Percent-encode after encoding, with encoding, buffer, and queryPercentEncodeSet, + // and append the result to url’s query. + url.query = ada::unicode::percent_encode(helpers::substring(url_data, input_position), query_percent_encode_set); + + return url; + } + case ada::state::HOST: { + ada_log("HOST ", helpers::substring(url_data, input_position)); + + std::string_view host_view = helpers::substring(url_data, input_position); + auto [location, found_colon] = helpers::get_host_delimiter_location(url.is_special(), host_view); + input_position = (location != std::string_view::npos) ? input_position + location : input_size; + // Otherwise, if c is U+003A (:) and insideBrackets is false, then: + // Note: the 'found_colon' value is true if and only if a colon was encountered + // while not inside brackets. + if (found_colon) { + // If buffer is the empty string, validation error, return failure. + // Let host be the result of host parsing buffer with url is not special. + ada_log("HOST parsing ", host_view); + if(!url.parse_host(host_view)) { return url; } + ada_log("HOST parsing results in ", url.host.has_value() ? "none" : url.host.value()); + // Set url’s host to host, buffer to the empty string, and state to port state. + state = ada::state::PORT; + input_position++; + } + // Otherwise, if one of the following is true: + // - c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#) + // - url is special and c is U+005C (\) + // The get_host_delimiter_location function either brings us to + // the colon outside of the bracket, or to one of those characters. + else { + + // If url is special and host_view is the empty string, validation error, return failure. + if (url.is_special() && host_view.empty()) { + url.is_valid = false; + return url; + } + + // Let host be the result of host parsing host_view with url is not special. + if (host_view.empty()) { + url.host = ""; + } else { + if(!url.parse_host(host_view)) { return url; } + } + // Set url’s host to host, and state to path start state. + state = ada::state::PATH_START; + } + + break; + } + case ada::state::OPAQUE_PATH: { + ada_log("OPAQUE_PATH ", helpers::substring(url_data, input_position)); + std::string_view view = helpers::substring(url_data, input_position); + // If c is U+003F (?), then set url’s query to the empty string and state to query state. + size_t location = view.find('?'); + if(location != std::string_view::npos) { + view.remove_suffix(view.size() - location); + state = ada::state::QUERY; + input_position += location + 1; + } else { + input_position = input_size + 1; + } + url.has_opaque_path = true; + url.path = unicode::percent_encode(view, character_sets::C0_CONTROL_PERCENT_ENCODE); + break; + } + case ada::state::PORT: { + ada_log("PORT ", helpers::substring(url_data, input_position)); + std::string_view port_view = helpers::substring(url_data, input_position); + size_t consumed_bytes = url.parse_port(port_view, true); + input_position += consumed_bytes; + if(!url.is_valid) { return url; } + state = state::PATH_START; + [[fallthrough]]; + } + case ada::state::PATH_START: { + ada_log("PATH_START ", helpers::substring(url_data, input_position)); + + // If url is special, then: + if (url.is_special()) { + // Set state to path state. + state = ada::state::PATH; + + // Optimization: Avoiding going into PATH state improves the performance of urls ending with /. + if (input_position == input_size) { + url.path = "/"; + return url; + } + // If c is neither U+002F (/) nor U+005C (\), then decrease pointer by 1. + // We know that (input_position == input_size) is impossible here, because of the previous if-check. + if ((url_data[input_position] != '/') && (url_data[input_position] != '\\')) { + break; + } + } + // Otherwise, if state override is not given and c is U+003F (?), + // set url’s query to the empty string and state to query state. + else if ((input_position != input_size) && (url_data[input_position] == '?')) { + state = ada::state::QUERY; + } + // Otherwise, if c is not the EOF code point: + else if (input_position != input_size) { + // Set state to path state. + state = ada::state::PATH; + + // If c is not U+002F (/), then decrease pointer by 1. + if (url_data[input_position] != '/') { + break; + } + } + + input_position++; + break; + } + case ada::state::PATH: { + std::string_view view = helpers::substring(url_data, input_position); + ada_log("PATH ", helpers::substring(url_data, input_position)); + + // Most time, we do not need percent encoding. + // Furthermore, we can immediately locate the '?'. + size_t locofquestionmark = view.find('?'); + if(locofquestionmark != std::string_view::npos) { + state = ada::state::QUERY; + view.remove_suffix(view.size()-locofquestionmark); + input_position += locofquestionmark + 1; + } else { + input_position = input_size + 1; + } + if(!helpers::parse_prepared_path(view, url.get_scheme_type(), url.path)) { return url; } + break; + } + case ada::state::FILE_SLASH: { + ada_log("FILE_SLASH ", helpers::substring(url_data, input_position)); + + // If c is U+002F (/) or U+005C (\), then: + if ((input_position != input_size) && (url_data[input_position] == '/' || url_data[input_position] == '\\')) { + ada_log("FILE_SLASH c is U+002F or U+005C"); + // Set state to file host state. + state = ada::state::FILE_HOST; + input_position++; + } else { + ada_log("FILE_SLASH otherwise"); + // If base is non-null and base’s scheme is "file", then: + // Note: it is unsafe to do base_url->scheme unless you know that + // base_url_has_value() is true. + if (base_url != nullptr && base_url->get_scheme_type() == ada::scheme::type::FILE) { + // Set url’s host to base’s host. + url.host = base_url->host; + + // If the code point substring from pointer to the end of input does not start with + // a Windows drive letter and base’s path[0] is a normalized Windows drive letter, + // then append base’s path[0] to url’s path. + if (!base_url->path.empty()) { + if (!checkers::is_windows_drive_letter(helpers::substring(url_data, input_position))) { + std::string_view first_base_url_path = base_url->path; + first_base_url_path.remove_prefix(1); + size_t loc = first_base_url_path.find('/'); + if(loc != std::string_view::npos) { + first_base_url_path.remove_suffix(first_base_url_path.size() - loc); + } + if (checkers::is_normalized_windows_drive_letter(first_base_url_path)) { + url.path += '/'; + url.path += first_base_url_path; + } + } + } + } + + // Set state to path state, and decrease pointer by 1. + state = ada::state::PATH; + } + + break; + } + case ada::state::FILE_HOST: { + std::string_view view = helpers::substring(url_data, input_position); + ada_log("FILE_HOST ", helpers::substring(url_data, input_position)); + + size_t location = view.find_first_of("/\\?"); + std::string_view file_host_buffer(view.data(), (location != std::string_view::npos) ? location : view.size()); + + if (checkers::is_windows_drive_letter(file_host_buffer)) { + state = ada::state::PATH; + } else if (file_host_buffer.empty()) { + // Set url’s host to the empty string. + url.host = ""; + // Set state to path start state. + state = ada::state::PATH_START; + } else { + size_t consumed_bytes = file_host_buffer.size(); + input_position += consumed_bytes; + // Let host be the result of host parsing buffer with url is not special. + if(!url.parse_host(file_host_buffer)) { return url; } + + // If host is "localhost", then set host to the empty string. + if (url.host.has_value() && url.host.value() == "localhost") { + url.host = ""; + } + + // Set buffer to the empty string and state to path start state. + state = ada::state::PATH_START; + } + + break; + } + case ada::state::FILE: { + ada_log("FILE ", helpers::substring(url_data, input_position)); + std::string_view file_view = helpers::substring(url_data, input_position); + + // Set url’s scheme to "file". + url.set_scheme("file"); + + // Set url’s host to the empty string. + url.host = ""; + + // If c is U+002F (/) or U+005C (\), then: + if (input_position != input_size && (url_data[input_position] == '/' || url_data[input_position] == '\\')) { + ada_log("FILE c is U+002F or U+005C"); + // Set state to file slash state. + state = ada::state::FILE_SLASH; + } + // Otherwise, if base is non-null and base’s scheme is "file": + else if (base_url != nullptr && base_url->get_scheme_type() == ada::scheme::type::FILE) { + // Set url’s host to base’s host, url’s path to a clone of base’s path, and url’s query to base’s query. + ada_log("FILE base non-null"); + url.host = base_url->host; + url.path = base_url->path; + url.has_opaque_path = base_url->has_opaque_path; + url.query = base_url->query; + + // If c is U+003F (?), then set url’s query to the empty string and state to query state. + if (input_position != input_size && url_data[input_position] == '?') { + state = ada::state::QUERY; + } + // Otherwise, if c is not the EOF code point: + else if (input_position != input_size) { + // Set url’s query to null. + url.query = std::nullopt; + + // If the code point substring from pointer to the end of input does not start with a + // Windows drive letter, then shorten url’s path. + if (!checkers::is_windows_drive_letter(file_view)) { + helpers::shorten_path(url.path, url.get_scheme_type()); + } + // Otherwise: + else { + // Set url’s path to an empty list. + url.path.clear(); + url.has_opaque_path = true; + } + + // Set state to path state and decrease pointer by 1. + state = ada::state::PATH; + break; + } + } + // Otherwise, set state to path state, and decrease pointer by 1. + else { + ada_log("FILE go to path"); + state = ada::state::PATH; + break; + } + + input_position++; + break; + } + default: + ada::unreachable(); + } + } + ada_log("returning ", url.to_string()); + return url; + } + +} // namespace ada::parser +/* end file src/parser.cpp */ +/* end file src/ada.cpp */ diff --git a/deps/ada/ada.gyp b/deps/ada/ada.gyp new file mode 100644 index 00000000000000..1171e8750755e1 --- /dev/null +++ b/deps/ada/ada.gyp @@ -0,0 +1,32 @@ +{ + 'variables': { + 'v8_enable_i18n_support%': 1, + }, + 'targets': [ + { + 'target_name': 'ada', + 'type': 'static_library', + 'include_dirs': ['.'], + 'direct_dependent_settings': { + 'include_dirs': ['.'], + }, + 'sources': ['ada.cpp'], + 'conditions': [ + ['v8_enable_i18n_support==0', { + 'defines': ['ADA_HAS_ICU=0'], + }], + ['v8_enable_i18n_support==1', { + 'dependencies': [ + '<(icu_gyp_path):icui18n', + '<(icu_gyp_path):icuuc', + ], + }], + ['OS=="win" and v8_enable_i18n_support==1', { + 'dependencies': [ + '<(icu_gyp_path):icudata', + ], + }], + ] + }, + ] +} diff --git a/deps/ada/ada.h b/deps/ada/ada.h new file mode 100644 index 00000000000000..9916f41fd23b28 --- /dev/null +++ b/deps/ada/ada.h @@ -0,0 +1,4423 @@ +/* auto-generated on 2023-02-26 15:07:41 -0500. Do not edit! */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada.h +/* begin file include/ada.h */ +/** + * @file ada.h + * @brief Includes all definitions for Ada. + */ +#ifndef ADA_H +#define ADA_H + +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/character_sets-inl.h +/* begin file include/ada/character_sets-inl.h */ +/** + * @file character_sets-inl.h + * @brief Definitions of the character sets used by unicode functions. + * @author Node.js + * @see https://github.com/nodejs/node/blob/main/src/node_url_tables.cc + */ +#ifndef ADA_CHARACTER_SETS_INL_H +#define ADA_CHARACTER_SETS_INL_H + +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/character_sets.h +/* begin file include/ada/character_sets.h */ +/** + * @file character_sets.h + * @brief Declaration of the character sets used by unicode functions. + * @author Node.js + * @see https://github.com/nodejs/node/blob/main/src/node_url_tables.cc + */ +#ifndef ADA_CHARACTER_SETS_H +#define ADA_CHARACTER_SETS_H + +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/common_defs.h +/* begin file include/ada/common_defs.h */ +/** + * @file common_defs.h + * @brief Common definitions for cross-platform compiler support. + */ +#ifndef ADA_COMMON_DEFS_H +#define ADA_COMMON_DEFS_H + +#ifdef _MSC_VER +#define ADA_VISUAL_STUDIO 1 +/** + * We want to differentiate carefully between + * clang under visual studio and regular visual + * studio. + */ +#ifdef __clang__ +// clang under visual studio +#define ADA_CLANG_VISUAL_STUDIO 1 +#else +// just regular visual studio (best guess) +#define ADA_REGULAR_VISUAL_STUDIO 1 +#endif // __clang__ +#endif // _MSC_VER + + +#if defined(__GNUC__) + // Marks a block with a name so that MCA analysis can see it. + #define ADA_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name); + #define ADA_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name); + #define ADA_DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name); +#else + #define ADA_BEGIN_DEBUG_BLOCK(name) + #define ADA_END_DEBUG_BLOCK(name) + #define ADA_DEBUG_BLOCK(name, block) +#endif + +// Align to N-byte boundary +#define ADA_ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1)) +#define ADA_ROUNDDOWN_N(a, n) ((a) & ~((n)-1)) + +#define ADA_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) + +#if defined(ADA_REGULAR_VISUAL_STUDIO) + + #define ada_really_inline __forceinline + #define ada_never_inline __declspec(noinline) + + #define ada_unused + #define ada_warn_unused + + #ifndef ada_likely + #define ada_likely(x) x + #endif + #ifndef ada_unlikely + #define ada_unlikely(x) x + #endif + + #define ADA_PUSH_DISABLE_WARNINGS __pragma(warning( push )) + #define ADA_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 )) + #define ADA_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER )) + // Get rid of Intellisense-only warnings (Code Analysis) + // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910). + #ifdef __has_include + #if __has_include() + #include + #define ADA_DISABLE_UNDESIRED_WARNINGS ADA_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS) + #endif + #endif + + #ifndef ADA_DISABLE_UNDESIRED_WARNINGS + #define ADA_DISABLE_UNDESIRED_WARNINGS + #endif + + #define ADA_DISABLE_DEPRECATED_WARNING ADA_DISABLE_VS_WARNING(4996) + #define ADA_DISABLE_STRICT_OVERFLOW_WARNING + #define ADA_POP_DISABLE_WARNINGS __pragma(warning( pop )) + +#else // ADA_REGULAR_VISUAL_STUDIO + + #define ada_really_inline inline __attribute__((always_inline)) + #define ada_never_inline inline __attribute__((noinline)) + + #define ada_unused __attribute__((unused)) + #define ada_warn_unused __attribute__((warn_unused_result)) + + #ifndef ada_likely + #define ada_likely(x) __builtin_expect(!!(x), 1) + #endif + #ifndef ada_unlikely + #define ada_unlikely(x) __builtin_expect(!!(x), 0) + #endif + + #define ADA_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push") + // gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary + #define ADA_PUSH_DISABLE_ALL_WARNINGS ADA_PUSH_DISABLE_WARNINGS \ + ADA_DISABLE_GCC_WARNING(-Weffc++) \ + ADA_DISABLE_GCC_WARNING(-Wall) \ + ADA_DISABLE_GCC_WARNING(-Wconversion) \ + ADA_DISABLE_GCC_WARNING(-Wextra) \ + ADA_DISABLE_GCC_WARNING(-Wattributes) \ + ADA_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ + ADA_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ + ADA_DISABLE_GCC_WARNING(-Wreturn-type) \ + ADA_DISABLE_GCC_WARNING(-Wshadow) \ + ADA_DISABLE_GCC_WARNING(-Wunused-parameter) \ + ADA_DISABLE_GCC_WARNING(-Wunused-variable) + #define ADA_PRAGMA(P) _Pragma(#P) + #define ADA_DISABLE_GCC_WARNING(WARNING) ADA_PRAGMA(GCC diagnostic ignored #WARNING) + #if defined(ADA_CLANG_VISUAL_STUDIO) + #define ADA_DISABLE_UNDESIRED_WARNINGS ADA_DISABLE_GCC_WARNING(-Wmicrosoft-include) + #else + #define ADA_DISABLE_UNDESIRED_WARNINGS + #endif + #define ADA_DISABLE_DEPRECATED_WARNING ADA_DISABLE_GCC_WARNING(-Wdeprecated-declarations) + #define ADA_DISABLE_STRICT_OVERFLOW_WARNING ADA_DISABLE_GCC_WARNING(-Wstrict-overflow) + #define ADA_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop") + +#endif // MSC_VER + +#if defined(ADA_VISUAL_STUDIO) + /** + * It does not matter here whether you are using + * the regular visual studio or clang under visual + * studio. + */ + #if ADA_USING_LIBRARY + #define ADA_DLLIMPORTEXPORT __declspec(dllimport) + #else + #define ADA_DLLIMPORTEXPORT __declspec(dllexport) + #endif +#else + #define ADA_DLLIMPORTEXPORT +#endif + +/// If EXPR is an error, returns it. +#define ADA_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } } + +// __has_cpp_attribute is part of C++20 +#if !defined(__has_cpp_attribute) +#define __has_cpp_attribute(x) 0 +#endif + + +#if __has_cpp_attribute(gnu::noinline) +#define ADA_ATTRIBUTE_NOINLINE [[gnu::noinline]] +#else +#define ADA_ATTRIBUTE_NOINLINE +#endif + +namespace ada { + [[noreturn]] inline void unreachable() { +#ifdef __GNUC__ + __builtin_unreachable(); +#elif defined(_MSC_VER) + __assume(false); +#else +#endif + } +} + + + +#if defined(__GNUC__) && !defined(__clang__) +#if __GNUC__ <= 8 +#define ADA_OLD_GCC 1 +#endif // __GNUC__ <= 8 +#endif // defined(__GNUC__) && !defined(__clang__) + +#if ADA_OLD_GCC +#define ada_constexpr +#else +#define ada_constexpr constexpr +#endif + + #if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) + #define ADA_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + #elif defined(_WIN32) + #define ADA_IS_BIG_ENDIAN 0 + #else + #if defined(__APPLE__) || defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ + #include + #elif defined(sun) || defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__) + #include + #else // defined(__APPLE__) || defined(__FreeBSD__) + + #ifdef __has_include + #if __has_include() + #include + #endif //__has_include() + #endif //__has_include + + #endif // defined(__APPLE__) || defined(__FreeBSD__) + + + #ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) + #define ADA_IS_BIG_ENDIAN 0 + #endif + + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + #define ADA_IS_BIG_ENDIAN 0 + #else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + #define ADA_IS_BIG_ENDIAN 1 + #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + + #endif // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ + + +#ifndef ADA_HAS_ICU +#if __has_include() +#define ADA_HAS_ICU 1 +#else +#define ADA_HAS_ICU 0 +#endif // __has_include() +#endif // ADA_HAS_ICU + +#if ADA_HAS_ICU +#include +#include +#include +#endif // ADA_HAS_ICU + +#define ADA_WINDOWS_TO_ASCII_FALLBACK 0 // we never use anything but ICU. No fallback. + +#endif // ADA_COMMON_DEFS_H +/* end file include/ada/common_defs.h */ +#include + +/** + * @namespace ada::character_sets + * @brief Includes the definitions for unicode character sets. + */ +namespace ada::character_sets { + ada_really_inline bool bit_at(const uint8_t a[], const uint8_t i); +} // namespace ada::character_sets + +#endif // ADA_CHARACTER_SETS_H +/* end file include/ada/character_sets.h */ + +namespace ada::character_sets { + + constexpr char hex[1024] = + "%00\0%01\0%02\0%03\0%04\0%05\0%06\0%07\0" + "%08\0%09\0%0A\0%0B\0%0C\0%0D\0%0E\0%0F\0" + "%10\0%11\0%12\0%13\0%14\0%15\0%16\0%17\0" + "%18\0%19\0%1A\0%1B\0%1C\0%1D\0%1E\0%1F\0" + "%20\0%21\0%22\0%23\0%24\0%25\0%26\0%27\0" + "%28\0%29\0%2A\0%2B\0%2C\0%2D\0%2E\0%2F\0" + "%30\0%31\0%32\0%33\0%34\0%35\0%36\0%37\0" + "%38\0%39\0%3A\0%3B\0%3C\0%3D\0%3E\0%3F\0" + "%40\0%41\0%42\0%43\0%44\0%45\0%46\0%47\0" + "%48\0%49\0%4A\0%4B\0%4C\0%4D\0%4E\0%4F\0" + "%50\0%51\0%52\0%53\0%54\0%55\0%56\0%57\0" + "%58\0%59\0%5A\0%5B\0%5C\0%5D\0%5E\0%5F\0" + "%60\0%61\0%62\0%63\0%64\0%65\0%66\0%67\0" + "%68\0%69\0%6A\0%6B\0%6C\0%6D\0%6E\0%6F\0" + "%70\0%71\0%72\0%73\0%74\0%75\0%76\0%77\0" + "%78\0%79\0%7A\0%7B\0%7C\0%7D\0%7E\0%7F\0" + "%80\0%81\0%82\0%83\0%84\0%85\0%86\0%87\0" + "%88\0%89\0%8A\0%8B\0%8C\0%8D\0%8E\0%8F\0" + "%90\0%91\0%92\0%93\0%94\0%95\0%96\0%97\0" + "%98\0%99\0%9A\0%9B\0%9C\0%9D\0%9E\0%9F\0" + "%A0\0%A1\0%A2\0%A3\0%A4\0%A5\0%A6\0%A7\0" + "%A8\0%A9\0%AA\0%AB\0%AC\0%AD\0%AE\0%AF\0" + "%B0\0%B1\0%B2\0%B3\0%B4\0%B5\0%B6\0%B7\0" + "%B8\0%B9\0%BA\0%BB\0%BC\0%BD\0%BE\0%BF\0" + "%C0\0%C1\0%C2\0%C3\0%C4\0%C5\0%C6\0%C7\0" + "%C8\0%C9\0%CA\0%CB\0%CC\0%CD\0%CE\0%CF\0" + "%D0\0%D1\0%D2\0%D3\0%D4\0%D5\0%D6\0%D7\0" + "%D8\0%D9\0%DA\0%DB\0%DC\0%DD\0%DE\0%DF\0" + "%E0\0%E1\0%E2\0%E3\0%E4\0%E5\0%E6\0%E7\0" + "%E8\0%E9\0%EA\0%EB\0%EC\0%ED\0%EE\0%EF\0" + "%F0\0%F1\0%F2\0%F3\0%F4\0%F5\0%F6\0%F7\0" + "%F8\0%F9\0%FA\0%FB\0%FC\0%FD\0%FE\0%FF"; + + constexpr uint8_t C0_CONTROL_PERCENT_ENCODE[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 + }; + + constexpr uint8_t SPECIAL_QUERY_PERCENT_ENCODE[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x80, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 + }; + + constexpr uint8_t QUERY_PERCENT_ENCODE[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 + }; + + constexpr uint8_t FRAGMENT_PERCENT_ENCODE[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 + }; + + constexpr uint8_t USERINFO_PERCENT_ENCODE[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 40 41 42 43 44 45 46 47 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 + }; + + constexpr uint8_t PATH_PERCENT_ENCODE[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 + }; + + ada_really_inline bool bit_at(const uint8_t a[], const uint8_t i) { + return !!(a[i >> 3] & (1 << (i & 7))); + } + +} // namespace ada::character_sets + +#endif // ADA_CHARACTER_SETS_H +/* end file include/ada/character_sets-inl.h */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/checkers-inl.h +/* begin file include/ada/checkers-inl.h */ +/** + * @file checkers-inl.h + * @brief Definitions for URL specific checkers used within Ada. + */ +#ifndef ADA_CHECKERS_INL_H +#define ADA_CHECKERS_INL_H + + +#include +#include + +namespace ada::checkers { + + inline bool has_hex_prefix_unsafe(std::string_view input) { + // This is actualy efficient code, see has_hex_prefix for the assembly. + uint32_t value_one = 1; + bool is_little_endian = (reinterpret_cast(&value_one)[0] == 1); + uint16_t word0x{}; + std::memcpy(&word0x, "0x", 2); // we would use bit_cast in C++20 and the function could be constexpr. + uint16_t two_first_bytes{}; + std::memcpy(&two_first_bytes, input.data(),2); + if(is_little_endian) { two_first_bytes |= 0x2000; } else { two_first_bytes |= 0x020; } + return two_first_bytes == word0x; + } + + inline bool has_hex_prefix(std::string_view input) { + return input.size() >=2 && has_hex_prefix_unsafe(input); + } + + constexpr bool is_digit(char x) noexcept { return (x >= '0') & (x <= '9'); } + + constexpr char to_lower(char x) noexcept { return (x | 0x20); } + + constexpr bool is_alpha(char x) noexcept { return (to_lower(x) >= 'a') && (to_lower(x) <= 'z'); } + + inline constexpr bool is_windows_drive_letter(std::string_view input) noexcept { + return input.size() >= 2 && (is_alpha(input[0]) && ((input[1] == ':') || (input[1] == '|'))) + && ((input.size() == 2) || (input[2] == '/' || input[2] == '\\' || input[2] == '?' || input[2] == '#')); + } + + inline constexpr bool is_normalized_windows_drive_letter(std::string_view input) noexcept { + return input.size() >= 2 && (is_alpha(input[0]) && (input[1] == ':')); + } + + ada_really_inline constexpr bool begins_with(std::string_view view, std::string_view prefix) { + // in C++20, you have view.begins_with(prefix) + return view.size() >= prefix.size() && (view.substr(0, prefix.size()) == prefix); + } + +} // namespace ada::checkers + +#endif //ADA_CHECKERS_H +/* end file include/ada/checkers-inl.h */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/log.h +/* begin file include/ada/log.h */ +/** + * @file log.h + * @brief Includes the definitions for logging. + * @private Excluded from docs through the doxygen file. + */ +#ifndef ADA_LOG_H +#define ADA_LOG_H + +#include +// To enable logging, set ADA_LOGGING to 1: +#ifndef ADA_LOGGING +#define ADA_LOGGING 0 +#endif + +namespace ada { + +/** + * Private function used for logging messages. + * @private + */ +template +ada_really_inline void inner_log([[maybe_unused]] T t) { +#if ADA_LOGGING + std::cout << t << std::endl; +#endif +} + + +/** + * Private function used for logging messages. + * @private + */ +template +ada_really_inline void inner_log([[maybe_unused]] T t, [[maybe_unused]] Args... args) { +#if ADA_LOGGING + std::cout << t; + inner_log(args...) ; +#endif +} + + +/** + * Log a message. + * @private + */ +template +ada_really_inline void log([[maybe_unused]] T t, [[maybe_unused]] Args... args) { +#if ADA_LOGGING + std::cout << "ADA_LOG: " << t; + inner_log(args...) ; +#endif +} + +/** + * Log a message. + * @private + */ +template +ada_really_inline void log([[maybe_unused]] T t) { +#if ADA_LOGGING + std::cout << "ADA_LOG: " << t << std::endl; +#endif + +} +} + +#if ADA_LOGGING + +#ifndef ada_log +#define ada_log(...) do { \ + ada::log(__VA_ARGS__); \ +} while(0) +#endif // ada_log +#else +#define ada_log(...) +#endif // ADA_LOGGING + +#endif // ADA_LOG_H +/* end file include/ada/log.h */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/encoding_type.h +/* begin file include/ada/encoding_type.h */ +/** + * @file encoding_type.h + * @brief Definition for supported encoding types. + */ +#ifndef ADA_ENCODING_TYPE_H +#define ADA_ENCODING_TYPE_H + +#include + +namespace ada { + + /** + * This specification defines three encodings with the same names as encoding schemes defined + * in the Unicode standard: UTF-8, UTF-16LE, and UTF-16BE. + * + * @see https://encoding.spec.whatwg.org/#encodings + */ + enum class encoding_type { + UTF8, + UTF_16LE, + UTF_16BE, + }; + + /** + * Convert a encoding_type to string. + */ + ada_warn_unused std::string to_string(encoding_type type); + +} // ada namespace + +#endif // ADA_ENCODING_TYPE_H +/* end file include/ada/encoding_type.h */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/helpers.h +/* begin file include/ada/helpers.h */ +/** + * @file helpers.h + * @brief Definitions for helper functions used within Ada. + */ +#ifndef ADA_HELPERS_H +#define ADA_HELPERS_H + +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/url.h +/* begin file include/ada/url.h */ +/** + * @file url.h + * @brief Declaration for the URL + */ +#ifndef ADA_URL_H +#define ADA_URL_H + +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/checkers.h +/* begin file include/ada/checkers.h */ +/** + * @file checkers.h + * @brief Declarations for URL specific checkers used within Ada. + */ +#ifndef ADA_CHECKERS_H +#define ADA_CHECKERS_H + + +#include +#include + +/** + * @namespace ada::checkers + * @brief Includes the definitions for validation functions + */ +namespace ada::checkers { + + /** + * Assuming that x is an ASCII letter, this function returns the lower case equivalent. + * @details More likely to be inlined by the compiler and constexpr. + */ + constexpr char to_lower(char x) noexcept; + + /** + * Returns true if the character is an ASCII letter. Equivalent to std::isalpha but + * more likely to be inlined by the compiler. + * + * @attention std::isalpha is not constexpr generally. + */ + constexpr bool is_alpha(char x) noexcept; + + /** + * Check whether a string starts with 0x or 0X. The function is only + * safe if input.size() >=2. + * + * @see has_hex_prefix + */ + inline bool has_hex_prefix_unsafe(std::string_view input); + /** + * Check whether a string starts with 0x or 0X. + */ + inline bool has_hex_prefix(std::string_view input); + + /** + * Check whether x is an ASCII digit. More likely to be inlined than std::isdigit. + */ + constexpr bool is_digit(char x) noexcept; + + /** + * @details A string starts with a Windows drive letter if all of the following are true: + * + * - its length is greater than or equal to 2 + * - its first two code points are a Windows drive letter + * - its length is 2 or its third code point is U+002F (/), U+005C (\), U+003F (?), or U+0023 (#). + * + * https://url.spec.whatwg.org/#start-with-a-windows-drive-letter + */ + inline constexpr bool is_windows_drive_letter(std::string_view input) noexcept; + + /** + * @details A normalized Windows drive letter is a Windows drive letter of which the second code point is U+003A (:). + */ + inline constexpr bool is_normalized_windows_drive_letter(std::string_view input) noexcept; + + /** + * @warning Will be removed when Ada supports C++20. + */ + ada_really_inline constexpr bool begins_with(std::string_view view, std::string_view prefix); + + /** + * Returns true if an input is an ipv4 address. + */ + ada_really_inline ada_constexpr bool is_ipv4(std::string_view view) noexcept; + + /** + * Returns a bitset. If the first bit is set, then at least one character needs + * percent encoding. If the second bit is set, a \\ is found. If the third bit is set + * then we have a dot. If the fourth bit is set, then we have a percent character. + */ + ada_really_inline constexpr uint8_t path_signature(std::string_view input) noexcept; + + /** + * Returns true if the length of the domain name and its labels are according to the specifications. + * The length of the domain must be 255 octets (253 characters not including the last 2 which are the empty + * label reserved at the end). When the empty label is included (a dot at the end), the domain name can have + * 254 characters. The length of a label must be at least 1 and at most 63 characters. + * @see section 3.1. of https://www.rfc-editor.org/rfc/rfc1034 + * @see https://www.unicode.org/reports/tr46/#ToASCII + */ + ada_really_inline constexpr bool verify_dns_length(std::string_view input) noexcept; + +} // namespace ada::checkers + +#endif //ADA_CHECKERS_H +/* end file include/ada/checkers.h */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/scheme.h +/* begin file include/ada/scheme.h */ +/** + * @file scheme.h + * @brief Declarations for the URL scheme. + */ +#ifndef ADA_SCHEME_H +#define ADA_SCHEME_H + + +#include +#include +#include + +/** + * @namespace ada::scheme + * @brief Includes the scheme declarations + */ +namespace ada::scheme { + + /** + * Type of the scheme as an enum. + * Using strings to represent a scheme type is not ideal because + * checking for types involves string comparisons. It is faster to use + * a simple integer. + */ + enum type { + HTTP = 0, + NOT_SPECIAL = 1, + HTTPS = 2, + WS = 3, + FTP = 4, + WSS = 5, + FILE = 6 + }; + + /** + * A special scheme is an ASCII string that is listed in the first column of the following table. + * The default port for a special scheme is listed in the second column on the same row. + * The default port for any other ASCII string is null. + * + * @see https://url.spec.whatwg.org/#url-miscellaneous + * @param scheme + * @return If scheme is a special scheme + */ + ada_really_inline constexpr bool is_special(std::string_view scheme); + + /** + * A special scheme is an ASCII string that is listed in the first column of the following table. + * The default port for a special scheme is listed in the second column on the same row. + * The default port for any other ASCII string is null. + * + * @see https://url.spec.whatwg.org/#url-miscellaneous + * @param scheme + * @return The special port + */ + constexpr uint16_t get_special_port(std::string_view scheme) noexcept; + + /** + * Returns the port number of a special scheme. + * @see https://url.spec.whatwg.org/#special-scheme + */ + constexpr uint16_t get_special_port(ada::scheme::type type) noexcept; + /** + * Returns the scheme of an input, or NOT_SPECIAL if it's not a special scheme defined by the spec. + */ + constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept; + +} // namespace ada::serializers + +#endif // ADA_SCHEME_H +/* end file include/ada/scheme.h */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/serializers.h +/* begin file include/ada/serializers.h */ +/** + * @file serializers.h + * @brief Definitions for the URL serializers. + */ +#ifndef ADA_SERIALIZERS_H +#define ADA_SERIALIZERS_H + + +#include +#include +#include + +/** + * @namespace ada::serializers + * @brief Includes the definitions for URL serializers + */ +namespace ada::serializers { + + /** + * Finds and returns the longest sequence of 0 values in a ipv6 input. + */ + void find_longest_sequence_of_ipv6_pieces(const std::array& address, size_t& compress, size_t& compress_length) noexcept; + + /** + * Serializes an ipv6 address. + * @details An IPv6 address is a 128-bit unsigned integer that identifies a network address. + * @see https://url.spec.whatwg.org/#concept-ipv6-serializer + */ + std::string ipv6(const std::array& address) noexcept; + + /** + * Serializes an ipv4 address. + * @details An IPv4 address is a 32-bit unsigned integer that identifies a network address. + * @see https://url.spec.whatwg.org/#concept-ipv4-serializer + */ + std::string ipv4(const uint64_t address) noexcept; + +} // namespace ada::serializers + +#endif // ADA_SERIALIZERS_H +/* end file include/ada/serializers.h */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/unicode.h +/* begin file include/ada/unicode.h */ +/** + * @file unicode.h + * @brief Definitions for all unicode specific functions. + */ +#ifndef ADA_UNICODE_H +#define ADA_UNICODE_H + +#include +#include + +/** + * @namespace ada::unicode + * @brief Includes the definitions for unicode operations + */ +namespace ada::unicode { + + /** + * We receive a UTF-8 string representing a domain name. + * If the string is percent encoded, we apply percent decoding. + * + * Given a domain, we need to identify its labels. + * They are separated by label-separators: + * + * U+002E ( . ) FULL STOP + * U+FF0E ( . ) FULLWIDTH FULL STOP + * U+3002 ( 。 ) IDEOGRAPHIC FULL STOP + * U+FF61 ( 。 ) HALFWIDTH IDEOGRAPHIC FULL STOP + * + * They are all mapped to U+002E. + * + * We process each label into a string that should not exceed 63 octets. + * If the string is already punycode (starts with "xn--"), then we must + * scan it to look for unallowed code points. + * Otherwise, if the string is not pure ASCII, we need to transcode it + * to punycode by following RFC 3454 which requires us to + * - Map characters (see section 3), + * - Normalize (see section 4), + * - Reject forbidden characters, + * - Check for right-to-left characters and if so, check all requirements (see section 6), + * - Optionally reject based on unassigned code points (section 7). + * + * The Unicode standard provides a table of code points with a mapping, a list of + * forbidden code points and so forth. This table is subject to change and will + * vary based on the implementation. For Unicode 15, the table is at + * https://www.unicode.org/Public/idna/15.0.0/IdnaMappingTable.txt + * If you use ICU, they parse this table and map it to code using a Python script. + * + * The resulting strings should not exceed 255 octets according to RFC 1035 section 2.3.4. + * ICU checks for label size and domain size, but if we pass "be_strict = false", these + * errors are ignored. + * + * @see https://url.spec.whatwg.org/#concept-domain-to-ascii + * + */ + bool to_ascii(std::optional& out, std::string_view plain, bool be_strict, size_t first_percent); + + /** + * Checks if the input has tab or newline characters. + * + * @attention The has_tabs_or_newline function is a bottleneck and it is simple enough that compilers + * like GCC can 'autovectorize it'. + */ + ada_really_inline constexpr bool has_tabs_or_newline(std::string_view user_input) noexcept; + + /** + * Checks if the input is a forbidden host code point. + * @see https://url.spec.whatwg.org/#forbidden-host-code-point + */ + ada_really_inline constexpr bool is_forbidden_host_code_point(const char c) noexcept; + + + /** + * Checks if the input is a forbidden domain code point. + * @see https://url.spec.whatwg.org/#forbidden-domain-code-point + */ + ada_really_inline constexpr bool contains_forbidden_domain_code_point(char * input, size_t length) noexcept; + + /** + * Checks if the input is a forbidden doamin code point. + * @see https://url.spec.whatwg.org/#forbidden-domain-code-point + */ + ada_really_inline constexpr bool is_forbidden_domain_code_point(const char c) noexcept; + + /** + * Checks if the input is alphanumeric, '+', '-' or '.' + */ + ada_really_inline constexpr bool is_alnum_plus(const char c) noexcept; + + /** + * @details An ASCII hex digit is an ASCII upper hex digit or ASCII lower hex digit. + * An ASCII upper hex digit is an ASCII digit or a code point in the range U+0041 (A) to U+0046 (F), inclusive. + * An ASCII lower hex digit is an ASCII digit or a code point in the range U+0061 (a) to U+0066 (f), inclusive. + */ + ada_really_inline constexpr bool is_ascii_hex_digit(const char c) noexcept; + + /** + * Checks if the input is a C0 control or space character. + * + * @details A C0 control or space is a C0 control or U+0020 SPACE. + * A C0 control is a code point in the range U+0000 NULL to U+001F INFORMATION SEPARATOR ONE, inclusive. + */ + ada_really_inline constexpr bool is_c0_control_or_space(const char c) noexcept; + + /** + * Checks if the input is a ASCII tab or newline character. + * + * @details An ASCII tab or newline is U+0009 TAB, U+000A LF, or U+000D CR. + */ + ada_really_inline constexpr bool is_ascii_tab_or_newline(const char c) noexcept; + + /** + * @details A double-dot path segment must be ".." or an ASCII case-insensitive match for ".%2e", "%2e.", or "%2e%2e". + */ + ada_really_inline ada_constexpr bool is_double_dot_path_segment(const std::string_view input) noexcept; + + /** + * @details A single-dot path segment must be "." or an ASCII case-insensitive match for "%2e". + */ + ada_really_inline constexpr bool is_single_dot_path_segment(const std::string_view input) noexcept; + + /** + * @details ipv4 character might contain 0-9 or a-f character ranges. + */ + ada_really_inline constexpr bool is_lowercase_hex(const char c) noexcept; + + /** + * @details Convert hex to binary. + */ + unsigned constexpr convert_hex_to_binary(char c) noexcept; + + /** + * first_percent should be = input.find('%') + * + * @todo It would be faster as noexcept maybe, but it could be unsafe since. + * @author Node.js + * @see https://github.com/nodejs/node/blob/main/src/node_url.cc#L245 + * @see https://encoding.spec.whatwg.org/#utf-8-decode-without-bom + */ + std::string percent_decode(const std::string_view input, size_t first_percent); + + /** + * Returns a percent-encoding string whether percent encoding was needed or not. + * @see https://github.com/nodejs/node/blob/main/src/node_url.cc#L226 + */ + std::string percent_encode(const std::string_view input, const uint8_t character_set[]); + + /** + * Returns true if percent encoding was needed, in which case, we store + * the percent-encoded content in 'out'. Otherwise, out is left unchanged. + * @see https://github.com/nodejs/node/blob/main/src/node_url.cc#L226 + */ + bool percent_encode(const std::string_view input, const uint8_t character_set[], std::string& out); + + /** + * Lowers the string in-place, assuming that the content is ASCII. + * Return true if the content was ASCII. + */ + constexpr bool to_lower_ascii(char * input, size_t length) noexcept; +} // namespace ada::unicode + +#endif // ADA_UNICODE_H +/* end file include/ada/unicode.h */ + +#include +#include +#include +#include +#include +#include + +namespace ada { + /** + * @brief Generic URL struct. + * + * @details To disambiguate from a valid URL string it can also be referred to as a URL record. + * A URL is a struct that represents a universal identifier. + * @see https://url.spec.whatwg.org/#url-representation + */ + struct url { + url() = default; + url(const url &u) = default; + url(url &&u) noexcept = default; + url &operator=(url &&u) noexcept = default; + url &operator=(const url &u) = default; + ADA_ATTRIBUTE_NOINLINE ~url() = default; + + /** + * @private + * A URL’s username is an ASCII string identifying a username. It is initially the empty string. + */ + std::string username{}; + + /** + * @private + * A URL’s password is an ASCII string identifying a password. It is initially the empty string. + */ + std::string password{}; + + /** + * @private + * A URL’s host is null or a host. It is initially null. + */ + std::optional host{}; + + /** + * @private + * A URL’s port is either null or a 16-bit unsigned integer that identifies a networking port. It is initially null. + */ + std::optional port{}; + + /** + * @private + * A URL’s path is either an ASCII string or a list of zero or more ASCII strings, usually identifying a location. + */ + std::string path{}; + + /** + * @private + * A URL’s query is either null or an ASCII string. It is initially null. + */ + std::optional query{}; + + /** + * @private + * A URL’s fragment is either null or an ASCII string that can be used for further processing on the resource + * the URL’s other components identify. It is initially null. + */ + std::optional fragment{}; + + /** + * @see https://url.spec.whatwg.org/#dom-url-href + * @see https://url.spec.whatwg.org/#concept-url-serializer + */ + [[nodiscard]] std::string get_href() const noexcept; + + /** + * The origin getter steps are to return the serialization of this’s URL’s origin. [HTML] + * @see https://url.spec.whatwg.org/#concept-url-origin + */ + [[nodiscard]] std::string get_origin() const noexcept; + + /** + * The protocol getter steps are to return this’s URL’s scheme, followed by U+003A (:). + * @see https://url.spec.whatwg.org/#dom-url-protocol + */ + [[nodiscard]] std::string get_protocol() const noexcept; + + /** + * Return url’s host, serialized, followed by U+003A (:) and url’s port, serialized. + * @see https://url.spec.whatwg.org/#dom-url-host + */ + [[nodiscard]] std::string get_host() const noexcept; + + /** + * Return this’s URL’s host, serialized. + * @see https://url.spec.whatwg.org/#dom-url-hostname + */ + [[nodiscard]] std::string get_hostname() const noexcept; + + /** + * The pathname getter steps are to return the result of URL path serializing this’s URL. + * @see https://url.spec.whatwg.org/#dom-url-pathname + */ + [[nodiscard]] std::string get_pathname() const noexcept; + + /** + * Return U+003F (?), followed by this’s URL’s query. + * @see https://url.spec.whatwg.org/#dom-url-search + */ + [[nodiscard]] std::string get_search() const noexcept; + + /** + * The username getter steps are to return this’s URL’s username. + * @see https://url.spec.whatwg.org/#dom-url-username + */ + [[nodiscard]] std::string get_username() const noexcept; + + /** + * @return Returns true on successful operation. + * @see https://url.spec.whatwg.org/#dom-url-username + */ + bool set_username(const std::string_view input); + + /** + * @return Returns true on success. + * @see https://url.spec.whatwg.org/#dom-url-password + */ + bool set_password(const std::string_view input); + + /** + * @return Returns true on success. + * @see https://url.spec.whatwg.org/#dom-url-port + */ + bool set_port(const std::string_view input); + + /** + * This function always succeeds. + * @see https://url.spec.whatwg.org/#dom-url-hash + */ + void set_hash(const std::string_view input); + + /** + * This function always succeeds. + * @see https://url.spec.whatwg.org/#dom-url-search + */ + void set_search(const std::string_view input); + + /** + * @return Returns true on success. + * @see https://url.spec.whatwg.org/#dom-url-search + */ + bool set_pathname(const std::string_view input); + + /** + * @return Returns true on success. + * @see https://url.spec.whatwg.org/#dom-url-host + */ + bool set_host(const std::string_view input); + + /** + * @return Returns true on success. + * @see https://url.spec.whatwg.org/#dom-url-hostname + */ + bool set_hostname(const std::string_view input); + + /** + * @return Returns true on success. + * @see https://url.spec.whatwg.org/#dom-url-protocol + */ + bool set_protocol(const std::string_view input); + + /** + * @see https://url.spec.whatwg.org/#dom-url-href + */ + bool set_href(const std::string_view input); + + /** + * @private + * + * Sets the host or hostname according to override condition. + * Return true on success. + * @see https://url.spec.whatwg.org/#hostname-state + */ + bool set_host_or_hostname(std::string_view input, bool override_hostname); + + /** + * The password getter steps are to return this’s URL’s password. + * @see https://url.spec.whatwg.org/#dom-url-password + */ + [[nodiscard]] std::string get_password() const noexcept; + + /** + * Return this’s URL’s port, serialized. + * @see https://url.spec.whatwg.org/#dom-url-port + */ + [[nodiscard]] std::string get_port() const noexcept; + + /** + * Return U+0023 (#), followed by this’s URL’s fragment. + * @see https://url.spec.whatwg.org/#dom-url-hash + */ + [[nodiscard]] std::string get_hash() const noexcept; + + /** + * Returns true if this URL has a valid domain as per RFC 1034 and + * corresponding specifications. Among other things, it requires + * that the domain string has fewer than 255 octets. + */ + [[nodiscard]] bool has_valid_domain() const noexcept; + + /** + * Used for returning the validity from the result of the URL parser. + */ + bool is_valid{true}; + + /** + * A URL has an opaque path if its path is a string. + */ + bool has_opaque_path{false}; + + /** + * A URL includes credentials if its username or password is not the empty string. + */ + [[nodiscard]] ada_really_inline bool includes_credentials() const noexcept; + + /** + * A URL is special if its scheme is a special scheme. A URL is not special if its scheme is not a special scheme. + */ + [[nodiscard]] ada_really_inline bool is_special() const noexcept; + + /** + * @private + * + * Return the 'special port' if the URL is special and not 'file'. + * Returns 0 otherwise. + */ + [[nodiscard]] inline uint16_t get_special_port() const; + + /** + * @private + * + * Return the scheme type. Note that it is faster to do + * get_scheme_type() == ada::scheme::type::FILE than to do + * get_scheme() == "file", since the former is a direct integer comparison, + * while the other involves a (cheap) string test. + */ + [[nodiscard]] ada_really_inline ada::scheme::type get_scheme_type() const noexcept; + + /** + * @private + * + * Get the default port if the url's scheme has one, returns 0 otherwise. + */ + [[nodiscard]] ada_really_inline uint16_t scheme_default_port() const noexcept; + /** + * @private + * + * A URL cannot have a username/password/port if its host is null or the empty string, or its scheme is "file". + */ + [[nodiscard]] inline bool cannot_have_credentials_or_port() const; + + /** + * @private + * + * Parse a port (16-bit decimal digit) from the provided input. + * We assume that the input does not contain spaces or tabs + * within the ASCII digits. + * It returns how many bytes were consumed when a number is successfully parsed. + * @return On failure, it returns zero. + * @see https://url.spec.whatwg.org/#host-parsing + */ + ada_really_inline size_t parse_port(std::string_view view, bool check_trailing_content = false) noexcept; + + /** + * @private + * + * Return a string representing the scheme. Note that get_scheme_type() should often be used instead. + * @see https://url.spec.whatwg.org/#dom-url-protocol + */ + [[nodiscard]] inline std::string_view get_scheme() const noexcept; + /** + * Set the scheme for this URL. The provided scheme should be a valid + * scheme string, be lower-cased, not contain spaces or tabs. It should + * have no spurious trailing or leading content. + */ + inline void set_scheme(std::string&& new_scheme) noexcept; + + /** + * @private + * + * Take the scheme from another URL. The scheme string is moved from the + * provided url. + */ + inline void copy_scheme(ada::url&& u) noexcept; + + /** + * @private + * + * Take the scheme from another URL. The scheme string is copied from the + * provided url. + */ + inline void copy_scheme(const ada::url& u); + + /** + * @private + * + * Parse the host from the provided input. We assume that + * the input does not contain spaces or tabs. Control + * characters and spaces are not trimmed (they should have + * been removed if needed). + * Return true on success. + * @see https://url.spec.whatwg.org/#host-parsing + */ + [[nodiscard]] ada_really_inline bool parse_host(std::string_view input); + + /** + * @private + * + * Parse the path from the provided input. + * Return true on success. Control characters not + * trimmed from the ends (they should have + * been removed if needed). + * + * The input is expected to be UTF-8. + * + * @see https://url.spec.whatwg.org/ + */ + [[nodiscard]] ada_really_inline bool parse_path(const std::string_view input); + + /** + * @private + */ + template + [[nodiscard]] ada_really_inline bool parse_scheme(const std::string_view input); + + /** + * Returns a JSON string representation of this URL. + */ + std::string to_string() const; + + private: + + /** + * @private + * + * Return true on success. + * @see https://url.spec.whatwg.org/#concept-ipv4-parser + */ + [[nodiscard]] bool parse_ipv4(std::string_view input); + + /** + * @private + * + * Return true on success. + * @see https://url.spec.whatwg.org/#concept-ipv6-parser + */ + [[nodiscard]] bool parse_ipv6(std::string_view input); + + /** + * @private + * + * Return true on success. + * @see https://url.spec.whatwg.org/#concept-opaque-host-parser + */ + [[nodiscard]] bool parse_opaque_host(std::string_view input); + + /** + * @private + */ + ada::scheme::type type{ada::scheme::type::NOT_SPECIAL}; + + /** + * @private + * + * A URL’s scheme is an ASCII string that identifies the type of URL and can be used to dispatch a + * URL for further processing after parsing. It is initially the empty string. + * We only set non_special_scheme when the scheme is non-special, otherwise we avoid constructing + * string. + * + * Special schemes are stored in ada::scheme::details::is_special_list so we typically do not need + * to store them in each url instance. + */ + std::string non_special_scheme{}; + + }; // struct url + + + inline std::ostream& operator<<(std::ostream& out, const ada::url& u); +} // namespace ada + +#endif // ADA_URL_H +/* end file include/ada/url.h */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/state.h +/* begin file include/ada/state.h */ +/** + * @file state.h + * @brief Definitions for the states of the URL state machine. + */ +#ifndef ADA_STATE_H +#define ADA_STATE_H + + +#include + +namespace ada { + + /** + * @see https://url.spec.whatwg.org/#url-parsing + */ + enum class state { + AUTHORITY, + SCHEME_START, + SCHEME, + HOST, + NO_SCHEME, + FRAGMENT, + RELATIVE_SCHEME, + RELATIVE_SLASH, + FILE, + FILE_HOST, + FILE_SLASH, + PATH_OR_AUTHORITY, + SPECIAL_AUTHORITY_IGNORE_SLASHES, + SPECIAL_AUTHORITY_SLASHES, + SPECIAL_RELATIVE_OR_AUTHORITY, + QUERY, + PATH, + PATH_START, + OPAQUE_PATH, + PORT, + }; + + /** + * Stringify a URL state machine state. + */ + ada_warn_unused std::string to_string(ada::state s); + +} // ada namespace + +#endif // ADA_STATE_H +/* end file include/ada/state.h */ + +#include +#include + +/** + * @namespace ada::helpers + * @brief Includes the definitions for helper functions + */ +namespace ada::helpers { + + /** + * This function is used to prune a fragment from a url, and returning the removed string if input has fragment. + * + * @details prune_fragment seeks the first '#' and returns everything after it as a + * string_view, and modifies (in place) the input so that it points at everything + * before the '#'. If no '#' is found, the input is left unchanged and std::nullopt is returned. + * + * @attention The function is non-allocating and it does not throw. + * @returns Note that the returned string_view might be empty! + */ + ada_really_inline std::optional prune_fragment(std::string_view& input) noexcept; + + /** + * Defined by the URL specification, shorten a URLs paths. + * @see https://url.spec.whatwg.org/#shorten-a-urls-path + */ + ada_really_inline void shorten_path(std::string& path, ada::scheme::type type) noexcept; + + + /** + * @private + * + * Parse the path from the provided input and append to the existing + * (possibly empty) path. The input cannot contain tabs and spaces: it + * is the user's responsibility to check. + * + * The input is expected to be UTF-8. + * + * @return true on success. + * @see https://url.spec.whatwg.org/ + */ + ada_really_inline bool parse_prepared_path(const std::string_view input, ada::scheme::type type, std::string& path); + + /** + * Remove and mutate all ASCII tab or newline characters from an input. + */ + ada_really_inline void remove_ascii_tab_or_newline(std::string& input) noexcept; + + /** + * Return the substring from input going from index pos to the end. If pos > input.size(), + * it returns an empty string_view. This function cannot throw. + */ + ada_really_inline std::string_view substring(std::string_view input, size_t pos) noexcept; + + /** + * Returns a host's delimiter location depending on the state of the instance, and + * whether a colon was found outside brackets. + * Used by the host parser. + */ + ada_really_inline std::pair get_host_delimiter_location(const bool is_special, std::string_view& view) noexcept; + + /** + * Removes leading and trailing C0 control and whitespace characters from string. + */ + ada_really_inline void trim_c0_whitespace(std::string_view& input) noexcept; + + /** + * @see https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path + */ + ada_really_inline void strip_trailing_spaces_from_opaque_path(ada::url& url) noexcept; + + /** + * Reverse the order of the bytes. + */ + ada_really_inline uint64_t swap_bytes(uint64_t val) noexcept; + + /** + * Reverse the order of the bytes but only if the system is big endian + */ + ada_really_inline uint64_t swap_bytes_if_big_endian(uint64_t val) noexcept; + + /** + * Finds the delimiter of a view in authority state. + */ + ada_really_inline size_t find_authority_delimiter_special(std::string_view view) noexcept; + + /** + * Finds the delimiter of a view in authority state. + */ + ada_really_inline size_t find_authority_delimiter(std::string_view view) noexcept; + +} // namespace ada::helpers + +#endif // ADA_HELPERS_H +/* end file include/ada/helpers.h */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/parser.h +/* begin file include/ada/parser.h */ +/** + * @file parser.h + * @brief Definitions for the parser. + */ +#ifndef ADA_PARSER_H +#define ADA_PARSER_H + +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/expected.h +/* begin file include/ada/expected.h */ +/** + * @file expected.h + * @brief Definitions for std::expected + * @private Excluded from docs through the doxygen file. + */ +/// +// expected - An implementation of std::expected with extensions +// Written in 2017 by Sy Brand (tartanllama@gmail.com, @TartanLlama) +// +// Documentation available at http://tl.tartanllama.xyz/ +// +// To the extent possible under law, the author(s) have dedicated all +// copyright and related and neighboring rights to this software to the +// public domain worldwide. This software is distributed without any warranty. +// +// You should have received a copy of the CC0 Public Domain Dedication +// along with this software. If not, see +// . +/// + +#ifndef TL_EXPECTED_HPP +#define TL_EXPECTED_HPP + +#define TL_EXPECTED_VERSION_MAJOR 1 +#define TL_EXPECTED_VERSION_MINOR 0 +#define TL_EXPECTED_VERSION_PATCH 1 + +#include +#include +#include +#include + +#if defined(__EXCEPTIONS) || defined(_CPPUNWIND) +#define TL_EXPECTED_EXCEPTIONS_ENABLED +#endif + +#if (defined(_MSC_VER) && _MSC_VER == 1900) +#define TL_EXPECTED_MSVC2015 +#define TL_EXPECTED_MSVC2015_CONSTEXPR +#else +#define TL_EXPECTED_MSVC2015_CONSTEXPR constexpr +#endif + +#if (defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ <= 9 && \ + !defined(__clang__)) +#define TL_EXPECTED_GCC49 +#endif + +#if (defined(__GNUC__) && __GNUC__ == 5 && __GNUC_MINOR__ <= 4 && \ + !defined(__clang__)) +#define TL_EXPECTED_GCC54 +#endif + +#if (defined(__GNUC__) && __GNUC__ == 5 && __GNUC_MINOR__ <= 5 && \ + !defined(__clang__)) +#define TL_EXPECTED_GCC55 +#endif + +#if (defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ <= 9 && \ + !defined(__clang__)) +// GCC < 5 doesn't support overloading on const&& for member functions + +#define TL_EXPECTED_NO_CONSTRR +// GCC < 5 doesn't support some standard C++11 type traits +#define TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \ + std::has_trivial_copy_constructor +#define TL_EXPECTED_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \ + std::has_trivial_copy_assign + +// This one will be different for GCC 5.7 if it's ever supported +#define TL_EXPECTED_IS_TRIVIALLY_DESTRUCTIBLE(T) \ + std::is_trivially_destructible + +// GCC 5 < v < 8 has a bug in is_trivially_copy_constructible which breaks +// std::vector for non-copyable types +#elif (defined(__GNUC__) && __GNUC__ < 8 && !defined(__clang__)) +#ifndef TL_GCC_LESS_8_TRIVIALLY_COPY_CONSTRUCTIBLE_MUTEX +#define TL_GCC_LESS_8_TRIVIALLY_COPY_CONSTRUCTIBLE_MUTEX +namespace tl { +namespace detail { +template +struct is_trivially_copy_constructible + : std::is_trivially_copy_constructible {}; +#ifdef _GLIBCXX_VECTOR +template +struct is_trivially_copy_constructible> : std::false_type {}; +#endif +} // namespace detail +} // namespace tl +#endif + +#define TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \ + tl::detail::is_trivially_copy_constructible +#define TL_EXPECTED_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \ + std::is_trivially_copy_assignable +#define TL_EXPECTED_IS_TRIVIALLY_DESTRUCTIBLE(T) \ + std::is_trivially_destructible +#else +#define TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \ + std::is_trivially_copy_constructible +#define TL_EXPECTED_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \ + std::is_trivially_copy_assignable +#define TL_EXPECTED_IS_TRIVIALLY_DESTRUCTIBLE(T) \ + std::is_trivially_destructible +#endif + +#if __cplusplus > 201103L +#define TL_EXPECTED_CXX14 +#endif + +#ifdef TL_EXPECTED_GCC49 +#define TL_EXPECTED_GCC49_CONSTEXPR +#else +#define TL_EXPECTED_GCC49_CONSTEXPR constexpr +#endif + +#if (__cplusplus == 201103L || defined(TL_EXPECTED_MSVC2015) || \ + defined(TL_EXPECTED_GCC49)) +#define TL_EXPECTED_11_CONSTEXPR +#else +#define TL_EXPECTED_11_CONSTEXPR constexpr +#endif + +namespace tl { +template class expected; + +#ifndef TL_MONOSTATE_INPLACE_MUTEX +#define TL_MONOSTATE_INPLACE_MUTEX +class monostate {}; + +struct in_place_t { + explicit in_place_t() = default; +}; +static constexpr in_place_t in_place{}; +#endif + +template class unexpected { +public: + static_assert(!std::is_same::value, "E must not be void"); + + unexpected() = delete; + constexpr explicit unexpected(const E &e) : m_val(e) {} + + constexpr explicit unexpected(E &&e) : m_val(std::move(e)) {} + + template ::value>::type * = nullptr> + constexpr explicit unexpected(Args &&...args) + : m_val(std::forward(args)...) {} + template < + class U, class... Args, + typename std::enable_if &, Args &&...>::value>::type * = nullptr> + constexpr explicit unexpected(std::initializer_list l, Args &&...args) + : m_val(l, std::forward(args)...) {} + + constexpr const E &value() const & { return m_val; } + TL_EXPECTED_11_CONSTEXPR E &value() & { return m_val; } + TL_EXPECTED_11_CONSTEXPR E &&value() && { return std::move(m_val); } + constexpr const E &&value() const && { return std::move(m_val); } + +private: + E m_val; +}; + +#ifdef __cpp_deduction_guides +template unexpected(E) -> unexpected; +#endif + +template +constexpr bool operator==(const unexpected &lhs, const unexpected &rhs) { + return lhs.value() == rhs.value(); +} +template +constexpr bool operator!=(const unexpected &lhs, const unexpected &rhs) { + return lhs.value() != rhs.value(); +} +template +constexpr bool operator<(const unexpected &lhs, const unexpected &rhs) { + return lhs.value() < rhs.value(); +} +template +constexpr bool operator<=(const unexpected &lhs, const unexpected &rhs) { + return lhs.value() <= rhs.value(); +} +template +constexpr bool operator>(const unexpected &lhs, const unexpected &rhs) { + return lhs.value() > rhs.value(); +} +template +constexpr bool operator>=(const unexpected &lhs, const unexpected &rhs) { + return lhs.value() >= rhs.value(); +} + +template +unexpected::type> make_unexpected(E &&e) { + return unexpected::type>(std::forward(e)); +} + +struct unexpect_t { + unexpect_t() = default; +}; +static constexpr unexpect_t unexpect{}; + +namespace detail { +template +[[noreturn]] TL_EXPECTED_11_CONSTEXPR void throw_exception(E &&e) { +#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED + throw std::forward(e); +#else +#ifdef _MSC_VER + __assume(0); +#else + __builtin_unreachable(); +#endif +#endif +} + +#ifndef TL_TRAITS_MUTEX +#define TL_TRAITS_MUTEX +// C++14-style aliases for brevity +template using remove_const_t = typename std::remove_const::type; +template +using remove_reference_t = typename std::remove_reference::type; +template using decay_t = typename std::decay::type; +template +using enable_if_t = typename std::enable_if::type; +template +using conditional_t = typename std::conditional::type; + +// std::conjunction from C++17 +template struct conjunction : std::true_type {}; +template struct conjunction : B {}; +template +struct conjunction + : std::conditional, B>::type {}; + +#if defined(_LIBCPP_VERSION) && __cplusplus == 201103L +#define TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND +#endif + +// In C++11 mode, there's an issue in libc++'s std::mem_fn +// which results in a hard-error when using it in a noexcept expression +// in some cases. This is a check to workaround the common failing case. +#ifdef TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND +template +struct is_pointer_to_non_const_member_func : std::false_type {}; +template +struct is_pointer_to_non_const_member_func + : std::true_type {}; +template +struct is_pointer_to_non_const_member_func + : std::true_type {}; +template +struct is_pointer_to_non_const_member_func + : std::true_type {}; +template +struct is_pointer_to_non_const_member_func + : std::true_type {}; +template +struct is_pointer_to_non_const_member_func + : std::true_type {}; +template +struct is_pointer_to_non_const_member_func + : std::true_type {}; + +template struct is_const_or_const_ref : std::false_type {}; +template struct is_const_or_const_ref : std::true_type {}; +template struct is_const_or_const_ref : std::true_type {}; +#endif + +// std::invoke from C++17 +// https://stackoverflow.com/questions/38288042/c11-14-invoke-workaround +template < + typename Fn, typename... Args, +#ifdef TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND + typename = enable_if_t::value && + is_const_or_const_ref::value)>, +#endif + typename = enable_if_t>::value>, int = 0> +constexpr auto invoke(Fn &&f, Args &&...args) noexcept( + noexcept(std::mem_fn(f)(std::forward(args)...))) + -> decltype(std::mem_fn(f)(std::forward(args)...)) { + return std::mem_fn(f)(std::forward(args)...); +} + +template >::value>> +constexpr auto invoke(Fn &&f, Args &&...args) noexcept( + noexcept(std::forward(f)(std::forward(args)...))) + -> decltype(std::forward(f)(std::forward(args)...)) { + return std::forward(f)(std::forward(args)...); +} + +// std::invoke_result from C++17 +template struct invoke_result_impl; + +template +struct invoke_result_impl< + F, + decltype(detail::invoke(std::declval(), std::declval()...), void()), + Us...> { + using type = + decltype(detail::invoke(std::declval(), std::declval()...)); +}; + +template +using invoke_result = invoke_result_impl; + +template +using invoke_result_t = typename invoke_result::type; + +#if defined(_MSC_VER) && _MSC_VER <= 1900 +// TODO make a version which works with MSVC 2015 +template struct is_swappable : std::true_type {}; + +template struct is_nothrow_swappable : std::true_type {}; +#else +// https://stackoverflow.com/questions/26744589/what-is-a-proper-way-to-implement-is-swappable-to-test-for-the-swappable-concept +namespace swap_adl_tests { +// if swap ADL finds this then it would call std::swap otherwise (same +// signature) +struct tag {}; + +template tag swap(T &, T &); +template tag swap(T (&a)[N], T (&b)[N]); + +// helper functions to test if an unqualified swap is possible, and if it +// becomes std::swap +template std::false_type can_swap(...) noexcept(false); +template (), std::declval()))> +std::true_type can_swap(int) noexcept(noexcept(swap(std::declval(), + std::declval()))); + +template std::false_type uses_std(...); +template +std::is_same(), std::declval())), tag> +uses_std(int); + +template +struct is_std_swap_noexcept + : std::integral_constant::value && + std::is_nothrow_move_assignable::value> {}; + +template +struct is_std_swap_noexcept : is_std_swap_noexcept {}; + +template +struct is_adl_swap_noexcept + : std::integral_constant(0))> {}; +} // namespace swap_adl_tests + +template +struct is_swappable + : std::integral_constant< + bool, + decltype(detail::swap_adl_tests::can_swap(0))::value && + (!decltype(detail::swap_adl_tests::uses_std(0))::value || + (std::is_move_assignable::value && + std::is_move_constructible::value))> {}; + +template +struct is_swappable + : std::integral_constant< + bool, + decltype(detail::swap_adl_tests::can_swap(0))::value && + (!decltype(detail::swap_adl_tests::uses_std( + 0))::value || + is_swappable::value)> {}; + +template +struct is_nothrow_swappable + : std::integral_constant< + bool, + is_swappable::value && + ((decltype(detail::swap_adl_tests::uses_std(0))::value && + detail::swap_adl_tests::is_std_swap_noexcept::value) || + (!decltype(detail::swap_adl_tests::uses_std(0))::value && + detail::swap_adl_tests::is_adl_swap_noexcept::value))> {}; +#endif +#endif + +// Trait for checking if a type is a tl::expected +template struct is_expected_impl : std::false_type {}; +template +struct is_expected_impl> : std::true_type {}; +template using is_expected = is_expected_impl>; + +template +using expected_enable_forward_value = detail::enable_if_t< + std::is_constructible::value && + !std::is_same, in_place_t>::value && + !std::is_same, detail::decay_t>::value && + !std::is_same, detail::decay_t>::value>; + +template +using expected_enable_from_other = detail::enable_if_t< + std::is_constructible::value && + std::is_constructible::value && + !std::is_constructible &>::value && + !std::is_constructible &&>::value && + !std::is_constructible &>::value && + !std::is_constructible &&>::value && + !std::is_convertible &, T>::value && + !std::is_convertible &&, T>::value && + !std::is_convertible &, T>::value && + !std::is_convertible &&, T>::value>; + +template +using is_void_or = conditional_t::value, std::true_type, U>; + +template +using is_copy_constructible_or_void = + is_void_or>; + +template +using is_move_constructible_or_void = + is_void_or>; + +template +using is_copy_assignable_or_void = is_void_or>; + +template +using is_move_assignable_or_void = is_void_or>; + +} // namespace detail + +namespace detail { +struct no_init_t {}; +static constexpr no_init_t no_init{}; + +// Implements the storage of the values, and ensures that the destructor is +// trivial if it can be. +// +// This specialization is for where neither `T` or `E` is trivially +// destructible, so the destructors must be called on destruction of the +// `expected` +template ::value, + bool = std::is_trivially_destructible::value> +struct expected_storage_base { + constexpr expected_storage_base() : m_val(T{}), m_has_val(true) {} + constexpr expected_storage_base(no_init_t) : m_no_init(), m_has_val(false) {} + + template ::value> * = + nullptr> + constexpr expected_storage_base(in_place_t, Args &&...args) + : m_val(std::forward(args)...), m_has_val(true) {} + + template &, Args &&...>::value> * = nullptr> + constexpr expected_storage_base(in_place_t, std::initializer_list il, + Args &&...args) + : m_val(il, std::forward(args)...), m_has_val(true) {} + template ::value> * = + nullptr> + constexpr explicit expected_storage_base(unexpect_t, Args &&...args) + : m_unexpect(std::forward(args)...), m_has_val(false) {} + + template &, Args &&...>::value> * = nullptr> + constexpr explicit expected_storage_base(unexpect_t, + std::initializer_list il, + Args &&...args) + : m_unexpect(il, std::forward(args)...), m_has_val(false) {} + + ~expected_storage_base() { + if (m_has_val) { + m_val.~T(); + } else { + m_unexpect.~unexpected(); + } + } + union { + T m_val; + unexpected m_unexpect; + char m_no_init; + }; + bool m_has_val; +}; + +// This specialization is for when both `T` and `E` are trivially-destructible, +// so the destructor of the `expected` can be trivial. +template struct expected_storage_base { + constexpr expected_storage_base() : m_val(T{}), m_has_val(true) {} + constexpr expected_storage_base(no_init_t) : m_no_init(), m_has_val(false) {} + + template ::value> * = + nullptr> + constexpr expected_storage_base(in_place_t, Args &&...args) + : m_val(std::forward(args)...), m_has_val(true) {} + + template &, Args &&...>::value> * = nullptr> + constexpr expected_storage_base(in_place_t, std::initializer_list il, + Args &&...args) + : m_val(il, std::forward(args)...), m_has_val(true) {} + template ::value> * = + nullptr> + constexpr explicit expected_storage_base(unexpect_t, Args &&...args) + : m_unexpect(std::forward(args)...), m_has_val(false) {} + + template &, Args &&...>::value> * = nullptr> + constexpr explicit expected_storage_base(unexpect_t, + std::initializer_list il, + Args &&...args) + : m_unexpect(il, std::forward(args)...), m_has_val(false) {} + + ~expected_storage_base() = default; + union { + T m_val; + unexpected m_unexpect; + char m_no_init; + }; + bool m_has_val; +}; + +// T is trivial, E is not. +template struct expected_storage_base { + constexpr expected_storage_base() : m_val(T{}), m_has_val(true) {} + TL_EXPECTED_MSVC2015_CONSTEXPR expected_storage_base(no_init_t) + : m_no_init(), m_has_val(false) {} + + template ::value> * = + nullptr> + constexpr expected_storage_base(in_place_t, Args &&...args) + : m_val(std::forward(args)...), m_has_val(true) {} + + template &, Args &&...>::value> * = nullptr> + constexpr expected_storage_base(in_place_t, std::initializer_list il, + Args &&...args) + : m_val(il, std::forward(args)...), m_has_val(true) {} + template ::value> * = + nullptr> + constexpr explicit expected_storage_base(unexpect_t, Args &&...args) + : m_unexpect(std::forward(args)...), m_has_val(false) {} + + template &, Args &&...>::value> * = nullptr> + constexpr explicit expected_storage_base(unexpect_t, + std::initializer_list il, + Args &&...args) + : m_unexpect(il, std::forward(args)...), m_has_val(false) {} + + ~expected_storage_base() { + if (!m_has_val) { + m_unexpect.~unexpected(); + } + } + + union { + T m_val; + unexpected m_unexpect; + char m_no_init; + }; + bool m_has_val; +}; + +// E is trivial, T is not. +template struct expected_storage_base { + constexpr expected_storage_base() : m_val(T{}), m_has_val(true) {} + constexpr expected_storage_base(no_init_t) : m_no_init(), m_has_val(false) {} + + template ::value> * = + nullptr> + constexpr expected_storage_base(in_place_t, Args &&...args) + : m_val(std::forward(args)...), m_has_val(true) {} + + template &, Args &&...>::value> * = nullptr> + constexpr expected_storage_base(in_place_t, std::initializer_list il, + Args &&...args) + : m_val(il, std::forward(args)...), m_has_val(true) {} + template ::value> * = + nullptr> + constexpr explicit expected_storage_base(unexpect_t, Args &&...args) + : m_unexpect(std::forward(args)...), m_has_val(false) {} + + template &, Args &&...>::value> * = nullptr> + constexpr explicit expected_storage_base(unexpect_t, + std::initializer_list il, + Args &&...args) + : m_unexpect(il, std::forward(args)...), m_has_val(false) {} + + ~expected_storage_base() { + if (m_has_val) { + m_val.~T(); + } + } + union { + T m_val; + unexpected m_unexpect; + char m_no_init; + }; + bool m_has_val; +}; + +// `T` is `void`, `E` is trivially-destructible +template struct expected_storage_base { + #if __GNUC__ <= 5 + //no constexpr for GCC 4/5 bug + #else + TL_EXPECTED_MSVC2015_CONSTEXPR + #endif + expected_storage_base() : m_has_val(true) {} + + constexpr expected_storage_base(no_init_t) : m_val(), m_has_val(false) {} + + constexpr expected_storage_base(in_place_t) : m_has_val(true) {} + + template ::value> * = + nullptr> + constexpr explicit expected_storage_base(unexpect_t, Args &&...args) + : m_unexpect(std::forward(args)...), m_has_val(false) {} + + template &, Args &&...>::value> * = nullptr> + constexpr explicit expected_storage_base(unexpect_t, + std::initializer_list il, + Args &&...args) + : m_unexpect(il, std::forward(args)...), m_has_val(false) {} + + ~expected_storage_base() = default; + struct dummy {}; + union { + unexpected m_unexpect; + dummy m_val; + }; + bool m_has_val; +}; + +// `T` is `void`, `E` is not trivially-destructible +template struct expected_storage_base { + constexpr expected_storage_base() : m_dummy(), m_has_val(true) {} + constexpr expected_storage_base(no_init_t) : m_dummy(), m_has_val(false) {} + + constexpr expected_storage_base(in_place_t) : m_dummy(), m_has_val(true) {} + + template ::value> * = + nullptr> + constexpr explicit expected_storage_base(unexpect_t, Args &&...args) + : m_unexpect(std::forward(args)...), m_has_val(false) {} + + template &, Args &&...>::value> * = nullptr> + constexpr explicit expected_storage_base(unexpect_t, + std::initializer_list il, + Args &&...args) + : m_unexpect(il, std::forward(args)...), m_has_val(false) {} + + ~expected_storage_base() { + if (!m_has_val) { + m_unexpect.~unexpected(); + } + } + + union { + unexpected m_unexpect; + char m_dummy; + }; + bool m_has_val; +}; + +// This base class provides some handy member functions which can be used in +// further derived classes +template +struct expected_operations_base : expected_storage_base { + using expected_storage_base::expected_storage_base; + + template void construct(Args &&...args) noexcept { + new (std::addressof(this->m_val)) T(std::forward(args)...); + this->m_has_val = true; + } + + template void construct_with(Rhs &&rhs) noexcept { + new (std::addressof(this->m_val)) T(std::forward(rhs).get()); + this->m_has_val = true; + } + + template void construct_error(Args &&...args) noexcept { + new (std::addressof(this->m_unexpect)) + unexpected(std::forward(args)...); + this->m_has_val = false; + } + +#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED + + // These assign overloads ensure that the most efficient assignment + // implementation is used while maintaining the strong exception guarantee. + // The problematic case is where rhs has a value, but *this does not. + // + // This overload handles the case where we can just copy-construct `T` + // directly into place without throwing. + template ::value> + * = nullptr> + void assign(const expected_operations_base &rhs) noexcept { + if (!this->m_has_val && rhs.m_has_val) { + geterr().~unexpected(); + construct(rhs.get()); + } else { + assign_common(rhs); + } + } + + // This overload handles the case where we can attempt to create a copy of + // `T`, then no-throw move it into place if the copy was successful. + template ::value && + std::is_nothrow_move_constructible::value> + * = nullptr> + void assign(const expected_operations_base &rhs) noexcept { + if (!this->m_has_val && rhs.m_has_val) { + T tmp = rhs.get(); + geterr().~unexpected(); + construct(std::move(tmp)); + } else { + assign_common(rhs); + } + } + + // This overload is the worst-case, where we have to move-construct the + // unexpected value into temporary storage, then try to copy the T into place. + // If the construction succeeds, then everything is fine, but if it throws, + // then we move the old unexpected value back into place before rethrowing the + // exception. + template ::value && + !std::is_nothrow_move_constructible::value> + * = nullptr> + void assign(const expected_operations_base &rhs) { + if (!this->m_has_val && rhs.m_has_val) { + auto tmp = std::move(geterr()); + geterr().~unexpected(); + +#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED + try { + construct(rhs.get()); + } catch (...) { + geterr() = std::move(tmp); + throw; + } +#else + construct(rhs.get()); +#endif + } else { + assign_common(rhs); + } + } + + // These overloads do the same as above, but for rvalues + template ::value> + * = nullptr> + void assign(expected_operations_base &&rhs) noexcept { + if (!this->m_has_val && rhs.m_has_val) { + geterr().~unexpected(); + construct(std::move(rhs).get()); + } else { + assign_common(std::move(rhs)); + } + } + + template ::value> + * = nullptr> + void assign(expected_operations_base &&rhs) { + if (!this->m_has_val && rhs.m_has_val) { + auto tmp = std::move(geterr()); + geterr().~unexpected(); +#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED + try { + construct(std::move(rhs).get()); + } catch (...) { + geterr() = std::move(tmp); + throw; + } +#else + construct(std::move(rhs).get()); +#endif + } else { + assign_common(std::move(rhs)); + } + } + +#else + + // If exceptions are disabled then we can just copy-construct + void assign(const expected_operations_base &rhs) noexcept { + if (!this->m_has_val && rhs.m_has_val) { + geterr().~unexpected(); + construct(rhs.get()); + } else { + assign_common(rhs); + } + } + + void assign(expected_operations_base &&rhs) noexcept { + if (!this->m_has_val && rhs.m_has_val) { + geterr().~unexpected(); + construct(std::move(rhs).get()); + } else { + assign_common(rhs); + } + } + +#endif + + // The common part of move/copy assigning + template void assign_common(Rhs &&rhs) { + if (this->m_has_val) { + if (rhs.m_has_val) { + get() = std::forward(rhs).get(); + } else { + destroy_val(); + construct_error(std::forward(rhs).geterr()); + } + } else { + if (!rhs.m_has_val) { + geterr() = std::forward(rhs).geterr(); + } + } + } + + bool has_value() const { return this->m_has_val; } + + TL_EXPECTED_11_CONSTEXPR T &get() & { return this->m_val; } + constexpr const T &get() const & { return this->m_val; } + TL_EXPECTED_11_CONSTEXPR T &&get() && { return std::move(this->m_val); } +#ifndef TL_EXPECTED_NO_CONSTRR + constexpr const T &&get() const && { return std::move(this->m_val); } +#endif + + TL_EXPECTED_11_CONSTEXPR unexpected &geterr() & { + return this->m_unexpect; + } + constexpr const unexpected &geterr() const & { return this->m_unexpect; } + TL_EXPECTED_11_CONSTEXPR unexpected &&geterr() && { + return std::move(this->m_unexpect); + } +#ifndef TL_EXPECTED_NO_CONSTRR + constexpr const unexpected &&geterr() const && { + return std::move(this->m_unexpect); + } +#endif + + TL_EXPECTED_11_CONSTEXPR void destroy_val() { get().~T(); } +}; + +// This base class provides some handy member functions which can be used in +// further derived classes +template +struct expected_operations_base : expected_storage_base { + using expected_storage_base::expected_storage_base; + + template void construct() noexcept { this->m_has_val = true; } + + // This function doesn't use its argument, but needs it so that code in + // levels above this can work independently of whether T is void + template void construct_with(Rhs &&) noexcept { + this->m_has_val = true; + } + + template void construct_error(Args &&...args) noexcept { + new (std::addressof(this->m_unexpect)) + unexpected(std::forward(args)...); + this->m_has_val = false; + } + + template void assign(Rhs &&rhs) noexcept { + if (!this->m_has_val) { + if (rhs.m_has_val) { + geterr().~unexpected(); + construct(); + } else { + geterr() = std::forward(rhs).geterr(); + } + } else { + if (!rhs.m_has_val) { + construct_error(std::forward(rhs).geterr()); + } + } + } + + bool has_value() const { return this->m_has_val; } + + TL_EXPECTED_11_CONSTEXPR unexpected &geterr() & { + return this->m_unexpect; + } + constexpr const unexpected &geterr() const & { return this->m_unexpect; } + TL_EXPECTED_11_CONSTEXPR unexpected &&geterr() && { + return std::move(this->m_unexpect); + } +#ifndef TL_EXPECTED_NO_CONSTRR + constexpr const unexpected &&geterr() const && { + return std::move(this->m_unexpect); + } +#endif + + TL_EXPECTED_11_CONSTEXPR void destroy_val() { + // no-op + } +}; + +// This class manages conditionally having a trivial copy constructor +// This specialization is for when T and E are trivially copy constructible +template :: + value &&TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(E)::value> +struct expected_copy_base : expected_operations_base { + using expected_operations_base::expected_operations_base; +}; + +// This specialization is for when T or E are not trivially copy constructible +template +struct expected_copy_base : expected_operations_base { + using expected_operations_base::expected_operations_base; + + expected_copy_base() = default; + expected_copy_base(const expected_copy_base &rhs) + : expected_operations_base(no_init) { + if (rhs.has_value()) { + this->construct_with(rhs); + } else { + this->construct_error(rhs.geterr()); + } + } + + expected_copy_base(expected_copy_base &&rhs) = default; + expected_copy_base &operator=(const expected_copy_base &rhs) = default; + expected_copy_base &operator=(expected_copy_base &&rhs) = default; +}; + +// This class manages conditionally having a trivial move constructor +// Unfortunately there's no way to achieve this in GCC < 5 AFAIK, since it +// doesn't implement an analogue to std::is_trivially_move_constructible. We +// have to make do with a non-trivial move constructor even if T is trivially +// move constructible +#ifndef TL_EXPECTED_GCC49 +template >::value + &&std::is_trivially_move_constructible::value> +struct expected_move_base : expected_copy_base { + using expected_copy_base::expected_copy_base; +}; +#else +template struct expected_move_base; +#endif +template +struct expected_move_base : expected_copy_base { + using expected_copy_base::expected_copy_base; + + expected_move_base() = default; + expected_move_base(const expected_move_base &rhs) = default; + + expected_move_base(expected_move_base &&rhs) noexcept( + std::is_nothrow_move_constructible::value) + : expected_copy_base(no_init) { + if (rhs.has_value()) { + this->construct_with(std::move(rhs)); + } else { + this->construct_error(std::move(rhs.geterr())); + } + } + expected_move_base &operator=(const expected_move_base &rhs) = default; + expected_move_base &operator=(expected_move_base &&rhs) = default; +}; + +// This class manages conditionally having a trivial copy assignment operator +template >::value + &&TL_EXPECTED_IS_TRIVIALLY_COPY_ASSIGNABLE(E)::value + &&TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(E)::value + &&TL_EXPECTED_IS_TRIVIALLY_DESTRUCTIBLE(E)::value> +struct expected_copy_assign_base : expected_move_base { + using expected_move_base::expected_move_base; +}; + +template +struct expected_copy_assign_base : expected_move_base { + using expected_move_base::expected_move_base; + + expected_copy_assign_base() = default; + expected_copy_assign_base(const expected_copy_assign_base &rhs) = default; + + expected_copy_assign_base(expected_copy_assign_base &&rhs) = default; + expected_copy_assign_base &operator=(const expected_copy_assign_base &rhs) { + this->assign(rhs); + return *this; + } + expected_copy_assign_base & + operator=(expected_copy_assign_base &&rhs) = default; +}; + +// This class manages conditionally having a trivial move assignment operator +// Unfortunately there's no way to achieve this in GCC < 5 AFAIK, since it +// doesn't implement an analogue to std::is_trivially_move_assignable. We have +// to make do with a non-trivial move assignment operator even if T is trivially +// move assignable +#ifndef TL_EXPECTED_GCC49 +template , + std::is_trivially_move_constructible, + std::is_trivially_move_assignable>>:: + value &&std::is_trivially_destructible::value + &&std::is_trivially_move_constructible::value + &&std::is_trivially_move_assignable::value> +struct expected_move_assign_base : expected_copy_assign_base { + using expected_copy_assign_base::expected_copy_assign_base; +}; +#else +template struct expected_move_assign_base; +#endif + +template +struct expected_move_assign_base + : expected_copy_assign_base { + using expected_copy_assign_base::expected_copy_assign_base; + + expected_move_assign_base() = default; + expected_move_assign_base(const expected_move_assign_base &rhs) = default; + + expected_move_assign_base(expected_move_assign_base &&rhs) = default; + + expected_move_assign_base & + operator=(const expected_move_assign_base &rhs) = default; + + expected_move_assign_base & + operator=(expected_move_assign_base &&rhs) noexcept( + std::is_nothrow_move_constructible::value + &&std::is_nothrow_move_assignable::value) { + this->assign(std::move(rhs)); + return *this; + } +}; + +// expected_delete_ctor_base will conditionally delete copy and move +// constructors depending on whether T is copy/move constructible +template ::value && + std::is_copy_constructible::value), + bool EnableMove = (is_move_constructible_or_void::value && + std::is_move_constructible::value)> +struct expected_delete_ctor_base { + expected_delete_ctor_base() = default; + expected_delete_ctor_base(const expected_delete_ctor_base &) = default; + expected_delete_ctor_base(expected_delete_ctor_base &&) noexcept = default; + expected_delete_ctor_base & + operator=(const expected_delete_ctor_base &) = default; + expected_delete_ctor_base & + operator=(expected_delete_ctor_base &&) noexcept = default; +}; + +template +struct expected_delete_ctor_base { + expected_delete_ctor_base() = default; + expected_delete_ctor_base(const expected_delete_ctor_base &) = default; + expected_delete_ctor_base(expected_delete_ctor_base &&) noexcept = delete; + expected_delete_ctor_base & + operator=(const expected_delete_ctor_base &) = default; + expected_delete_ctor_base & + operator=(expected_delete_ctor_base &&) noexcept = default; +}; + +template +struct expected_delete_ctor_base { + expected_delete_ctor_base() = default; + expected_delete_ctor_base(const expected_delete_ctor_base &) = delete; + expected_delete_ctor_base(expected_delete_ctor_base &&) noexcept = default; + expected_delete_ctor_base & + operator=(const expected_delete_ctor_base &) = default; + expected_delete_ctor_base & + operator=(expected_delete_ctor_base &&) noexcept = default; +}; + +template +struct expected_delete_ctor_base { + expected_delete_ctor_base() = default; + expected_delete_ctor_base(const expected_delete_ctor_base &) = delete; + expected_delete_ctor_base(expected_delete_ctor_base &&) noexcept = delete; + expected_delete_ctor_base & + operator=(const expected_delete_ctor_base &) = default; + expected_delete_ctor_base & + operator=(expected_delete_ctor_base &&) noexcept = default; +}; + +// expected_delete_assign_base will conditionally delete copy and move +// constructors depending on whether T and E are copy/move constructible + +// assignable +template ::value && + std::is_copy_constructible::value && + is_copy_assignable_or_void::value && + std::is_copy_assignable::value), + bool EnableMove = (is_move_constructible_or_void::value && + std::is_move_constructible::value && + is_move_assignable_or_void::value && + std::is_move_assignable::value)> +struct expected_delete_assign_base { + expected_delete_assign_base() = default; + expected_delete_assign_base(const expected_delete_assign_base &) = default; + expected_delete_assign_base(expected_delete_assign_base &&) noexcept = + default; + expected_delete_assign_base & + operator=(const expected_delete_assign_base &) = default; + expected_delete_assign_base & + operator=(expected_delete_assign_base &&) noexcept = default; +}; + +template +struct expected_delete_assign_base { + expected_delete_assign_base() = default; + expected_delete_assign_base(const expected_delete_assign_base &) = default; + expected_delete_assign_base(expected_delete_assign_base &&) noexcept = + default; + expected_delete_assign_base & + operator=(const expected_delete_assign_base &) = default; + expected_delete_assign_base & + operator=(expected_delete_assign_base &&) noexcept = delete; +}; + +template +struct expected_delete_assign_base { + expected_delete_assign_base() = default; + expected_delete_assign_base(const expected_delete_assign_base &) = default; + expected_delete_assign_base(expected_delete_assign_base &&) noexcept = + default; + expected_delete_assign_base & + operator=(const expected_delete_assign_base &) = delete; + expected_delete_assign_base & + operator=(expected_delete_assign_base &&) noexcept = default; +}; + +template +struct expected_delete_assign_base { + expected_delete_assign_base() = default; + expected_delete_assign_base(const expected_delete_assign_base &) = default; + expected_delete_assign_base(expected_delete_assign_base &&) noexcept = + default; + expected_delete_assign_base & + operator=(const expected_delete_assign_base &) = delete; + expected_delete_assign_base & + operator=(expected_delete_assign_base &&) noexcept = delete; +}; + +// This is needed to be able to construct the expected_default_ctor_base which +// follows, while still conditionally deleting the default constructor. +struct default_constructor_tag { + explicit constexpr default_constructor_tag() = default; +}; + +// expected_default_ctor_base will ensure that expected has a deleted default +// consturctor if T is not default constructible. +// This specialization is for when T is default constructible +template ::value || std::is_void::value> +struct expected_default_ctor_base { + constexpr expected_default_ctor_base() noexcept = default; + constexpr expected_default_ctor_base( + expected_default_ctor_base const &) noexcept = default; + constexpr expected_default_ctor_base(expected_default_ctor_base &&) noexcept = + default; + expected_default_ctor_base & + operator=(expected_default_ctor_base const &) noexcept = default; + expected_default_ctor_base & + operator=(expected_default_ctor_base &&) noexcept = default; + + constexpr explicit expected_default_ctor_base(default_constructor_tag) {} +}; + +// This specialization is for when T is not default constructible +template struct expected_default_ctor_base { + constexpr expected_default_ctor_base() noexcept = delete; + constexpr expected_default_ctor_base( + expected_default_ctor_base const &) noexcept = default; + constexpr expected_default_ctor_base(expected_default_ctor_base &&) noexcept = + default; + expected_default_ctor_base & + operator=(expected_default_ctor_base const &) noexcept = default; + expected_default_ctor_base & + operator=(expected_default_ctor_base &&) noexcept = default; + + constexpr explicit expected_default_ctor_base(default_constructor_tag) {} +}; +} // namespace detail + +template class bad_expected_access : public std::exception { +public: + explicit bad_expected_access(E e) : m_val(std::move(e)) {} + + virtual const char *what() const noexcept override { + return "Bad expected access"; + } + + const E &error() const & { return m_val; } + E &error() & { return m_val; } + const E &&error() const && { return std::move(m_val); } + E &&error() && { return std::move(m_val); } + +private: + E m_val; +}; + +/// An `expected` object is an object that contains the storage for +/// another object and manages the lifetime of this contained object `T`. +/// Alternatively it could contain the storage for another unexpected object +/// `E`. The contained object may not be initialized after the expected object +/// has been initialized, and may not be destroyed before the expected object +/// has been destroyed. The initialization state of the contained object is +/// tracked by the expected object. +template +class expected : private detail::expected_move_assign_base, + private detail::expected_delete_ctor_base, + private detail::expected_delete_assign_base, + private detail::expected_default_ctor_base { + static_assert(!std::is_reference::value, "T must not be a reference"); + static_assert(!std::is_same::type>::value, + "T must not be in_place_t"); + static_assert(!std::is_same::type>::value, + "T must not be unexpect_t"); + static_assert( + !std::is_same>::type>::value, + "T must not be unexpected"); + static_assert(!std::is_reference::value, "E must not be a reference"); + + T *valptr() { return std::addressof(this->m_val); } + const T *valptr() const { return std::addressof(this->m_val); } + unexpected *errptr() { return std::addressof(this->m_unexpect); } + const unexpected *errptr() const { + return std::addressof(this->m_unexpect); + } + + template ::value> * = nullptr> + TL_EXPECTED_11_CONSTEXPR U &val() { + return this->m_val; + } + TL_EXPECTED_11_CONSTEXPR unexpected &err() { return this->m_unexpect; } + + template ::value> * = nullptr> + constexpr const U &val() const { + return this->m_val; + } + constexpr const unexpected &err() const { return this->m_unexpect; } + + using impl_base = detail::expected_move_assign_base; + using ctor_base = detail::expected_default_ctor_base; + +public: + typedef T value_type; + typedef E error_type; + typedef unexpected unexpected_type; + +#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \ + !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55) + template TL_EXPECTED_11_CONSTEXPR auto and_then(F &&f) & { + return and_then_impl(*this, std::forward(f)); + } + template TL_EXPECTED_11_CONSTEXPR auto and_then(F &&f) && { + return and_then_impl(std::move(*this), std::forward(f)); + } + template constexpr auto and_then(F &&f) const & { + return and_then_impl(*this, std::forward(f)); + } + +#ifndef TL_EXPECTED_NO_CONSTRR + template constexpr auto and_then(F &&f) const && { + return and_then_impl(std::move(*this), std::forward(f)); + } +#endif + +#else + template + TL_EXPECTED_11_CONSTEXPR auto + and_then(F &&f) & -> decltype(and_then_impl(std::declval(), + std::forward(f))) { + return and_then_impl(*this, std::forward(f)); + } + template + TL_EXPECTED_11_CONSTEXPR auto + and_then(F &&f) && -> decltype(and_then_impl(std::declval(), + std::forward(f))) { + return and_then_impl(std::move(*this), std::forward(f)); + } + template + constexpr auto and_then(F &&f) const & -> decltype(and_then_impl( + std::declval(), std::forward(f))) { + return and_then_impl(*this, std::forward(f)); + } + +#ifndef TL_EXPECTED_NO_CONSTRR + template + constexpr auto and_then(F &&f) const && -> decltype(and_then_impl( + std::declval(), std::forward(f))) { + return and_then_impl(std::move(*this), std::forward(f)); + } +#endif +#endif + +#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \ + !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55) + template TL_EXPECTED_11_CONSTEXPR auto map(F &&f) & { + return expected_map_impl(*this, std::forward(f)); + } + template TL_EXPECTED_11_CONSTEXPR auto map(F &&f) && { + return expected_map_impl(std::move(*this), std::forward(f)); + } + template constexpr auto map(F &&f) const & { + return expected_map_impl(*this, std::forward(f)); + } + template constexpr auto map(F &&f) const && { + return expected_map_impl(std::move(*this), std::forward(f)); + } +#else + template + TL_EXPECTED_11_CONSTEXPR decltype(expected_map_impl( + std::declval(), std::declval())) + map(F &&f) & { + return expected_map_impl(*this, std::forward(f)); + } + template + TL_EXPECTED_11_CONSTEXPR decltype(expected_map_impl(std::declval(), + std::declval())) + map(F &&f) && { + return expected_map_impl(std::move(*this), std::forward(f)); + } + template + constexpr decltype(expected_map_impl(std::declval(), + std::declval())) + map(F &&f) const & { + return expected_map_impl(*this, std::forward(f)); + } + +#ifndef TL_EXPECTED_NO_CONSTRR + template + constexpr decltype(expected_map_impl(std::declval(), + std::declval())) + map(F &&f) const && { + return expected_map_impl(std::move(*this), std::forward(f)); + } +#endif +#endif + +#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \ + !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55) + template TL_EXPECTED_11_CONSTEXPR auto transform(F &&f) & { + return expected_map_impl(*this, std::forward(f)); + } + template TL_EXPECTED_11_CONSTEXPR auto transform(F &&f) && { + return expected_map_impl(std::move(*this), std::forward(f)); + } + template constexpr auto transform(F &&f) const & { + return expected_map_impl(*this, std::forward(f)); + } + template constexpr auto transform(F &&f) const && { + return expected_map_impl(std::move(*this), std::forward(f)); + } +#else + template + TL_EXPECTED_11_CONSTEXPR decltype(expected_map_impl( + std::declval(), std::declval())) + transform(F &&f) & { + return expected_map_impl(*this, std::forward(f)); + } + template + TL_EXPECTED_11_CONSTEXPR decltype(expected_map_impl(std::declval(), + std::declval())) + transform(F &&f) && { + return expected_map_impl(std::move(*this), std::forward(f)); + } + template + constexpr decltype(expected_map_impl(std::declval(), + std::declval())) + transform(F &&f) const & { + return expected_map_impl(*this, std::forward(f)); + } + +#ifndef TL_EXPECTED_NO_CONSTRR + template + constexpr decltype(expected_map_impl(std::declval(), + std::declval())) + transform(F &&f) const && { + return expected_map_impl(std::move(*this), std::forward(f)); + } +#endif +#endif + +#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \ + !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55) + template TL_EXPECTED_11_CONSTEXPR auto map_error(F &&f) & { + return map_error_impl(*this, std::forward(f)); + } + template TL_EXPECTED_11_CONSTEXPR auto map_error(F &&f) && { + return map_error_impl(std::move(*this), std::forward(f)); + } + template constexpr auto map_error(F &&f) const & { + return map_error_impl(*this, std::forward(f)); + } + template constexpr auto map_error(F &&f) const && { + return map_error_impl(std::move(*this), std::forward(f)); + } +#else + template + TL_EXPECTED_11_CONSTEXPR decltype(map_error_impl(std::declval(), + std::declval())) + map_error(F &&f) & { + return map_error_impl(*this, std::forward(f)); + } + template + TL_EXPECTED_11_CONSTEXPR decltype(map_error_impl(std::declval(), + std::declval())) + map_error(F &&f) && { + return map_error_impl(std::move(*this), std::forward(f)); + } + template + constexpr decltype(map_error_impl(std::declval(), + std::declval())) + map_error(F &&f) const & { + return map_error_impl(*this, std::forward(f)); + } + +#ifndef TL_EXPECTED_NO_CONSTRR + template + constexpr decltype(map_error_impl(std::declval(), + std::declval())) + map_error(F &&f) const && { + return map_error_impl(std::move(*this), std::forward(f)); + } +#endif +#endif + template expected TL_EXPECTED_11_CONSTEXPR or_else(F &&f) & { + return or_else_impl(*this, std::forward(f)); + } + + template expected TL_EXPECTED_11_CONSTEXPR or_else(F &&f) && { + return or_else_impl(std::move(*this), std::forward(f)); + } + + template expected constexpr or_else(F &&f) const & { + return or_else_impl(*this, std::forward(f)); + } + +#ifndef TL_EXPECTED_NO_CONSTRR + template expected constexpr or_else(F &&f) const && { + return or_else_impl(std::move(*this), std::forward(f)); + } +#endif + constexpr expected() = default; + constexpr expected(const expected &rhs) = default; + constexpr expected(expected &&rhs) = default; + expected &operator=(const expected &rhs) = default; + expected &operator=(expected &&rhs) = default; + + template ::value> * = + nullptr> + constexpr expected(in_place_t, Args &&...args) + : impl_base(in_place, std::forward(args)...), + ctor_base(detail::default_constructor_tag{}) {} + + template &, Args &&...>::value> * = nullptr> + constexpr expected(in_place_t, std::initializer_list il, Args &&...args) + : impl_base(in_place, il, std::forward(args)...), + ctor_base(detail::default_constructor_tag{}) {} + + template ::value> * = + nullptr, + detail::enable_if_t::value> * = + nullptr> + explicit constexpr expected(const unexpected &e) + : impl_base(unexpect, e.value()), + ctor_base(detail::default_constructor_tag{}) {} + + template < + class G = E, + detail::enable_if_t::value> * = + nullptr, + detail::enable_if_t::value> * = nullptr> + constexpr expected(unexpected const &e) + : impl_base(unexpect, e.value()), + ctor_base(detail::default_constructor_tag{}) {} + + template < + class G = E, + detail::enable_if_t::value> * = nullptr, + detail::enable_if_t::value> * = nullptr> + explicit constexpr expected(unexpected &&e) noexcept( + std::is_nothrow_constructible::value) + : impl_base(unexpect, std::move(e.value())), + ctor_base(detail::default_constructor_tag{}) {} + + template < + class G = E, + detail::enable_if_t::value> * = nullptr, + detail::enable_if_t::value> * = nullptr> + constexpr expected(unexpected &&e) noexcept( + std::is_nothrow_constructible::value) + : impl_base(unexpect, std::move(e.value())), + ctor_base(detail::default_constructor_tag{}) {} + + template ::value> * = + nullptr> + constexpr explicit expected(unexpect_t, Args &&...args) + : impl_base(unexpect, std::forward(args)...), + ctor_base(detail::default_constructor_tag{}) {} + + template &, Args &&...>::value> * = nullptr> + constexpr explicit expected(unexpect_t, std::initializer_list il, + Args &&...args) + : impl_base(unexpect, il, std::forward(args)...), + ctor_base(detail::default_constructor_tag{}) {} + + template ::value && + std::is_convertible::value)> * = + nullptr, + detail::expected_enable_from_other + * = nullptr> + explicit TL_EXPECTED_11_CONSTEXPR expected(const expected &rhs) + : ctor_base(detail::default_constructor_tag{}) { + if (rhs.has_value()) { + this->construct(*rhs); + } else { + this->construct_error(rhs.error()); + } + } + + template ::value && + std::is_convertible::value)> * = + nullptr, + detail::expected_enable_from_other + * = nullptr> + TL_EXPECTED_11_CONSTEXPR expected(const expected &rhs) + : ctor_base(detail::default_constructor_tag{}) { + if (rhs.has_value()) { + this->construct(*rhs); + } else { + this->construct_error(rhs.error()); + } + } + + template < + class U, class G, + detail::enable_if_t::value && + std::is_convertible::value)> * = nullptr, + detail::expected_enable_from_other * = nullptr> + explicit TL_EXPECTED_11_CONSTEXPR expected(expected &&rhs) + : ctor_base(detail::default_constructor_tag{}) { + if (rhs.has_value()) { + this->construct(std::move(*rhs)); + } else { + this->construct_error(std::move(rhs.error())); + } + } + + template < + class U, class G, + detail::enable_if_t<(std::is_convertible::value && + std::is_convertible::value)> * = nullptr, + detail::expected_enable_from_other * = nullptr> + TL_EXPECTED_11_CONSTEXPR expected(expected &&rhs) + : ctor_base(detail::default_constructor_tag{}) { + if (rhs.has_value()) { + this->construct(std::move(*rhs)); + } else { + this->construct_error(std::move(rhs.error())); + } + } + + template < + class U = T, + detail::enable_if_t::value> * = nullptr, + detail::expected_enable_forward_value * = nullptr> + explicit TL_EXPECTED_MSVC2015_CONSTEXPR expected(U &&v) + : expected(in_place, std::forward(v)) {} + + template < + class U = T, + detail::enable_if_t::value> * = nullptr, + detail::expected_enable_forward_value * = nullptr> + TL_EXPECTED_MSVC2015_CONSTEXPR expected(U &&v) + : expected(in_place, std::forward(v)) {} + + template < + class U = T, class G = T, + detail::enable_if_t::value> * = + nullptr, + detail::enable_if_t::value> * = nullptr, + detail::enable_if_t< + (!std::is_same, detail::decay_t>::value && + !detail::conjunction, + std::is_same>>::value && + std::is_constructible::value && + std::is_assignable::value && + std::is_nothrow_move_constructible::value)> * = nullptr> + expected &operator=(U &&v) { + if (has_value()) { + val() = std::forward(v); + } else { + err().~unexpected(); + ::new (valptr()) T(std::forward(v)); + this->m_has_val = true; + } + + return *this; + } + + template < + class U = T, class G = T, + detail::enable_if_t::value> * = + nullptr, + detail::enable_if_t::value> * = nullptr, + detail::enable_if_t< + (!std::is_same, detail::decay_t>::value && + !detail::conjunction, + std::is_same>>::value && + std::is_constructible::value && + std::is_assignable::value && + std::is_nothrow_move_constructible::value)> * = nullptr> + expected &operator=(U &&v) { + if (has_value()) { + val() = std::forward(v); + } else { + auto tmp = std::move(err()); + err().~unexpected(); + +#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED + try { + ::new (valptr()) T(std::forward(v)); + this->m_has_val = true; + } catch (...) { + err() = std::move(tmp); + throw; + } +#else + ::new (valptr()) T(std::forward(v)); + this->m_has_val = true; +#endif + } + + return *this; + } + + template ::value && + std::is_assignable::value> * = nullptr> + expected &operator=(const unexpected &rhs) { + if (!has_value()) { + err() = rhs; + } else { + this->destroy_val(); + ::new (errptr()) unexpected(rhs); + this->m_has_val = false; + } + + return *this; + } + + template ::value && + std::is_move_assignable::value> * = nullptr> + expected &operator=(unexpected &&rhs) noexcept { + if (!has_value()) { + err() = std::move(rhs); + } else { + this->destroy_val(); + ::new (errptr()) unexpected(std::move(rhs)); + this->m_has_val = false; + } + + return *this; + } + + template ::value> * = nullptr> + void emplace(Args &&...args) { + if (has_value()) { + val().~T(); + } else { + err().~unexpected(); + this->m_has_val = true; + } + ::new (valptr()) T(std::forward(args)...); + } + + template ::value> * = nullptr> + void emplace(Args &&...args) { + if (has_value()) { + val().~T(); + ::new (valptr()) T(std::forward(args)...); + } else { + auto tmp = std::move(err()); + err().~unexpected(); + +#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED + try { + ::new (valptr()) T(std::forward(args)...); + this->m_has_val = true; + } catch (...) { + err() = std::move(tmp); + throw; + } +#else + ::new (valptr()) T(std::forward(args)...); + this->m_has_val = true; +#endif + } + } + + template &, Args &&...>::value> * = nullptr> + void emplace(std::initializer_list il, Args &&...args) { + if (has_value()) { + T t(il, std::forward(args)...); + val() = std::move(t); + } else { + err().~unexpected(); + ::new (valptr()) T(il, std::forward(args)...); + this->m_has_val = true; + } + } + + template &, Args &&...>::value> * = nullptr> + void emplace(std::initializer_list il, Args &&...args) { + if (has_value()) { + T t(il, std::forward(args)...); + val() = std::move(t); + } else { + auto tmp = std::move(err()); + err().~unexpected(); + +#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED + try { + ::new (valptr()) T(il, std::forward(args)...); + this->m_has_val = true; + } catch (...) { + err() = std::move(tmp); + throw; + } +#else + ::new (valptr()) T(il, std::forward(args)...); + this->m_has_val = true; +#endif + } + } + +private: + using t_is_void = std::true_type; + using t_is_not_void = std::false_type; + using t_is_nothrow_move_constructible = std::true_type; + using move_constructing_t_can_throw = std::false_type; + using e_is_nothrow_move_constructible = std::true_type; + using move_constructing_e_can_throw = std::false_type; + + void swap_where_both_have_value(expected & /*rhs*/, t_is_void) noexcept { + // swapping void is a no-op + } + + void swap_where_both_have_value(expected &rhs, t_is_not_void) { + using std::swap; + swap(val(), rhs.val()); + } + + void swap_where_only_one_has_value(expected &rhs, t_is_void) noexcept( + std::is_nothrow_move_constructible::value) { + ::new (errptr()) unexpected_type(std::move(rhs.err())); + rhs.err().~unexpected_type(); + std::swap(this->m_has_val, rhs.m_has_val); + } + + void swap_where_only_one_has_value(expected &rhs, t_is_not_void) { + swap_where_only_one_has_value_and_t_is_not_void( + rhs, typename std::is_nothrow_move_constructible::type{}, + typename std::is_nothrow_move_constructible::type{}); + } + + void swap_where_only_one_has_value_and_t_is_not_void( + expected &rhs, t_is_nothrow_move_constructible, + e_is_nothrow_move_constructible) noexcept { + auto temp = std::move(val()); + val().~T(); + ::new (errptr()) unexpected_type(std::move(rhs.err())); + rhs.err().~unexpected_type(); + ::new (rhs.valptr()) T(std::move(temp)); + std::swap(this->m_has_val, rhs.m_has_val); + } + + void swap_where_only_one_has_value_and_t_is_not_void( + expected &rhs, t_is_nothrow_move_constructible, + move_constructing_e_can_throw) { + auto temp = std::move(val()); + val().~T(); +#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED + try { + ::new (errptr()) unexpected_type(std::move(rhs.err())); + rhs.err().~unexpected_type(); + ::new (rhs.valptr()) T(std::move(temp)); + std::swap(this->m_has_val, rhs.m_has_val); + } catch (...) { + val() = std::move(temp); + throw; + } +#else + ::new (errptr()) unexpected_type(std::move(rhs.err())); + rhs.err().~unexpected_type(); + ::new (rhs.valptr()) T(std::move(temp)); + std::swap(this->m_has_val, rhs.m_has_val); +#endif + } + + void swap_where_only_one_has_value_and_t_is_not_void( + expected &rhs, move_constructing_t_can_throw, + e_is_nothrow_move_constructible) { + auto temp = std::move(rhs.err()); + rhs.err().~unexpected_type(); +#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED + try { + ::new (rhs.valptr()) T(std::move(val())); + val().~T(); + ::new (errptr()) unexpected_type(std::move(temp)); + std::swap(this->m_has_val, rhs.m_has_val); + } catch (...) { + rhs.err() = std::move(temp); + throw; + } +#else + ::new (rhs.valptr()) T(std::move(val())); + val().~T(); + ::new (errptr()) unexpected_type(std::move(temp)); + std::swap(this->m_has_val, rhs.m_has_val); +#endif + } + +public: + template + detail::enable_if_t::value && + detail::is_swappable::value && + (std::is_nothrow_move_constructible::value || + std::is_nothrow_move_constructible::value)> + swap(expected &rhs) noexcept( + std::is_nothrow_move_constructible::value + &&detail::is_nothrow_swappable::value + &&std::is_nothrow_move_constructible::value + &&detail::is_nothrow_swappable::value) { + if (has_value() && rhs.has_value()) { + swap_where_both_have_value(rhs, typename std::is_void::type{}); + } else if (!has_value() && rhs.has_value()) { + rhs.swap(*this); + } else if (has_value()) { + swap_where_only_one_has_value(rhs, typename std::is_void::type{}); + } else { + using std::swap; + swap(err(), rhs.err()); + } + } + + constexpr const T *operator->() const { return valptr(); } + TL_EXPECTED_11_CONSTEXPR T *operator->() { return valptr(); } + + template ::value> * = nullptr> + constexpr const U &operator*() const & { + return val(); + } + template ::value> * = nullptr> + TL_EXPECTED_11_CONSTEXPR U &operator*() & { + return val(); + } + template ::value> * = nullptr> + constexpr const U &&operator*() const && { + return std::move(val()); + } + template ::value> * = nullptr> + TL_EXPECTED_11_CONSTEXPR U &&operator*() && { + return std::move(val()); + } + + constexpr bool has_value() const noexcept { return this->m_has_val; } + constexpr explicit operator bool() const noexcept { return this->m_has_val; } + + template ::value> * = nullptr> + TL_EXPECTED_11_CONSTEXPR const U &value() const & { + if (!has_value()) + detail::throw_exception(bad_expected_access(err().value())); + return val(); + } + template ::value> * = nullptr> + TL_EXPECTED_11_CONSTEXPR U &value() & { + if (!has_value()) + detail::throw_exception(bad_expected_access(err().value())); + return val(); + } + template ::value> * = nullptr> + TL_EXPECTED_11_CONSTEXPR const U &&value() const && { + if (!has_value()) + detail::throw_exception(bad_expected_access(std::move(err()).value())); + return std::move(val()); + } + template ::value> * = nullptr> + TL_EXPECTED_11_CONSTEXPR U &&value() && { + if (!has_value()) + detail::throw_exception(bad_expected_access(std::move(err()).value())); + return std::move(val()); + } + + constexpr const E &error() const & { return err().value(); } + TL_EXPECTED_11_CONSTEXPR E &error() & { return err().value(); } + constexpr const E &&error() const && { return std::move(err().value()); } + TL_EXPECTED_11_CONSTEXPR E &&error() && { return std::move(err().value()); } + + template constexpr T value_or(U &&v) const & { + static_assert(std::is_copy_constructible::value && + std::is_convertible::value, + "T must be copy-constructible and convertible to from U&&"); + return bool(*this) ? **this : static_cast(std::forward(v)); + } + template TL_EXPECTED_11_CONSTEXPR T value_or(U &&v) && { + static_assert(std::is_move_constructible::value && + std::is_convertible::value, + "T must be move-constructible and convertible to from U&&"); + return bool(*this) ? std::move(**this) : static_cast(std::forward(v)); + } +}; + +namespace detail { +template using exp_t = typename detail::decay_t::value_type; +template using err_t = typename detail::decay_t::error_type; +template using ret_t = expected>; + +#ifdef TL_EXPECTED_CXX14 +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + *std::declval()))> +constexpr auto and_then_impl(Exp &&exp, F &&f) { + static_assert(detail::is_expected::value, "F must return an expected"); + + return exp.has_value() + ? detail::invoke(std::forward(f), *std::forward(exp)) + : Ret(unexpect, std::forward(exp).error()); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval()))> +constexpr auto and_then_impl(Exp &&exp, F &&f) { + static_assert(detail::is_expected::value, "F must return an expected"); + + return exp.has_value() ? detail::invoke(std::forward(f)) + : Ret(unexpect, std::forward(exp).error()); +} +#else +template struct TC; +template (), + *std::declval())), + detail::enable_if_t>::value> * = nullptr> +auto and_then_impl(Exp &&exp, F &&f) -> Ret { + static_assert(detail::is_expected::value, "F must return an expected"); + + return exp.has_value() + ? detail::invoke(std::forward(f), *std::forward(exp)) + : Ret(unexpect, std::forward(exp).error()); +} + +template ())), + detail::enable_if_t>::value> * = nullptr> +constexpr auto and_then_impl(Exp &&exp, F &&f) -> Ret { + static_assert(detail::is_expected::value, "F must return an expected"); + + return exp.has_value() ? detail::invoke(std::forward(f)) + : Ret(unexpect, std::forward(exp).error()); +} +#endif + +#ifdef TL_EXPECTED_CXX14 +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + *std::declval())), + detail::enable_if_t::value> * = nullptr> +constexpr auto expected_map_impl(Exp &&exp, F &&f) { + using result = ret_t>; + return exp.has_value() ? result(detail::invoke(std::forward(f), + *std::forward(exp))) + : result(unexpect, std::forward(exp).error()); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + *std::declval())), + detail::enable_if_t::value> * = nullptr> +auto expected_map_impl(Exp &&exp, F &&f) { + using result = expected>; + if (exp.has_value()) { + detail::invoke(std::forward(f), *std::forward(exp)); + return result(); + } + + return result(unexpect, std::forward(exp).error()); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval())), + detail::enable_if_t::value> * = nullptr> +constexpr auto expected_map_impl(Exp &&exp, F &&f) { + using result = ret_t>; + return exp.has_value() ? result(detail::invoke(std::forward(f))) + : result(unexpect, std::forward(exp).error()); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval())), + detail::enable_if_t::value> * = nullptr> +auto expected_map_impl(Exp &&exp, F &&f) { + using result = expected>; + if (exp.has_value()) { + detail::invoke(std::forward(f)); + return result(); + } + + return result(unexpect, std::forward(exp).error()); +} +#else +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + *std::declval())), + detail::enable_if_t::value> * = nullptr> + +constexpr auto expected_map_impl(Exp &&exp, F &&f) + -> ret_t> { + using result = ret_t>; + + return exp.has_value() ? result(detail::invoke(std::forward(f), + *std::forward(exp))) + : result(unexpect, std::forward(exp).error()); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + *std::declval())), + detail::enable_if_t::value> * = nullptr> + +auto expected_map_impl(Exp &&exp, F &&f) -> expected> { + if (exp.has_value()) { + detail::invoke(std::forward(f), *std::forward(exp)); + return {}; + } + + return unexpected>(std::forward(exp).error()); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval())), + detail::enable_if_t::value> * = nullptr> + +constexpr auto expected_map_impl(Exp &&exp, F &&f) + -> ret_t> { + using result = ret_t>; + + return exp.has_value() ? result(detail::invoke(std::forward(f))) + : result(unexpect, std::forward(exp).error()); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval())), + detail::enable_if_t::value> * = nullptr> + +auto expected_map_impl(Exp &&exp, F &&f) -> expected> { + if (exp.has_value()) { + detail::invoke(std::forward(f)); + return {}; + } + + return unexpected>(std::forward(exp).error()); +} +#endif + +#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \ + !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55) +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +constexpr auto map_error_impl(Exp &&exp, F &&f) { + using result = expected, detail::decay_t>; + return exp.has_value() + ? result(*std::forward(exp)) + : result(unexpect, detail::invoke(std::forward(f), + std::forward(exp).error())); +} +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +auto map_error_impl(Exp &&exp, F &&f) { + using result = expected, monostate>; + if (exp.has_value()) { + return result(*std::forward(exp)); + } + + detail::invoke(std::forward(f), std::forward(exp).error()); + return result(unexpect, monostate{}); +} +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +constexpr auto map_error_impl(Exp &&exp, F &&f) { + using result = expected, detail::decay_t>; + return exp.has_value() + ? result() + : result(unexpect, detail::invoke(std::forward(f), + std::forward(exp).error())); +} +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +auto map_error_impl(Exp &&exp, F &&f) { + using result = expected, monostate>; + if (exp.has_value()) { + return result(); + } + + detail::invoke(std::forward(f), std::forward(exp).error()); + return result(unexpect, monostate{}); +} +#else +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +constexpr auto map_error_impl(Exp &&exp, F &&f) + -> expected, detail::decay_t> { + using result = expected, detail::decay_t>; + + return exp.has_value() + ? result(*std::forward(exp)) + : result(unexpect, detail::invoke(std::forward(f), + std::forward(exp).error())); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +auto map_error_impl(Exp &&exp, F &&f) -> expected, monostate> { + using result = expected, monostate>; + if (exp.has_value()) { + return result(*std::forward(exp)); + } + + detail::invoke(std::forward(f), std::forward(exp).error()); + return result(unexpect, monostate{}); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +constexpr auto map_error_impl(Exp &&exp, F &&f) + -> expected, detail::decay_t> { + using result = expected, detail::decay_t>; + + return exp.has_value() + ? result() + : result(unexpect, detail::invoke(std::forward(f), + std::forward(exp).error())); +} + +template >::value> * = nullptr, + class Ret = decltype(detail::invoke(std::declval(), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +auto map_error_impl(Exp &&exp, F &&f) -> expected, monostate> { + using result = expected, monostate>; + if (exp.has_value()) { + return result(); + } + + detail::invoke(std::forward(f), std::forward(exp).error()); + return result(unexpect, monostate{}); +} +#endif + +#ifdef TL_EXPECTED_CXX14 +template (), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +constexpr auto or_else_impl(Exp &&exp, F &&f) { + static_assert(detail::is_expected::value, "F must return an expected"); + return exp.has_value() ? std::forward(exp) + : detail::invoke(std::forward(f), + std::forward(exp).error()); +} + +template (), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +detail::decay_t or_else_impl(Exp &&exp, F &&f) { + return exp.has_value() ? std::forward(exp) + : (detail::invoke(std::forward(f), + std::forward(exp).error()), + std::forward(exp)); +} +#else +template (), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +auto or_else_impl(Exp &&exp, F &&f) -> Ret { + static_assert(detail::is_expected::value, "F must return an expected"); + return exp.has_value() ? std::forward(exp) + : detail::invoke(std::forward(f), + std::forward(exp).error()); +} + +template (), + std::declval().error())), + detail::enable_if_t::value> * = nullptr> +detail::decay_t or_else_impl(Exp &&exp, F &&f) { + return exp.has_value() ? std::forward(exp) + : (detail::invoke(std::forward(f), + std::forward(exp).error()), + std::forward(exp)); +} +#endif +} // namespace detail + +template +constexpr bool operator==(const expected &lhs, + const expected &rhs) { + return (lhs.has_value() != rhs.has_value()) + ? false + : (!lhs.has_value() ? lhs.error() == rhs.error() : *lhs == *rhs); +} +template +constexpr bool operator!=(const expected &lhs, + const expected &rhs) { + return (lhs.has_value() != rhs.has_value()) + ? true + : (!lhs.has_value() ? lhs.error() != rhs.error() : *lhs != *rhs); +} +template +constexpr bool operator==(const expected &lhs, + const expected &rhs) { + return (lhs.has_value() != rhs.has_value()) + ? false + : (!lhs.has_value() ? lhs.error() == rhs.error() : true); +} +template +constexpr bool operator!=(const expected &lhs, + const expected &rhs) { + return (lhs.has_value() != rhs.has_value()) + ? true + : (!lhs.has_value() ? lhs.error() == rhs.error() : false); +} + +template +constexpr bool operator==(const expected &x, const U &v) { + return x.has_value() ? *x == v : false; +} +template +constexpr bool operator==(const U &v, const expected &x) { + return x.has_value() ? *x == v : false; +} +template +constexpr bool operator!=(const expected &x, const U &v) { + return x.has_value() ? *x != v : true; +} +template +constexpr bool operator!=(const U &v, const expected &x) { + return x.has_value() ? *x != v : true; +} + +template +constexpr bool operator==(const expected &x, const unexpected &e) { + return x.has_value() ? false : x.error() == e.value(); +} +template +constexpr bool operator==(const unexpected &e, const expected &x) { + return x.has_value() ? false : x.error() == e.value(); +} +template +constexpr bool operator!=(const expected &x, const unexpected &e) { + return x.has_value() ? true : x.error() != e.value(); +} +template +constexpr bool operator!=(const unexpected &e, const expected &x) { + return x.has_value() ? true : x.error() != e.value(); +} + +template ::value || + std::is_move_constructible::value) && + detail::is_swappable::value && + std::is_move_constructible::value && + detail::is_swappable::value> * = nullptr> +void swap(expected &lhs, + expected &rhs) noexcept(noexcept(lhs.swap(rhs))) { + lhs.swap(rhs); +} +} // namespace tl + +#endif +/* end file include/ada/expected.h */ +#include +#include + +/** + * @namespace ada::parser + * @brief Includes the definitions for supported parsers + */ +namespace ada::parser { + + /** + * Parses a url. + */ + url parse_url(std::string_view user_input, + const ada::url* base_url = nullptr, + ada::encoding_type encoding = ada::encoding_type::UTF8); + +} // namespace ada + +#endif // ADA_PARSER_H +/* end file include/ada/parser.h */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/scheme-inl.h +/* begin file include/ada/scheme-inl.h */ +/** + * @file scheme-inl.h + * @brief Definitions for the URL scheme. + */ +#ifndef ADA_SCHEME_INL_H +#define ADA_SCHEME_INL_H + + +namespace ada::scheme { + + /** + * @namespace ada::scheme::details + * @brief Includes the definitions for scheme specific entities + */ + namespace details { + // for use with is_special and get_special_port + // Spaces, if present, are removed from URL. + constexpr std::string_view is_special_list[] = {"http", " ", "https", + "ws", "ftp", "wss", "file", " "}; + // for use with get_special_port + constexpr uint16_t special_ports[] = {80, 0, 443, 80, 21, 443, 0, 0}; + } + + ada_really_inline constexpr bool is_special(std::string_view scheme) { + if(scheme.empty()) { return false; } + int hash_value = (2*scheme.size() + (unsigned)(scheme[0])) & 7; + const std::string_view target = details::is_special_list[hash_value]; + return (target[0] == scheme[0]) && (target.substr(1) == scheme.substr(1)); + } + constexpr uint16_t get_special_port(std::string_view scheme) noexcept { + if(scheme.empty()) { return 0; } + int hash_value = (2*scheme.size() + (unsigned)(scheme[0])) & 7; + const std::string_view target = details::is_special_list[hash_value]; + if ((target[0] == scheme[0]) && (target.substr(1) == scheme.substr(1))) { + return details::special_ports[hash_value]; + } else { return 0; } + } + constexpr uint16_t get_special_port(ada::scheme::type type) noexcept { + return details::special_ports[int(type)]; + } + constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept { + if(scheme.empty()) { return ada::scheme::NOT_SPECIAL; } + int hash_value = (2*scheme.size() + (unsigned)(scheme[0])) & 7; + const std::string_view target = details::is_special_list[hash_value]; + if ((target[0] == scheme[0]) && (target.substr(1) == scheme.substr(1))) { + return ada::scheme::type(hash_value); + } else { return ada::scheme::NOT_SPECIAL; } + } + +} // namespace ada::serializers + +#endif // ADA_SCHEME_H +/* end file include/ada/scheme-inl.h */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/url-inl.h +/* begin file include/ada/url-inl.h */ +/** + * @file url-inl.h + * @brief Definitions for the URL + */ +#ifndef ADA_URL_INL_H +#define ADA_URL_INL_H + + +namespace ada { + [[nodiscard]] ada_really_inline bool url::includes_credentials() const noexcept { + return !username.empty() || !password.empty(); + } + [[nodiscard]] ada_really_inline bool url::is_special() const noexcept { + return type != ada::scheme::NOT_SPECIAL; + } + [[nodiscard]] inline uint16_t url::get_special_port() const { + return ada::scheme::get_special_port(type); + } + [[nodiscard]] ada_really_inline ada::scheme::type url::get_scheme_type() const noexcept { + return type; + } + [[nodiscard]] ada_really_inline uint16_t url::scheme_default_port() const noexcept { + return scheme::get_special_port(type); + } + [[nodiscard]] inline bool url::cannot_have_credentials_or_port() const { + return !host.has_value() || host.value().empty() || type == ada::scheme::type::FILE; + } + ada_really_inline size_t url::parse_port(std::string_view view, bool check_trailing_content) noexcept { + ada_log("parse_port('", view, "') ", view.size()); + uint16_t parsed_port{}; + auto r = std::from_chars(view.data(), view.data() + view.size(), parsed_port); + if(r.ec == std::errc::result_out_of_range) { + ada_log("parse_port: std::errc::result_out_of_range"); + is_valid = false; + return 0; + } + ada_log("parse_port: ", parsed_port); + const size_t consumed = size_t(r.ptr - view.data()); + ada_log("parse_port: consumed ", consumed); + if(check_trailing_content) { + is_valid &= (consumed == view.size() || view[consumed] == '/' || view[consumed] == '?' || (is_special() && view[consumed] == '\\')); + } + ada_log("parse_port: is_valid = ", is_valid); + if(is_valid) { + port = (r.ec == std::errc() && scheme_default_port() != parsed_port) ? + std::optional(parsed_port) : std::nullopt; + } + return consumed; + } + [[nodiscard]] inline std::string_view url::get_scheme() const noexcept { + if(is_special()) { return ada::scheme::details::is_special_list[type]; } + // We only move the 'scheme' if it is non-special. + return non_special_scheme; + } + inline void url::set_scheme(std::string&& new_scheme) noexcept { + type = ada::scheme::get_scheme_type(new_scheme); + // We only move the 'scheme' if it is non-special. + if(!is_special()) { + non_special_scheme = new_scheme; + } + } + inline void url::copy_scheme(ada::url&& u) noexcept { + non_special_scheme = u.non_special_scheme; + type = u.type; + } + inline void url::copy_scheme(const ada::url& u) { + non_special_scheme = u.non_special_scheme; + type = u.type; + } + + inline std::ostream& operator<<(std::ostream& out, const ada::url& u) { + return out << u.to_string(); + } +} // namespace ada + +#endif // ADA_URL_H +/* end file include/ada/url-inl.h */ + +// Public API +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/ada_version.h +/* begin file include/ada/ada_version.h */ +/** + * @file ada_version.h + * @brief Definitions for Ada's version number. + */ +#ifndef ADA_ADA_VERSION_H +#define ADA_ADA_VERSION_H + +#define ADA_VERSION "1.0.4" + +namespace ada { + + enum { + ADA_VERSION_MAJOR = 1, + ADA_VERSION_MINOR = 0, + ADA_VERSION_REVISION = 4, + }; + +} // namespace ada + +#endif // ADA_ADA_VERSION_H +/* end file include/ada/ada_version.h */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/implementation.h +/* begin file include/ada/implementation.h */ +/** + * @file implementation.h + * @brief Definitions for user facing functions for parsing URL and it's components. + */ +#ifndef ADA_IMPLEMENTATION_H +#define ADA_IMPLEMENTATION_H + +#include +#include + + +namespace ada { + enum class errors { + generic_error + }; + + using result = tl::expected; + + /** + * The URL parser takes a scalar value string input, with an optional null or base URL base (default null) + * and an optional encoding encoding (default UTF-8). + * + * @param input the string input to analyze. + * @param base_url the optional string input to use as a base url. + * @param encoding encoding (default to UTF-8) + * @return a parsed URL. + */ + ada_warn_unused ada::result parse(std::string_view input, + const ada::url* base_url = nullptr, + ada::encoding_type encoding = ada::encoding_type::UTF8); + /** + * Computes a href string from a file path. + * @return a href string (starts with file:://) + */ + std::string href_from_file(std::string_view path); +} + +#endif // ADA_IMPLEMENTATION_H +/* end file include/ada/implementation.h */ + +#endif // ADA_H +/* end file include/ada.h */ diff --git a/lib/internal/url.js b/lib/internal/url.js index 7c24dffb6df266..7b2e0b4ffa6cc7 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -18,11 +18,9 @@ const { ObjectGetPrototypeOf, ObjectKeys, ObjectPrototypeHasOwnProperty, - ReflectApply, ReflectGetOwnPropertyDescriptor, ReflectOwnKeys, RegExpPrototypeSymbolReplace, - String, StringPrototypeCharAt, StringPrototypeCharCodeAt, StringPrototypeCodePointAt, @@ -81,7 +79,6 @@ const path = require('path'); const { validateFunction, - validateObject, } = require('internal/validators'); const querystring = require('querystring'); @@ -92,24 +89,8 @@ const isWindows = platform === 'win32'; const { domainToASCII: _domainToASCII, domainToUnicode: _domainToUnicode, - encodeAuth, parse, - URL_FLAGS_CANNOT_BE_BASE, - URL_FLAGS_HAS_FRAGMENT, - URL_FLAGS_HAS_HOST, - URL_FLAGS_HAS_PASSWORD, - URL_FLAGS_HAS_PATH, - URL_FLAGS_HAS_QUERY, - URL_FLAGS_HAS_USERNAME, - URL_FLAGS_IS_DEFAULT_SCHEME_PORT, - URL_FLAGS_SPECIAL, - kFragment, - kHost, - kHostname, - kPathStart, - kPort, - kQuery, - kSchemeStart, + updateUrl, } = internalBinding('url'); const { @@ -120,13 +101,21 @@ const { const FORWARD_SLASH = /\//g; const context = Symbol('context'); -const cannotBeBase = Symbol('cannot-be-base'); -const cannotHaveUsernamePasswordPort = - Symbol('cannot-have-username-password-port'); -const special = Symbol('special'); const searchParams = Symbol('query'); const kFormat = Symbol('format'); +const updateActions = { + kProtocol: 0, + kHost: 1, + kHostname: 2, + kPort: 3, + kUsername: 4, + kPassword: 5, + kPathname: 6, + kSearch: 7, + kHash: 8, + kHref: 9, +}; let blob; let cryptoRandom; @@ -145,14 +134,6 @@ function lazyCryptoRandom() { return cryptoRandom; } -// Refs: https://html.spec.whatwg.org/multipage/browsers.html#concept-origin-opaque -const kOpaqueOrigin = 'null'; - -// Refs: https://html.spec.whatwg.org/multipage/browsers.html#ascii-serialisation-of-an-origin -function serializeTupleOrigin(scheme, host, port) { - return `${scheme}//${host}${port === null ? '' : `:${port}`}`; -} - // This class provides the internal state of a URL object. An instance of this // class is stored in every URL object and is accessed internally by setters // and getters. It roughly corresponds to the concept of a URL record in the @@ -160,17 +141,16 @@ function serializeTupleOrigin(scheme, host, port) { // the C++ binding. // Refs: https://url.spec.whatwg.org/#concept-url class URLContext { - constructor() { - this.flags = 0; - this.scheme = ':'; - this.username = ''; - this.password = ''; - this.host = null; - this.port = null; - this.path = []; - this.query = null; - this.fragment = null; - } + href = ''; + origin = ''; + protocol = ''; + hostname = ''; + pathname = ''; + search = ''; + username = ''; + password = ''; + port = ''; + hash = ''; } function isURLSearchParams(self) { @@ -247,7 +227,6 @@ class URLSearchParams { } else { // USVString init = toUSVString(init); - if (init[0] === '?') init = init.slice(1); initSearchParams(this, init); } @@ -308,7 +287,9 @@ class URLSearchParams { name = toUSVString(name); value = toUSVString(value); ArrayPrototypePush(this[searchParams], name, value); - update(this[context], this); + if (this[context]) { + this[context].search = this.toString(); + } } delete(name) { @@ -329,7 +310,9 @@ class URLSearchParams { i += 2; } } - update(this[context], this); + if (this[context]) { + this[context].search = this.toString(); + } } get(name) { @@ -424,7 +407,9 @@ class URLSearchParams { ArrayPrototypePush(list, name, value); } - update(this[context], this); + if (this[context]) { + this[context].search = this.toString(); + } } sort() { @@ -468,7 +453,9 @@ class URLSearchParams { } } - update(this[context], this); + if (this[context]) { + this[context].search = this.toString(); + } } // https://heycam.github.io/webidl/#es-iterators @@ -550,125 +537,28 @@ ObjectDefineProperties(URLSearchParams.prototype, { }, }); -function onParseComplete(flags, protocol, username, password, - host, port, path, query, fragment) { - const ctx = this[context]; - ctx.flags = flags; - ctx.scheme = protocol; - ctx.username = (flags & URL_FLAGS_HAS_USERNAME) !== 0 ? username : ''; - ctx.password = (flags & URL_FLAGS_HAS_PASSWORD) !== 0 ? password : ''; - ctx.port = port; - ctx.path = (flags & URL_FLAGS_HAS_PATH) !== 0 ? path : []; - ctx.query = query; - ctx.fragment = fragment; - ctx.host = host; - if (!this[searchParams]) { // Invoked from URL constructor - this[searchParams] = new URLSearchParams(); - this[searchParams][context] = this; - } - initSearchParams(this[searchParams], query); -} - function isURLThis(self) { return self != null && ObjectPrototypeHasOwnProperty(self, context); } -function onParseError(input, flags) { - throw new ERR_INVALID_URL(input); -} - -function onParseProtocolComplete(flags, protocol, username, password, - host, port, path, query, fragment) { - const ctx = this[context]; - if ((flags & URL_FLAGS_SPECIAL) !== 0) { - ctx.flags |= URL_FLAGS_SPECIAL; - } else { - ctx.flags &= ~URL_FLAGS_SPECIAL; - } - ctx.scheme = protocol; - ctx.port = port; -} - -function onParseHostnameComplete(flags, protocol, username, password, - host, port, path, query, fragment) { - const ctx = this[context]; - if ((flags & URL_FLAGS_HAS_HOST) !== 0) { - ctx.host = host; - ctx.flags |= URL_FLAGS_HAS_HOST; - } else { - ctx.host = null; - ctx.flags &= ~URL_FLAGS_HAS_HOST; - } -} - -function onParsePortComplete(flags, protocol, username, password, - host, port, path, query, fragment) { - this[context].port = port; -} - -function onParseHostComplete(flags, protocol, username, password, - host, port, path, query, fragment) { - ReflectApply(onParseHostnameComplete, this, arguments); - if (port !== null || ((flags & URL_FLAGS_IS_DEFAULT_SCHEME_PORT) !== 0)) - ReflectApply(onParsePortComplete, this, arguments); -} - -function onParsePathComplete(flags, protocol, username, password, - host, port, path, query, fragment) { - const ctx = this[context]; - if ((flags & URL_FLAGS_HAS_PATH) !== 0) { - ctx.path = path; - ctx.flags |= URL_FLAGS_HAS_PATH; - } else { - ctx.path = []; - ctx.flags &= ~URL_FLAGS_HAS_PATH; - } - - // The C++ binding may set host to empty string. - if ((flags & URL_FLAGS_HAS_HOST) !== 0) { - ctx.host = host; - ctx.flags |= URL_FLAGS_HAS_HOST; - } -} - -function onParseSearchComplete(flags, protocol, username, password, - host, port, path, query, fragment) { - this[context].query = query; -} - -function onParseHashComplete(flags, protocol, username, password, - host, port, path, query, fragment) { - this[context].fragment = fragment; -} - class URL { constructor(input, base = undefined) { // toUSVString is not needed. input = `${input}`; - let base_context; - if (base !== undefined) { - base_context = new URL(base)[context]; - } this[context] = new URLContext(); - parse(input, -1, base_context, undefined, - FunctionPrototypeBind(onParseComplete, this), - FunctionPrototypeBind(onParseError, this, input)); - } + this.#onParseComplete = FunctionPrototypeBind(this.#onParseComplete, this); - get [special]() { - return (this[context].flags & URL_FLAGS_SPECIAL) !== 0; - } + if (base !== undefined) { + base = `${base}`; + } - get [cannotBeBase]() { - return (this[context].flags & URL_FLAGS_CANNOT_BE_BASE) !== 0; - } + const isValid = parse(input, + base, + this.#onParseComplete); - // https://url.spec.whatwg.org/#cannot-have-a-username-password-port - get [cannotHaveUsernamePasswordPort]() { - const { host, scheme } = this[context]; - return ((host == null || host === '') || - this[cannotBeBase] || - scheme === 'file:'); + if (!isValid) { + throw new ERR_INVALID_URL(input); + } } [inspect.custom](depth, opts) { @@ -697,126 +587,68 @@ class URL { obj.hash = this.hash; if (opts.showHidden) { - obj.cannotBeBase = this[cannotBeBase]; - obj.special = this[special]; obj[context] = this[context]; } return `${constructor.name} ${inspect(obj, opts)}`; } - [kFormat](options) { - if (options) - validateObject(options, 'options'); - - options = { - fragment: true, - unicode: false, - search: true, - auth: true, - ...options, - }; + #onParseComplete = (href, origin, protocol, hostname, pathname, + search, username, password, port, hash) => { const ctx = this[context]; - // https://url.spec.whatwg.org/#url-serializing - let ret = ctx.scheme; - if (ctx.host !== null) { - ret += '//'; - const has_username = ctx.username !== ''; - const has_password = ctx.password !== ''; - if (options.auth && (has_username || has_password)) { - if (has_username) - ret += ctx.username; - if (has_password) - ret += `:${ctx.password}`; - ret += '@'; - } - ret += options.unicode ? - domainToUnicode(ctx.host) : ctx.host; - if (ctx.port !== null) - ret += `:${ctx.port}`; + ctx.href = href; + ctx.origin = origin; + ctx.protocol = protocol; + ctx.hostname = hostname; + ctx.pathname = pathname; + ctx.search = search; + ctx.username = username; + ctx.password = password; + ctx.port = port; + ctx.hash = hash; + if (!this[searchParams]) { // Invoked from URL constructor + this[searchParams] = new URLSearchParams(); + this[searchParams][context] = this; } - if (this[cannotBeBase]) { - ret += ctx.path[0]; - } else { - if (ctx.host === null && ctx.path.length > 1 && ctx.path[0] === '') { - ret += '/.'; - } - if (ctx.path.length) { - ret += '/' + ArrayPrototypeJoin(ctx.path, '/'); - } - } - if (options.search && ctx.query !== null) - ret += `?${ctx.query}`; - if (options.fragment && ctx.fragment !== null) - ret += `#${ctx.fragment}`; - return ret; - } + initSearchParams(this[searchParams], ctx.search); + }; - // https://heycam.github.io/webidl/#es-stringifier toString() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - return this[kFormat](); + return this[context].href; } get href() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - return this[kFormat](); + return this[context].href; } - set href(input) { + set href(value) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - // toUSVString is not needed. - input = `${input}`; - parse(input, -1, undefined, undefined, - FunctionPrototypeBind(onParseComplete, this), - FunctionPrototypeBind(onParseError, this, input)); + const valid = updateUrl(this[context].href, updateActions.kHref, `${value}`, this.#onParseComplete); + if (!valid) { throw ERR_INVALID_URL(`${value}`); } } // readonly get origin() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - // Refs: https://url.spec.whatwg.org/#concept-url-origin - const ctx = this[context]; - switch (ctx.scheme) { - case 'blob:': - if (ctx.path.length > 0) { - try { - return (new URL(ctx.path[0])).origin; - } catch { - // Fall through... do nothing - } - } - return kOpaqueOrigin; - case 'ftp:': - case 'http:': - case 'https:': - case 'ws:': - case 'wss:': - return serializeTupleOrigin(ctx.scheme, ctx.host, ctx.port); - } - return kOpaqueOrigin; + return this[context].origin; } get protocol() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - return this[context].scheme; + return this[context].protocol; } - set protocol(scheme) { + set protocol(value) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - // toUSVString is not needed. - scheme = `${scheme}`; - if (scheme.length === 0) - return; - const ctx = this[context]; - parse(scheme, kSchemeStart, null, ctx, - FunctionPrototypeBind(onParseProtocolComplete, this)); + updateUrl(this[context].href, updateActions.kProtocol, `${value}`, this.#onParseComplete); } get username() { @@ -825,21 +657,10 @@ class URL { return this[context].username; } - set username(username) { + set username(value) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - // toUSVString is not needed. - username = `${username}`; - if (this[cannotHaveUsernamePasswordPort]) - return; - const ctx = this[context]; - if (username === '') { - ctx.username = ''; - ctx.flags &= ~URL_FLAGS_HAS_USERNAME; - return; - } - ctx.username = encodeAuth(username); - ctx.flags |= URL_FLAGS_HAS_USERNAME; + updateUrl(this[context].href, updateActions.kUsername, `${value}`, this.#onParseComplete); } get password() { @@ -848,138 +669,74 @@ class URL { return this[context].password; } - set password(password) { + set password(value) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - // toUSVString is not needed. - password = `${password}`; - if (this[cannotHaveUsernamePasswordPort]) - return; - const ctx = this[context]; - if (password === '') { - ctx.password = ''; - ctx.flags &= ~URL_FLAGS_HAS_PASSWORD; - return; - } - ctx.password = encodeAuth(password); - ctx.flags |= URL_FLAGS_HAS_PASSWORD; + updateUrl(this[context].href, updateActions.kPassword, `${value}`, this.#onParseComplete); } get host() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - const ctx = this[context]; - let ret = ctx.host || ''; - if (ctx.port !== null) - ret += `:${ctx.port}`; - return ret; + const port = this[context].port; + const suffix = port.length > 0 ? `:${port}` : ''; + return this[context].hostname + suffix; } - set host(host) { + set host(value) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - const ctx = this[context]; - // toUSVString is not needed. - host = `${host}`; - if (this[cannotBeBase]) { - // Cannot set the host if cannot-be-base is set - return; - } - parse(host, kHost, null, ctx, - FunctionPrototypeBind(onParseHostComplete, this)); + updateUrl(this[context].href, updateActions.kHost, `${value}`, this.#onParseComplete); } get hostname() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - return this[context].host || ''; + return this[context].hostname; } - set hostname(host) { + set hostname(value) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - const ctx = this[context]; - // toUSVString is not needed. - host = `${host}`; - if (this[cannotBeBase]) { - // Cannot set the host if cannot-be-base is set - return; - } - parse(host, kHostname, null, ctx, onParseHostnameComplete.bind(this)); + updateUrl(this[context].href, updateActions.kHostname, `${value}`, this.#onParseComplete); } get port() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - const port = this[context].port; - return port === null ? '' : String(port); + return this[context].port; } - set port(port) { + set port(value) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - // toUSVString is not needed. - port = `${port}`; - if (this[cannotHaveUsernamePasswordPort]) - return; - const ctx = this[context]; - if (port === '') { - ctx.port = null; - return; - } - parse(port, kPort, null, ctx, - FunctionPrototypeBind(onParsePortComplete, this)); + updateUrl(this[context].href, updateActions.kPort, `${value}`, this.#onParseComplete); } get pathname() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - const ctx = this[context]; - if (this[cannotBeBase]) - return ctx.path[0]; - if (ctx.path.length === 0) - return ''; - return `/${ArrayPrototypeJoin(ctx.path, '/')}`; + return this[context].pathname; } - set pathname(path) { + set pathname(value) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - // toUSVString is not needed. - path = `${path}`; - if (this[cannotBeBase]) - return; - parse(path, kPathStart, null, this[context], - onParsePathComplete.bind(this)); + updateUrl(this[context].href, updateActions.kPathname, `${value}`, this.#onParseComplete); } get search() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - const { query } = this[context]; - if (query === null || query === '') - return ''; - return `?${query}`; + return this[context].search; } set search(search) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - const ctx = this[context]; search = toUSVString(search); - if (search === '') { - ctx.query = null; - ctx.flags &= ~URL_FLAGS_HAS_QUERY; - } else { - if (search[0] === '?') search = StringPrototypeSlice(search, 1); - ctx.query = ''; - ctx.flags |= URL_FLAGS_HAS_QUERY; - if (search) { - parse(search, kQuery, null, ctx, - FunctionPrototypeBind(onParseSearchComplete, this)); - } - } - initSearchParams(this[searchParams], search); + updateUrl(this[context].href, updateActions.kSearch, search, this.#onParseComplete); + initSearchParams(this[searchParams], this[context].search); } // readonly @@ -992,34 +749,19 @@ class URL { get hash() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - const { fragment } = this[context]; - if (fragment === null || fragment === '') - return ''; - return `#${fragment}`; + return this[context].hash; } - set hash(hash) { + set hash(value) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - const ctx = this[context]; - // toUSVString is not needed. - hash = `${hash}`; - if (!hash) { - ctx.fragment = null; - ctx.flags &= ~URL_FLAGS_HAS_FRAGMENT; - return; - } - if (hash[0] === '#') hash = StringPrototypeSlice(hash, 1); - ctx.fragment = ''; - ctx.flags |= URL_FLAGS_HAS_FRAGMENT; - parse(hash, kFragment, null, ctx, - FunctionPrototypeBind(onParseHashComplete, this)); + updateUrl(this[context].href, updateActions.kHash, `${value}`, this.#onParseComplete); } toJSON() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - return this[kFormat](); + return this[context].href; } static createObjectURL(obj) { @@ -1027,9 +769,6 @@ class URL { if (cryptoRandom === undefined) throw new ERR_NO_CRYPTO(); - // Yes, lazy loading is annoying but because of circular - // references between the url, internal/blob, and buffer - // modules, lazy loading here makes sure that things work. const blob = lazyBlob(); if (!blob.isBlob(obj)) throw new ERR_INVALID_ARG_TYPE('obj', 'Blob', obj); @@ -1044,6 +783,7 @@ class URL { static revokeObjectURL(url) { url = `${url}`; try { + // TODO(@anonrig): Remove this try/catch by calling `parse` directly. const parsed = new URL(url); const split = StringPrototypeSplit(parsed.pathname, ':'); if (split.length === 2) @@ -1078,21 +818,6 @@ ObjectDefineProperties(URL, { revokeObjectURL: kEnumerableProperty, }); -function update(url, params) { - if (!url) - return; - - const ctx = url[context]; - const serializedParams = params.toString(); - if (serializedParams) { - ctx.query = serializedParams; - ctx.flags |= URL_FLAGS_HAS_QUERY; - } else { - ctx.query = null; - ctx.flags &= ~URL_FLAGS_HAS_QUERY; - } -} - function initSearchParams(url, init) { if (!init) { url[searchParams] = []; @@ -1105,14 +830,14 @@ function initSearchParams(url, init) { // Ref: https://url.spec.whatwg.org/#concept-urlencoded-parser function parseParams(qs) { const out = []; - let pairStart = 0; - let lastPos = 0; let seenSep = false; let buf = ''; let encoded = false; let encodeCheck = 0; - let i; - for (i = 0; i < qs.length; ++i) { + let i = qs[0] === '?' ? 1 : 0; + let pairStart = i; + let lastPos = i; + for (; i < qs.length; ++i) { const code = StringPrototypeCharCodeAt(qs, i); // Try matching key/value pair separator @@ -1591,7 +1316,6 @@ module.exports = { domainToASCII, domainToUnicode, urlToHttpOptions, - formatSymbol: kFormat, searchParamsSymbol: searchParams, encodeStr, }; diff --git a/lib/url.js b/lib/url.js index 4d7374a8e3f358..019a6dced9db55 100644 --- a/lib/url.js +++ b/lib/url.js @@ -22,6 +22,7 @@ 'use strict'; const { + Boolean, Int8Array, ObjectCreate, ObjectKeys, @@ -38,7 +39,10 @@ const { ERR_INVALID_ARG_TYPE, ERR_INVALID_URL, } = require('internal/errors').codes; -const { validateString } = require('internal/validators'); +const { + validateString, + validateObject, +} = require('internal/validators'); // This ensures setURLConstructor() is called before the native // URL::ToObject() method is used. @@ -51,11 +55,14 @@ const { domainToASCII, domainToUnicode, fileURLToPath, - formatSymbol, pathToFileURL, urlToHttpOptions, } = require('internal/url'); +const { + formatUrl, +} = internalBinding('url'); + // Original url.parse() API function Url() { @@ -579,13 +586,36 @@ function urlFormat(urlObject, options) { } else if (typeof urlObject !== 'object' || urlObject === null) { throw new ERR_INVALID_ARG_TYPE('urlObject', ['Object', 'string'], urlObject); - } else if (!(urlObject instanceof Url)) { - const format = urlObject[formatSymbol]; - return format ? - format.call(urlObject, options) : - Url.prototype.format.call(urlObject); + } else if (urlObject instanceof URL) { + let fragment = true; + let unicode = false; + let search = true; + let auth = true; + + if (options) { + validateObject(options, 'options'); + + if (options.fragment != null) { + fragment = Boolean(options.fragment); + } + + if (options.unicode != null) { + unicode = Boolean(options.unicode); + } + + if (options.search != null) { + search = Boolean(options.search); + } + + if (options.auth != null) { + auth = Boolean(options.auth); + } + } + + return formatUrl(urlObject.href, fragment, unicode, search, auth); } - return urlObject.format(); + + return Url.prototype.format.call(urlObject); } // These characters do not need escaping: diff --git a/node.gyp b/node.gyp index e7b0d968e9d4bf..cf52281bb4479b 100644 --- a/node.gyp +++ b/node.gyp @@ -468,6 +468,7 @@ 'deps/histogram/histogram.gyp:histogram', 'deps/uvwasi/uvwasi.gyp:uvwasi', 'deps/simdutf/simdutf.gyp:simdutf', + 'deps/ada/ada.gyp:ada', ], 'sources': [ @@ -544,7 +545,6 @@ 'src/node_trace_events.cc', 'src/node_types.cc', 'src/node_url.cc', - 'src/node_url_tables.cc', 'src/node_util.cc', 'src/node_v8.cc', 'src/node_wasi.cc', @@ -1219,6 +1219,7 @@ 'node_dtrace_ustack', 'node_dtrace_provider', 'deps/simdutf/simdutf.gyp:simdutf', + 'deps/ada/ada.gyp:ada', ], 'includes': [ @@ -1259,7 +1260,6 @@ 'test/cctest/test_sockaddr.cc', 'test/cctest/test_traced_value.cc', 'test/cctest/test_util.cc', - 'test/cctest/test_url.cc', ], 'conditions': [ @@ -1318,6 +1318,7 @@ 'node_dtrace_header', 'node_dtrace_ustack', 'node_dtrace_provider', + 'deps/ada/ada.gyp:ada', ], 'includes': [ @@ -1387,6 +1388,7 @@ '<(node_lib_target_name)', 'deps/histogram/histogram.gyp:histogram', 'deps/uvwasi/uvwasi.gyp:uvwasi', + 'deps/ada/ada.gyp:ada', ], 'includes': [ diff --git a/src/crypto/crypto_common.cc b/src/crypto/crypto_common.cc index 59acdd82096fc8..41e607e9298314 100644 --- a/src/crypto/crypto_common.cc +++ b/src/crypto/crypto_common.cc @@ -1,13 +1,12 @@ +#include "crypto/crypto_common.h" #include "base_object-inl.h" #include "env-inl.h" +#include "memory_tracker-inl.h" +#include "node.h" #include "node_buffer.h" #include "node_crypto.h" -#include "crypto/crypto_common.h" -#include "node.h" #include "node_internals.h" -#include "node_url.h" #include "string_bytes.h" -#include "memory_tracker-inl.h" #include "v8.h" #include diff --git a/src/inspector_agent.cc b/src/inspector_agent.cc index 90ab0c2c321d37..214f992c9d0155 100644 --- a/src/inspector_agent.cc +++ b/src/inspector_agent.cc @@ -647,8 +647,9 @@ class NodeInspectorClient : public V8InspectorClient { protocol::StringUtil::StringViewToUtf8(resource_name_view); if (!IsFilePath(resource_name)) return nullptr; - node::url::URL url = node::url::URL::FromFilePath(resource_name); - return Utf8ToStringView(url.href()); + + std::string url = node::url::FromFilePath(resource_name); + return Utf8ToStringView(url); } node::Environment* env_; diff --git a/src/json_utils.cc b/src/json_utils.cc index aa667ccc90471b..f387985bccd761 100644 --- a/src/json_utils.cc +++ b/src/json_utils.cc @@ -2,8 +2,8 @@ namespace node { -std::string EscapeJsonChars(const std::string& str) { - const std::string control_symbols[0x20] = { +std::string EscapeJsonChars(std::string_view str) { + static const std::string_view control_symbols[0x20] = { "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", "\\u000e", "\\u000f", "\\u0010", "\\u0011", diff --git a/src/json_utils.h b/src/json_utils.h index 06033aa0a9b16d..06d4a7ac0905f9 100644 --- a/src/json_utils.h +++ b/src/json_utils.h @@ -4,13 +4,21 @@ #if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS #include -#include #include +#include #include +#include namespace node { -std::string EscapeJsonChars(const std::string& str); +constexpr bool NeedsJsonEscape(std::string_view str) { + for (const char c : str) { + if (c == '\\' || c == '"' || c < 0x20) return true; + } + return false; +} + +std::string EscapeJsonChars(std::string_view str); std::string Reindent(const std::string& str, int indentation); // JSON compiler definitions. @@ -135,17 +143,20 @@ class JSONWriter { } inline void write_value(Null null) { out_ << "null"; } - inline void write_value(const char* str) { write_string(str); } - inline void write_value(const std::string& str) { write_string(str); } + inline void write_value(std::string_view str) { write_string(str); } inline void write_value(const ForeignJSON& json) { out_ << Reindent(json.as_string, indent_); } - inline void write_string(const std::string& str) { - out_ << '"' << EscapeJsonChars(str) << '"'; + inline void write_string(std::string_view str) { + out_ << '"'; + if (NeedsJsonEscape(str)) // only create temporary std::string if necessary + out_ << EscapeJsonChars(str); + else + out_ << str; + out_ << '"'; } - inline void write_string(const char* str) { write_string(std::string(str)); } enum JSONState { kObjectStart, kAfterValue }; std::ostream& out_; diff --git a/src/module_wrap.cc b/src/module_wrap.cc index 73ce4aa42035a1..0645b3ddf506df 100644 --- a/src/module_wrap.cc +++ b/src/module_wrap.cc @@ -6,7 +6,6 @@ #include "node_errors.h" #include "node_internals.h" #include "node_process-inl.h" -#include "node_url.h" #include "node_watchdog.h" #include "util-inl.h" @@ -20,8 +19,6 @@ namespace loader { using errors::TryCatchScope; using node::contextify::ContextifyContext; -using node::url::URL; -using node::url::URL_FLAGS_FAILED; using v8::Array; using v8::ArrayBufferView; using v8::Context; diff --git a/src/node_api.cc b/src/node_api.cc index 49234a23dce800..4d95d518286b87 100644 --- a/src/node_api.cc +++ b/src/node_api.cc @@ -657,7 +657,7 @@ void napi_module_register_by_symbol(v8::Local exports, // a file system path. // TODO(gabrielschulhof): Pass the `filename` through unchanged if/when we // receive it as a URL already. - module_filename = node::url::URL::FromFilePath(filename.ToString()).href(); + module_filename = node::url::FromFilePath(filename.ToStringView()); } // Create a new napi_env for this specific module. diff --git a/src/node_errors.cc b/src/node_errors.cc index dab622f6dce455..2dc9e085269222 100644 --- a/src/node_errors.cc +++ b/src/node_errors.cc @@ -450,8 +450,10 @@ static void ReportFatalException(Environment* env, // Not an error object. Just print as-is. node::Utf8Value message(env->isolate(), error); - FPrintF(stderr, "%s\n", - *message ? message.ToString() : ""); + FPrintF( + stderr, + "%s\n", + *message ? message.ToStringView() : ""); } else { node::Utf8Value name_string(env->isolate(), name.ToLocalChecked()); node::Utf8Value message_string(env->isolate(), message.ToLocalChecked()); diff --git a/src/node_metadata.cc b/src/node_metadata.cc index ed28871c385532..6fe09f843e26b7 100644 --- a/src/node_metadata.cc +++ b/src/node_metadata.cc @@ -1,5 +1,6 @@ #include "node_metadata.h" #include "acorn_version.h" +#include "ada.h" #include "ares.h" #include "brotli/encode.h" #include "llhttp.h" @@ -115,6 +116,7 @@ Metadata::Versions::Versions() { #endif simdutf = SIMDUTF_VERSION; + ada = ADA_VERSION; } Metadata::Release::Release() : name(NODE_RELEASE) { diff --git a/src/node_metadata.h b/src/node_metadata.h index 2a924141d6edba..1831bfd0baaac7 100644 --- a/src/node_metadata.h +++ b/src/node_metadata.h @@ -47,6 +47,7 @@ namespace node { V(uvwasi) \ V(acorn) \ V(simdutf) \ + V(ada) \ NODE_VERSIONS_KEY_UNDICI(V) #if HAVE_OPENSSL diff --git a/src/node_report.cc b/src/node_report.cc index 2998bd09b049fd..f6439623de01e4 100644 --- a/src/node_report.cc +++ b/src/node_report.cc @@ -400,11 +400,10 @@ static void PrintJavaScriptErrorProperties(JSONWriter* writer, !value->ToString(context).ToLocal(&value_string)) { continue; } - String::Utf8Value k(isolate, key); + node::Utf8Value k(isolate, key); if (!strcmp(*k, "stack") || !strcmp(*k, "message")) continue; - String::Utf8Value v(isolate, value_string); - writer->json_keyvalue(std::string(*k, k.length()), - std::string(*v, v.length())); + node::Utf8Value v(isolate, value_string); + writer->json_keyvalue(k.ToStringView(), v.ToStringView()); } } writer->json_objectend(); // the end of 'errorProperties' @@ -631,27 +630,26 @@ static void PrintResourceUsage(JSONWriter* writer) { uint64_t free_memory = uv_get_free_memory(); uint64_t total_memory = uv_get_total_memory(); - writer->json_keyvalue("free_memory", std::to_string(free_memory)); - writer->json_keyvalue("total_memory", std::to_string(total_memory)); + writer->json_keyvalue("free_memory", free_memory); + writer->json_keyvalue("total_memory", total_memory); size_t rss; int err = uv_resident_set_memory(&rss); if (!err) { - writer->json_keyvalue("rss", std::to_string(rss)); + writer->json_keyvalue("rss", rss); } uint64_t constrained_memory = uv_get_constrained_memory(); if (constrained_memory) { - writer->json_keyvalue("constrained_memory", - std::to_string(constrained_memory)); + writer->json_keyvalue("constrained_memory", constrained_memory); } // See GuessMemoryAvailableToTheProcess if (!err && constrained_memory && constrained_memory >= rss) { uint64_t available_memory = constrained_memory - rss; - writer->json_keyvalue("available_memory", std::to_string(available_memory)); + writer->json_keyvalue("available_memory", available_memory); } else { - writer->json_keyvalue("available_memory", std::to_string(free_memory)); + writer->json_keyvalue("available_memory", free_memory); } if (uv_getrusage(&rusage) == 0) { @@ -668,7 +666,7 @@ static void PrintResourceUsage(JSONWriter* writer) { writer->json_keyvalue("cpuConsumptionPercent", cpu_percentage); writer->json_keyvalue("userCpuConsumptionPercent", user_cpu_percentage); writer->json_keyvalue("kernelCpuConsumptionPercent", kernel_cpu_percentage); - writer->json_keyvalue("maxRss", std::to_string(rusage.ru_maxrss * 1024)); + writer->json_keyvalue("maxRss", rusage.ru_maxrss * 1024); writer->json_objectstart("pageFaults"); writer->json_keyvalue("IORequired", rusage.ru_majflt); writer->json_keyvalue("IONotRequired", rusage.ru_minflt); @@ -795,13 +793,15 @@ static void PrintComponentVersions(JSONWriter* writer) { writer->json_objectstart("componentVersions"); #define V(key) +1 - std::pair versions_array[NODE_VERSIONS_KEYS(V)]; + std::pair + versions_array[NODE_VERSIONS_KEYS(V)]; #undef V auto* slot = &versions_array[0]; #define V(key) \ do { \ - *slot++ = std::make_pair(#key, per_process::metadata.versions.key); \ + *slot++ = std::pair( \ + #key, per_process::metadata.versions.key); \ } while (0); NODE_VERSIONS_KEYS(V) #undef V diff --git a/src/node_report_utils.cc b/src/node_report_utils.cc index b8f32beb203f6d..516eac22dc63a2 100644 --- a/src/node_report_utils.cc +++ b/src/node_report_utils.cc @@ -83,36 +83,33 @@ static void ReportEndpoints(uv_handle_t* h, JSONWriter* writer) { // Utility function to format libuv pipe information. static void ReportPipeEndpoints(uv_handle_t* h, JSONWriter* writer) { uv_any_handle* handle = reinterpret_cast(h); - MallocedBuffer buffer(0); - size_t buffer_size = 0; + MaybeStackBuffer buffer; + size_t buffer_size = buffer.capacity(); int rc = -1; // First call to get required buffer size. - rc = uv_pipe_getsockname(&handle->pipe, buffer.data, &buffer_size); + rc = uv_pipe_getsockname(&handle->pipe, buffer.out(), &buffer_size); if (rc == UV_ENOBUFS) { - buffer = MallocedBuffer(buffer_size); - if (buffer.data != nullptr) { - rc = uv_pipe_getsockname(&handle->pipe, buffer.data, &buffer_size); - } else { - buffer_size = 0; - } + buffer.AllocateSufficientStorage(buffer_size); + rc = uv_pipe_getsockname(&handle->pipe, buffer.out(), &buffer_size); } - if (rc == 0 && buffer_size != 0 && buffer.data != nullptr) { - writer->json_keyvalue("localEndpoint", buffer.data); + if (rc == 0 && buffer_size != 0) { + buffer.SetLength(buffer_size); + writer->json_keyvalue("localEndpoint", buffer.ToStringView()); } else { writer->json_keyvalue("localEndpoint", null); } // First call to get required buffer size. - rc = uv_pipe_getpeername(&handle->pipe, buffer.data, &buffer_size); + buffer_size = buffer.capacity(); + rc = uv_pipe_getpeername(&handle->pipe, buffer.out(), &buffer_size); if (rc == UV_ENOBUFS) { - buffer = MallocedBuffer(buffer_size); - if (buffer.data != nullptr) { - rc = uv_pipe_getpeername(&handle->pipe, buffer.data, &buffer_size); - } + buffer.AllocateSufficientStorage(buffer_size); + rc = uv_pipe_getpeername(&handle->pipe, buffer.out(), &buffer_size); } - if (rc == 0 && buffer_size != 0 && buffer.data != nullptr) { - writer->json_keyvalue("remoteEndpoint", buffer.data); + if (rc == 0 && buffer_size != 0) { + buffer.SetLength(buffer_size); + writer->json_keyvalue("remoteEndpoint", buffer.ToStringView()); } else { writer->json_keyvalue("remoteEndpoint", null); } @@ -120,42 +117,41 @@ static void ReportPipeEndpoints(uv_handle_t* h, JSONWriter* writer) { // Utility function to format libuv path information. static void ReportPath(uv_handle_t* h, JSONWriter* writer) { - MallocedBuffer buffer(0); + MaybeStackBuffer buffer; int rc = -1; - size_t size = 0; + size_t size = buffer.capacity(); uv_any_handle* handle = reinterpret_cast(h); - bool wrote_filename = false; // First call to get required buffer size. switch (h->type) { case UV_FS_EVENT: - rc = uv_fs_event_getpath(&(handle->fs_event), buffer.data, &size); + rc = uv_fs_event_getpath(&(handle->fs_event), buffer.out(), &size); break; case UV_FS_POLL: - rc = uv_fs_poll_getpath(&(handle->fs_poll), buffer.data, &size); + rc = uv_fs_poll_getpath(&(handle->fs_poll), buffer.out(), &size); break; default: break; } if (rc == UV_ENOBUFS) { - buffer = MallocedBuffer(size + 1); + buffer.AllocateSufficientStorage(size); switch (h->type) { case UV_FS_EVENT: - rc = uv_fs_event_getpath(&(handle->fs_event), buffer.data, &size); + rc = uv_fs_event_getpath(&(handle->fs_event), buffer.out(), &size); break; case UV_FS_POLL: - rc = uv_fs_poll_getpath(&(handle->fs_poll), buffer.data, &size); + rc = uv_fs_poll_getpath(&(handle->fs_poll), buffer.out(), &size); break; default: break; } - if (rc == 0) { - // buffer is not null terminated. - buffer.data[size] = '\0'; - writer->json_keyvalue("filename", buffer.data); - wrote_filename = true; - } } - if (!wrote_filename) writer->json_keyvalue("filename", null); + + if (rc == 0 && size > 0) { + buffer.SetLength(size); + writer->json_keyvalue("filename", buffer.ToStringView()); + } else { + writer->json_keyvalue("filename", null); + } } // Utility function to walk libuv handles. diff --git a/src/node_url.cc b/src/node_url.cc index 5d710c345d7357..81f3ecbc0698c9 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -1,37 +1,23 @@ #include "node_url.h" +#include "ada.h" #include "base_object-inl.h" #include "node_errors.h" #include "node_external_reference.h" #include "node_i18n.h" #include "util-inl.h" -#include #include #include -#include -#include namespace node { -using url::table_data::hex; -using url::table_data::C0_CONTROL_ENCODE_SET; -using url::table_data::FRAGMENT_ENCODE_SET; -using url::table_data::PATH_ENCODE_SET; -using url::table_data::USERINFO_ENCODE_SET; -using url::table_data::QUERY_ENCODE_SET_NONSPECIAL; -using url::table_data::QUERY_ENCODE_SET_SPECIAL; - -using v8::Array; using v8::Context; using v8::Function; using v8::FunctionCallbackInfo; using v8::HandleScope; -using v8::Int32; -using v8::Integer; using v8::Isolate; using v8::Local; using v8::NewStringType; -using v8::Null; using v8::Object; using v8::String; using v8::Undefined; @@ -47,1778 +33,278 @@ Local Utf8String(Isolate* isolate, const std::string& str) { namespace url { namespace { -// https://url.spec.whatwg.org/#eof-code-point -constexpr char kEOL = -1; - -// https://url.spec.whatwg.org/#concept-host -class URLHost { - public: - ~URLHost(); - - void ParseIPv4Host(const char* input, size_t length); - void ParseIPv6Host(const char* input, size_t length); - void ParseOpaqueHost(const char* input, size_t length); - void ParseHost(const char* input, - size_t length, - bool is_special, - bool unicode = false); - - bool ParsingFailed() const { return type_ == HostType::H_FAILED; } - std::string ToString() const; - // Like ToString(), but avoids a copy in exchange for invalidating `*this`. - std::string ToStringMove(); - - private: - enum class HostType { - H_FAILED, - H_DOMAIN, - H_IPV4, - H_IPV6, - H_OPAQUE, - }; - - union Value { - std::string domain_or_opaque; - uint32_t ipv4; - uint16_t ipv6[8]; - - ~Value() {} - Value() : ipv4(0) {} - }; - - Value value_; - HostType type_ = HostType::H_FAILED; - - void Reset() { - using string = std::string; - switch (type_) { - case HostType::H_DOMAIN: - case HostType::H_OPAQUE: - value_.domain_or_opaque.~string(); - break; - default: - break; - } - type_ = HostType::H_FAILED; - } - - // Setting the string members of the union with = is brittle because - // it relies on them being initialized to a state that requires no - // destruction of old data. - // For a long time, that worked well enough because ParseIPv6Host() happens - // to zero-fill `value_`, but that really is relying on standard library - // internals too much. - // These helpers are the easiest solution but we might want to consider - // just not forcing strings into an union. - void SetOpaque(std::string&& string) { - Reset(); - type_ = HostType::H_OPAQUE; - new(&value_.domain_or_opaque) std::string(std::move(string)); - } - - void SetDomain(std::string&& string) { - Reset(); - type_ = HostType::H_DOMAIN; - new(&value_.domain_or_opaque) std::string(std::move(string)); - } +enum url_update_action { + kProtocol = 0, + kHost = 1, + kHostname = 2, + kPort = 3, + kUsername = 4, + kPassword = 5, + kPathname = 6, + kSearch = 7, + kHash = 8, + kHref = 9, }; -URLHost::~URLHost() { - Reset(); -} - -#define ARGS(XX) \ - XX(ARG_FLAGS) \ - XX(ARG_PROTOCOL) \ - XX(ARG_USERNAME) \ - XX(ARG_PASSWORD) \ - XX(ARG_HOST) \ - XX(ARG_PORT) \ - XX(ARG_PATH) \ - XX(ARG_QUERY) \ - XX(ARG_FRAGMENT) \ - XX(ARG_COUNT) // This one has to be last. - -enum url_cb_args { -#define XX(name) name, - ARGS(XX) -#undef XX -}; - -#define TWO_CHAR_STRING_TEST(bits, name, expr) \ - template \ - bool name(const T ch1, const T ch2) { \ - static_assert(sizeof(ch1) >= (bits) / 8, \ - "Character must be wider than " #bits " bits"); \ - return (expr); \ - } \ - template \ - bool name(const std::basic_string& str) { \ - static_assert(sizeof(str[0]) >= (bits) / 8, \ - "Character must be wider than " #bits " bits"); \ - return str.length() >= 2 && name(str[0], str[1]); \ - } - -// https://infra.spec.whatwg.org/#ascii-tab-or-newline -CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r')) - -// https://infra.spec.whatwg.org/#c0-control -CHAR_TEST(8, IsC0Control, (ch >= '\0' && ch <= '\x1f')) - -// https://infra.spec.whatwg.org/#c0-control-or-space -CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' ')) - -// https://infra.spec.whatwg.org/#ascii-digit -CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9')) - -CHAR_TEST(8, IsASCIIOcDigit, (ch >= '0' && ch <= '7')) - -// https://infra.spec.whatwg.org/#ascii-hex-digit -CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) || - (ch >= 'A' && ch <= 'F') || - (ch >= 'a' && ch <= 'f'))) - -// https://infra.spec.whatwg.org/#ascii-alpha -CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') || - (ch >= 'a' && ch <= 'z'))) - -// https://infra.spec.whatwg.org/#ascii-alphanumeric -CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch))) - -// https://infra.spec.whatwg.org/#ascii-lowercase -template -T ASCIILowercase(T ch) { - return IsASCIIAlpha(ch) ? (ch | 0x20) : ch; -} - -// https://url.spec.whatwg.org/#forbidden-host-code-point -CHAR_TEST(8, - IsForbiddenHostCodePoint, - ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' || ch == ' ' || - ch == '#' || ch == '/' || ch == ':' || ch == '?' || ch == '@' || - ch == '[' || ch == '<' || ch == '>' || ch == '\\' || ch == ']' || - ch == '^' || ch == '|') - -// https://url.spec.whatwg.org/#forbidden-domain-code-point -CHAR_TEST(8, - IsForbiddenDomainCodePoint, - IsForbiddenHostCodePoint(ch) || IsC0Control(ch) || ch == '%' || - ch == '\x7f') - -// https://url.spec.whatwg.org/#windows-drive-letter -TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter, - (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|'))) - -// https://url.spec.whatwg.org/#normalized-windows-drive-letter -TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter, - (IsASCIIAlpha(ch1) && ch2 == ':')) - -#undef TWO_CHAR_STRING_TEST - -bool BitAt(const uint8_t a[], const uint8_t i) { - return !!(a[i >> 3] & (1 << (i & 7))); +void SetArgs(Environment* env, Local argv[10], const ada::result& url) { + Isolate* isolate = env->isolate(); + argv[0] = Utf8String(isolate, url->get_href()); + argv[1] = Utf8String(isolate, url->get_origin()); + argv[2] = Utf8String(isolate, url->get_protocol()); + argv[3] = Utf8String(isolate, url->get_hostname()); + argv[4] = Utf8String(isolate, url->get_pathname()); + argv[5] = Utf8String(isolate, url->get_search()); + argv[6] = Utf8String(isolate, url->get_username()); + argv[7] = Utf8String(isolate, url->get_password()); + argv[8] = Utf8String(isolate, url->get_port()); + argv[9] = Utf8String(isolate, url->get_hash()); } -// Appends ch to str. If ch position in encode_set is set, the ch will -// be percent-encoded then appended. -void AppendOrEscape(std::string* str, - const unsigned char ch, - const uint8_t encode_set[]) { - if (BitAt(encode_set, ch)) - *str += hex + ch * 4; // "%XX\0" has a length of 4 - else - *str += ch; -} +void Parse(const FunctionCallbackInfo& args) { + CHECK_GE(args.Length(), 3); + CHECK(args[0]->IsString()); // input + // args[1] // base url + CHECK(args[2]->IsFunction()); // complete callback -unsigned hex2bin(const char ch) { - if (ch >= '0' && ch <= '9') - return ch - '0'; - if (ch >= 'A' && ch <= 'F') - return 10 + (ch - 'A'); - if (ch >= 'a' && ch <= 'f') - return 10 + (ch - 'a'); - UNREACHABLE(); -} + Local success_callback_ = args[2].As(); -std::string PercentDecode(const char* input, size_t len) { - std::string dest; - if (len == 0) - return dest; - dest.reserve(len); - const char* pointer = input; - const char* end = input + len; + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(env->isolate()); + Context::Scope context_scope(env->context()); - while (pointer < end) { - const char ch = pointer[0]; - size_t remaining = end - pointer - 1; - if (ch != '%' || remaining < 2 || - (ch == '%' && - (!IsASCIIHexDigit(pointer[1]) || - !IsASCIIHexDigit(pointer[2])))) { - dest += ch; - pointer++; - continue; - } else { - unsigned a = hex2bin(pointer[1]); - unsigned b = hex2bin(pointer[2]); - char c = static_cast(a * 16 + b); - dest += c; - pointer += 3; + Utf8Value input(env->isolate(), args[0]); + ada::result base; + ada::url* base_pointer = nullptr; + if (args[1]->IsString()) { + base = ada::parse(Utf8Value(env->isolate(), args[1]).ToString()); + if (!base) { + return args.GetReturnValue().Set(false); } + base_pointer = &base.value(); } - return dest; -} - -#define SPECIALS(XX) \ - XX(ftp, 21, "ftp:") \ - XX(file, -1, "file:") \ - XX(http, 80, "http:") \ - XX(https, 443, "https:") \ - XX(ws, 80, "ws:") \ - XX(wss, 443, "wss:") - -bool IsSpecial(const std::string& scheme) { -#define V(_, __, name) if (scheme == name) return true; - SPECIALS(V); -#undef V - return false; -} - -Local GetSpecial(Environment* env, const std::string& scheme) { -#define V(key, _, name) if (scheme == name) \ - return env->url_special_##key##_string(); - SPECIALS(V) -#undef V - UNREACHABLE(); -} - -int NormalizePort(const std::string& scheme, int p) { -#define V(_, port, name) if (scheme == name && p == port) return -1; - SPECIALS(V); -#undef V - return p; -} - -// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter -bool StartsWithWindowsDriveLetter(const char* p, const char* end) { - size_t length = end - p; - return length >= 2 && - IsWindowsDriveLetter(p[0], p[1]) && - (length == 2 || - p[2] == '/' || - p[2] == '\\' || - p[2] == '?' || - p[2] == '#'); -} - -#if defined(NODE_HAVE_I18N_SUPPORT) -bool ToUnicode(const std::string& input, std::string* output) { - MaybeStackBuffer buf; - if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0) - return false; - output->assign(*buf, buf.length()); - return true; -} - -bool ToASCII(const std::string& input, std::string* output) { - MaybeStackBuffer buf; - if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0) - return false; - if (buf.length() == 0) - return false; - output->assign(*buf, buf.length()); - return true; -} -#else // !defined(NODE_HAVE_I18N_SUPPORT) -// Intentional non-ops if ICU is not present. -bool ToUnicode(const std::string& input, std::string* output) { - *output = input; - return true; -} - -bool ToASCII(const std::string& input, std::string* output) { - *output = input; - return true; -} -#endif // !defined(NODE_HAVE_I18N_SUPPORT) - -#define NS_IN6ADDRSZ 16 + ada::result out = ada::parse(input.ToStringView(), base_pointer); -void URLHost::ParseIPv6Host(const char* input, size_t length) { - CHECK_EQ(type_, HostType::H_FAILED); - - unsigned char buf[sizeof(struct in6_addr)]; - MaybeStackBuffer ipv6(length + 1); - *(*ipv6 + length) = 0; - memset(buf, 0, sizeof(buf)); - memcpy(*ipv6, input, sizeof(const char) * length); - - int ret = uv_inet_pton(AF_INET6, *ipv6, buf); - - if (ret != 0) { - return; - } - - // Ref: https://sourceware.org/git/?p=glibc.git;a=blob;f=resolv/inet_ntop.c;h=c4d38c0f951013e51a4fc6eaa8a9b82e146abe5a;hb=HEAD#l119 - for (int i = 0; i < NS_IN6ADDRSZ; i += 2) { - value_.ipv6[i >> 1] = (buf[i] << 8) | buf[i + 1]; - } - - type_ = HostType::H_IPV6; -} - -// https://url.spec.whatwg.org/#ipv4-number-parser -int64_t ParseIPv4Number(const char* start, const char* end) { - if (end - start == 0) return -1; - - unsigned R = 10; - if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') { - start += 2; - R = 16; - } else if (end - start >= 2 && start[0] == '0') { - start++; - R = 8; + if (!out) { + return args.GetReturnValue().Set(false); } - if (end - start == 0) return 0; - - const char* p = start; - - while (p < end) { - const char ch = p[0]; - switch (R) { - case 8: - if (ch < '0' || ch > '7') - return -1; - break; - case 10: - if (!IsASCIIDigit(ch)) - return -1; - break; - case 16: - if (!IsASCIIHexDigit(ch)) - return -1; - break; - } - p++; - } - return strtoll(start, nullptr, R); + const Local undef = Undefined(isolate); + Local argv[] = { + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + }; + SetArgs(env, argv, out); + USE(success_callback_->Call( + env->context(), args.This(), arraysize(argv), argv)); + args.GetReturnValue().Set(true); } -// https://url.spec.whatwg.org/#ipv4-number-parser -bool IsIPv4NumberValid(const std::string_view input) { - if (input.empty()) { - return false; - } - - // If a number starts with '0' it might be a number with base 8 or base - // 16. If not, checking if all characters are digits proves that it is a - // base 10 number. - if (input.size() >= 2 && input[0] == '0') { - if (input[1] == 'X' || input[1] == 'x') { - if (input.size() == 2) { - return true; - } - - return std::all_of(input.begin() + 2, input.end(), [](const char& c) { - return IsASCIIHexDigit(c); - }); - } - - return std::all_of(input.begin() + 1, input.end(), [](const char& c) { - return IsASCIIOcDigit(c); - }); - } - - return std::all_of(input.begin(), input.end(), [](const char& c) { - return IsASCIIDigit(c); - }); -} +void DomainToASCII(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); -// https://url.spec.whatwg.org/#ends-in-a-number-checker -inline bool EndsInANumber(const std::string_view input) { + std::string input = Utf8Value(env->isolate(), args[0]).ToString(); if (input.empty()) { - return false; + return args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); } - char delimiter = '.'; - auto last_index = input.size() - 1; - if (input.back() == delimiter) { - --last_index; - } - - std::string_view last{}; - auto pos = input.find_last_of(delimiter, last_index); - if (pos == std::string_view::npos) { - last = input.substr(0, last_index); - } else { - last = input.substr(pos + 1, last_index - pos); - } - - if (last.empty()) { - return false; - } - - if (std::all_of(last.begin(), last.end(), [](const char& c) { - return IsASCIIDigit(c); - })) { - return true; - } - - return IsIPv4NumberValid(last); +#if defined(NODE_HAVE_I18N_SUPPORT) + // It is important to have an initial value that contains a special scheme. + // Since it will change the implementation of `set_hostname` according to URL + // spec. + ada::result out = ada::parse("ws://x"); + DCHECK(out); + if (!out->set_hostname(input)) { + return args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); + } + std::string host = out->get_hostname(); + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), host.c_str()).ToLocalChecked()); +#else + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), input.c_str()).ToLocalChecked()); +#endif } -void URLHost::ParseIPv4Host(const char* input, size_t length) { - CHECK_EQ(type_, HostType::H_FAILED); - const char* pointer = input; - const char* mark = input; - const char* end = pointer + length; - unsigned int parts = 0; - uint32_t val = 0; - uint64_t numbers[4]; - unsigned int tooBigNumbers = 0; - if (length == 0) - return; - - while (pointer <= end) { - const char ch = pointer < end ? pointer[0] : kEOL; - int64_t remaining = end - pointer - 1; - if (ch == '.' || ch == kEOL) { - if (++parts > arraysize(numbers)) return; - if (pointer == mark) - return; - int64_t n = ParseIPv4Number(mark, pointer); - if (n < 0) - return; - - if (n > 255) { - tooBigNumbers++; - } - numbers[parts - 1] = n; - mark = pointer + 1; - if (ch == '.' && remaining == 0) - break; - } - pointer++; - } - CHECK_GT(parts, 0); - - // If any but the last item in numbers is greater than 255, return failure. - // If the last item in numbers is greater than or equal to - // 256^(5 - the number of items in numbers), return failure. - if (tooBigNumbers > 1 || (tooBigNumbers == 1 && numbers[parts - 1] <= 255) || - numbers[parts - 1] >= UINT64_C(1) << (8 * (5 - parts))) { - return; - } +void DomainToUnicode(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); - type_ = HostType::H_IPV4; - val = static_cast(numbers[parts - 1]); - for (unsigned int n = 0; n < parts - 1; n++) { - val += static_cast(numbers[n]) << (8 * (3 - n)); + std::string input = Utf8Value(env->isolate(), args[0]).ToString(); +#if defined(NODE_HAVE_I18N_SUPPORT) + // It is important to have an initial value that contains a special scheme. + // Since it will change the implementation of `set_hostname` according to URL + // spec. + ada::result out = ada::parse("ws://x"); + DCHECK(out); + if (!out->set_hostname(input)) { + return args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), "").ToLocalChecked()); } + std::string host = out->get_hostname(); - value_.ipv4 = val; -} + MaybeStackBuffer buf; + int32_t len = i18n::ToUnicode(&buf, host.data(), host.length()); -void URLHost::ParseOpaqueHost(const char* input, size_t length) { - CHECK_EQ(type_, HostType::H_FAILED); - std::string output; - output.reserve(length); - for (size_t i = 0; i < length; i++) { - const char ch = input[i]; - if (IsForbiddenHostCodePoint(ch)) { - return; - } else { - AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET); - } + if (len < 0) { + return args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), "").ToLocalChecked()); } - SetOpaque(std::move(output)); + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), *buf, NewStringType::kNormal, len) + .ToLocalChecked()); +#else // !defined(NODE_HAVE_I18N_SUPPORT) + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), input.c_str()).ToLocalChecked()); +#endif } -void URLHost::ParseHost(const char* input, - size_t length, - bool is_special, - bool unicode) { - CHECK_EQ(type_, HostType::H_FAILED); - const char* pointer = input; - - if (length == 0) - return; +void UpdateUrl(const FunctionCallbackInfo& args) { + CHECK(args[0]->IsString()); // href + CHECK(args[1]->IsNumber()); // action type + CHECK(args[2]->IsString()); // new value + CHECK(args[3]->IsFunction()); // success callback - if (pointer[0] == '[') { - if (pointer[length - 1] != ']') - return; - return ParseIPv6Host(++pointer, length - 2); - } - - if (!is_special) - return ParseOpaqueHost(input, length); - - // First, we have to percent decode - std::string decoded = PercentDecode(input, length); - - // Then we have to punycode toASCII - if (!ToASCII(decoded, &decoded)) - return; - - // If any of the following characters are still present, we have to fail - for (size_t n = 0; n < decoded.size(); n++) { - const char ch = decoded[n]; - if (IsForbiddenDomainCodePoint(ch)) { - return; - } - } - - // If domain ends in a number, then return the result of IPv4 parsing domain - if (EndsInANumber(decoded)) { - return ParseIPv4Host(decoded.c_str(), decoded.length()); - } - - // If the unicode flag is set, run the result through punycode ToUnicode - if (unicode && !ToUnicode(decoded, &decoded)) - return; - - // It's not an IPv4 or IPv6 address, it must be a domain - SetDomain(std::move(decoded)); -} + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); -// Locates the longest sequence of 0 segments in an IPv6 address -// in order to use the :: compression when serializing -template -T* FindLongestZeroSequence(T* values, size_t len) { - T* start = values; - T* end = start + len; - T* result = nullptr; + enum url_update_action action = static_cast( + args[1]->Uint32Value(env->context()).FromJust()); + Utf8Value input(isolate, args[0].As()); + Utf8Value new_value(isolate, args[2].As()); + Local success_callback_ = args[3].As(); - T* current = nullptr; - unsigned counter = 0, longest = 1; + std::string_view new_value_view = new_value.ToStringView(); + ada::result out = ada::parse(input.ToStringView()); + CHECK(out); - while (start < end) { - if (*start == 0) { - if (current == nullptr) - current = start; - counter++; - } else { - if (counter > longest) { - longest = counter; - result = current; - } - counter = 0; - current = nullptr; - } - start++; - } - if (counter > longest) - result = current; - return result; -} + bool result{true}; -std::string URLHost::ToStringMove() { - std::string return_value; - switch (type_) { - case HostType::H_DOMAIN: - case HostType::H_OPAQUE: - return_value = std::move(value_.domain_or_opaque); - break; - default: - return_value = ToString(); - break; - } - Reset(); - return return_value; -} - -std::string URLHost::ToString() const { - std::string dest; - switch (type_) { - case HostType::H_DOMAIN: - case HostType::H_OPAQUE: - return value_.domain_or_opaque; - case HostType::H_IPV4: { - dest.reserve(15); - uint32_t value = value_.ipv4; - for (int n = 0; n < 4; n++) { - dest.insert(0, std::to_string(value % 256)); - if (n < 3) - dest.insert(0, 1, '.'); - value /= 256; - } + switch (action) { + case kPathname: { + result = out->set_pathname(new_value_view); break; } - case HostType::H_IPV6: { - dest.reserve(41); - dest += '['; - const uint16_t* start = &value_.ipv6[0]; - const uint16_t* compress_pointer = - FindLongestZeroSequence(start, 8); - bool ignore0 = false; - for (int n = 0; n <= 7; n++) { - const uint16_t* piece = &value_.ipv6[n]; - if (ignore0 && *piece == 0) - continue; - else if (ignore0) - ignore0 = false; - if (compress_pointer == piece) { - dest += n == 0 ? "::" : ":"; - ignore0 = true; - continue; - } - char buf[5]; - snprintf(buf, sizeof(buf), "%x", *piece); - dest += buf; - if (n < 7) - dest += ':'; - } - dest += ']'; + case kHash: { + out->set_hash(new_value_view); break; } - case HostType::H_FAILED: + case kHost: { + result = out->set_host(new_value_view); break; - } - return dest; -} - -bool ParseHost(const std::string& input, - std::string* output, - bool is_special, - bool unicode = false) { - if (input.empty()) { - output->clear(); - return true; - } - URLHost host; - host.ParseHost(input.c_str(), input.length(), is_special, unicode); - if (host.ParsingFailed()) - return false; - *output = host.ToStringMove(); - return true; -} - -std::vector FromJSStringArray(Environment* env, - Local array) { - std::vector vec; - if (array->Length() > 0) - vec.reserve(array->Length()); - for (size_t n = 0; n < array->Length(); n++) { - Local val = array->Get(env->context(), n).ToLocalChecked(); - if (val->IsString()) { - Utf8Value value(env->isolate(), val.As()); - vec.emplace_back(*value, value.length()); - } - } - return vec; -} - -url_data HarvestBase(Environment* env, Local base_obj) { - url_data base; - Local context = env->context(); - - Local flags = - base_obj->Get(env->context(), env->flags_string()).ToLocalChecked(); - if (flags->IsInt32()) - base.flags = flags->Int32Value(context).FromJust(); - - Local port = - base_obj->Get(env->context(), env->port_string()).ToLocalChecked(); - if (port->IsInt32()) - base.port = port->Int32Value(context).FromJust(); - - Local scheme = - base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked(); - base.scheme = Utf8Value(env->isolate(), scheme).out(); - - auto GetStr = [&](std::string url_data::*member, - int flag, - Local name, - bool empty_as_present) { - Local value = base_obj->Get(env->context(), name).ToLocalChecked(); - if (value->IsString()) { - Utf8Value utf8value(env->isolate(), value.As()); - (base.*member).assign(*utf8value, utf8value.length()); - if (empty_as_present || value.As()->Length() != 0) { - base.flags |= flag; - } } - }; - GetStr(&url_data::username, - URL_FLAGS_HAS_USERNAME, - env->username_string(), - false); - GetStr(&url_data::password, - URL_FLAGS_HAS_PASSWORD, - env->password_string(), - false); - GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true); - GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true); - GetStr(&url_data::fragment, - URL_FLAGS_HAS_FRAGMENT, - env->fragment_string(), - true); - - Local - path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked(); - if (path->IsArray()) { - base.flags |= URL_FLAGS_HAS_PATH; - base.path = FromJSStringArray(env, path.As()); - } - return base; -} - -url_data HarvestContext(Environment* env, Local context_obj) { - url_data context; - Local flags = - context_obj->Get(env->context(), env->flags_string()).ToLocalChecked(); - if (flags->IsInt32()) { - static constexpr int32_t kCopyFlagsMask = - URL_FLAGS_SPECIAL | - URL_FLAGS_CANNOT_BE_BASE | - URL_FLAGS_HAS_USERNAME | - URL_FLAGS_HAS_PASSWORD | - URL_FLAGS_HAS_HOST; - context.flags |= flags.As()->Value() & kCopyFlagsMask; - } - Local scheme = - context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked(); - if (scheme->IsString()) { - Utf8Value value(env->isolate(), scheme); - context.scheme.assign(*value, value.length()); - } - Local port = - context_obj->Get(env->context(), env->port_string()).ToLocalChecked(); - if (port->IsInt32()) - context.port = port.As()->Value(); - if (context.flags & URL_FLAGS_HAS_USERNAME) { - Local username = - context_obj->Get(env->context(), - env->username_string()).ToLocalChecked(); - CHECK(username->IsString()); - Utf8Value value(env->isolate(), username); - context.username.assign(*value, value.length()); - } - if (context.flags & URL_FLAGS_HAS_PASSWORD) { - Local password = - context_obj->Get(env->context(), - env->password_string()).ToLocalChecked(); - CHECK(password->IsString()); - Utf8Value value(env->isolate(), password); - context.password.assign(*value, value.length()); - } - Local host = - context_obj->Get(env->context(), - env->host_string()).ToLocalChecked(); - if (host->IsString()) { - Utf8Value value(env->isolate(), host); - context.host.assign(*value, value.length()); - } - return context; -} - -// Single dot segment can be ".", "%2e", or "%2E" -bool IsSingleDotSegment(const std::string& str) { - switch (str.size()) { - case 1: - return str == "."; - case 3: - return str[0] == '%' && - str[1] == '2' && - ASCIILowercase(str[2]) == 'e'; - default: - return false; - } -} - -// Double dot segment can be: -// "..", ".%2e", ".%2E", "%2e.", "%2E.", -// "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e" -bool IsDoubleDotSegment(const std::string& str) { - switch (str.size()) { - case 2: - return str == ".."; - case 4: - if (str[0] != '.' && str[0] != '%') - return false; - return ((str[0] == '.' && - str[1] == '%' && - str[2] == '2' && - ASCIILowercase(str[3]) == 'e') || - (str[0] == '%' && - str[1] == '2' && - ASCIILowercase(str[2]) == 'e' && - str[3] == '.')); - case 6: - return (str[0] == '%' && - str[1] == '2' && - ASCIILowercase(str[2]) == 'e' && - str[3] == '%' && - str[4] == '2' && - ASCIILowercase(str[5]) == 'e'); - default: - return false; - } -} - -void ShortenUrlPath(struct url_data* url) { - if (url->path.empty()) return; - if (url->path.size() == 1 && url->scheme == "file:" && - IsNormalizedWindowsDriveLetter(url->path[0])) return; - url->path.pop_back(); -} - -} // anonymous namespace - -void URL::Parse(const char* input, - size_t len, - enum url_parse_state state_override, - struct url_data* url, - bool has_url, - const struct url_data* base, - bool has_base) { - const char* p = input; - const char* end = input + len; - - if (!has_url) { - for (const char* ptr = p; ptr < end; ptr++) { - if (IsC0ControlOrSpace(*ptr)) - p++; - else - break; - } - for (const char* ptr = end - 1; ptr >= p; ptr--) { - if (IsC0ControlOrSpace(*ptr)) - end--; - else - break; + case kHostname: { + result = out->set_hostname(new_value_view); + break; } - input = p; - len = end - p; - } - - // The spec says we should strip out any ASCII tabs or newlines. - // In those cases, we create another std::string instance with the filtered - // contents, but in the general case we avoid the overhead. - std::string whitespace_stripped; - for (const char* ptr = p; ptr < end; ptr++) { - if (!IsASCIITabOrNewline(*ptr)) - continue; - // Hit tab or newline. Allocate storage, copy what we have until now, - // and then iterate and filter all similar characters out. - whitespace_stripped.reserve(len - 1); - whitespace_stripped.assign(p, ptr - p); - // 'ptr + 1' skips the current char, which we know to be tab or newline. - for (ptr = ptr + 1; ptr < end; ptr++) { - if (!IsASCIITabOrNewline(*ptr)) - whitespace_stripped += *ptr; + case kHref: { + result = out->set_href(new_value_view); + break; } - - // Update variables like they should have looked like if the string - // had been stripped of whitespace to begin with. - input = whitespace_stripped.c_str(); - len = whitespace_stripped.size(); - p = input; - end = input + len; - break; - } - - bool atflag = false; // Set when @ has been seen. - bool square_bracket_flag = false; // Set inside of [...] - bool password_token_seen_flag = false; // Set after a : after an username. - - std::string buffer; - - // Set the initial parse state. - const bool has_state_override = state_override != kUnknownState; - enum url_parse_state state = has_state_override ? state_override : - kSchemeStart; - - if (state < kSchemeStart || state > kFragment) { - url->flags |= URL_FLAGS_INVALID_PARSE_STATE; - return; - } - - while (p <= end) { - const char ch = p < end ? p[0] : kEOL; - bool special = (url->flags & URL_FLAGS_SPECIAL); - bool cannot_be_base; - bool special_back_slash = (special && ch == '\\'); - - switch (state) { - case kSchemeStart: - if (IsASCIIAlpha(ch)) { - buffer += ASCIILowercase(ch); - state = kScheme; - } else if (!has_state_override) { - state = kNoScheme; - continue; - } else { - url->flags |= URL_FLAGS_FAILED; - return; - } - break; - case kScheme: - if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') { - buffer += ASCIILowercase(ch); - } else if (ch == ':' || (has_state_override && ch == kEOL)) { - if (has_state_override && buffer.size() == 0) { - url->flags |= URL_FLAGS_TERMINATED; - return; - } - buffer += ':'; - - bool new_is_special = IsSpecial(buffer); - - if (has_state_override) { - if ((special != new_is_special) || - ((buffer == "file:") && - ((url->flags & URL_FLAGS_HAS_USERNAME) || - (url->flags & URL_FLAGS_HAS_PASSWORD) || - (url->port != -1))) || - (url->scheme == "file:" && url->host.empty())) { - url->flags |= URL_FLAGS_TERMINATED; - return; - } - } - - url->scheme = std::move(buffer); - url->port = NormalizePort(url->scheme, url->port); - if (new_is_special) { - url->flags |= URL_FLAGS_SPECIAL; - special = true; - } else { - url->flags &= ~URL_FLAGS_SPECIAL; - special = false; - } - // `special_back_slash` equals to `(special && ch == '\\')` and `ch` - // here always not equals to `\\`. So `special_back_slash` here always - // equals to `false`. - special_back_slash = false; - buffer.clear(); - if (has_state_override) - return; - if (url->scheme == "file:") { - state = kFile; - } else if (special && - has_base && - url->scheme == base->scheme) { - state = kSpecialRelativeOrAuthority; - } else if (special) { - state = kSpecialAuthoritySlashes; - } else if (p + 1 < end && p[1] == '/') { - state = kPathOrAuthority; - p++; - } else { - url->flags |= URL_FLAGS_CANNOT_BE_BASE; - url->flags |= URL_FLAGS_HAS_PATH; - url->path.emplace_back(""); - state = kCannotBeBase; - } - } else if (!has_state_override) { - buffer.clear(); - state = kNoScheme; - p = input; - continue; - } else { - url->flags |= URL_FLAGS_FAILED; - return; - } - break; - case kNoScheme: - cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE); - if (!has_base || (cannot_be_base && ch != '#')) { - url->flags |= URL_FLAGS_FAILED; - return; - } else if (cannot_be_base && ch == '#') { - url->scheme = base->scheme; - if (IsSpecial(url->scheme)) { - url->flags |= URL_FLAGS_SPECIAL; - special = true; - } else { - url->flags &= ~URL_FLAGS_SPECIAL; - special = false; - } - special_back_slash = (special && ch == '\\'); - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - if (base->flags & URL_FLAGS_HAS_FRAGMENT) { - url->flags |= URL_FLAGS_HAS_FRAGMENT; - url->fragment = base->fragment; - } - url->flags |= URL_FLAGS_CANNOT_BE_BASE; - state = kFragment; - } else if (has_base && - base->scheme != "file:") { - state = kRelative; - continue; - } else { - url->scheme = "file:"; - url->flags |= URL_FLAGS_SPECIAL; - special = true; - state = kFile; - special_back_slash = (special && ch == '\\'); - continue; - } - break; - case kSpecialRelativeOrAuthority: - if (ch == '/' && p + 1 < end && p[1] == '/') { - state = kSpecialAuthorityIgnoreSlashes; - p++; - } else { - state = kRelative; - continue; - } - break; - case kPathOrAuthority: - if (ch == '/') { - state = kAuthority; - } else { - state = kPath; - continue; - } - break; - case kRelative: - url->scheme = base->scheme; - if (IsSpecial(url->scheme)) { - url->flags |= URL_FLAGS_SPECIAL; - special = true; - } else { - url->flags &= ~URL_FLAGS_SPECIAL; - special = false; - } - special_back_slash = (special && ch == '\\'); - switch (ch) { - case kEOL: - if (base->flags & URL_FLAGS_HAS_USERNAME) { - url->flags |= URL_FLAGS_HAS_USERNAME; - url->username = base->username; - } - if (base->flags & URL_FLAGS_HAS_PASSWORD) { - url->flags |= URL_FLAGS_HAS_PASSWORD; - url->password = base->password; - } - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - url->port = base->port; - break; - case '/': - state = kRelativeSlash; - break; - case '?': - if (base->flags & URL_FLAGS_HAS_USERNAME) { - url->flags |= URL_FLAGS_HAS_USERNAME; - url->username = base->username; - } - if (base->flags & URL_FLAGS_HAS_PASSWORD) { - url->flags |= URL_FLAGS_HAS_PASSWORD; - url->password = base->password; - } - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - url->port = base->port; - state = kQuery; - break; - case '#': - if (base->flags & URL_FLAGS_HAS_USERNAME) { - url->flags |= URL_FLAGS_HAS_USERNAME; - url->username = base->username; - } - if (base->flags & URL_FLAGS_HAS_PASSWORD) { - url->flags |= URL_FLAGS_HAS_PASSWORD; - url->password = base->password; - } - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - url->port = base->port; - state = kFragment; - break; - default: - if (special_back_slash) { - state = kRelativeSlash; - } else { - if (base->flags & URL_FLAGS_HAS_USERNAME) { - url->flags |= URL_FLAGS_HAS_USERNAME; - url->username = base->username; - } - if (base->flags & URL_FLAGS_HAS_PASSWORD) { - url->flags |= URL_FLAGS_HAS_PASSWORD; - url->password = base->password; - } - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - ShortenUrlPath(url); - } - url->port = base->port; - state = kPath; - continue; - } - } - break; - case kRelativeSlash: - if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) { - state = kSpecialAuthorityIgnoreSlashes; - } else if (ch == '/') { - state = kAuthority; - } else { - if (base->flags & URL_FLAGS_HAS_USERNAME) { - url->flags |= URL_FLAGS_HAS_USERNAME; - url->username = base->username; - } - if (base->flags & URL_FLAGS_HAS_PASSWORD) { - url->flags |= URL_FLAGS_HAS_PASSWORD; - url->password = base->password; - } - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - url->port = base->port; - state = kPath; - continue; - } - break; - case kSpecialAuthoritySlashes: - state = kSpecialAuthorityIgnoreSlashes; - if (ch == '/' && p + 1 < end && p[1] == '/') { - p++; - } else { - continue; - } - break; - case kSpecialAuthorityIgnoreSlashes: - if (ch != '/' && ch != '\\') { - state = kAuthority; - continue; - } - break; - case kAuthority: - if (ch == '@') { - if (atflag) { - buffer.reserve(buffer.size() + 3); - buffer.insert(0, "%40"); - } - atflag = true; - size_t blen = buffer.size(); - if (blen > 0 && buffer[0] != ':') { - url->flags |= URL_FLAGS_HAS_USERNAME; - } - for (size_t n = 0; n < blen; n++) { - const char bch = buffer[n]; - if (bch == ':') { - url->flags |= URL_FLAGS_HAS_PASSWORD; - if (!password_token_seen_flag) { - password_token_seen_flag = true; - continue; - } - } - if (password_token_seen_flag) { - AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET); - } else { - AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET); - } - } - buffer.clear(); - } else if (ch == kEOL || - ch == '/' || - ch == '?' || - ch == '#' || - special_back_slash) { - if (atflag && buffer.size() == 0) { - url->flags |= URL_FLAGS_FAILED; - return; - } - p -= buffer.size() + 1; - buffer.clear(); - state = kHost; - } else { - buffer += ch; - } - break; - case kHost: - case kHostname: - if (has_state_override && url->scheme == "file:") { - state = kFileHost; - continue; - } else if (ch == ':' && !square_bracket_flag) { - if (buffer.size() == 0) { - url->flags |= URL_FLAGS_FAILED; - return; - } - if (state_override == kHostname) { - return; - } - url->flags |= URL_FLAGS_HAS_HOST; - if (!ParseHost(buffer, &url->host, special)) { - url->flags |= URL_FLAGS_FAILED; - return; - } - buffer.clear(); - state = kPort; - } else if (ch == kEOL || - ch == '/' || - ch == '?' || - ch == '#' || - special_back_slash) { - p--; - if (special && buffer.size() == 0) { - url->flags |= URL_FLAGS_FAILED; - return; - } - if (has_state_override && - buffer.size() == 0 && - ((url->username.size() > 0 || url->password.size() > 0) || - url->port != -1)) { - url->flags |= URL_FLAGS_TERMINATED; - return; - } - url->flags |= URL_FLAGS_HAS_HOST; - if (!ParseHost(buffer, &url->host, special)) { - url->flags |= URL_FLAGS_FAILED; - return; - } - buffer.clear(); - state = kPathStart; - if (has_state_override) { - return; - } - } else { - if (ch == '[') - square_bracket_flag = true; - if (ch == ']') - square_bracket_flag = false; - buffer += ch; - } - break; - case kPort: - if (IsASCIIDigit(ch)) { - buffer += ch; - } else if (has_state_override || - ch == kEOL || - ch == '/' || - ch == '?' || - ch == '#' || - special_back_slash) { - if (buffer.size() > 0) { - unsigned port = 0; - // the condition port <= 0xffff prevents integer overflow - for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++) - port = port * 10 + buffer[i] - '0'; - if (port > 0xffff) { - // TODO(TimothyGu): This hack is currently needed for the host - // setter since it needs access to hostname if it is valid, and - // if the FAILED flag is set the entire response to JS layer - // will be empty. - if (state_override == kHost) - url->port = -1; - else - url->flags |= URL_FLAGS_FAILED; - return; - } - // the port is valid - url->port = NormalizePort(url->scheme, static_cast(port)); - if (url->port == -1) - url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT; - buffer.clear(); - } else if (has_state_override) { - // TODO(TimothyGu): Similar case as above. - if (state_override == kHost) - url->port = -1; - else - url->flags |= URL_FLAGS_TERMINATED; - return; - } - state = kPathStart; - continue; - } else { - url->flags |= URL_FLAGS_FAILED; - return; - } - break; - case kFile: - url->scheme = "file:"; - url->host.clear(); - url->flags |= URL_FLAGS_HAS_HOST; - if (ch == '/' || ch == '\\') { - state = kFileSlash; - } else if (has_base && base->scheme == "file:") { - switch (ch) { - case kEOL: - if (base->flags & URL_FLAGS_HAS_HOST) { - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - break; - case '?': - if (base->flags & URL_FLAGS_HAS_HOST) { - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - url->flags |= URL_FLAGS_HAS_QUERY; - url->query.clear(); - state = kQuery; - break; - case '#': - if (base->flags & URL_FLAGS_HAS_HOST) { - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - url->flags |= URL_FLAGS_HAS_FRAGMENT; - url->fragment.clear(); - state = kFragment; - break; - default: - url->query.clear(); - if (base->flags & URL_FLAGS_HAS_HOST) { - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - if (!StartsWithWindowsDriveLetter(p, end)) { - ShortenUrlPath(url); - } else { - url->path.clear(); - } - state = kPath; - continue; - } - } else { - state = kPath; - continue; - } - break; - case kFileSlash: - if (ch == '/' || ch == '\\') { - state = kFileHost; - } else { - if (has_base && base->scheme == "file:") { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - if (!StartsWithWindowsDriveLetter(p, end) && - IsNormalizedWindowsDriveLetter(base->path[0])) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path.push_back(base->path[0]); - } - } - state = kPath; - continue; - } - break; - case kFileHost: - if (ch == kEOL || - ch == '/' || - ch == '\\' || - ch == '?' || - ch == '#') { - if (!has_state_override && - buffer.size() == 2 && - IsWindowsDriveLetter(buffer)) { - state = kPath; - } else if (buffer.size() == 0) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host.clear(); - if (has_state_override) - return; - state = kPathStart; - } else { - std::string host; - if (!ParseHost(buffer, &host, special)) { - url->flags |= URL_FLAGS_FAILED; - return; - } - if (host == "localhost") - host.clear(); - url->flags |= URL_FLAGS_HAS_HOST; - url->host = host; - if (has_state_override) - return; - buffer.clear(); - state = kPathStart; - } - continue; - } else { - buffer += ch; - } - break; - case kPathStart: - if (IsSpecial(url->scheme)) { - state = kPath; - if (ch != '/' && ch != '\\') { - continue; - } - } else if (!has_state_override && ch == '?') { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query.clear(); - state = kQuery; - } else if (!has_state_override && ch == '#') { - url->flags |= URL_FLAGS_HAS_FRAGMENT; - url->fragment.clear(); - state = kFragment; - } else if (ch != kEOL) { - state = kPath; - if (ch != '/') { - continue; - } - } else if (has_state_override && !(url->flags & URL_FLAGS_HAS_HOST)) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path.emplace_back(""); - } - break; - case kPath: - if (ch == kEOL || - ch == '/' || - special_back_slash || - (!has_state_override && (ch == '?' || ch == '#'))) { - if (IsDoubleDotSegment(buffer)) { - ShortenUrlPath(url); - if (ch != '/' && !special_back_slash) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path.emplace_back(""); - } - } else if (IsSingleDotSegment(buffer) && - ch != '/' && !special_back_slash) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path.emplace_back(""); - } else if (!IsSingleDotSegment(buffer)) { - if (url->scheme == "file:" && - url->path.empty() && - buffer.size() == 2 && - IsWindowsDriveLetter(buffer)) { - buffer[1] = ':'; - } - url->flags |= URL_FLAGS_HAS_PATH; - url->path.emplace_back(std::move(buffer)); - } - buffer.clear(); - if (ch == '?') { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query.clear(); - state = kQuery; - } else if (ch == '#') { - url->flags |= URL_FLAGS_HAS_FRAGMENT; - url->fragment.clear(); - state = kFragment; - } - } else { - AppendOrEscape(&buffer, ch, PATH_ENCODE_SET); - } - break; - case kCannotBeBase: - switch (ch) { - case '?': - state = kQuery; - break; - case '#': - state = kFragment; - break; - default: - if (url->path.empty()) - url->path.emplace_back(""); - else if (ch != kEOL) - AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET); - } - break; - case kQuery: - if (ch == kEOL || (!has_state_override && ch == '#')) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = std::move(buffer); - buffer.clear(); - if (ch == '#') - state = kFragment; - } else { - AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL : - QUERY_ENCODE_SET_NONSPECIAL); - } - break; - case kFragment: - switch (ch) { - case kEOL: - url->flags |= URL_FLAGS_HAS_FRAGMENT; - url->fragment = std::move(buffer); - break; - default: - AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET); - } - break; - default: - url->flags |= URL_FLAGS_INVALID_PARSE_STATE; - return; + case kPassword: { + result = out->set_password(new_value_view); + break; } - - p++; - } -} // NOLINT(readability/fn_size) - -// https://url.spec.whatwg.org/#url-serializing -std::string URL::SerializeURL(const url_data& url, - bool exclude = false) { - std::string output; - output.reserve( - 10 + // We generally insert < 10 separator characters between URL parts - url.scheme.size() + - url.username.size() + - url.password.size() + - url.host.size() + - url.query.size() + - url.fragment.size() + - url.href.size() + - std::accumulate( - url.path.begin(), - url.path.end(), - 0, - [](size_t sum, const auto& str) { return sum + str.size(); })); - - output += url.scheme; - if (url.flags & URL_FLAGS_HAS_HOST) { - output += "//"; - if (url.flags & URL_FLAGS_HAS_USERNAME || - url.flags & URL_FLAGS_HAS_PASSWORD) { - if (url.flags & URL_FLAGS_HAS_USERNAME) { - output += url.username; - } - if (url.flags & URL_FLAGS_HAS_PASSWORD) { - output += ":" + url.password; - } - output += "@"; + case kPort: { + result = out->set_port(new_value_view); + break; } - output += url.host; - if (url.port != -1) { - output += ":" + std::to_string(url.port); + case kProtocol: { + result = out->set_protocol(new_value_view); + break; } - } - if (url.flags & URL_FLAGS_CANNOT_BE_BASE) { - output += url.path[0]; - } else { - if (!(url.flags & URL_FLAGS_HAS_HOST) && - url.path.size() > 1 && - url.path[0].empty()) { - output += "/."; + case kSearch: { + out->set_search(new_value_view); + break; } - for (size_t i = 1; i < url.path.size(); i++) { - output += "/" + url.path[i]; + case kUsername: { + result = out->set_username(new_value_view); + break; } } - if (url.flags & URL_FLAGS_HAS_QUERY) { - output += "?" + url.query; - } - if (!exclude && (url.flags & URL_FLAGS_HAS_FRAGMENT)) { - output += "#" + url.fragment; - } - output.shrink_to_fit(); - return output; -} - -namespace { -void SetArgs(Environment* env, - Local argv[ARG_COUNT], - const struct url_data& url) { - Isolate* isolate = env->isolate(); - argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags); - argv[ARG_PROTOCOL] = - url.flags & URL_FLAGS_SPECIAL ? - GetSpecial(env, url.scheme) : - OneByteString(isolate, url.scheme.c_str()); - if (url.flags & URL_FLAGS_HAS_USERNAME) - argv[ARG_USERNAME] = Utf8String(isolate, url.username); - if (url.flags & URL_FLAGS_HAS_PASSWORD) - argv[ARG_PASSWORD] = Utf8String(isolate, url.password); - if (url.flags & URL_FLAGS_HAS_HOST) - argv[ARG_HOST] = Utf8String(isolate, url.host); - if (url.flags & URL_FLAGS_HAS_QUERY) - argv[ARG_QUERY] = Utf8String(isolate, url.query); - if (url.flags & URL_FLAGS_HAS_FRAGMENT) - argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment); - if (url.port > -1) - argv[ARG_PORT] = Integer::New(isolate, url.port); - if (url.flags & URL_FLAGS_HAS_PATH) - argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked(); -} - -void Parse(Environment* env, - Local recv, - const char* input, - size_t len, - enum url_parse_state state_override, - Local base_obj, - Local context_obj, - Local cb, - Local error_cb) { - Isolate* isolate = env->isolate(); - Local context = env->context(); - HandleScope handle_scope(isolate); - Context::Scope context_scope(context); - - const bool has_context = context_obj->IsObject(); - const bool has_base = base_obj->IsObject(); - url_data base; - url_data url; - if (has_context) - url = HarvestContext(env, context_obj.As()); - if (has_base) - base = HarvestBase(env, base_obj.As()); - - URL::Parse(input, len, state_override, &url, has_context, &base, has_base); - if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) || - ((state_override != kUnknownState) && - (url.flags & URL_FLAGS_TERMINATED))) - return; - - // Define the return value placeholders const Local undef = Undefined(isolate); - const Local null = Null(isolate); - if (!(url.flags & URL_FLAGS_FAILED)) { - Local argv[] = { + Local argv[] = { undef, undef, undef, undef, - null, // host defaults to null - null, // port defaults to null undef, - null, // query defaults to null - null, // fragment defaults to null - }; - SetArgs(env, argv, url); - USE(cb->Call(context, recv, arraysize(argv), argv)); - } else if (error_cb->IsFunction()) { - Local flags = Integer::NewFromUnsigned(isolate, url.flags); - USE(error_cb.As()->Call(context, recv, 1, &flags)); - } + undef, + undef, + undef, + undef, + undef, + }; + SetArgs(env, argv, out); + USE(success_callback_->Call( + env->context(), args.This(), arraysize(argv), argv)); + args.GetReturnValue().Set(result); } -void Parse(const FunctionCallbackInfo& args) { +void FormatUrl(const FunctionCallbackInfo& args) { + CHECK_GT(args.Length(), 4); + CHECK(args[0]->IsString()); // url href + Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 5); - CHECK(args[0]->IsString()); // input - CHECK(args[2]->IsUndefined() || // base context - args[2]->IsNull() || - args[2]->IsObject()); - CHECK(args[3]->IsUndefined() || // context - args[3]->IsNull() || - args[3]->IsObject()); - CHECK(args[4]->IsFunction()); // complete callback - CHECK(args[5]->IsUndefined() || args[5]->IsFunction()); // error callback + Isolate* isolate = env->isolate(); - Utf8Value input(env->isolate(), args[0]); - enum url_parse_state state_override = kUnknownState; - if (args[1]->IsNumber()) { - state_override = static_cast( - args[1]->Uint32Value(env->context()).FromJust()); - } + Utf8Value href(isolate, args[0].As()); + const bool fragment = args[1]->IsTrue(); + const bool unicode = args[2]->IsTrue(); + const bool search = args[3]->IsTrue(); + const bool auth = args[4]->IsTrue(); - Parse(env, args.This(), - *input, input.length(), - state_override, - args[2], - args[3], - args[4].As(), - args[5]); -} + ada::result out = ada::parse(href.ToStringView()); + CHECK(out); -void EncodeAuthSet(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 1); - CHECK(args[0]->IsString()); - Utf8Value value(env->isolate(), args[0]); - std::string output; - size_t len = value.length(); - output.reserve(len); - for (size_t n = 0; n < len; n++) { - const char ch = (*value)[n]; - AppendOrEscape(&output, ch, USERINFO_ENCODE_SET); + if (!fragment) { + out->fragment = std::nullopt; } - args.GetReturnValue().Set( - String::NewFromUtf8(env->isolate(), output.c_str()).ToLocalChecked()); -} -void DomainToASCII(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 1); - CHECK(args[0]->IsString()); - Utf8Value value(env->isolate(), args[0]); + if (unicode) { +#if defined(NODE_HAVE_I18N_SUPPORT) + std::string hostname = out->get_hostname(); + MaybeStackBuffer buf; + int32_t len = i18n::ToUnicode(&buf, hostname.data(), hostname.length()); - URLHost host; - // Assuming the host is used for a special scheme. - host.ParseHost(*value, value.length(), true); - if (host.ParsingFailed()) { - args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); - return; + if (len < 0) { + out->host = ""; + } else { + out->host = buf.ToString(); + } +#else + out->host = ""; +#endif } - std::string out = host.ToStringMove(); - args.GetReturnValue().Set( - String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked()); -} -void DomainToUnicode(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 1); - CHECK(args[0]->IsString()); - Utf8Value value(env->isolate(), args[0]); + if (!search) { + out->query = std::nullopt; + } - URLHost host; - // Assuming the host is used for a special scheme. - host.ParseHost(*value, value.length(), true, true); - if (host.ParsingFailed()) { - args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); - return; + if (!auth) { + out->username = ""; + out->password = ""; } - std::string out = host.ToStringMove(); - args.GetReturnValue().Set( - String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked()); + + std::string result = out->get_href(); + args.GetReturnValue().Set(String::NewFromUtf8(env->isolate(), + result.data(), + NewStringType::kNormal, + result.length()) + .ToLocalChecked()); } void Initialize(Local target, @@ -1826,38 +312,31 @@ void Initialize(Local target, Local context, void* priv) { SetMethod(context, target, "parse", Parse); - SetMethodNoSideEffect(context, target, "encodeAuth", EncodeAuthSet); + SetMethod(context, target, "updateUrl", UpdateUrl); + SetMethod(context, target, "formatUrl", FormatUrl); + SetMethodNoSideEffect(context, target, "domainToASCII", DomainToASCII); SetMethodNoSideEffect(context, target, "domainToUnicode", DomainToUnicode); - -#define XX(name, _) NODE_DEFINE_CONSTANT(target, name); - FLAGS(XX) -#undef XX - -#define XX(name) NODE_DEFINE_CONSTANT(target, name); - PARSESTATES(XX) -#undef XX } } // namespace void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(Parse); - registry->Register(EncodeAuthSet); + registry->Register(UpdateUrl); + registry->Register(FormatUrl); + registry->Register(DomainToASCII); registry->Register(DomainToUnicode); } -URL URL::FromFilePath(const std::string& file_path) { - URL url("file://"); +std::string FromFilePath(const std::string_view file_path) { std::string escaped_file_path; for (size_t i = 0; i < file_path.length(); ++i) { escaped_file_path += file_path[i]; - if (file_path[i] == '%') - escaped_file_path += "25"; + if (file_path[i] == '%') escaped_file_path += "25"; } - URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart, - &url.context_, true, nullptr, false); - return url; + + return ada::href_from_file(escaped_file_path); } } // namespace url diff --git a/src/node_url.h b/src/node_url.h index bec281661e6f5e..c3d895d2f6092f 100644 --- a/src/node_url.h +++ b/src/node_url.h @@ -3,196 +3,18 @@ #if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS +#include "ada.h" #include "node.h" +#include "util.h" #include namespace node { namespace url { -#define PARSESTATES(XX) \ - XX(kSchemeStart) \ - XX(kScheme) \ - XX(kNoScheme) \ - XX(kSpecialRelativeOrAuthority) \ - XX(kPathOrAuthority) \ - XX(kRelative) \ - XX(kRelativeSlash) \ - XX(kSpecialAuthoritySlashes) \ - XX(kSpecialAuthorityIgnoreSlashes) \ - XX(kAuthority) \ - XX(kHost) \ - XX(kHostname) \ - XX(kPort) \ - XX(kFile) \ - XX(kFileSlash) \ - XX(kFileHost) \ - XX(kPathStart) \ - XX(kPath) \ - XX(kCannotBeBase) \ - XX(kQuery) \ - XX(kFragment) - -#define FLAGS(XX) \ - XX(URL_FLAGS_NONE, 0) \ - XX(URL_FLAGS_FAILED, 0x01) \ - XX(URL_FLAGS_CANNOT_BE_BASE, 0x02) \ - XX(URL_FLAGS_INVALID_PARSE_STATE, 0x04) \ - XX(URL_FLAGS_TERMINATED, 0x08) \ - XX(URL_FLAGS_SPECIAL, 0x10) \ - XX(URL_FLAGS_HAS_USERNAME, 0x20) \ - XX(URL_FLAGS_HAS_PASSWORD, 0x40) \ - XX(URL_FLAGS_HAS_HOST, 0x80) \ - XX(URL_FLAGS_HAS_PATH, 0x100) \ - XX(URL_FLAGS_HAS_QUERY, 0x200) \ - XX(URL_FLAGS_HAS_FRAGMENT, 0x400) \ - XX(URL_FLAGS_IS_DEFAULT_SCHEME_PORT, 0x800) \ - -enum url_parse_state { - kUnknownState = -1, -#define XX(name) name, - PARSESTATES(XX) -#undef XX -}; - -enum url_flags { -#define XX(name, val) name = val, - FLAGS(XX) -#undef XX -}; - -struct url_data { - int32_t flags = URL_FLAGS_NONE; - int port = -1; - std::string scheme; - std::string username; - std::string password; - std::string host; - std::string query; - std::string fragment; - std::vector path; - std::string href; -}; - -namespace table_data { -extern const char hex[1024]; -extern const uint8_t C0_CONTROL_ENCODE_SET[32]; -extern const uint8_t FRAGMENT_ENCODE_SET[32]; -extern const uint8_t PATH_ENCODE_SET[32]; -extern const uint8_t USERINFO_ENCODE_SET[32]; -extern const uint8_t QUERY_ENCODE_SET_NONSPECIAL[32]; -extern const uint8_t QUERY_ENCODE_SET_SPECIAL[32]; -} - -class URL { - public: - static void Parse(const char* input, - size_t len, - enum url_parse_state state_override, - struct url_data* url, - bool has_url, - const struct url_data* base, - bool has_base); - - static std::string SerializeURL(const url_data& url, bool exclude); - - URL(const char* input, const size_t len) { - Parse(input, len, kUnknownState, &context_, false, nullptr, false); - } - - URL(const char* input, const size_t len, const URL* base) { - if (base != nullptr) - Parse(input, len, kUnknownState, - &context_, false, - &(base->context_), true); - else - Parse(input, len, kUnknownState, &context_, false, nullptr, false); - } - - URL(const char* input, const size_t len, - const char* base, const size_t baselen) { - if (base != nullptr && baselen > 0) { - URL _base(base, baselen); - Parse(input, len, kUnknownState, - &context_, false, - &(_base.context_), true); - } else { - Parse(input, len, kUnknownState, &context_, false, nullptr, false); - } - } - - explicit URL(const std::string& input) : - URL(input.c_str(), input.length()) {} - - URL(const std::string& input, const URL* base) : - URL(input.c_str(), input.length(), base) {} - - URL(const std::string& input, const URL& base) : - URL(input.c_str(), input.length(), &base) {} - - URL(const std::string& input, const std::string& base) : - URL(input.c_str(), input.length(), base.c_str(), base.length()) {} - - int32_t flags() const { - return context_.flags; - } - - int port() const { - return context_.port; - } - - const std::string& protocol() const { - return context_.scheme; - } - - const std::string& username() const { - return context_.username; - } - - const std::string& password() const { - return context_.password; - } - - const std::string& host() const { - return context_.host; - } - - const std::string& query() const { - return context_.query; - } - - const std::string& fragment() const { - return context_.fragment; - } - - std::string path() const { - std::string ret; - for (const std::string& element : context_.path) { - ret += '/' + element; - } - return ret; - } - - std::string href() const { - return SerializeURL(context_, false); - } - - // Get the file URL from native file system path. - static URL FromFilePath(const std::string& file_path); - - URL(const URL&) = default; - URL& operator=(const URL&) = default; - URL(URL&&) = default; - URL& operator=(URL&&) = default; - - URL() : URL("") {} - - private: - url_data context_; -}; +std::string FromFilePath(const std::string_view file_path); } // namespace url - } // namespace node #endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS diff --git a/src/node_url_tables.cc b/src/node_url_tables.cc deleted file mode 100644 index 801badf838dc83..00000000000000 --- a/src/node_url_tables.cc +++ /dev/null @@ -1,448 +0,0 @@ -#include -#include "node_url.h" - -namespace node { -namespace url { -namespace table_data { - -const char hex[1024] = - "%00\0%01\0%02\0%03\0%04\0%05\0%06\0%07\0" - "%08\0%09\0%0A\0%0B\0%0C\0%0D\0%0E\0%0F\0" - "%10\0%11\0%12\0%13\0%14\0%15\0%16\0%17\0" - "%18\0%19\0%1A\0%1B\0%1C\0%1D\0%1E\0%1F\0" - "%20\0%21\0%22\0%23\0%24\0%25\0%26\0%27\0" - "%28\0%29\0%2A\0%2B\0%2C\0%2D\0%2E\0%2F\0" - "%30\0%31\0%32\0%33\0%34\0%35\0%36\0%37\0" - "%38\0%39\0%3A\0%3B\0%3C\0%3D\0%3E\0%3F\0" - "%40\0%41\0%42\0%43\0%44\0%45\0%46\0%47\0" - "%48\0%49\0%4A\0%4B\0%4C\0%4D\0%4E\0%4F\0" - "%50\0%51\0%52\0%53\0%54\0%55\0%56\0%57\0" - "%58\0%59\0%5A\0%5B\0%5C\0%5D\0%5E\0%5F\0" - "%60\0%61\0%62\0%63\0%64\0%65\0%66\0%67\0" - "%68\0%69\0%6A\0%6B\0%6C\0%6D\0%6E\0%6F\0" - "%70\0%71\0%72\0%73\0%74\0%75\0%76\0%77\0" - "%78\0%79\0%7A\0%7B\0%7C\0%7D\0%7E\0%7F\0" - "%80\0%81\0%82\0%83\0%84\0%85\0%86\0%87\0" - "%88\0%89\0%8A\0%8B\0%8C\0%8D\0%8E\0%8F\0" - "%90\0%91\0%92\0%93\0%94\0%95\0%96\0%97\0" - "%98\0%99\0%9A\0%9B\0%9C\0%9D\0%9E\0%9F\0" - "%A0\0%A1\0%A2\0%A3\0%A4\0%A5\0%A6\0%A7\0" - "%A8\0%A9\0%AA\0%AB\0%AC\0%AD\0%AE\0%AF\0" - "%B0\0%B1\0%B2\0%B3\0%B4\0%B5\0%B6\0%B7\0" - "%B8\0%B9\0%BA\0%BB\0%BC\0%BD\0%BE\0%BF\0" - "%C0\0%C1\0%C2\0%C3\0%C4\0%C5\0%C6\0%C7\0" - "%C8\0%C9\0%CA\0%CB\0%CC\0%CD\0%CE\0%CF\0" - "%D0\0%D1\0%D2\0%D3\0%D4\0%D5\0%D6\0%D7\0" - "%D8\0%D9\0%DA\0%DB\0%DC\0%DD\0%DE\0%DF\0" - "%E0\0%E1\0%E2\0%E3\0%E4\0%E5\0%E6\0%E7\0" - "%E8\0%E9\0%EA\0%EB\0%EC\0%ED\0%EE\0%EF\0" - "%F0\0%F1\0%F2\0%F3\0%F4\0%F5\0%F6\0%F7\0" - "%F8\0%F9\0%FA\0%FB\0%FC\0%FD\0%FE\0%FF"; - -const uint8_t C0_CONTROL_ENCODE_SET[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 40 41 42 43 44 45 46 47 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - -const uint8_t FRAGMENT_ENCODE_SET[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x01 | 0x00 | 0x04 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00, - // 40 41 42 43 44 45 46 47 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - - -const uint8_t PATH_ENCODE_SET[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80, - // 40 41 42 43 44 45 46 47 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - -const uint8_t USERINFO_ENCODE_SET[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 40 41 42 43 44 45 46 47 - 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - -const uint8_t QUERY_ENCODE_SET_NONSPECIAL[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00, - // 40 41 42 43 44 45 46 47 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - -// Same as QUERY_ENCODE_SET_NONSPECIAL, but with 0x27 (') encoded. -const uint8_t QUERY_ENCODE_SET_SPECIAL[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x80, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00, - // 40 41 42 43 44 45 46 47 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - -} // namespace table_data -} // namespace url -} // namespace node diff --git a/src/util.h b/src/util.h index 9bdd00bb834a2b..b1750f7b718f66 100644 --- a/src/util.h +++ b/src/util.h @@ -37,10 +37,10 @@ #include #include #include +#include #include #include #include -#include #include #include #include @@ -473,6 +473,11 @@ class MaybeStackBuffer { free(buf_); } + inline std::basic_string ToString() const { return {out(), length()}; } + inline std::basic_string_view ToStringView() const { + return {out(), length()}; + } + private: size_t length_; // capacity of the malloc'ed buf_ @@ -520,8 +525,6 @@ class Utf8Value : public MaybeStackBuffer { public: explicit Utf8Value(v8::Isolate* isolate, v8::Local value); - inline std::string ToString() const { return std::string(out(), length()); } - inline bool operator==(const char* a) const { return strcmp(out(), a) == 0; } @@ -596,7 +599,7 @@ struct MallocedBuffer { } void Truncate(size_t new_size) { - CHECK(new_size <= size); + CHECK_LE(new_size, size); size = new_size; } @@ -605,7 +608,7 @@ struct MallocedBuffer { data = UncheckedRealloc(data, new_size); } - inline bool is_empty() const { return data == nullptr; } + bool is_empty() const { return data == nullptr; } MallocedBuffer() : data(nullptr), size(0) {} explicit MallocedBuffer(size_t size) : data(Malloc(size)), size(size) {} diff --git a/test/benchmark/test-benchmark-url.js b/test/benchmark/test-benchmark-url.js index 664e7c4d8dc827..f4eb4efa234599 100644 --- a/test/benchmark/test-benchmark-url.js +++ b/test/benchmark/test-benchmark-url.js @@ -1,6 +1,18 @@ 'use strict'; -require('../common'); +const common = require('../common'); + +// TODO(@anonrig): Remove this check when Ada removes ICU requirement. +if (!common.hasIntl) { + // A handful of the benchmarks fail when ICU is not included. + // ICU is responsible for ignoring certain inputs from the hostname + // and without it, it is not possible to validate the correctness of the input. + // DomainToASCII method in Unicode specification states which characters are + // ignored and/or remapped. Doing this outside of the scope of DomainToASCII, + // would be a violation of the WHATWG URL specification. + // Please look into: https://unicode.org/reports/tr46/#ProcessingStepMap + common.skip('missing Intl'); +} const runBenchmark = require('../common/benchmark'); diff --git a/test/cctest/test_url.cc b/test/cctest/test_url.cc deleted file mode 100644 index 080129b3ddd1ab..00000000000000 --- a/test/cctest/test_url.cc +++ /dev/null @@ -1,188 +0,0 @@ -#include "node_url.h" -#include "node_i18n.h" -#include "util-inl.h" - -#include "gtest/gtest.h" - -using node::url::URL; -using node::url::URL_FLAGS_FAILED; - -class URLTest : public ::testing::Test { - protected: - void SetUp() override { -#if defined(NODE_HAVE_I18N_SUPPORT) - std::string icu_data_dir; - node::i18n::InitializeICUDirectory(icu_data_dir); -#endif - } - - void TearDown() override {} -}; - -TEST_F(URLTest, Simple) { - URL simple("https://example.org:81/a/b/c?query#fragment"); - - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "https:"); - EXPECT_EQ(simple.host(), "example.org"); - EXPECT_EQ(simple.port(), 81); - EXPECT_EQ(simple.path(), "/a/b/c"); - EXPECT_EQ(simple.query(), "query"); - EXPECT_EQ(simple.fragment(), "fragment"); -} - -TEST_F(URLTest, Simple2) { - const char* input = "https://example.org:81/a/b/c?query#fragment"; - URL simple(input, strlen(input)); - - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "https:"); - EXPECT_EQ(simple.host(), "example.org"); - EXPECT_EQ(simple.port(), 81); - EXPECT_EQ(simple.path(), "/a/b/c"); - EXPECT_EQ(simple.query(), "query"); - EXPECT_EQ(simple.fragment(), "fragment"); -} - -TEST_F(URLTest, ForbiddenHostCodePoint) { - URL error("https://exa|mple.org:81/a/b/c?query#fragment"); - EXPECT_TRUE(error.flags() & URL_FLAGS_FAILED); -} - -TEST_F(URLTest, NoBase1) { - URL error("123noscheme"); - EXPECT_TRUE(error.flags() & URL_FLAGS_FAILED); -} - -TEST_F(URLTest, Base1) { - URL base("http://example.org/foo/bar"); - ASSERT_FALSE(base.flags() & URL_FLAGS_FAILED); - - URL simple("../baz", &base); - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "http:"); - EXPECT_EQ(simple.host(), "example.org"); - EXPECT_EQ(simple.path(), "/baz"); -} - -TEST_F(URLTest, Base2) { - URL simple("../baz", "http://example.org/foo/bar"); - - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "http:"); - EXPECT_EQ(simple.host(), "example.org"); - EXPECT_EQ(simple.path(), "/baz"); -} - -TEST_F(URLTest, Base3) { - const char* input = "../baz"; - const char* base = "http://example.org/foo/bar"; - - URL simple(input, strlen(input), base, strlen(base)); - - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "http:"); - EXPECT_EQ(simple.host(), "example.org"); - EXPECT_EQ(simple.path(), "/baz"); -} - -TEST_F(URLTest, Base4) { - const char* input = "\\x"; - const char* base = "http://example.org/foo/bar"; - - URL simple(input, strlen(input), base, strlen(base)); - - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "http:"); - EXPECT_EQ(simple.host(), "example.org"); - EXPECT_EQ(simple.path(), "/x"); -} - -TEST_F(URLTest, Base5) { - const char* input = "/x"; - const char* base = "http://example.org/foo/bar"; - - URL simple(input, strlen(input), base, strlen(base)); - - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "http:"); - EXPECT_EQ(simple.host(), "example.org"); - EXPECT_EQ(simple.path(), "/x"); -} - -TEST_F(URLTest, Base6) { - const char* input = "\\\\x"; - const char* base = "http://example.org/foo/bar"; - - URL simple(input, strlen(input), base, strlen(base)); - - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "http:"); - EXPECT_EQ(simple.host(), "x"); -} - -TEST_F(URLTest, Base7) { - const char* input = "//x"; - const char* base = "http://example.org/foo/bar"; - - URL simple(input, strlen(input), base, strlen(base)); - - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "http:"); - EXPECT_EQ(simple.host(), "x"); -} - -TEST_F(URLTest, TruncatedAfterProtocol) { - char input[2] = { 'q', ':' }; - URL simple(input, sizeof(input)); - - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "q:"); - EXPECT_EQ(simple.host(), ""); - EXPECT_EQ(simple.path(), "/"); -} - -TEST_F(URLTest, TruncatedAfterProtocol2) { - char input[6] = { 'h', 't', 't', 'p', ':', '/' }; - URL simple(input, sizeof(input)); - - EXPECT_TRUE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "http:"); - EXPECT_EQ(simple.host(), ""); - EXPECT_EQ(simple.path(), ""); -} - -TEST_F(URLTest, FromFilePath) { - URL file_url; -#ifdef _WIN32 - file_url = URL::FromFilePath("C:\\Program Files\\"); - EXPECT_EQ("file:", file_url.protocol()); - EXPECT_EQ("//C:/Program%20Files/", file_url.path()); - EXPECT_EQ("file:///C:/Program%20Files/", file_url.href()); - - file_url = URL::FromFilePath("C:\\a\\b\\c"); - EXPECT_EQ("file:", file_url.protocol()); - EXPECT_EQ("//C:/a/b/c", file_url.path()); - EXPECT_EQ("file:///C:/a/b/c", file_url.href()); - - file_url = URL::FromFilePath("b:\\a\\%%.js"); - EXPECT_EQ("file:", file_url.protocol()); - EXPECT_EQ("//b:/a/%25%25.js", file_url.path()); - EXPECT_EQ("file:///b:/a/%25%25.js", file_url.href()); -#else - file_url = URL::FromFilePath("/"); - EXPECT_EQ("file:", file_url.protocol()); - EXPECT_EQ("//", file_url.path()); - EXPECT_EQ("file:///", file_url.href()); - - file_url = URL::FromFilePath("/a/b/c"); - EXPECT_EQ("file:", file_url.protocol()); - EXPECT_EQ("//a/b/c", file_url.path()); - EXPECT_EQ("file:///a/b/c", file_url.href()); - - file_url = URL::FromFilePath("/a/%%.js"); - EXPECT_EQ("file:", file_url.protocol()); - EXPECT_EQ("//a/%25%25.js", file_url.path()); - EXPECT_EQ("file:///a/%25%25.js", file_url.href()); -#endif -} diff --git a/test/fixtures/wpt/README.md b/test/fixtures/wpt/README.md index a42fc09e975e87..e203bb9170793c 100644 --- a/test/fixtures/wpt/README.md +++ b/test/fixtures/wpt/README.md @@ -27,7 +27,7 @@ Last update: - performance-timeline: https://github.com/web-platform-tests/wpt/tree/17ebc3aea0/performance-timeline - resources: https://github.com/web-platform-tests/wpt/tree/fbf1e7d247/resources - streams: https://github.com/web-platform-tests/wpt/tree/9e5ef42bd3/streams -- url: https://github.com/web-platform-tests/wpt/tree/0a187bc169/url +- url: https://github.com/web-platform-tests/wpt/tree/f1ade799d0/url - user-timing: https://github.com/web-platform-tests/wpt/tree/df24fb604e/user-timing - wasm/jsapi: https://github.com/web-platform-tests/wpt/tree/d8dbe6990b/wasm/jsapi - wasm/webapi: https://github.com/web-platform-tests/wpt/tree/fd1b23eeaa/wasm/webapi diff --git a/test/fixtures/wpt/url/IdnaTestV2.window.js b/test/fixtures/wpt/url/IdnaTestV2.window.js new file mode 100644 index 00000000000000..8873886bdab8a0 --- /dev/null +++ b/test/fixtures/wpt/url/IdnaTestV2.window.js @@ -0,0 +1,41 @@ +promise_test(() => fetch("resources/IdnaTestV2.json").then(res => res.json()).then(runTests), "Loading data…"); + +// Performance impact of this seems negligible (performance.now() diff in WebKit went from 48 to 52) +// and there was a preference to let more non-ASCII hit the parser. +function encodeHostEndingCodePoints(input) { + let output = ""; + for (const codePoint of input) { + if ([":", "/", "?", "#", "\\"].includes(codePoint)) { + output += encodeURIComponent(codePoint); + } else { + output += codePoint; + } + } + return output; +} + +function runTests(idnaTests) { + for (const idnaTest of idnaTests) { + if (typeof idnaTest === "string") { + continue // skip comments + } + if (idnaTest.input === "") { + continue // cannot test empty string input through new URL() + } + // Percent-encode the input such that ? and equivalent code points do not end up counting as + // part of the URL, but are parsed through the host parser instead. + const encodedInput = encodeHostEndingCodePoints(idnaTest.input); + + test(() => { + if (idnaTest.output === null) { + assert_throws_js(TypeError, () => new URL(`https://${encodedInput}/x`)); + } else { + const url = new URL(`https://${encodedInput}/x`); + assert_equals(url.host, idnaTest.output); + assert_equals(url.hostname, idnaTest.output); + assert_equals(url.pathname, "/x"); + assert_equals(url.href, `https://${idnaTest.output}/x`); + } + }, `ToASCII("${idnaTest.input}")${idnaTest.comment ? " " + idnaTest.comment : ""}`); + } +} diff --git a/test/fixtures/wpt/url/a-element-xhtml.xhtml b/test/fixtures/wpt/url/a-element-xhtml.xhtml index c6c67cf3ce619b..05bec4ce4b2f1e 100644 --- a/test/fixtures/wpt/url/a-element-xhtml.xhtml +++ b/test/fixtures/wpt/url/a-element-xhtml.xhtml @@ -3,8 +3,13 @@ URL Test + + + + + diff --git a/test/fixtures/wpt/url/a-element.html b/test/fixtures/wpt/url/a-element.html index 05c37f30b71e12..3429e07ec3f3f7 100644 --- a/test/fixtures/wpt/url/a-element.html +++ b/test/fixtures/wpt/url/a-element.html @@ -1,7 +1,12 @@ + + + + +
diff --git a/test/fixtures/wpt/url/historical.any.js b/test/fixtures/wpt/url/historical.any.js index c3067dfd730123..cbeb36a63f25c2 100644 --- a/test/fixtures/wpt/url/historical.any.js +++ b/test/fixtures/wpt/url/historical.any.js @@ -28,4 +28,12 @@ test(function() { assert_equals(URL.domainToUnicode, undefined); }, "URL.domainToUnicode should be undefined"); +test(() => { + assert_throws_dom("DataCloneError", () => self.structuredClone(new URL("about:blank"))); +}, "URL: no structured serialize/deserialize support"); + +test(() => { + assert_throws_dom("DataCloneError", () => self.structuredClone(new URLSearchParams())); +}, "URLSearchParams: no structured serialize/deserialize support"); + done(); diff --git a/test/fixtures/wpt/url/resources/IdnaTestV2.json b/test/fixtures/wpt/url/resources/IdnaTestV2.json new file mode 100644 index 00000000000000..669d4b0938204a --- /dev/null +++ b/test/fixtures/wpt/url/resources/IdnaTestV2.json @@ -0,0 +1,9754 @@ +[ + "THIS IS A GENERATED FILE. PLEASE DO NOT MODIFY DIRECTLY. See ../tools/IdnaTestV2-parser.py instead.", + "--exclude-ipv4-like: True; --exclude-std3: True; --exclude_bidi: True", + { + "input": "fass.de", + "output": "fass.de" + }, + { + "input": "fa\u00df.de", + "output": "xn--fa-hia.de" + }, + { + "input": "Fa\u00df.de", + "output": "xn--fa-hia.de" + }, + { + "input": "xn--fa-hia.de", + "output": "xn--fa-hia.de" + }, + { + "input": "\u00e0.\u05d0\u0308", + "output": "xn--0ca.xn--ssa73l" + }, + { + "input": "a\u0300.\u05d0\u0308", + "output": "xn--0ca.xn--ssa73l" + }, + { + "input": "A\u0300.\u05d0\u0308", + "output": "xn--0ca.xn--ssa73l" + }, + { + "input": "\u00c0.\u05d0\u0308", + "output": "xn--0ca.xn--ssa73l" + }, + { + "input": "xn--0ca.xn--ssa73l", + "output": "xn--0ca.xn--ssa73l" + }, + { + "input": "\u00e0\u0308.\u05d0", + "output": "xn--0ca81i.xn--4db" + }, + { + "input": "a\u0300\u0308.\u05d0", + "output": "xn--0ca81i.xn--4db" + }, + { + "input": "A\u0300\u0308.\u05d0", + "output": "xn--0ca81i.xn--4db" + }, + { + "input": "\u00c0\u0308.\u05d0", + "output": "xn--0ca81i.xn--4db" + }, + { + "input": "xn--0ca81i.xn--4db", + "output": "xn--0ca81i.xn--4db" + }, + { + "comment": "C1", + "input": "a\u200cb", + "output": null + }, + { + "comment": "C1", + "input": "A\u200cB", + "output": null + }, + { + "comment": "C1", + "input": "A\u200cb", + "output": null + }, + { + "input": "ab", + "output": "ab" + }, + { + "comment": "C1", + "input": "xn--ab-j1t", + "output": null + }, + { + "input": "a\u094d\u200cb", + "output": "xn--ab-fsf604u" + }, + { + "input": "A\u094d\u200cB", + "output": "xn--ab-fsf604u" + }, + { + "input": "A\u094d\u200cb", + "output": "xn--ab-fsf604u" + }, + { + "input": "xn--ab-fsf", + "output": "xn--ab-fsf" + }, + { + "input": "a\u094db", + "output": "xn--ab-fsf" + }, + { + "input": "A\u094dB", + "output": "xn--ab-fsf" + }, + { + "input": "A\u094db", + "output": "xn--ab-fsf" + }, + { + "input": "xn--ab-fsf604u", + "output": "xn--ab-fsf604u" + }, + { + "comment": "C2", + "input": "a\u200db", + "output": null + }, + { + "comment": "C2", + "input": "A\u200dB", + "output": null + }, + { + "comment": "C2", + "input": "A\u200db", + "output": null + }, + { + "comment": "C2", + "input": "xn--ab-m1t", + "output": null + }, + { + "input": "a\u094d\u200db", + "output": "xn--ab-fsf014u" + }, + { + "input": "A\u094d\u200dB", + "output": "xn--ab-fsf014u" + }, + { + "input": "A\u094d\u200db", + "output": "xn--ab-fsf014u" + }, + { + "input": "xn--ab-fsf014u", + "output": "xn--ab-fsf014u" + }, + { + "input": "\u00a1", + "output": "xn--7a" + }, + { + "input": "xn--7a", + "output": "xn--7a" + }, + { + "input": "\u19da", + "output": "xn--pkf" + }, + { + "input": "xn--pkf", + "output": "xn--pkf" + }, + { + "comment": "A4_2 (ignored)", + "input": "\u3002", + "output": "." + }, + { + "comment": "A4_2 (ignored)", + "input": ".", + "output": "." + }, + { + "input": "\uab60", + "output": "xn--3y9a" + }, + { + "input": "xn--3y9a", + "output": "xn--3y9a" + }, + { + "comment": "A4_2 (ignored)", + "input": "1234567890\u00e41234567890123456789012345678901234567890123456", + "output": "xn--12345678901234567890123456789012345678901234567890123456-fxe" + }, + { + "comment": "A4_2 (ignored)", + "input": "1234567890a\u03081234567890123456789012345678901234567890123456", + "output": "xn--12345678901234567890123456789012345678901234567890123456-fxe" + }, + { + "comment": "A4_2 (ignored)", + "input": "1234567890A\u03081234567890123456789012345678901234567890123456", + "output": "xn--12345678901234567890123456789012345678901234567890123456-fxe" + }, + { + "comment": "A4_2 (ignored)", + "input": "1234567890\u00c41234567890123456789012345678901234567890123456", + "output": "xn--12345678901234567890123456789012345678901234567890123456-fxe" + }, + { + "comment": "A4_2 (ignored)", + "input": "xn--12345678901234567890123456789012345678901234567890123456-fxe", + "output": "xn--12345678901234567890123456789012345678901234567890123456-fxe" + }, + { + "input": "www.eXample.cOm", + "output": "www.example.com" + }, + { + "input": "B\u00fccher.de", + "output": "xn--bcher-kva.de" + }, + { + "input": "Bu\u0308cher.de", + "output": "xn--bcher-kva.de" + }, + { + "input": "bu\u0308cher.de", + "output": "xn--bcher-kva.de" + }, + { + "input": "b\u00fccher.de", + "output": "xn--bcher-kva.de" + }, + { + "input": "B\u00dcCHER.DE", + "output": "xn--bcher-kva.de" + }, + { + "input": "BU\u0308CHER.DE", + "output": "xn--bcher-kva.de" + }, + { + "input": "xn--bcher-kva.de", + "output": "xn--bcher-kva.de" + }, + { + "input": "\u00d6BB", + "output": "xn--bb-eka" + }, + { + "input": "O\u0308BB", + "output": "xn--bb-eka" + }, + { + "input": "o\u0308bb", + "output": "xn--bb-eka" + }, + { + "input": "\u00f6bb", + "output": "xn--bb-eka" + }, + { + "input": "\u00d6bb", + "output": "xn--bb-eka" + }, + { + "input": "O\u0308bb", + "output": "xn--bb-eka" + }, + { + "input": "xn--bb-eka", + "output": "xn--bb-eka" + }, + { + "input": "\u03b2\u03cc\u03bb\u03bf\u03c2.com", + "output": "xn--nxasmm1c.com" + }, + { + "input": "\u03b2\u03bf\u0301\u03bb\u03bf\u03c2.com", + "output": "xn--nxasmm1c.com" + }, + { + "input": "\u0392\u039f\u0301\u039b\u039f\u03a3.COM", + "output": "xn--nxasmq6b.com" + }, + { + "input": "\u0392\u038c\u039b\u039f\u03a3.COM", + "output": "xn--nxasmq6b.com" + }, + { + "input": "\u03b2\u03cc\u03bb\u03bf\u03c3.com", + "output": "xn--nxasmq6b.com" + }, + { + "input": "\u03b2\u03bf\u0301\u03bb\u03bf\u03c3.com", + "output": "xn--nxasmq6b.com" + }, + { + "input": "\u0392\u03bf\u0301\u03bb\u03bf\u03c3.com", + "output": "xn--nxasmq6b.com" + }, + { + "input": "\u0392\u03cc\u03bb\u03bf\u03c3.com", + "output": "xn--nxasmq6b.com" + }, + { + "input": "xn--nxasmq6b.com", + "output": "xn--nxasmq6b.com" + }, + { + "input": "\u0392\u03bf\u0301\u03bb\u03bf\u03c2.com", + "output": "xn--nxasmm1c.com" + }, + { + "input": "\u0392\u03cc\u03bb\u03bf\u03c2.com", + "output": "xn--nxasmm1c.com" + }, + { + "input": "xn--nxasmm1c.com", + "output": "xn--nxasmm1c.com" + }, + { + "input": "xn--nxasmm1c", + "output": "xn--nxasmm1c" + }, + { + "input": "\u03b2\u03cc\u03bb\u03bf\u03c2", + "output": "xn--nxasmm1c" + }, + { + "input": "\u03b2\u03bf\u0301\u03bb\u03bf\u03c2", + "output": "xn--nxasmm1c" + }, + { + "input": "\u0392\u039f\u0301\u039b\u039f\u03a3", + "output": "xn--nxasmq6b" + }, + { + "input": "\u0392\u038c\u039b\u039f\u03a3", + "output": "xn--nxasmq6b" + }, + { + "input": "\u03b2\u03cc\u03bb\u03bf\u03c3", + "output": "xn--nxasmq6b" + }, + { + "input": "\u03b2\u03bf\u0301\u03bb\u03bf\u03c3", + "output": "xn--nxasmq6b" + }, + { + "input": "\u0392\u03bf\u0301\u03bb\u03bf\u03c3", + "output": "xn--nxasmq6b" + }, + { + "input": "\u0392\u03cc\u03bb\u03bf\u03c3", + "output": "xn--nxasmq6b" + }, + { + "input": "xn--nxasmq6b", + "output": "xn--nxasmq6b" + }, + { + "input": "\u0392\u03cc\u03bb\u03bf\u03c2", + "output": "xn--nxasmm1c" + }, + { + "input": "\u0392\u03bf\u0301\u03bb\u03bf\u03c2", + "output": "xn--nxasmm1c" + }, + { + "input": "www.\u0dc1\u0dca\u200d\u0dbb\u0dd3.com", + "output": "www.xn--10cl1a0b660p.com" + }, + { + "input": "WWW.\u0dc1\u0dca\u200d\u0dbb\u0dd3.COM", + "output": "www.xn--10cl1a0b660p.com" + }, + { + "input": "Www.\u0dc1\u0dca\u200d\u0dbb\u0dd3.com", + "output": "www.xn--10cl1a0b660p.com" + }, + { + "input": "www.xn--10cl1a0b.com", + "output": "www.xn--10cl1a0b.com" + }, + { + "input": "www.\u0dc1\u0dca\u0dbb\u0dd3.com", + "output": "www.xn--10cl1a0b.com" + }, + { + "input": "WWW.\u0dc1\u0dca\u0dbb\u0dd3.COM", + "output": "www.xn--10cl1a0b.com" + }, + { + "input": "Www.\u0dc1\u0dca\u0dbb\u0dd3.com", + "output": "www.xn--10cl1a0b.com" + }, + { + "input": "www.xn--10cl1a0b660p.com", + "output": "www.xn--10cl1a0b660p.com" + }, + { + "input": "\u0646\u0627\u0645\u0647\u200c\u0627\u06cc", + "output": "xn--mgba3gch31f060k" + }, + { + "input": "xn--mgba3gch31f", + "output": "xn--mgba3gch31f" + }, + { + "input": "\u0646\u0627\u0645\u0647\u0627\u06cc", + "output": "xn--mgba3gch31f" + }, + { + "input": "xn--mgba3gch31f060k", + "output": "xn--mgba3gch31f060k" + }, + { + "input": "xn--mgba3gch31f060k.com", + "output": "xn--mgba3gch31f060k.com" + }, + { + "input": "\u0646\u0627\u0645\u0647\u200c\u0627\u06cc.com", + "output": "xn--mgba3gch31f060k.com" + }, + { + "input": "\u0646\u0627\u0645\u0647\u200c\u0627\u06cc.COM", + "output": "xn--mgba3gch31f060k.com" + }, + { + "input": "xn--mgba3gch31f.com", + "output": "xn--mgba3gch31f.com" + }, + { + "input": "\u0646\u0627\u0645\u0647\u0627\u06cc.com", + "output": "xn--mgba3gch31f.com" + }, + { + "input": "\u0646\u0627\u0645\u0647\u0627\u06cc.COM", + "output": "xn--mgba3gch31f.com" + }, + { + "input": "a.b\uff0ec\u3002d\uff61", + "output": "a.b.c.d." + }, + { + "input": "a.b.c\u3002d\u3002", + "output": "a.b.c.d." + }, + { + "input": "A.B.C\u3002D\u3002", + "output": "a.b.c.d." + }, + { + "input": "A.b.c\u3002D\u3002", + "output": "a.b.c.d." + }, + { + "input": "a.b.c.d.", + "output": "a.b.c.d." + }, + { + "input": "A.B\uff0eC\u3002D\uff61", + "output": "a.b.c.d." + }, + { + "input": "A.b\uff0ec\u3002D\uff61", + "output": "a.b.c.d." + }, + { + "input": "U\u0308.xn--tda", + "output": "xn--tda.xn--tda" + }, + { + "input": "\u00dc.xn--tda", + "output": "xn--tda.xn--tda" + }, + { + "input": "\u00fc.xn--tda", + "output": "xn--tda.xn--tda" + }, + { + "input": "u\u0308.xn--tda", + "output": "xn--tda.xn--tda" + }, + { + "input": "U\u0308.XN--TDA", + "output": "xn--tda.xn--tda" + }, + { + "input": "\u00dc.XN--TDA", + "output": "xn--tda.xn--tda" + }, + { + "input": "\u00dc.xn--Tda", + "output": "xn--tda.xn--tda" + }, + { + "input": "U\u0308.xn--Tda", + "output": "xn--tda.xn--tda" + }, + { + "input": "xn--tda.xn--tda", + "output": "xn--tda.xn--tda" + }, + { + "input": "\u00fc.\u00fc", + "output": "xn--tda.xn--tda" + }, + { + "input": "u\u0308.u\u0308", + "output": "xn--tda.xn--tda" + }, + { + "input": "U\u0308.U\u0308", + "output": "xn--tda.xn--tda" + }, + { + "input": "\u00dc.\u00dc", + "output": "xn--tda.xn--tda" + }, + { + "input": "\u00dc.\u00fc", + "output": "xn--tda.xn--tda" + }, + { + "input": "U\u0308.u\u0308", + "output": "xn--tda.xn--tda" + }, + { + "comment": "V1", + "input": "xn--u-ccb", + "output": null + }, + { + "comment": "P1; V6", + "input": "a\u2488com", + "output": null + }, + { + "input": "a1.com", + "output": "a1.com" + }, + { + "comment": "P1; V6", + "input": "A\u2488COM", + "output": null + }, + { + "comment": "P1; V6", + "input": "A\u2488Com", + "output": null + }, + { + "comment": "V6", + "input": "xn--acom-0w1b", + "output": null + }, + { + "comment": "V6", + "input": "xn--a-ecp.ru", + "output": null + }, + { + "comment": "P4", + "input": "xn--0.pt", + "output": null + }, + { + "comment": "V6", + "input": "xn--a.pt", + "output": null + }, + { + "comment": "P4", + "input": "xn--a-\u00c4.pt", + "output": null + }, + { + "comment": "P4", + "input": "xn--a-A\u0308.pt", + "output": null + }, + { + "comment": "P4", + "input": "xn--a-a\u0308.pt", + "output": null + }, + { + "comment": "P4", + "input": "xn--a-\u00e4.pt", + "output": null + }, + { + "comment": "P4", + "input": "XN--A-\u00c4.PT", + "output": null + }, + { + "comment": "P4", + "input": "XN--A-A\u0308.PT", + "output": null + }, + { + "comment": "P4", + "input": "Xn--A-A\u0308.pt", + "output": null + }, + { + "comment": "P4", + "input": "Xn--A-\u00c4.pt", + "output": null + }, + { + "comment": "V2 (ignored)", + "input": "xn--xn--a--gua.pt", + "output": "xn--xn--a--gua.pt" + }, + { + "input": "\u65e5\u672c\u8a9e\u3002\uff2a\uff30", + "output": "xn--wgv71a119e.jp" + }, + { + "input": "\u65e5\u672c\u8a9e\u3002JP", + "output": "xn--wgv71a119e.jp" + }, + { + "input": "\u65e5\u672c\u8a9e\u3002jp", + "output": "xn--wgv71a119e.jp" + }, + { + "input": "\u65e5\u672c\u8a9e\u3002Jp", + "output": "xn--wgv71a119e.jp" + }, + { + "input": "xn--wgv71a119e.jp", + "output": "xn--wgv71a119e.jp" + }, + { + "input": "\u65e5\u672c\u8a9e.jp", + "output": "xn--wgv71a119e.jp" + }, + { + "input": "\u65e5\u672c\u8a9e.JP", + "output": "xn--wgv71a119e.jp" + }, + { + "input": "\u65e5\u672c\u8a9e.Jp", + "output": "xn--wgv71a119e.jp" + }, + { + "input": "\u65e5\u672c\u8a9e\u3002\uff4a\uff50", + "output": "xn--wgv71a119e.jp" + }, + { + "input": "\u65e5\u672c\u8a9e\u3002\uff2a\uff50", + "output": "xn--wgv71a119e.jp" + }, + { + "input": "\u2615", + "output": "xn--53h" + }, + { + "input": "xn--53h", + "output": "xn--53h" + }, + { + "comment": "C1; C2; A4_2 (ignored)", + "input": "1.a\u00df\u200c\u200db\u200c\u200dc\u00df\u00df\u00df\u00dfd\u03c2\u03c3\u00df\u00df\u00df\u00df\u00df\u00df\u00df\u00dfe\u00df\u00df\u00df\u00df\u00df\u00df\u00df\u00df\u00df\u00dfx\u00df\u00df\u00df\u00df\u00df\u00df\u00df\u00df\u00df\u00dfy\u00df\u00df\u00df\u00df\u00df\u00df\u00df\u00df\u0302\u00dfz", + "output": null + }, + { + "comment": "C1; C2; A4_2 (ignored)", + "input": "1.ASS\u200c\u200dB\u200c\u200dCSSSSSSSSD\u03a3\u03a3SSSSSSSSSSSSSSSSESSSSSSSSSSSSSSSSSSSSXSSSSSSSSSSSSSSSSSSSSYSSSSSSSSSSSSSSSS\u0302SSZ", + "output": null + }, + { + "comment": "C1; C2; A4_2 (ignored)", + "input": "1.ASS\u200c\u200dB\u200c\u200dCSSSSSSSSD\u03a3\u03a3SSSSSSSSSSSSSSSSESSSSSSSSSSSSSSSSSSSSXSSSSSSSSSSSSSSSSSSSSYSSSSSSSSSSSSSSS\u015cSSZ", + "output": null + }, + { + "comment": "C1; C2; A4_2 (ignored)", + "input": "1.ass\u200c\u200db\u200c\u200dcssssssssd\u03c3\u03c3ssssssssssssssssessssssssssssssssssssxssssssssssssssssssssysssssssssssssss\u015dssz", + "output": null + }, + { + "comment": "C1; C2; A4_2 (ignored)", + "input": "1.ass\u200c\u200db\u200c\u200dcssssssssd\u03c3\u03c3ssssssssssssssssessssssssssssssssssssxssssssssssssssssssssyssssssssssssssss\u0302ssz", + "output": null + }, + { + "comment": "C1; C2; A4_2 (ignored)", + "input": "1.Ass\u200c\u200db\u200c\u200dcssssssssd\u03c3\u03c3ssssssssssssssssessssssssssssssssssssxssssssssssssssssssssyssssssssssssssss\u0302ssz", + "output": null + }, + { + "comment": "C1; C2; A4_2 (ignored)", + "input": "1.Ass\u200c\u200db\u200c\u200dcssssssssd\u03c3\u03c3ssssssssssssssssessssssssssssssssssssxssssssssssssssssssssysssssssssssssss\u015dssz", + "output": null + }, + { + "comment": "A4_2 (ignored)", + "input": "1.xn--assbcssssssssdssssssssssssssssessssssssssssssssssssxssssssssssssssssssssysssssssssssssssssz-pxq1419aa", + "output": "1.xn--assbcssssssssdssssssssssssssssessssssssssssssssssssxssssssssssssssssssssysssssssssssssssssz-pxq1419aa" + }, + { + "comment": "A4_2 (ignored)", + "input": "1.assbcssssssssd\u03c3\u03c3ssssssssssssssssessssssssssssssssssssxssssssssssssssssssssysssssssssssssss\u015dssz", + "output": "1.xn--assbcssssssssdssssssssssssssssessssssssssssssssssssxssssssssssssssssssssysssssssssssssssssz-pxq1419aa" + }, + { + "comment": "A4_2 (ignored)", + "input": "1.assbcssssssssd\u03c3\u03c3ssssssssssssssssessssssssssssssssssssxssssssssssssssssssssyssssssssssssssss\u0302ssz", + "output": "1.xn--assbcssssssssdssssssssssssssssessssssssssssssssssssxssssssssssssssssssssysssssssssssssssssz-pxq1419aa" + }, + { + "comment": "A4_2 (ignored)", + "input": "1.ASSBCSSSSSSSSD\u03a3\u03a3SSSSSSSSSSSSSSSSESSSSSSSSSSSSSSSSSSSSXSSSSSSSSSSSSSSSSSSSSYSSSSSSSSSSSSSSSS\u0302SSZ", + "output": "1.xn--assbcssssssssdssssssssssssssssessssssssssssssssssssxssssssssssssssssssssysssssssssssssssssz-pxq1419aa" + }, + { + "comment": "A4_2 (ignored)", + "input": "1.ASSBCSSSSSSSSD\u03a3\u03a3SSSSSSSSSSSSSSSSESSSSSSSSSSSSSSSSSSSSXSSSSSSSSSSSSSSSSSSSSYSSSSSSSSSSSSSSS\u015cSSZ", + "output": "1.xn--assbcssssssssdssssssssssssssssessssssssssssssssssssxssssssssssssssssssssysssssssssssssssssz-pxq1419aa" + }, + { + "comment": "A4_2 (ignored)", + "input": "1.Assbcssssssssd\u03c3\u03c3ssssssssssssssssessssssssssssssssssssxssssssssssssssssssssysssssssssssssss\u015dssz", + "output": "1.xn--assbcssssssssdssssssssssssssssessssssssssssssssssssxssssssssssssssssssssysssssssssssssssssz-pxq1419aa" + }, + { + "comment": "A4_2 (ignored)", + "input": "1.Assbcssssssssd\u03c3\u03c3ssssssssssssssssessssssssssssssssssssxssssssssssssssssssssyssssssssssssssss\u0302ssz", + "output": "1.xn--assbcssssssssdssssssssssssssssessssssssssssssssssssxssssssssssssssssssssysssssssssssssssssz-pxq1419aa" + }, + { + "comment": "C1; C2; A4_2 (ignored)", + "input": "1.xn--assbcssssssssdssssssssssssssssessssssssssssssssssssxssssssssssssssssssssysssssssssssssssssz-pxq1419aa69989dba9gc", + "output": null + }, + { + "comment": "C1; C2; A4_2 (ignored)", + "input": "1.A\u00df\u200c\u200db\u200c\u200dc\u00df\u00df\u00df\u00dfd\u03c2\u03c3\u00df\u00df\u00df\u00df\u00df\u00df\u00df\u00dfe\u00df\u00df\u00df\u00df\u00df\u00df\u00df\u00df\u00df\u00dfx\u00df\u00df\u00df\u00df\u00df\u00df\u00df\u00df\u00df\u00dfy\u00df\u00df\u00df\u00df\u00df\u00df\u00df\u00df\u0302\u00dfz", + "output": null + }, + { + "comment": "C1; C2; A4_2 (ignored)", + "input": "1.xn--abcdexyz-qyacaaabaaaaaaabaaaaaaaaabaaaaaaaaabaaaaaaaa010ze2isb1140zba8cc", + "output": null + }, + { + "comment": "C1; C2", + "input": "\u200cx\u200dn\u200c-\u200d-b\u00df", + "output": null + }, + { + "comment": "C1; C2", + "input": "\u200cX\u200dN\u200c-\u200d-BSS", + "output": null + }, + { + "comment": "C1; C2", + "input": "\u200cx\u200dn\u200c-\u200d-bss", + "output": null + }, + { + "comment": "C1; C2", + "input": "\u200cX\u200dn\u200c-\u200d-Bss", + "output": null + }, + { + "input": "xn--bss", + "output": "xn--bss" + }, + { + "input": "\u5919", + "output": "xn--bss" + }, + { + "comment": "C1; C2", + "input": "xn--xn--bss-7z6ccid", + "output": null + }, + { + "comment": "C1; C2", + "input": "\u200cX\u200dn\u200c-\u200d-B\u00df", + "output": null + }, + { + "comment": "C1; C2", + "input": "xn--xn--b-pqa5796ccahd", + "output": null + }, + { + "input": "\u02e3\u034f\u2115\u200b\ufe63\u00ad\uff0d\u180c\u212c\ufe00\u017f\u2064\ud835\udd30\udb40\uddef\ufb04", + "output": "xn--bssffl" + }, + { + "input": "x\u034fN\u200b-\u00ad-\u180cB\ufe00s\u2064s\udb40\uddefffl", + "output": "xn--bssffl" + }, + { + "input": "x\u034fn\u200b-\u00ad-\u180cb\ufe00s\u2064s\udb40\uddefffl", + "output": "xn--bssffl" + }, + { + "input": "X\u034fN\u200b-\u00ad-\u180cB\ufe00S\u2064S\udb40\uddefFFL", + "output": "xn--bssffl" + }, + { + "input": "X\u034fn\u200b-\u00ad-\u180cB\ufe00s\u2064s\udb40\uddefffl", + "output": "xn--bssffl" + }, + { + "input": "xn--bssffl", + "output": "xn--bssffl" + }, + { + "input": "\u5921\u591e\u591c\u5919", + "output": "xn--bssffl" + }, + { + "input": "\u02e3\u034f\u2115\u200b\ufe63\u00ad\uff0d\u180c\u212c\ufe00S\u2064\ud835\udd30\udb40\uddefFFL", + "output": "xn--bssffl" + }, + { + "input": "x\u034fN\u200b-\u00ad-\u180cB\ufe00S\u2064s\udb40\uddefFFL", + "output": "xn--bssffl" + }, + { + "input": "\u02e3\u034f\u2115\u200b\ufe63\u00ad\uff0d\u180c\u212c\ufe00s\u2064\ud835\udd30\udb40\uddefffl", + "output": "xn--bssffl" + }, + { + "input": "\u00e41234567890123456789012345678901234567890123456789012345", + "output": "xn--1234567890123456789012345678901234567890123456789012345-9te" + }, + { + "input": "a\u03081234567890123456789012345678901234567890123456789012345", + "output": "xn--1234567890123456789012345678901234567890123456789012345-9te" + }, + { + "input": "A\u03081234567890123456789012345678901234567890123456789012345", + "output": "xn--1234567890123456789012345678901234567890123456789012345-9te" + }, + { + "input": "\u00c41234567890123456789012345678901234567890123456789012345", + "output": "xn--1234567890123456789012345678901234567890123456789012345-9te" + }, + { + "input": "xn--1234567890123456789012345678901234567890123456789012345-9te", + "output": "xn--1234567890123456789012345678901234567890123456789012345-9te" + }, + { + "comment": "V2 (ignored); V3 (ignored); A4_2 (ignored)", + "input": "a.b..-q--a-.e", + "output": "a.b..-q--a-.e" + }, + { + "comment": "V2 (ignored); V3 (ignored); A4_2 (ignored)", + "input": "a.b..-q--\u00e4-.e", + "output": "a.b..xn---q----jra.e" + }, + { + "comment": "V2 (ignored); V3 (ignored); A4_2 (ignored)", + "input": "a.b..-q--a\u0308-.e", + "output": "a.b..xn---q----jra.e" + }, + { + "comment": "V2 (ignored); V3 (ignored); A4_2 (ignored)", + "input": "A.B..-Q--A\u0308-.E", + "output": "a.b..xn---q----jra.e" + }, + { + "comment": "V2 (ignored); V3 (ignored); A4_2 (ignored)", + "input": "A.B..-Q--\u00c4-.E", + "output": "a.b..xn---q----jra.e" + }, + { + "comment": "V2 (ignored); V3 (ignored); A4_2 (ignored)", + "input": "A.b..-Q--\u00c4-.E", + "output": "a.b..xn---q----jra.e" + }, + { + "comment": "V2 (ignored); V3 (ignored); A4_2 (ignored)", + "input": "A.b..-Q--A\u0308-.E", + "output": "a.b..xn---q----jra.e" + }, + { + "comment": "V2 (ignored); V3 (ignored); A4_2 (ignored)", + "input": "a.b..xn---q----jra.e", + "output": "a.b..xn---q----jra.e" + }, + { + "comment": "A4_2 (ignored)", + "input": "a..c", + "output": "a..c" + }, + { + "comment": "V3 (ignored)", + "input": "a.-b.", + "output": "a.-b." + }, + { + "comment": "V3 (ignored)", + "input": "a.b-.c", + "output": "a.b-.c" + }, + { + "comment": "V3 (ignored)", + "input": "a.-.c", + "output": "a.-.c" + }, + { + "comment": "V2 (ignored)", + "input": "a.bc--de.f", + "output": "a.bc--de.f" + }, + { + "comment": "A4_2 (ignored)", + "input": "\u00e4.\u00ad.c", + "output": "xn--4ca..c" + }, + { + "comment": "A4_2 (ignored)", + "input": "a\u0308.\u00ad.c", + "output": "xn--4ca..c" + }, + { + "comment": "A4_2 (ignored)", + "input": "A\u0308.\u00ad.C", + "output": "xn--4ca..c" + }, + { + "comment": "A4_2 (ignored)", + "input": "\u00c4.\u00ad.C", + "output": "xn--4ca..c" + }, + { + "comment": "A4_2 (ignored)", + "input": "xn--4ca..c", + "output": "xn--4ca..c" + }, + { + "comment": "V3 (ignored)", + "input": "\u00e4.-b.", + "output": "xn--4ca.-b." + }, + { + "comment": "V3 (ignored)", + "input": "a\u0308.-b.", + "output": "xn--4ca.-b." + }, + { + "comment": "V3 (ignored)", + "input": "A\u0308.-B.", + "output": "xn--4ca.-b." + }, + { + "comment": "V3 (ignored)", + "input": "\u00c4.-B.", + "output": "xn--4ca.-b." + }, + { + "comment": "V3 (ignored)", + "input": "xn--4ca.-b.", + "output": "xn--4ca.-b." + }, + { + "comment": "V3 (ignored)", + "input": "\u00e4.b-.c", + "output": "xn--4ca.b-.c" + }, + { + "comment": "V3 (ignored)", + "input": "a\u0308.b-.c", + "output": "xn--4ca.b-.c" + }, + { + "comment": "V3 (ignored)", + "input": "A\u0308.B-.C", + "output": "xn--4ca.b-.c" + }, + { + "comment": "V3 (ignored)", + "input": "\u00c4.B-.C", + "output": "xn--4ca.b-.c" + }, + { + "comment": "V3 (ignored)", + "input": "\u00c4.b-.C", + "output": "xn--4ca.b-.c" + }, + { + "comment": "V3 (ignored)", + "input": "A\u0308.b-.C", + "output": "xn--4ca.b-.c" + }, + { + "comment": "V3 (ignored)", + "input": "xn--4ca.b-.c", + "output": "xn--4ca.b-.c" + }, + { + "comment": "V3 (ignored)", + "input": "\u00e4.-.c", + "output": "xn--4ca.-.c" + }, + { + "comment": "V3 (ignored)", + "input": "a\u0308.-.c", + "output": "xn--4ca.-.c" + }, + { + "comment": "V3 (ignored)", + "input": "A\u0308.-.C", + "output": "xn--4ca.-.c" + }, + { + "comment": "V3 (ignored)", + "input": "\u00c4.-.C", + "output": "xn--4ca.-.c" + }, + { + "comment": "V3 (ignored)", + "input": "xn--4ca.-.c", + "output": "xn--4ca.-.c" + }, + { + "comment": "V2 (ignored)", + "input": "\u00e4.bc--de.f", + "output": "xn--4ca.bc--de.f" + }, + { + "comment": "V2 (ignored)", + "input": "a\u0308.bc--de.f", + "output": "xn--4ca.bc--de.f" + }, + { + "comment": "V2 (ignored)", + "input": "A\u0308.BC--DE.F", + "output": "xn--4ca.bc--de.f" + }, + { + "comment": "V2 (ignored)", + "input": "\u00c4.BC--DE.F", + "output": "xn--4ca.bc--de.f" + }, + { + "comment": "V2 (ignored)", + "input": "\u00c4.bc--De.f", + "output": "xn--4ca.bc--de.f" + }, + { + "comment": "V2 (ignored)", + "input": "A\u0308.bc--De.f", + "output": "xn--4ca.bc--de.f" + }, + { + "comment": "V2 (ignored)", + "input": "xn--4ca.bc--de.f", + "output": "xn--4ca.bc--de.f" + }, + { + "comment": "V5", + "input": "a.b.\u0308c.d", + "output": null + }, + { + "comment": "V5", + "input": "A.B.\u0308C.D", + "output": null + }, + { + "comment": "V5", + "input": "A.b.\u0308c.d", + "output": null + }, + { + "comment": "V5", + "input": "a.b.xn--c-bcb.d", + "output": null + }, + { + "input": "A0", + "output": "a0" + }, + { + "input": "0A", + "output": "0a" + }, + { + "input": "\u05d0\u05c7", + "output": "xn--vdbr" + }, + { + "input": "xn--vdbr", + "output": "xn--vdbr" + }, + { + "input": "\u05d09\u05c7", + "output": "xn--9-ihcz" + }, + { + "input": "xn--9-ihcz", + "output": "xn--9-ihcz" + }, + { + "input": "\u05d0\u05ea", + "output": "xn--4db6c" + }, + { + "input": "xn--4db6c", + "output": "xn--4db6c" + }, + { + "input": "\u05d0\u05f3\u05ea", + "output": "xn--4db6c0a" + }, + { + "input": "xn--4db6c0a", + "output": "xn--4db6c0a" + }, + { + "input": "\u05d07\u05ea", + "output": "xn--7-zhc3f" + }, + { + "input": "xn--7-zhc3f", + "output": "xn--7-zhc3f" + }, + { + "input": "\u05d0\u0667\u05ea", + "output": "xn--4db6c6t" + }, + { + "input": "xn--4db6c6t", + "output": "xn--4db6c6t" + }, + { + "input": "\u0bb9\u0bcd\u200d", + "output": "xn--dmc4b194h" + }, + { + "input": "xn--dmc4b", + "output": "xn--dmc4b" + }, + { + "input": "\u0bb9\u0bcd", + "output": "xn--dmc4b" + }, + { + "input": "xn--dmc4b194h", + "output": "xn--dmc4b194h" + }, + { + "comment": "C2", + "input": "\u0bb9\u200d", + "output": null + }, + { + "input": "xn--dmc", + "output": "xn--dmc" + }, + { + "input": "\u0bb9", + "output": "xn--dmc" + }, + { + "comment": "C2", + "input": "xn--dmc225h", + "output": null + }, + { + "comment": "C2", + "input": "\u200d", + "output": null + }, + { + "comment": "A4_2 (ignored)", + "input": "", + "output": "" + }, + { + "comment": "C2", + "input": "xn--1ug", + "output": null + }, + { + "input": "\u0bb9\u0bcd\u200c", + "output": "xn--dmc4by94h" + }, + { + "input": "xn--dmc4by94h", + "output": "xn--dmc4by94h" + }, + { + "comment": "C1", + "input": "\u0bb9\u200c", + "output": null + }, + { + "comment": "C1", + "input": "xn--dmc025h", + "output": null + }, + { + "comment": "C1", + "input": "\u200c", + "output": null + }, + { + "comment": "C1", + "input": "xn--0ug", + "output": null + }, + { + "input": "\u0644\u0670\u200c\u06ed\u06ef", + "output": "xn--ghb2gxqia7523a" + }, + { + "input": "xn--ghb2gxqia", + "output": "xn--ghb2gxqia" + }, + { + "input": "\u0644\u0670\u06ed\u06ef", + "output": "xn--ghb2gxqia" + }, + { + "input": "xn--ghb2gxqia7523a", + "output": "xn--ghb2gxqia7523a" + }, + { + "input": "\u0644\u0670\u200c\u06ef", + "output": "xn--ghb2g3qq34f" + }, + { + "input": "xn--ghb2g3q", + "output": "xn--ghb2g3q" + }, + { + "input": "\u0644\u0670\u06ef", + "output": "xn--ghb2g3q" + }, + { + "input": "xn--ghb2g3qq34f", + "output": "xn--ghb2g3qq34f" + }, + { + "input": "\u0644\u200c\u06ed\u06ef", + "output": "xn--ghb25aga828w" + }, + { + "input": "xn--ghb25aga", + "output": "xn--ghb25aga" + }, + { + "input": "\u0644\u06ed\u06ef", + "output": "xn--ghb25aga" + }, + { + "input": "xn--ghb25aga828w", + "output": "xn--ghb25aga828w" + }, + { + "input": "\u0644\u200c\u06ef", + "output": "xn--ghb65a953d" + }, + { + "input": "xn--ghb65a", + "output": "xn--ghb65a" + }, + { + "input": "\u0644\u06ef", + "output": "xn--ghb65a" + }, + { + "input": "xn--ghb65a953d", + "output": "xn--ghb65a953d" + }, + { + "input": "xn--ghb2gxq", + "output": "xn--ghb2gxq" + }, + { + "input": "\u0644\u0670\u06ed", + "output": "xn--ghb2gxq" + }, + { + "comment": "C1", + "input": "\u06ef\u200c\u06ef", + "output": null + }, + { + "input": "xn--cmba", + "output": "xn--cmba" + }, + { + "input": "\u06ef\u06ef", + "output": "xn--cmba" + }, + { + "comment": "C1", + "input": "xn--cmba004q", + "output": null + }, + { + "input": "xn--ghb", + "output": "xn--ghb" + }, + { + "input": "\u0644", + "output": "xn--ghb" + }, + { + "comment": "A4_2 (ignored)", + "input": "a\u3002\u3002b", + "output": "a..b" + }, + { + "comment": "A4_2 (ignored)", + "input": "A\u3002\u3002B", + "output": "a..b" + }, + { + "comment": "A4_2 (ignored)", + "input": "a..b", + "output": "a..b" + }, + { + "comment": "A4_2 (ignored)", + "input": "..xn--skb", + "output": "..xn--skb" + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\u2495\u221d\u065f\uda0e\udd26\uff0e-\udb40\udd2f", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "14.\u221d\u065f\uda0e\udd26.-\udb40\udd2f", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "14.xn--7hb713l3v90n.-", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn--7hb713lfwbi1311b.-", + "output": null + }, + { + "input": "\ua863.\u07cf", + "output": "xn--8c9a.xn--qsb" + }, + { + "input": "xn--8c9a.xn--qsb", + "output": "xn--8c9a.xn--qsb" + }, + { + "comment": "P1; V6", + "input": "\ud97d\udf9c\uff0e\ud803\udfc7\u0fa2\u077d\u0600", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud97d\udf9c\uff0e\ud803\udfc7\u0fa1\u0fb7\u077d\u0600", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud97d\udf9c.\ud803\udfc7\u0fa1\u0fb7\u077d\u0600", + "output": null + }, + { + "comment": "V6", + "input": "xn--gw68a.xn--ifb57ev2psc6027m", + "output": null + }, + { + "comment": "V5", + "input": "\ud84f\udcd4\u0303.\ud805\udcc2", + "output": null + }, + { + "comment": "V5", + "input": "xn--nsa95820a.xn--wz1d", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u200c\ud9d4\udfad.\u10b2\ud804\uddc0", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u200c\ud9d4\udfad.\u2d12\ud804\uddc0", + "output": null + }, + { + "comment": "V6", + "input": "xn--bn95b.xn--9kj2034e", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--0ug15083f.xn--9kj2034e", + "output": null + }, + { + "comment": "V6", + "input": "xn--bn95b.xn--qnd6272k", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--0ug15083f.xn--qnd6272k", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u7e71\ud805\uddbf\u200d.\uff18\ufe12", + "output": null + }, + { + "comment": "V6", + "input": "xn--gl0as212a.xn--8-o89h", + "output": null + }, + { + "comment": "V6", + "input": "xn--1ug6928ac48e.xn--8-o89h", + "output": null + }, + { + "comment": "V5; A4_2 (ignored)", + "input": "\udb40\uddbe\uff0e\ud838\udc08", + "output": null + }, + { + "comment": "V5; A4_2 (ignored)", + "input": "\udb40\uddbe.\ud838\udc08", + "output": null + }, + { + "comment": "V5; A4_2 (ignored)", + "input": ".xn--ph4h", + "output": null + }, + { + "comment": "C2", + "input": "\u00df\u06eb\u3002\u200d", + "output": null + }, + { + "comment": "C2", + "input": "SS\u06eb\u3002\u200d", + "output": null + }, + { + "comment": "C2", + "input": "ss\u06eb\u3002\u200d", + "output": null + }, + { + "comment": "C2", + "input": "Ss\u06eb\u3002\u200d", + "output": null + }, + { + "input": "xn--ss-59d.", + "output": "xn--ss-59d." + }, + { + "input": "ss\u06eb.", + "output": "xn--ss-59d." + }, + { + "input": "SS\u06eb.", + "output": "xn--ss-59d." + }, + { + "input": "Ss\u06eb.", + "output": "xn--ss-59d." + }, + { + "comment": "C2", + "input": "xn--ss-59d.xn--1ug", + "output": null + }, + { + "comment": "C2", + "input": "xn--zca012a.xn--1ug", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\udb41\udc35\u200c\u2488\uff0e\udb40\udf87", + "output": null + }, + { + "comment": "C1; P1; V6; A4_2 (ignored)", + "input": "\udb41\udc35\u200c1..\udb40\udf87", + "output": null + }, + { + "comment": "V6; A4_2 (ignored)", + "input": "xn--1-bs31m..xn--tv36e", + "output": null + }, + { + "comment": "C1; V6; A4_2 (ignored)", + "input": "xn--1-rgn37671n..xn--tv36e", + "output": null + }, + { + "comment": "V6", + "input": "xn--tshz2001k.xn--tv36e", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--0ug88o47900b.xn--tv36e", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb3c\ude23\u065f\uaab2\u00df\u3002\udaf1\udce7", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb3c\ude23\u065f\uaab2SS\u3002\udaf1\udce7", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb3c\ude23\u065f\uaab2ss\u3002\udaf1\udce7", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb3c\ude23\u065f\uaab2Ss\u3002\udaf1\udce7", + "output": null + }, + { + "comment": "V6", + "input": "xn--ss-3xd2839nncy1m.xn--bb79d", + "output": null + }, + { + "comment": "V6", + "input": "xn--zca92z0t7n5w96j.xn--bb79d", + "output": null + }, + { + "comment": "C1; C2; P1; V6", + "input": "\u0774\u200c\ud83a\udd3f\u3002\ud8b5\ude10\u425c\u200d\ud9be\udd3c", + "output": null + }, + { + "comment": "C1; C2; P1; V6", + "input": "\u0774\u200c\ud83a\udd1d\u3002\ud8b5\ude10\u425c\u200d\ud9be\udd3c", + "output": null + }, + { + "comment": "V6", + "input": "xn--4pb2977v.xn--z0nt555ukbnv", + "output": null + }, + { + "comment": "C1; C2; V6", + "input": "xn--4pb607jjt73a.xn--1ug236ke314donv1a", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u3164\u094d\u10a0\u17d0.\u180b", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u1160\u094d\u10a0\u17d0.\u180b", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u1160\u094d\u2d00\u17d0.\u180b", + "output": null + }, + { + "comment": "V6", + "input": "xn--n3b742bkqf4ty.", + "output": null + }, + { + "comment": "V6", + "input": "xn--n3b468aoqa89r.", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u3164\u094d\u2d00\u17d0.\u180b", + "output": null + }, + { + "comment": "V6", + "input": "xn--n3b445e53po6d.", + "output": null + }, + { + "comment": "V6", + "input": "xn--n3b468azngju2a.", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u2763\u200d\uff0e\u09cd\ud807\udc3d\u0612\ua929", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u2763\u200d.\u09cd\ud807\udc3d\u0612\ua929", + "output": null + }, + { + "comment": "V5", + "input": "xn--pei.xn--0fb32q3w7q2g4d", + "output": null + }, + { + "comment": "C2; V5", + "input": "xn--1ugy10a.xn--0fb32q3w7q2g4d", + "output": null + }, + { + "comment": "V5", + "input": "\u0349\u3002\ud85e\udc6b", + "output": null + }, + { + "comment": "V5", + "input": "xn--nua.xn--bc6k", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud807\udc3f\udb40\udd66\uff0e\u1160", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud807\udc3f\udb40\udd66.\u1160", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--ok3d.xn--psd", + "output": null + }, + { + "comment": "V5", + "input": "\u850f\uff61\ud807\udc3a", + "output": null + }, + { + "comment": "V5", + "input": "\u850f\u3002\ud807\udc3a", + "output": null + }, + { + "comment": "V5", + "input": "xn--uy1a.xn--jk3d", + "output": null + }, + { + "comment": "V6", + "input": "xn--8g1d12120a.xn--5l6h", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud804\udee7\ua9c02\uff61\u39c9\uda09\udd84", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud804\udee7\ua9c02\u3002\u39c9\uda09\udd84", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--2-5z4eu89y.xn--97l02706d", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2938\u03c2\ud8ab\udc40\uff61\uffa0", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2938\u03c2\ud8ab\udc40\u3002\u1160", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2938\u03a3\ud8ab\udc40\u3002\u1160", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2938\u03c3\ud8ab\udc40\u3002\u1160", + "output": null + }, + { + "comment": "V6", + "input": "xn--4xa192qmp03d.xn--psd", + "output": null + }, + { + "comment": "V6", + "input": "xn--3xa392qmp03d.xn--psd", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2938\u03a3\ud8ab\udc40\uff61\uffa0", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2938\u03c3\ud8ab\udc40\uff61\uffa0", + "output": null + }, + { + "comment": "V6", + "input": "xn--4xa192qmp03d.xn--cl7c", + "output": null + }, + { + "comment": "V6", + "input": "xn--3xa392qmp03d.xn--cl7c", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\u200d\udb7d\udc56\udb40\udc50\uff0e\u05bd\ud826\udfb0\ua85d\ud800\udee1", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\u200d\udb7d\udc56\udb40\udc50.\u05bd\ud826\udfb0\ua85d\ud800\udee1", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--b726ey18m.xn--ldb8734fg0qcyzzg", + "output": null + }, + { + "comment": "C2; V5; V6", + "input": "xn--1ug66101lt8me.xn--ldb8734fg0qcyzzg", + "output": null + }, + { + "comment": "P1; V6; A4_2 (ignored)", + "input": "\u3002\udbcc\ude35\u03c2\ud8c2\udc07\u3002\ud802\udf88", + "output": null + }, + { + "comment": "P1; V6; A4_2 (ignored)", + "input": "\u3002\udbcc\ude35\u03a3\ud8c2\udc07\u3002\ud802\udf88", + "output": null + }, + { + "comment": "P1; V6; A4_2 (ignored)", + "input": "\u3002\udbcc\ude35\u03c3\ud8c2\udc07\u3002\ud802\udf88", + "output": null + }, + { + "comment": "V6; A4_2 (ignored)", + "input": ".xn--4xa68573c7n64d.xn--f29c", + "output": null + }, + { + "comment": "V6; A4_2 (ignored)", + "input": ".xn--3xa88573c7n64d.xn--f29c", + "output": null + }, + { + "input": "\ud83a\udd37.\ud802\udf90\ud83a\udc81\ud803\ude60\u0624", + "output": "xn--ve6h.xn--jgb1694kz0b2176a" + }, + { + "input": "\ud83a\udd37.\ud802\udf90\ud83a\udc81\ud803\ude60\u0648\u0654", + "output": "xn--ve6h.xn--jgb1694kz0b2176a" + }, + { + "input": "\ud83a\udd15.\ud802\udf90\ud83a\udc81\ud803\ude60\u0648\u0654", + "output": "xn--ve6h.xn--jgb1694kz0b2176a" + }, + { + "input": "\ud83a\udd15.\ud802\udf90\ud83a\udc81\ud803\ude60\u0624", + "output": "xn--ve6h.xn--jgb1694kz0b2176a" + }, + { + "input": "xn--ve6h.xn--jgb1694kz0b2176a", + "output": "xn--ve6h.xn--jgb1694kz0b2176a" + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "-\udb40\ude56\ua867\uff0e\udb40\ude82\ud8dc\udd83\ud83c\udd09", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn----hg4ei0361g.xn--207ht163h7m94c", + "output": null + }, + { + "comment": "C1; V5", + "input": "\u200c\uff61\u0354", + "output": null + }, + { + "comment": "C1; V5", + "input": "\u200c\u3002\u0354", + "output": null + }, + { + "comment": "V5; A4_2 (ignored)", + "input": ".xn--yua", + "output": null + }, + { + "comment": "C1; V5", + "input": "xn--0ug.xn--yua", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud83a\udd25\udb40\udd6e\uff0e\u1844\u10ae", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud83a\udd25\udb40\udd6e.\u1844\u10ae", + "output": null + }, + { + "input": "\ud83a\udd25\udb40\udd6e.\u1844\u2d0e", + "output": "xn--de6h.xn--37e857h" + }, + { + "comment": "P1; V6", + "input": "\ud83a\udd03\udb40\udd6e.\u1844\u10ae", + "output": null + }, + { + "input": "\ud83a\udd03\udb40\udd6e.\u1844\u2d0e", + "output": "xn--de6h.xn--37e857h" + }, + { + "input": "xn--de6h.xn--37e857h", + "output": "xn--de6h.xn--37e857h" + }, + { + "input": "\ud83a\udd25.\u1844\u2d0e", + "output": "xn--de6h.xn--37e857h" + }, + { + "comment": "P1; V6", + "input": "\ud83a\udd03.\u1844\u10ae", + "output": null + }, + { + "input": "\ud83a\udd03.\u1844\u2d0e", + "output": "xn--de6h.xn--37e857h" + }, + { + "comment": "V6", + "input": "xn--de6h.xn--mnd799a", + "output": null + }, + { + "input": "\ud83a\udd25\udb40\udd6e\uff0e\u1844\u2d0e", + "output": "xn--de6h.xn--37e857h" + }, + { + "comment": "P1; V6", + "input": "\ud83a\udd03\udb40\udd6e\uff0e\u1844\u10ae", + "output": null + }, + { + "input": "\ud83a\udd03\udb40\udd6e\uff0e\u1844\u2d0e", + "output": "xn--de6h.xn--37e857h" + }, + { + "comment": "P1; V6", + "input": "\ud83a\udd25.\u1844\u10ae", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u0fa4\ud986\udd2f\uff0e\ud835\udfed\u10bb", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u0fa4\ud986\udd2f.1\u10bb", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u0fa4\ud986\udd2f.1\u2d1b", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--0fd40533g.xn--1-tws", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--0fd40533g.xn--1-q1g", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u0fa4\ud986\udd2f\uff0e\ud835\udfed\u2d1b", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u03c2\ud9d5\udf0c\uff18.\ud83a\udf64", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u03c2\ud9d5\udf0c8.\ud83a\udf64", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u03a3\ud9d5\udf0c8.\ud83a\udf64", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u03c3\ud9d5\udf0c8.\ud83a\udf64", + "output": null + }, + { + "comment": "V6", + "input": "xn--8-zmb14974n.xn--su6h", + "output": null + }, + { + "comment": "V6", + "input": "xn--8-xmb44974n.xn--su6h", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u03a3\ud9d5\udf0c\uff18.\ud83a\udf64", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u03c3\ud9d5\udf0c\uff18.\ud83a\udf64", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u200c\uae03.\u69b6-", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u200c\u1100\u1173\u11b2.\u69b6-", + "output": null + }, + { + "comment": "V3 (ignored)", + "input": "xn--ej0b.xn----d87b", + "output": "xn--ej0b.xn----d87b" + }, + { + "comment": "C1; V3 (ignored)", + "input": "xn--0ug3307c.xn----d87b", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ub253\u6cd3\ud833\udd7d.\u09cd\u200d", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1102\u1170\u11be\u6cd3\ud833\udd7d.\u09cd\u200d", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--lwwp69lqs7m.xn--b7b", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--lwwp69lqs7m.xn--b7b605i", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1bf3\u10b1\u115f\uff0e\ud804\udd34\u2132", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1bf3\u10b1\u115f.\ud804\udd34\u2132", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1bf3\u2d11\u115f.\ud804\udd34\u214e", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1bf3\u10b1\u115f.\ud804\udd34\u214e", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--pnd26a55x.xn--73g3065g", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--osd925cvyn.xn--73g3065g", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--pnd26a55x.xn--f3g7465g", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1bf3\u2d11\u115f\uff0e\ud804\udd34\u214e", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1bf3\u10b1\u115f\uff0e\ud804\udd34\u214e", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u00c5\ub444-\uff0e\u200c", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "A\u030a\u1103\u116d\u11b7-\uff0e\u200c", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u00c5\ub444-.\u200c", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "A\u030a\u1103\u116d\u11b7-.\u200c", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "a\u030a\u1103\u116d\u11b7-.\u200c", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u00e5\ub444-.\u200c", + "output": null + }, + { + "comment": "V3 (ignored)", + "input": "xn----1fa1788k.", + "output": "xn----1fa1788k." + }, + { + "comment": "C1; V3 (ignored)", + "input": "xn----1fa1788k.xn--0ug", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "a\u030a\u1103\u116d\u11b7-\uff0e\u200c", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u00e5\ub444-\uff0e\u200c", + "output": null + }, + { + "comment": "C1; C2; P1; V5; V6", + "input": "\ub8f1\u200d\ud880\udf68\u200c\u3002\ud836\ude16\ufe12", + "output": null + }, + { + "comment": "C1; C2; P1; V5; V6", + "input": "\u1105\u116e\u11b0\u200d\ud880\udf68\u200c\u3002\ud836\ude16\ufe12", + "output": null + }, + { + "comment": "C1; C2; V5", + "input": "\ub8f1\u200d\ud880\udf68\u200c\u3002\ud836\ude16\u3002", + "output": null + }, + { + "comment": "C1; C2; V5", + "input": "\u1105\u116e\u11b0\u200d\ud880\udf68\u200c\u3002\ud836\ude16\u3002", + "output": null + }, + { + "comment": "V5", + "input": "xn--ct2b0738h.xn--772h.", + "output": null + }, + { + "comment": "C1; C2; V5", + "input": "xn--0ugb3358ili2v.xn--772h.", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--ct2b0738h.xn--y86cl899a", + "output": null + }, + { + "comment": "C1; C2; V5; V6", + "input": "xn--0ugb3358ili2v.xn--y86cl899a", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud83c\udd04\uff0e\u1cdc\u2488\u00df", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud83c\udd04\uff0e\u1cdc\u2488SS", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud83c\udd04\uff0e\u1cdc\u2488ss", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud83c\udd04\uff0e\u1cdc\u2488Ss", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--x07h.xn--ss-k1r094b", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--x07h.xn--zca344lmif", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "\u1bf3.-\u900b\ud98e\uddad\udb25\ude6e", + "output": null + }, + { + "comment": "V5; V6; V3 (ignored)", + "input": "xn--1zf.xn----483d46987byr50b", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u0756\u3002\u3164\u200d\u03c2", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u0756\u3002\u1160\u200d\u03c2", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u0756\u3002\u1160\u200d\u03a3", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u0756\u3002\u1160\u200d\u03c3", + "output": null + }, + { + "comment": "V6", + "input": "xn--9ob.xn--4xa380e", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--9ob.xn--4xa380ebol", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--9ob.xn--3xa580ebol", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u0756\u3002\u3164\u200d\u03a3", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u0756\u3002\u3164\u200d\u03c3", + "output": null + }, + { + "comment": "V6", + "input": "xn--9ob.xn--4xa574u", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--9ob.xn--4xa795lq2l", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--9ob.xn--3xa995lq2l", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u1846\u10a3\uff61\udb3a\udca7\u0315\u200d\u200d", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u1846\u10a3\u3002\udb3a\udca7\u0315\u200d\u200d", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u1846\u2d03\u3002\udb3a\udca7\u0315\u200d\u200d", + "output": null + }, + { + "comment": "V6", + "input": "xn--57e237h.xn--5sa98523p", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--57e237h.xn--5sa649la993427a", + "output": null + }, + { + "comment": "V6", + "input": "xn--bnd320b.xn--5sa98523p", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--bnd320b.xn--5sa649la993427a", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u1846\u2d03\uff61\udb3a\udca7\u0315\u200d\u200d", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud838\udc28\uff61\u1b44\uda45\udee8\ud838\udf87", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud838\udc28\u3002\u1b44\uda45\udee8\ud838\udf87", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--mi4h.xn--1uf6843smg20c", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u189b\udb60\udd5f\u00df.\u1327", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u189b\udb60\udd5fSS.\u1327", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u189b\udb60\udd5fss.\u1327", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u189b\udb60\udd5fSs.\u1327", + "output": null + }, + { + "comment": "V6", + "input": "xn--ss-7dp66033t.xn--p5d", + "output": null + }, + { + "comment": "V6", + "input": "xn--zca562jc642x.xn--p5d", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u2b92\u200c.\ud909\ude97\u200c", + "output": null + }, + { + "comment": "V6", + "input": "xn--b9i.xn--5p9y", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--0ugx66b.xn--0ugz2871c", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u00df\uff61\ud800\udef3\u10ac\u0fb8", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u00df\u3002\ud800\udef3\u10ac\u0fb8", + "output": null + }, + { + "input": "\u00df\u3002\ud800\udef3\u2d0c\u0fb8", + "output": "xn--zca.xn--lgd921mvv0m" + }, + { + "comment": "P1; V6", + "input": "SS\u3002\ud800\udef3\u10ac\u0fb8", + "output": null + }, + { + "input": "ss\u3002\ud800\udef3\u2d0c\u0fb8", + "output": "ss.xn--lgd921mvv0m" + }, + { + "comment": "P1; V6", + "input": "Ss\u3002\ud800\udef3\u10ac\u0fb8", + "output": null + }, + { + "comment": "V6", + "input": "ss.xn--lgd10cu829c", + "output": null + }, + { + "input": "ss.xn--lgd921mvv0m", + "output": "ss.xn--lgd921mvv0m" + }, + { + "input": "ss.\ud800\udef3\u2d0c\u0fb8", + "output": "ss.xn--lgd921mvv0m" + }, + { + "comment": "P1; V6", + "input": "SS.\ud800\udef3\u10ac\u0fb8", + "output": null + }, + { + "comment": "P1; V6", + "input": "Ss.\ud800\udef3\u10ac\u0fb8", + "output": null + }, + { + "input": "xn--zca.xn--lgd921mvv0m", + "output": "xn--zca.xn--lgd921mvv0m" + }, + { + "input": "\u00df.\ud800\udef3\u2d0c\u0fb8", + "output": "xn--zca.xn--lgd921mvv0m" + }, + { + "comment": "V6", + "input": "xn--zca.xn--lgd10cu829c", + "output": null + }, + { + "input": "\u00df\uff61\ud800\udef3\u2d0c\u0fb8", + "output": "xn--zca.xn--lgd921mvv0m" + }, + { + "comment": "P1; V6", + "input": "SS\uff61\ud800\udef3\u10ac\u0fb8", + "output": null + }, + { + "input": "ss\uff61\ud800\udef3\u2d0c\u0fb8", + "output": "ss.xn--lgd921mvv0m" + }, + { + "comment": "P1; V6", + "input": "Ss\uff61\ud800\udef3\u10ac\u0fb8", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1a5a\ud82e\udd9d\u0c4d\u3002\ud829\udf6c\ud835\udff5", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1a5a\ud82e\udd9d\u0c4d\u3002\ud829\udf6c9", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--lqc703ebm93a.xn--9-000p", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "\u1856\uff61\u031f\ud91d\udee8\u0b82-", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "\u1856\u3002\u031f\ud91d\udee8\u0b82-", + "output": null + }, + { + "comment": "V5; V6; V3 (ignored)", + "input": "xn--m8e.xn----mdb555dkk71m", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud83c\udd07\u4f10\ufe12.\ud831\ude5a\ua8c4", + "output": null + }, + { + "comment": "V6", + "input": "xn--woqs083bel0g.xn--0f9ao925c", + "output": null + }, + { + "comment": "P1; V6; A4_2 (ignored)", + "input": "\udb40\udda0\uff0e\ud99d\udc34\udaf1\udfc8", + "output": null + }, + { + "comment": "P1; V6; A4_2 (ignored)", + "input": "\udb40\udda0.\ud99d\udc34\udaf1\udfc8", + "output": null + }, + { + "comment": "V6; A4_2 (ignored)", + "input": ".xn--rx21bhv12i", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "-.\u1886\udb47\udca3-", + "output": null + }, + { + "comment": "V5; V6; V3 (ignored)", + "input": "-.xn----pbkx6497q", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\udafd\udcb0\uff0e-\ud835\udffb\u00df", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\udafd\udcb0.-5\u00df", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\udafd\udcb0.-5SS", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\udafd\udcb0.-5ss", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn--t960e.-5ss", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn--t960e.xn---5-hia", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\udafd\udcb0\uff0e-\ud835\udffbSS", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\udafd\udcb0\uff0e-\ud835\udffbss", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\udafd\udcb0\uff0e-\ud835\udffbSs", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\udafd\udcb0.-5Ss", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u200d\ud802\ude3f.\ud83e\udd12\u10c5\uda06\udfb6", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u200d\ud802\ude3f.\ud83e\udd12\u2d25\uda06\udfb6", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--0s9c.xn--tljz038l0gz4b", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--1ug9533g.xn--tljz038l0gz4b", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--0s9c.xn--9nd3211w0gz4b", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--1ug9533g.xn--9nd3211w0gz4b", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\ud894\udec5\u3002\u00df\ud873\udd69\u200d", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\ud894\udec5\u3002SS\ud873\udd69\u200d", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\ud894\udec5\u3002ss\ud873\udd69\u200d", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\ud894\udec5\u3002Ss\ud873\udd69\u200d", + "output": null + }, + { + "comment": "V6", + "input": "xn--ey1p.xn--ss-eq36b", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--ey1p.xn--ss-n1tx0508a", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--ey1p.xn--zca870nz438b", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u248b\uff61\u2488\u200d\uda8f\udd22", + "output": null + }, + { + "comment": "C2; P1; V6; A4_2 (ignored)", + "input": "4.\u30021.\u200d\uda8f\udd22", + "output": null + }, + { + "comment": "V6; A4_2 (ignored)", + "input": "4..1.xn--sf51d", + "output": null + }, + { + "comment": "C2; V6; A4_2 (ignored)", + "input": "4..1.xn--1ug64613i", + "output": null + }, + { + "comment": "V6", + "input": "xn--wsh.xn--tsh07994h", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--wsh.xn--1ug58o74922a", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u10b3\ud805\udf2b\u200d\uda1e\udf53\uff0e\u06a7\ud807\udc36", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u10b3\ud805\udf2b\u200d\uda1e\udf53.\u06a7\ud807\udc36", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2d13\ud805\udf2b\u200d\uda1e\udf53.\u06a7\ud807\udc36", + "output": null + }, + { + "comment": "V6", + "input": "xn--blj6306ey091d.xn--9jb4223l", + "output": null + }, + { + "comment": "V6", + "input": "xn--1ugy52cym7p7xu5e.xn--9jb4223l", + "output": null + }, + { + "comment": "V6", + "input": "xn--rnd8945ky009c.xn--9jb4223l", + "output": null + }, + { + "comment": "V6", + "input": "xn--rnd479ep20q7x12e.xn--9jb4223l", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2d13\ud805\udf2b\u200d\uda1e\udf53\uff0e\u06a7\ud807\udc36", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud802\ude3f.\ud83c\udd06\u2014", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--0s9c.xn--8ug8324p", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\uda10\udeb1\ud8c6\uddae\u06f8\u3002\udb43\udfad-", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn--lmb18944c0g2z.xn----2k81m", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud83d\udf85\udb43\udce1\udb30\udf59.\ud989\uddb7", + "output": null + }, + { + "comment": "V6", + "input": "xn--ie9hi1349bqdlb.xn--oj69a", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\u20e7\ud97e\udc4e-\uda6e\udcdd.4\u10a4\u200c", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\u20e7\ud97e\udc4e-\uda6e\udcdd.4\u2d04\u200c", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn----9snu5320fi76w.xn--4-ivs", + "output": null + }, + { + "comment": "C1; V5; V6", + "input": "xn----9snu5320fi76w.xn--4-sgn589c", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn----9snu5320fi76w.xn--4-f0g", + "output": null + }, + { + "comment": "C1; V5; V6", + "input": "xn----9snu5320fi76w.xn--4-f0g649i", + "output": null + }, + { + "input": "\u16ad\uff61\ud834\udf20\u00df\ud81a\udef1", + "output": "xn--hwe.xn--zca4946pblnc" + }, + { + "input": "\u16ad\u3002\ud834\udf20\u00df\ud81a\udef1", + "output": "xn--hwe.xn--zca4946pblnc" + }, + { + "input": "\u16ad\u3002\ud834\udf20SS\ud81a\udef1", + "output": "xn--hwe.xn--ss-ci1ub261a" + }, + { + "input": "\u16ad\u3002\ud834\udf20ss\ud81a\udef1", + "output": "xn--hwe.xn--ss-ci1ub261a" + }, + { + "input": "\u16ad\u3002\ud834\udf20Ss\ud81a\udef1", + "output": "xn--hwe.xn--ss-ci1ub261a" + }, + { + "input": "xn--hwe.xn--ss-ci1ub261a", + "output": "xn--hwe.xn--ss-ci1ub261a" + }, + { + "input": "\u16ad.\ud834\udf20ss\ud81a\udef1", + "output": "xn--hwe.xn--ss-ci1ub261a" + }, + { + "input": "\u16ad.\ud834\udf20SS\ud81a\udef1", + "output": "xn--hwe.xn--ss-ci1ub261a" + }, + { + "input": "\u16ad.\ud834\udf20Ss\ud81a\udef1", + "output": "xn--hwe.xn--ss-ci1ub261a" + }, + { + "input": "xn--hwe.xn--zca4946pblnc", + "output": "xn--hwe.xn--zca4946pblnc" + }, + { + "input": "\u16ad.\ud834\udf20\u00df\ud81a\udef1", + "output": "xn--hwe.xn--zca4946pblnc" + }, + { + "input": "\u16ad\uff61\ud834\udf20SS\ud81a\udef1", + "output": "xn--hwe.xn--ss-ci1ub261a" + }, + { + "input": "\u16ad\uff61\ud834\udf20ss\ud81a\udef1", + "output": "xn--hwe.xn--ss-ci1ub261a" + }, + { + "input": "\u16ad\uff61\ud834\udf20Ss\ud81a\udef1", + "output": "xn--hwe.xn--ss-ci1ub261a" + }, + { + "comment": "P1; V5; V6", + "input": "\ud8fc\udc47\u1734\uff0e\ud802\ude3a\u00c9\u2b13\ud804\udd34", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud8fc\udc47\u1734\uff0e\ud802\ude3aE\u0301\u2b13\ud804\udd34", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud8fc\udc47\u1734.\ud802\ude3a\u00c9\u2b13\ud804\udd34", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud8fc\udc47\u1734.\ud802\ude3aE\u0301\u2b13\ud804\udd34", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud8fc\udc47\u1734.\ud802\ude3ae\u0301\u2b13\ud804\udd34", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud8fc\udc47\u1734.\ud802\ude3a\u00e9\u2b13\ud804\udd34", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--c0e34564d.xn--9ca207st53lg3f", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud8fc\udc47\u1734\uff0e\ud802\ude3ae\u0301\u2b13\ud804\udd34", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud8fc\udc47\u1734\uff0e\ud802\ude3a\u00e9\u2b13\ud804\udd34", + "output": null + }, + { + "comment": "A4_2 (ignored)", + "input": "xn--09e4694e..xn--ye6h", + "output": "xn--09e4694e..xn--ye6h" + }, + { + "comment": "P1; V5; V6", + "input": "\u10c3\uff0e\u0653\u18a4", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u10c3.\u0653\u18a4", + "output": null + }, + { + "comment": "V5", + "input": "\u2d23.\u0653\u18a4", + "output": null + }, + { + "comment": "V5", + "input": "xn--rlj.xn--vhb294g", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--7nd.xn--vhb294g", + "output": null + }, + { + "comment": "V5", + "input": "\u2d23\uff0e\u0653\u18a4", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb40\udd08\u0813\uff0e\uc2c9\ud9d0\uddbb\u10c4\ud9ca\udc50", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb40\udd08\u0813\uff0e\u1109\u1174\u11b0\ud9d0\uddbb\u10c4\ud9ca\udc50", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb40\udd08\u0813.\uc2c9\ud9d0\uddbb\u10c4\ud9ca\udc50", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb40\udd08\u0813.\u1109\u1174\u11b0\ud9d0\uddbb\u10c4\ud9ca\udc50", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb40\udd08\u0813.\u1109\u1174\u11b0\ud9d0\uddbb\u2d24\ud9ca\udc50", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb40\udd08\u0813.\uc2c9\ud9d0\uddbb\u2d24\ud9ca\udc50", + "output": null + }, + { + "comment": "V6", + "input": "xn--oub.xn--sljz109bpe25dviva", + "output": null + }, + { + "comment": "V6", + "input": "xn--oub.xn--8nd9522gpe69cviva", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb40\udd08\u0813\uff0e\u1109\u1174\u11b0\ud9d0\uddbb\u2d24\ud9ca\udc50", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb40\udd08\u0813\uff0e\uc2c9\ud9d0\uddbb\u2d24\ud9ca\udc50", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "\ud804\udc45\u3002-", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "xn--210d.-", + "output": null + }, + { + "comment": "C2; P1; V6; V3 (ignored)", + "input": "\ua866\u1851\u200d\u2488\u3002\ud800\udee3-", + "output": null + }, + { + "comment": "C2; V3 (ignored); A4_2 (ignored)", + "input": "\ua866\u1851\u200d1.\u3002\ud800\udee3-", + "output": null + }, + { + "comment": "V3 (ignored); A4_2 (ignored)", + "input": "xn--1-o7j0610f..xn----381i", + "output": "xn--1-o7j0610f..xn----381i" + }, + { + "comment": "C2; V3 (ignored); A4_2 (ignored)", + "input": "xn--1-o7j663bdl7m..xn----381i", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn--h8e863drj7h.xn----381i", + "output": null + }, + { + "comment": "C2; V6; V3 (ignored)", + "input": "xn--h8e470bl0d838o.xn----381i", + "output": null + }, + { + "comment": "C2; P1; V6; V3 (ignored)", + "input": "\u2488\u4c39\u200d-\u3002\uc6c8", + "output": null + }, + { + "comment": "C2; P1; V6; V3 (ignored)", + "input": "\u2488\u4c39\u200d-\u3002\u110b\u116e\u11bf", + "output": null + }, + { + "comment": "C2; V3 (ignored)", + "input": "1.\u4c39\u200d-\u3002\uc6c8", + "output": null + }, + { + "comment": "C2; V3 (ignored)", + "input": "1.\u4c39\u200d-\u3002\u110b\u116e\u11bf", + "output": null + }, + { + "comment": "V3 (ignored)", + "input": "1.xn----zw5a.xn--kp5b", + "output": "1.xn----zw5a.xn--kp5b" + }, + { + "comment": "C2; V3 (ignored)", + "input": "1.xn----tgnz80r.xn--kp5b", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn----dcp160o.xn--kp5b", + "output": null + }, + { + "comment": "C2; V6; V3 (ignored)", + "input": "xn----tgnx5rjr6c.xn--kp5b", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u3066\u3002\u200c\udb43\udcfd\u07f3", + "output": null + }, + { + "comment": "V6", + "input": "xn--m9j.xn--rtb10784p", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--m9j.xn--rtb154j9l73w", + "output": null + }, + { + "comment": "V5", + "input": "\u03c2\uff61\ua9c0\u06e7", + "output": null + }, + { + "comment": "V5", + "input": "\u03c2\u3002\ua9c0\u06e7", + "output": null + }, + { + "comment": "V5", + "input": "\u03a3\u3002\ua9c0\u06e7", + "output": null + }, + { + "comment": "V5", + "input": "\u03c3\u3002\ua9c0\u06e7", + "output": null + }, + { + "comment": "V5", + "input": "xn--4xa.xn--3lb1944f", + "output": null + }, + { + "comment": "V5", + "input": "xn--3xa.xn--3lb1944f", + "output": null + }, + { + "comment": "V5", + "input": "\u03a3\uff61\ua9c0\u06e7", + "output": null + }, + { + "comment": "V5", + "input": "\u03c3\uff61\ua9c0\u06e7", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u0bcd\udb56\udec5\ud9f0\ude51.\u10a2\u10b5", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u0bcd\udb56\udec5\ud9f0\ude51.\u2d02\u2d15", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u0bcd\udb56\udec5\ud9f0\ude51.\u10a2\u2d15", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--xmc83135idcxza.xn--9md086l", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--xmc83135idcxza.xn--tkjwb", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--xmc83135idcxza.xn--9md2b", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\u1c32\ud83c\udd08\u2f9b\u05a6\uff0e\u200d\uda7e\udd64\u07fd", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--xcb756i493fwi5o.xn--1tb13454l", + "output": null + }, + { + "comment": "C2; V5; V6", + "input": "xn--xcb756i493fwi5o.xn--1tb334j1197q", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u1897\uff61\u04c0\ud934\udd3b", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u1897\u3002\u04c0\ud934\udd3b", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u1897\u3002\u04cf\ud934\udd3b", + "output": null + }, + { + "comment": "V6", + "input": "xn--hbf.xn--s5a83117e", + "output": null + }, + { + "comment": "V6", + "input": "xn--hbf.xn--d5a86117e", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u1897\uff61\u04cf\ud934\udd3b", + "output": null + }, + { + "comment": "V3 (ignored)", + "input": "-\ud800\udef7\ud81b\udf91\u3002\udb40\uddac", + "output": "xn----991iq40y." + }, + { + "comment": "V3 (ignored)", + "input": "xn----991iq40y.", + "output": "xn----991iq40y." + }, + { + "comment": "P1; V5; V6", + "input": "\ud807\udc98\udb40\udd12\ud80d\udc61\uff61\ud835\udfea\u10bc", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud807\udc98\udb40\udd12\ud80d\udc61\u30028\u10bc", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud807\udc98\udb40\udd12\ud80d\udc61\u30028\u2d1c", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--7m3d291b.xn--8-vws", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--7m3d291b.xn--8-s1g", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud807\udc98\udb40\udd12\ud80d\udc61\uff61\ud835\udfea\u2d1c", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1bab\uff61\ud83c\udc89\udb40\udc70", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1bab\u3002\ud83c\udc89\udb40\udc70", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--zxf.xn--fx7ho0250c", + "output": null + }, + { + "comment": "C1; P1; V6; V3 (ignored)", + "input": "\udb71\udeb6\udba0\uded6\uda1a\ude70-\u3002\u200c", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn----7i12hu122k9ire.", + "output": null + }, + { + "comment": "C1; V6; V3 (ignored)", + "input": "xn----7i12hu122k9ire.xn--0ug", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ufe12\uff0e\ufe2f\ud805\udc42", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ufe12\uff0e\ud805\udc42\ufe2f", + "output": null + }, + { + "comment": "V5; A4_2 (ignored)", + "input": "\u3002.\ud805\udc42\ufe2f", + "output": null + }, + { + "comment": "V5; A4_2 (ignored)", + "input": "..xn--s96cu30b", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--y86c.xn--s96cu30b", + "output": null + }, + { + "comment": "C2; V5", + "input": "\ua92c\u3002\u200d", + "output": null + }, + { + "comment": "V5", + "input": "xn--zi9a.", + "output": null + }, + { + "comment": "C2; V5", + "input": "xn--zi9a.xn--1ug", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\udb58\ude04\u3002-", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn--xm38e.-", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u0330\uff0e\udb81\udf31\u8680", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u0330.\udb81\udf31\u8680", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--xta.xn--e91aw9417e", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\ud83e\udc9f\ud83c\udd08\u200d\ua84e\uff61\u0f84", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--nc9aq743ds0e.xn--3ed", + "output": null + }, + { + "comment": "C2; V5; V6", + "input": "xn--1ug4874cfd0kbmg.xn--3ed", + "output": null + }, + { + "comment": "V5", + "input": "\ua854\u3002\u1039\u1887", + "output": null + }, + { + "comment": "V5", + "input": "xn--tc9a.xn--9jd663b", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "\ud880\udd67\ud94e\ude60-\uff0e\uabed-\u609c", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "\ud880\udd67\ud94e\ude60-.\uabed-\u609c", + "output": null + }, + { + "comment": "V5; V6; V3 (ignored)", + "input": "xn----7m53aj640l.xn----8f4br83t", + "output": null + }, + { + "comment": "C2; P1; V6; V3 (ignored)", + "input": "\u1849\ud899\udce7\u2b1e\u189c.-\u200d\ud83a\udcd1\u202e", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn--87e0ol04cdl39e.xn----qinu247r", + "output": null + }, + { + "comment": "C2; V6; V3 (ignored)", + "input": "xn--87e0ol04cdl39e.xn----ugn5e3763s", + "output": null + }, + { + "input": "\ud83a\udd53\uff0e\u0718", + "output": "xn--of6h.xn--inb" + }, + { + "input": "\ud83a\udd53.\u0718", + "output": "xn--of6h.xn--inb" + }, + { + "input": "xn--of6h.xn--inb", + "output": "xn--of6h.xn--inb" + }, + { + "comment": "V3 (ignored)", + "input": "\udb40\udd3d-\uff0e-\u0dca", + "output": "-.xn----ptf" + }, + { + "comment": "V3 (ignored)", + "input": "\udb40\udd3d-.-\u0dca", + "output": "-.xn----ptf" + }, + { + "comment": "V3 (ignored)", + "input": "-.xn----ptf", + "output": "-.xn----ptf" + }, + { + "comment": "P1; V6", + "input": "\u10ba\ud800\udef8\udb40\udd04\u3002\ud835\udfdd\ud7f6\u103a", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u10ba\ud800\udef8\udb40\udd04\u30025\ud7f6\u103a", + "output": null + }, + { + "input": "\u2d1a\ud800\udef8\udb40\udd04\u30025\ud7f6\u103a", + "output": "xn--ilj2659d.xn--5-dug9054m" + }, + { + "input": "xn--ilj2659d.xn--5-dug9054m", + "output": "xn--ilj2659d.xn--5-dug9054m" + }, + { + "input": "\u2d1a\ud800\udef8.5\ud7f6\u103a", + "output": "xn--ilj2659d.xn--5-dug9054m" + }, + { + "comment": "P1; V6", + "input": "\u10ba\ud800\udef8.5\ud7f6\u103a", + "output": null + }, + { + "comment": "V6", + "input": "xn--ynd2415j.xn--5-dug9054m", + "output": null + }, + { + "input": "\u2d1a\ud800\udef8\udb40\udd04\u3002\ud835\udfdd\ud7f6\u103a", + "output": "xn--ilj2659d.xn--5-dug9054m" + }, + { + "comment": "C2; P1; V5; V6", + "input": "\u200d-\u1839\ufe6a.\u1de1\u1922", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\u200d-\u1839%.\u1de1\u1922", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "xn---%-u4o.xn--gff52t", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "xn---%-u4oy48b.xn--gff52t", + "output": null + }, + { + "comment": "V5; V6; V3 (ignored)", + "input": "xn----c6jx047j.xn--gff52t", + "output": null + }, + { + "comment": "C2; V5; V6", + "input": "xn----c6j614b1z4v.xn--gff52t", + "output": null + }, + { + "input": "\u0723\u05a3\uff61\u332a", + "output": "xn--ucb18e.xn--eck4c5a" + }, + { + "input": "\u0723\u05a3\u3002\u30cf\u30a4\u30c4", + "output": "xn--ucb18e.xn--eck4c5a" + }, + { + "input": "xn--ucb18e.xn--eck4c5a", + "output": "xn--ucb18e.xn--eck4c5a" + }, + { + "input": "\u0723\u05a3.\u30cf\u30a4\u30c4", + "output": "xn--ucb18e.xn--eck4c5a" + }, + { + "comment": "P1; V6", + "input": "\ud84e\ude6b\uff0e\ud9f1\udc72", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud84e\ude6b.\ud9f1\udc72", + "output": null + }, + { + "comment": "V6", + "input": "xn--td3j.xn--4628b", + "output": null + }, + { + "input": "xn--skb", + "output": "xn--skb" + }, + { + "input": "\u06b9", + "output": "xn--skb" + }, + { + "comment": "V5; V3 (ignored)", + "input": "\u0c4d\ud836\ude3e\u05a9\ud835\udfed\u3002-\ud805\udf28", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "\u0c4d\ud836\ude3e\u05a91\u3002-\ud805\udf28", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "xn--1-rfc312cdp45c.xn----nq0j", + "output": null + }, + { + "comment": "P1; V6", + "input": "\uda4f\udfc8\u3002\ub64f", + "output": null + }, + { + "comment": "P1; V6", + "input": "\uda4f\udfc8\u3002\u1104\u116b\u11ae", + "output": null + }, + { + "comment": "V6", + "input": "xn--ph26c.xn--281b", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud916\ude1a\udb40\udd0c\udb07\udf40\u1840.\u08b6", + "output": null + }, + { + "comment": "V6", + "input": "xn--z7e98100evc01b.xn--czb", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u200d\uff61\ud8d4\udc5b", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u200d\u3002\ud8d4\udc5b", + "output": null + }, + { + "comment": "V6; A4_2 (ignored)", + "input": ".xn--6x4u", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--1ug.xn--6x4u", + "output": null + }, + { + "comment": "C1; V5", + "input": "\ud805\uddbf\ud836\ude14.\u185f\ud805\uddbf\u1b42\u200c", + "output": null + }, + { + "comment": "V5", + "input": "xn--461dw464a.xn--v8e29loy65a", + "output": null + }, + { + "comment": "C1; V5", + "input": "xn--461dw464a.xn--v8e29ldzfo952a", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\ud02c-?\ud99b\udcd2.\u200c\u0ac5\udb67\ude24\u06f4", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u110f\u1170\u11bb-?\ud99b\udcd2.\u200c\u0ac5\udb67\ude24\u06f4", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "xn---?-6g4k75207c.xn--hmb76q74166b", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "xn---?-6g4k75207c.xn--hmb76q48y18505a", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud02c-?\ud99b\udcd2.xn--hmb76q74166b", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u110f\u1170\u11bb-?\ud99b\udcd2.xn--hmb76q74166b", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u110f\u1170\u11bb-?\ud99b\udcd2.XN--HMB76Q74166B", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud02c-?\ud99b\udcd2.XN--HMB76Q74166B", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud02c-?\ud99b\udcd2.Xn--Hmb76q74166b", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u110f\u1170\u11bb-?\ud99b\udcd2.Xn--Hmb76q74166b", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\ud02c-?\ud99b\udcd2.xn--hmb76q48y18505a", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u110f\u1170\u11bb-?\ud99b\udcd2.xn--hmb76q48y18505a", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u110f\u1170\u11bb-?\ud99b\udcd2.XN--HMB76Q48Y18505A", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\ud02c-?\ud99b\udcd2.XN--HMB76Q48Y18505A", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\ud02c-?\ud99b\udcd2.Xn--Hmb76q48y18505a", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u110f\u1170\u11bb-?\ud99b\udcd2.Xn--Hmb76q48y18505a", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u200d\u252e\udb40\uddd0\uff0e\u0c00\u0c4d\u1734\u200d", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u200d\u252e\udb40\uddd0.\u0c00\u0c4d\u1734\u200d", + "output": null + }, + { + "comment": "V5", + "input": "xn--kxh.xn--eoc8m432a", + "output": null + }, + { + "comment": "C2; V5", + "input": "xn--1ug04r.xn--eoc8m432a40i", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udaa5\udeaa\uff61\ud83c\udd02", + "output": null + }, + { + "comment": "V6", + "input": "xn--n433d.xn--v07h", + "output": null + }, + { + "comment": "V5", + "input": "\ud804\udf68\u520d.\ud83d\udee6", + "output": null + }, + { + "comment": "V5", + "input": "xn--rbry728b.xn--y88h", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\udb40\udf0f3\uff61\u1bf1\ud835\udfd2", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\udb40\udf0f3\u3002\u1bf14", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--3-ib31m.xn--4-pql", + "output": null + }, + { + "comment": "V5", + "input": "\u034a\uff0e\ud802\ude0e", + "output": null + }, + { + "comment": "V5", + "input": "\u034a.\ud802\ude0e", + "output": null + }, + { + "comment": "V5", + "input": "xn--oua.xn--mr9c", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\ua846\u3002\u2183\u0fb5\ub1ae-", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\ua846\u3002\u2183\u0fb5\u1102\u116a\u11c1-", + "output": null + }, + { + "comment": "V3 (ignored)", + "input": "\ua846\u3002\u2184\u0fb5\u1102\u116a\u11c1-", + "output": "xn--fc9a.xn----qmg097k469k" + }, + { + "comment": "V3 (ignored)", + "input": "\ua846\u3002\u2184\u0fb5\ub1ae-", + "output": "xn--fc9a.xn----qmg097k469k" + }, + { + "comment": "V3 (ignored)", + "input": "xn--fc9a.xn----qmg097k469k", + "output": "xn--fc9a.xn----qmg097k469k" + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn--fc9a.xn----qmg787k869k", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\ud805\udc42\uff61\u200d\udb55\udf80\ud83d\udf95\uda54\udc54", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\ud805\udc42\u3002\u200d\udb55\udf80\ud83d\udf95\uda54\udc54", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--8v1d.xn--ye9h41035a2qqs", + "output": null + }, + { + "comment": "C2; V5; V6", + "input": "xn--8v1d.xn--1ug1386plvx1cd8vya", + "output": null + }, + { + "input": "\u00df\u09c1\u1ded\u3002\u06208\u2085", + "output": "xn--zca266bwrr.xn--85-psd" + }, + { + "input": "\u00df\u09c1\u1ded\u3002\u062085", + "output": "xn--zca266bwrr.xn--85-psd" + }, + { + "input": "SS\u09c1\u1ded\u3002\u062085", + "output": "xn--ss-e2f077r.xn--85-psd" + }, + { + "input": "ss\u09c1\u1ded\u3002\u062085", + "output": "xn--ss-e2f077r.xn--85-psd" + }, + { + "input": "Ss\u09c1\u1ded\u3002\u062085", + "output": "xn--ss-e2f077r.xn--85-psd" + }, + { + "input": "xn--ss-e2f077r.xn--85-psd", + "output": "xn--ss-e2f077r.xn--85-psd" + }, + { + "input": "ss\u09c1\u1ded.\u062085", + "output": "xn--ss-e2f077r.xn--85-psd" + }, + { + "input": "SS\u09c1\u1ded.\u062085", + "output": "xn--ss-e2f077r.xn--85-psd" + }, + { + "input": "Ss\u09c1\u1ded.\u062085", + "output": "xn--ss-e2f077r.xn--85-psd" + }, + { + "input": "xn--zca266bwrr.xn--85-psd", + "output": "xn--zca266bwrr.xn--85-psd" + }, + { + "input": "\u00df\u09c1\u1ded.\u062085", + "output": "xn--zca266bwrr.xn--85-psd" + }, + { + "input": "SS\u09c1\u1ded\u3002\u06208\u2085", + "output": "xn--ss-e2f077r.xn--85-psd" + }, + { + "input": "ss\u09c1\u1ded\u3002\u06208\u2085", + "output": "xn--ss-e2f077r.xn--85-psd" + }, + { + "input": "Ss\u09c1\u1ded\u3002\u06208\u2085", + "output": "xn--ss-e2f077r.xn--85-psd" + }, + { + "input": "\ufe0d\u0a9b\u3002\u5d68", + "output": "xn--6dc.xn--tot" + }, + { + "input": "xn--6dc.xn--tot", + "output": "xn--6dc.xn--tot" + }, + { + "input": "\u0a9b.\u5d68", + "output": "xn--6dc.xn--tot" + }, + { + "comment": "C1; P1; V5; V6; V3 (ignored)", + "input": "-\u200c\u2499\ud802\udee5\uff61\ud836\ude35", + "output": null + }, + { + "comment": "C1; V5; V3 (ignored)", + "input": "-\u200c18.\ud802\udee5\u3002\ud836\ude35", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "-18.xn--rx9c.xn--382h", + "output": null + }, + { + "comment": "C1; V5; V3 (ignored)", + "input": "xn---18-9m0a.xn--rx9c.xn--382h", + "output": null + }, + { + "comment": "V5; V6; V3 (ignored)", + "input": "xn----ddps939g.xn--382h", + "output": null + }, + { + "comment": "C1; V5; V6; V3 (ignored)", + "input": "xn----sgn18r3191a.xn--382h", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ufe05\ufe12\u3002\ud858\udc3e\u1ce0", + "output": null + }, + { + "comment": "A4_2 (ignored)", + "input": "\ufe05\u3002\u3002\ud858\udc3e\u1ce0", + "output": "..xn--t6f5138v" + }, + { + "comment": "A4_2 (ignored)", + "input": "..xn--t6f5138v", + "output": "..xn--t6f5138v" + }, + { + "comment": "V6", + "input": "xn--y86c.xn--t6f5138v", + "output": null + }, + { + "input": "xn--t6f5138v", + "output": "xn--t6f5138v" + }, + { + "input": "\ud858\udc3e\u1ce0", + "output": "xn--t6f5138v" + }, + { + "comment": "P1; V6", + "input": "\uda7b\udd5b\u0613.\u10b5", + "output": null + }, + { + "comment": "P1; V6", + "input": "\uda7b\udd5b\u0613.\u2d15", + "output": null + }, + { + "comment": "V6", + "input": "xn--1fb94204l.xn--dlj", + "output": null + }, + { + "comment": "V6", + "input": "xn--1fb94204l.xn--tnd", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u200c\udb40\udd37\uff61\uda09\udc41", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u200c\udb40\udd37\u3002\uda09\udc41", + "output": null + }, + { + "comment": "V6; A4_2 (ignored)", + "input": ".xn--w720c", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--0ug.xn--w720c", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u2488\u0dd6\u7105.\udb1e\udc59\u200d\ua85f", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "1.\u0dd6\u7105.\udb1e\udc59\u200d\ua85f", + "output": null + }, + { + "comment": "V5; V6", + "input": "1.xn--t1c6981c.xn--4c9a21133d", + "output": null + }, + { + "comment": "C2; V5; V6", + "input": "1.xn--t1c6981c.xn--1ugz184c9lw7i", + "output": null + }, + { + "comment": "V6", + "input": "xn--t1c337io97c.xn--4c9a21133d", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--t1c337io97c.xn--1ugz184c9lw7i", + "output": null + }, + { + "comment": "V5", + "input": "\ud804\uddc0\u258d.\u205e\u1830", + "output": null + }, + { + "comment": "V5", + "input": "xn--9zh3057f.xn--j7e103b", + "output": null + }, + { + "comment": "C2; V3 (ignored)", + "input": "-3.\u200d\u30cc\u1895", + "output": null + }, + { + "comment": "V3 (ignored)", + "input": "-3.xn--fbf115j", + "output": "-3.xn--fbf115j" + }, + { + "comment": "C2; V3 (ignored)", + "input": "-3.xn--fbf739aq5o", + "output": null + }, + { + "comment": "V5", + "input": "\ud802\ude3f\udb40\udd8c\u9e2e\ud805\udeb6.\u03c2", + "output": null + }, + { + "comment": "V5", + "input": "\ud802\ude3f\udb40\udd8c\u9e2e\ud805\udeb6.\u03a3", + "output": null + }, + { + "comment": "V5", + "input": "\ud802\ude3f\udb40\udd8c\u9e2e\ud805\udeb6.\u03c3", + "output": null + }, + { + "comment": "V5", + "input": "xn--l76a726rt2h.xn--4xa", + "output": null + }, + { + "comment": "V5", + "input": "xn--l76a726rt2h.xn--3xa", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u03c2-\u3002\u200c\ud835\udfed-", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u03c2-\u3002\u200c1-", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u03a3-\u3002\u200c1-", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u03c3-\u3002\u200c1-", + "output": null + }, + { + "comment": "V3 (ignored)", + "input": "xn----zmb.1-", + "output": "xn----zmb.1-" + }, + { + "comment": "C1; V3 (ignored)", + "input": "xn----zmb.xn--1--i1t", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "xn----xmb.xn--1--i1t", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u03a3-\u3002\u200c\ud835\udfed-", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u03c3-\u3002\u200c\ud835\udfed-", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1734-\u0ce2\uff0e\udb40\udd29\u10a4", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1734-\u0ce2.\udb40\udd29\u10a4", + "output": null + }, + { + "comment": "V5", + "input": "\u1734-\u0ce2.\udb40\udd29\u2d04", + "output": null + }, + { + "comment": "V5", + "input": "xn----ggf830f.xn--vkj", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn----ggf830f.xn--cnd", + "output": null + }, + { + "comment": "V5", + "input": "\u1734-\u0ce2\uff0e\udb40\udd29\u2d04", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\u200d\u3002\ud838\udc18\u2488\ua84d\u64c9", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u200d\u3002\ud838\udc181.\ua84d\u64c9", + "output": null + }, + { + "comment": "V5; A4_2 (ignored)", + "input": ".xn--1-1p4r.xn--s7uv61m", + "output": null + }, + { + "comment": "C2; V5", + "input": "xn--1ug.xn--1-1p4r.xn--s7uv61m", + "output": null + }, + { + "comment": "V5; V6; A4_2 (ignored)", + "input": ".xn--tsh026uql4bew9p", + "output": null + }, + { + "comment": "C2; V5; V6", + "input": "xn--1ug.xn--tsh026uql4bew9p", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2ad0\uff61\u10c0-\udacd\udc22", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2ad0\u3002\u10c0-\udacd\udc22", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2ad0\u3002\u2d20-\udacd\udc22", + "output": null + }, + { + "comment": "V6", + "input": "xn--r3i.xn----2wst7439i", + "output": null + }, + { + "comment": "V6", + "input": "xn--r3i.xn----z1g58579u", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2ad0\uff61\u2d20-\udacd\udc22", + "output": null + }, + { + "comment": "V5", + "input": "\ud805\udc42\u25ca\uff0e\u299f\u2220", + "output": null + }, + { + "comment": "V5", + "input": "\ud805\udc42\u25ca.\u299f\u2220", + "output": null + }, + { + "comment": "V5", + "input": "xn--01h3338f.xn--79g270a", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud5c1\udb21\udd99\u0e3a\udb28\udf5a\u3002\u06ba\ud835\udfdc", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u1112\u1164\u11bc\udb21\udd99\u0e3a\udb28\udf5a\u3002\u06ba\ud835\udfdc", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud5c1\udb21\udd99\u0e3a\udb28\udf5a\u3002\u06ba4", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u1112\u1164\u11bc\udb21\udd99\u0e3a\udb28\udf5a\u3002\u06ba4", + "output": null + }, + { + "comment": "V6", + "input": "xn--o4c1723h8g85gt4ya.xn--4-dvc", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ua953.\u033d\ud804\udcbd\u998b", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--3j9a.xn--bua0708eqzrd", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\udae2\udedd\uda69\udef8\u200d\uff61\u4716", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\udae2\udedd\uda69\udef8\u200d\u3002\u4716", + "output": null + }, + { + "comment": "V6", + "input": "xn--g138cxw05a.xn--k0o", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--1ug30527h9mxi.xn--k0o", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u186f\u2689\u59f6\ud83c\udd09\uff0e\u06f7\u200d\ud83c\udfaa\u200d", + "output": null + }, + { + "comment": "V6", + "input": "xn--c9e433epi4b3j20a.xn--kmb6733w", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--c9e433epi4b3j20a.xn--kmb859ja94998b", + "output": null + }, + { + "comment": "C1; P1; V5; V6; V3 (ignored)", + "input": "\u135f\u1848\u200c\uff0e\ufe12-\ud81b\udf90-", + "output": null + }, + { + "comment": "C1; V5; V3 (ignored); A4_2 (ignored)", + "input": "\u135f\u1848\u200c.\u3002-\ud81b\udf90-", + "output": null + }, + { + "comment": "V5; V3 (ignored); A4_2 (ignored)", + "input": "xn--b7d82w..xn-----pe4u", + "output": null + }, + { + "comment": "C1; V5; V3 (ignored); A4_2 (ignored)", + "input": "xn--b7d82wo4h..xn-----pe4u", + "output": null + }, + { + "comment": "V5; V6; V3 (ignored)", + "input": "xn--b7d82w.xn-----c82nz547a", + "output": null + }, + { + "comment": "C1; V5; V6; V3 (ignored)", + "input": "xn--b7d82wo4h.xn-----c82nz547a", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "\ud836\ude5c\u3002-\u0b4d\u10ab", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "\ud836\ude5c\u3002-\u0b4d\u2d0b", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "xn--792h.xn----bse820x", + "output": null + }, + { + "comment": "V5; V6; V3 (ignored)", + "input": "xn--792h.xn----bse632b", + "output": null + }, + { + "comment": "C1", + "input": "\ud835\udff5\u9681\u2bee\uff0e\u180d\u200c", + "output": null + }, + { + "comment": "C1", + "input": "9\u9681\u2bee.\u180d\u200c", + "output": null + }, + { + "input": "xn--9-mfs8024b.", + "output": "xn--9-mfs8024b." + }, + { + "input": "9\u9681\u2bee.", + "output": "xn--9-mfs8024b." + }, + { + "comment": "C1", + "input": "xn--9-mfs8024b.xn--0ug", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\u1bac\u10ac\u200c\u0325\u3002\ud835\udff8", + "output": null + }, + { + "comment": "C1; V5", + "input": "\u1bac\u2d0c\u200c\u0325\u3002\ud835\udff8", + "output": null + }, + { + "input": "xn--2ib43l.xn--te6h", + "output": "xn--2ib43l.xn--te6h" + }, + { + "input": "\u067d\u0943.\ud83a\udd35", + "output": "xn--2ib43l.xn--te6h" + }, + { + "input": "\u067d\u0943.\ud83a\udd13", + "output": "xn--2ib43l.xn--te6h" + }, + { + "comment": "C1; P1; V6", + "input": "\u200c\u3002\uffa0\u0f84\u0f96", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u200c\u3002\u1160\u0f84\u0f96", + "output": null + }, + { + "comment": "V6; A4_2 (ignored)", + "input": ".xn--3ed0b20h", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--0ug.xn--3ed0b20h", + "output": null + }, + { + "comment": "V6; A4_2 (ignored)", + "input": ".xn--3ed0by082k", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--0ug.xn--3ed0by082k", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ua9d0\u04c0\u1baa\u08f6\uff0e\ub235", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ua9d0\u04c0\u1baa\u08f6\uff0e\u1102\u116f\u11bc", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ua9d0\u04c0\u1baa\u08f6.\ub235", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ua9d0\u04c0\u1baa\u08f6.\u1102\u116f\u11bc", + "output": null + }, + { + "input": "\ua9d0\u04cf\u1baa\u08f6.\u1102\u116f\u11bc", + "output": "xn--s5a04sn4u297k.xn--2e1b" + }, + { + "input": "\ua9d0\u04cf\u1baa\u08f6.\ub235", + "output": "xn--s5a04sn4u297k.xn--2e1b" + }, + { + "input": "xn--s5a04sn4u297k.xn--2e1b", + "output": "xn--s5a04sn4u297k.xn--2e1b" + }, + { + "comment": "V6", + "input": "xn--d5a07sn4u297k.xn--2e1b", + "output": null + }, + { + "input": "\ua9d0\u04cf\u1baa\u08f6\uff0e\u1102\u116f\u11bc", + "output": "xn--s5a04sn4u297k.xn--2e1b" + }, + { + "input": "\ua9d0\u04cf\u1baa\u08f6\uff0e\ub235", + "output": "xn--s5a04sn4u297k.xn--2e1b" + }, + { + "comment": "P1; V5; V6", + "input": "\ua8ea\uff61\ud818\udd3f\ud804\uddbe\udb40\uddd7", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ua8ea\u3002\ud818\udd3f\ud804\uddbe\udb40\uddd7", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--3g9a.xn--ud1dz07k", + "output": null + }, + { + "input": "xn--9hb7344k.", + "output": "xn--9hb7344k." + }, + { + "input": "\ud802\udec7\u0661.", + "output": "xn--9hb7344k." + }, + { + "comment": "P1; V5; V6", + "input": "\u10c5.\ud804\udd33\u32b8", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u10c5.\ud804\udd3343", + "output": null + }, + { + "comment": "V5", + "input": "\u2d25.\ud804\udd3343", + "output": null + }, + { + "comment": "V5", + "input": "xn--tlj.xn--43-274o", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--9nd.xn--43-274o", + "output": null + }, + { + "comment": "V5", + "input": "\u2d25.\ud804\udd33\u32b8", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud91e\udea8\udb40\udd09\uffa0\u0fb7.\ud9a1\udfb0\ua953", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud91e\udea8\udb40\udd09\u1160\u0fb7.\ud9a1\udfb0\ua953", + "output": null + }, + { + "comment": "V6", + "input": "xn--kgd36f9z57y.xn--3j9au7544a", + "output": null + }, + { + "comment": "V6", + "input": "xn--kgd7493jee34a.xn--3j9au7544a", + "output": null + }, + { + "comment": "C1; V5", + "input": "\u0618.\u06f3\u200c\ua953", + "output": null + }, + { + "comment": "V5", + "input": "xn--6fb.xn--gmb0524f", + "output": null + }, + { + "comment": "C1; V5", + "input": "xn--6fb.xn--gmb469jjf1h", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u184c\uff0e\ufe12\u1891", + "output": null + }, + { + "comment": "A4_2 (ignored)", + "input": "\u184c.\u3002\u1891", + "output": "xn--c8e..xn--bbf" + }, + { + "comment": "A4_2 (ignored)", + "input": "xn--c8e..xn--bbf", + "output": "xn--c8e..xn--bbf" + }, + { + "comment": "V6", + "input": "xn--c8e.xn--bbf9168i", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud83b\uddcf\u3002\u1822\uda0d\ude06", + "output": null + }, + { + "comment": "V6", + "input": "xn--hd7h.xn--46e66060j", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u40b9\udbb9\udd85\ud800\udee6\uff0e\u200d", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u40b9\udbb9\udd85\ud800\udee6.\u200d", + "output": null + }, + { + "comment": "V6", + "input": "xn--0on3543c5981i.", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--0on3543c5981i.xn--1ug", + "output": null + }, + { + "input": "\u07e5.\u06b5", + "output": "xn--dtb.xn--okb" + }, + { + "input": "xn--dtb.xn--okb", + "output": "xn--dtb.xn--okb" + }, + { + "comment": "A4_2 (ignored)", + "input": ".xn--3e6h", + "output": ".xn--3e6h" + }, + { + "input": "xn--3e6h", + "output": "xn--3e6h" + }, + { + "input": "\ud83a\udd3f", + "output": "xn--3e6h" + }, + { + "input": "\ud83a\udd1d", + "output": "xn--3e6h" + }, + { + "comment": "C1; V5; V3 (ignored)", + "input": "\u103a\u200d\u200c\u3002-\u200c", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "xn--bkd.-", + "output": null + }, + { + "comment": "C1; V5; V3 (ignored)", + "input": "xn--bkd412fca.xn----sgn", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ufe12\uff61\u1b44\u1849", + "output": null + }, + { + "comment": "V5; A4_2 (ignored)", + "input": "\u3002\u3002\u1b44\u1849", + "output": null + }, + { + "comment": "V5; A4_2 (ignored)", + "input": "..xn--87e93m", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--y86c.xn--87e93m", + "output": null + }, + { + "comment": "C2; P1; V6; V3 (ignored)", + "input": "-\u1bab\ufe12\u200d.\ud90b\udd88\ud957\ude53", + "output": null + }, + { + "comment": "C2; P1; V6; V3 (ignored)", + "input": "-\u1bab\u3002\u200d.\ud90b\udd88\ud957\ude53", + "output": null + }, + { + "comment": "V6; V3 (ignored); A4_2 (ignored)", + "input": "xn----qml..xn--x50zy803a", + "output": null + }, + { + "comment": "C2; V6; V3 (ignored)", + "input": "xn----qml.xn--1ug.xn--x50zy803a", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn----qml1407i.xn--x50zy803a", + "output": null + }, + { + "comment": "C2; V6; V3 (ignored)", + "input": "xn----qmlv7tw180a.xn--x50zy803a", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u06b9\uff0e\u1873\u115f", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u06b9.\u1873\u115f", + "output": null + }, + { + "comment": "V6", + "input": "xn--skb.xn--osd737a", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u3a1b\ud823\udc4e.\ufe12\ud835\udfd5\u0d01", + "output": null + }, + { + "comment": "A4_2 (ignored)", + "input": "\u3a1b\ud823\udc4e.\u30027\u0d01", + "output": "xn--mbm8237g..xn--7-7hf" + }, + { + "comment": "A4_2 (ignored)", + "input": "xn--mbm8237g..xn--7-7hf", + "output": "xn--mbm8237g..xn--7-7hf" + }, + { + "comment": "V6", + "input": "xn--mbm8237g.xn--7-7hf1526p", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u00df\u200c\uaaf6\u18a5\uff0e\u22b6\u10c1\u10b6", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u00df\u200c\uaaf6\u18a5.\u22b6\u10c1\u10b6", + "output": null + }, + { + "comment": "C1", + "input": "\u00df\u200c\uaaf6\u18a5.\u22b6\u2d21\u2d16", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "SS\u200c\uaaf6\u18a5.\u22b6\u10c1\u10b6", + "output": null + }, + { + "comment": "C1", + "input": "ss\u200c\uaaf6\u18a5.\u22b6\u2d21\u2d16", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "Ss\u200c\uaaf6\u18a5.\u22b6\u10c1\u2d16", + "output": null + }, + { + "comment": "V6", + "input": "xn--ss-4epx629f.xn--5nd703gyrh", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--ss-4ep585bkm5p.xn--5nd703gyrh", + "output": null + }, + { + "input": "xn--ss-4epx629f.xn--ifh802b6a", + "output": "xn--ss-4epx629f.xn--ifh802b6a" + }, + { + "input": "ss\uaaf6\u18a5.\u22b6\u2d21\u2d16", + "output": "xn--ss-4epx629f.xn--ifh802b6a" + }, + { + "comment": "P1; V6", + "input": "SS\uaaf6\u18a5.\u22b6\u10c1\u10b6", + "output": null + }, + { + "comment": "P1; V6", + "input": "Ss\uaaf6\u18a5.\u22b6\u10c1\u2d16", + "output": null + }, + { + "comment": "V6", + "input": "xn--ss-4epx629f.xn--undv409k", + "output": null + }, + { + "comment": "C1", + "input": "xn--ss-4ep585bkm5p.xn--ifh802b6a", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--ss-4ep585bkm5p.xn--undv409k", + "output": null + }, + { + "comment": "C1", + "input": "xn--zca682johfi89m.xn--ifh802b6a", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--zca682johfi89m.xn--undv409k", + "output": null + }, + { + "comment": "C1", + "input": "\u00df\u200c\uaaf6\u18a5\uff0e\u22b6\u2d21\u2d16", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "SS\u200c\uaaf6\u18a5\uff0e\u22b6\u10c1\u10b6", + "output": null + }, + { + "comment": "C1", + "input": "ss\u200c\uaaf6\u18a5\uff0e\u22b6\u2d21\u2d16", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "Ss\u200c\uaaf6\u18a5\uff0e\u22b6\u10c1\u2d16", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u200d\u3002\u03c2\udb40\udc49", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u200d\u3002\u03a3\udb40\udc49", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u200d\u3002\u03c3\udb40\udc49", + "output": null + }, + { + "comment": "V6; A4_2 (ignored)", + "input": ".xn--4xa24344p", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--1ug.xn--4xa24344p", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--1ug.xn--3xa44344p", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\u2492\uda61\ude19\uda8f\udce0\ud805\udcc0.-\udb3a\udc4a", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "11.\uda61\ude19\uda8f\udce0\ud805\udcc0.-\udb3a\udc4a", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "11.xn--uz1d59632bxujd.xn----x310m", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn--3shy698frsu9dt1me.xn----x310m", + "output": null + }, + { + "comment": "C2; V3 (ignored)", + "input": "-\uff61\u200d", + "output": null + }, + { + "comment": "C2; V3 (ignored)", + "input": "-\u3002\u200d", + "output": null + }, + { + "comment": "V3 (ignored)", + "input": "-.", + "output": "-." + }, + { + "comment": "C2; V3 (ignored)", + "input": "-.xn--1ug", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u126c\uda12\udc3c\ud8c5\uddf6\uff61\ud802\ude2c\ud835\udfe0", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u126c\uda12\udc3c\ud8c5\uddf6\u3002\ud802\ude2c8", + "output": null + }, + { + "comment": "V6", + "input": "xn--d0d41273c887z.xn--8-ob5i", + "output": null + }, + { + "comment": "C2; P1; V6; V3 (ignored)", + "input": "\u03c2\u200d-.\u10c3\ud859\udfd9", + "output": null + }, + { + "comment": "C2; V3 (ignored)", + "input": "\u03c2\u200d-.\u2d23\ud859\udfd9", + "output": null + }, + { + "comment": "C2; P1; V6; V3 (ignored)", + "input": "\u03a3\u200d-.\u10c3\ud859\udfd9", + "output": null + }, + { + "comment": "C2; V3 (ignored)", + "input": "\u03c3\u200d-.\u2d23\ud859\udfd9", + "output": null + }, + { + "comment": "V3 (ignored)", + "input": "xn----zmb.xn--rlj2573p", + "output": "xn----zmb.xn--rlj2573p" + }, + { + "comment": "C2; V3 (ignored)", + "input": "xn----zmb048s.xn--rlj2573p", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn----zmb.xn--7nd64871a", + "output": null + }, + { + "comment": "C2; V6; V3 (ignored)", + "input": "xn----zmb048s.xn--7nd64871a", + "output": null + }, + { + "comment": "C2; V3 (ignored)", + "input": "xn----xmb348s.xn--rlj2573p", + "output": null + }, + { + "comment": "C2; V6; V3 (ignored)", + "input": "xn----xmb348s.xn--7nd64871a", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udad6\udf3d.\u8814", + "output": null + }, + { + "comment": "V6", + "input": "xn--g747d.xn--xl2a", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u08e6\u200d\uff0e\ubf3d", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u08e6\u200d\uff0e\u1108\u1168\u11c0", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u08e6\u200d.\ubf3d", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u08e6\u200d.\u1108\u1168\u11c0", + "output": null + }, + { + "comment": "V5", + "input": "xn--p0b.xn--e43b", + "output": null + }, + { + "comment": "C2; V5", + "input": "xn--p0b869i.xn--e43b", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud8f6\ude3d\uff0e\ud8ef\ude15", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud8f6\ude3d.\ud8ef\ude15", + "output": null + }, + { + "comment": "V6", + "input": "xn--pr3x.xn--rv7w", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud802\udfc0\ud803\ude09\ud83a\uddcf\u3002\ud949\udea7\u2084\u10ab\ud8cb\ude6b", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud802\udfc0\ud803\ude09\ud83a\uddcf\u3002\ud949\udea74\u10ab\ud8cb\ude6b", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud802\udfc0\ud803\ude09\ud83a\uddcf\u3002\ud949\udea74\u2d0b\ud8cb\ude6b", + "output": null + }, + { + "comment": "V6", + "input": "xn--039c42bq865a.xn--4-wvs27840bnrzm", + "output": null + }, + { + "comment": "V6", + "input": "xn--039c42bq865a.xn--4-t0g49302fnrzm", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud802\udfc0\ud803\ude09\ud83a\uddcf\u3002\ud949\udea7\u2084\u2d0b\ud8cb\ude6b", + "output": null + }, + { + "comment": "V5", + "input": "\ud835\udfd3\u3002\u06d7", + "output": null + }, + { + "comment": "V5", + "input": "5\u3002\u06d7", + "output": null + }, + { + "comment": "V5", + "input": "5.xn--nlb", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u200c\udaab\ude29.\u2f95", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u200c\udaab\ude29.\u8c37", + "output": null + }, + { + "comment": "V6", + "input": "xn--i183d.xn--6g3a", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--0ug26167i.xn--6g3a", + "output": null + }, + { + "comment": "C1; C2; P1; V6; V3 (ignored)", + "input": "\ufe12\udafb\udc07\u200d.-\u073c\u200c", + "output": null + }, + { + "comment": "C1; C2; P1; V6; V3 (ignored); A4_2 (ignored)", + "input": "\u3002\udafb\udc07\u200d.-\u073c\u200c", + "output": null + }, + { + "comment": "V6; V3 (ignored); A4_2 (ignored)", + "input": ".xn--hh50e.xn----t2c", + "output": null + }, + { + "comment": "C1; C2; V6; V3 (ignored); A4_2 (ignored)", + "input": ".xn--1ug05310k.xn----t2c071q", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn--y86c71305c.xn----t2c", + "output": null + }, + { + "comment": "C1; C2; V6; V3 (ignored)", + "input": "xn--1ug1658ftw26f.xn----t2c071q", + "output": null + }, + { + "comment": "C2", + "input": "\u200d\uff0e\ud835\udfd7", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u10ad\ud8be\udccd\ua868\u05ae\u3002\u10be\u200c\u200c", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u2d0d\ud8be\udccd\ua868\u05ae\u3002\u2d1e\u200c\u200c", + "output": null + }, + { + "comment": "V6", + "input": "xn--5cb172r175fug38a.xn--mlj", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--5cb172r175fug38a.xn--0uga051h", + "output": null + }, + { + "comment": "V6", + "input": "xn--5cb347co96jug15a.xn--2nd", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--5cb347co96jug15a.xn--2nd059ea", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud800\udef0\u3002\udb05\udcf1", + "output": null + }, + { + "comment": "V6", + "input": "xn--k97c.xn--q031e", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u08df\u10ab\ud89b\udff8\uade4\uff0e\uda40\udd7c\ud835\udfe2\ud72a\u0ae3", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u08df\u10ab\ud89b\udff8\u1100\u1172\u11af\uff0e\uda40\udd7c\ud835\udfe2\u1112\u1171\u11b9\u0ae3", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u08df\u10ab\ud89b\udff8\uade4.\uda40\udd7c0\ud72a\u0ae3", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u08df\u10ab\ud89b\udff8\u1100\u1172\u11af.\uda40\udd7c0\u1112\u1171\u11b9\u0ae3", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u08df\u2d0b\ud89b\udff8\u1100\u1172\u11af.\uda40\udd7c0\u1112\u1171\u11b9\u0ae3", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u08df\u2d0b\ud89b\udff8\uade4.\uda40\udd7c0\ud72a\u0ae3", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--i0b436pkl2g2h42a.xn--0-8le8997mulr5f", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--i0b601b6r7l2hs0a.xn--0-8le8997mulr5f", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u08df\u2d0b\ud89b\udff8\u1100\u1172\u11af\uff0e\uda40\udd7c\ud835\udfe2\u1112\u1171\u11b9\u0ae3", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u08df\u2d0b\ud89b\udff8\uade4\uff0e\uda40\udd7c\ud835\udfe2\ud72a\u0ae3", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u0784\uff0e\ud83a\udc5d\u0601", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u0784.\ud83a\udc5d\u0601", + "output": null + }, + { + "comment": "V6", + "input": "xn--lqb.xn--jfb1808v", + "output": null + }, + { + "comment": "V5", + "input": "\u0acd\u2083.8\ua8c4\u200d\ud83c\udce4", + "output": null + }, + { + "comment": "V5", + "input": "\u0acd3.8\ua8c4\u200d\ud83c\udce4", + "output": null + }, + { + "comment": "V5", + "input": "xn--3-yke.xn--8-sl4et308f", + "output": null + }, + { + "comment": "V5", + "input": "xn--3-yke.xn--8-ugnv982dbkwm", + "output": null + }, + { + "comment": "C1", + "input": "\u9c4a\u3002\u200c", + "output": null + }, + { + "input": "xn--rt6a.", + "output": "xn--rt6a." + }, + { + "input": "\u9c4a.", + "output": "xn--rt6a." + }, + { + "comment": "C1", + "input": "xn--rt6a.xn--0ug", + "output": null + }, + { + "input": "xn--4-0bd15808a.", + "output": "xn--4-0bd15808a." + }, + { + "input": "\ud83a\udd3a\u07cc4.", + "output": "xn--4-0bd15808a." + }, + { + "input": "\ud83a\udd18\u07cc4.", + "output": "xn--4-0bd15808a." + }, + { + "comment": "V3 (ignored)", + "input": "-\uff61\u43db", + "output": "-.xn--xco" + }, + { + "comment": "V3 (ignored)", + "input": "-\u3002\u43db", + "output": "-.xn--xco" + }, + { + "comment": "V3 (ignored)", + "input": "-.xn--xco", + "output": "-.xn--xco" + }, + { + "comment": "C1; C2; P1; V6", + "input": "\u200c\ud908\udce0\uff0e\u200d", + "output": null + }, + { + "comment": "C1; C2; P1; V6", + "input": "\u200c\ud908\udce0.\u200d", + "output": null + }, + { + "comment": "V6", + "input": "xn--dj8y.", + "output": null + }, + { + "comment": "C1; C2; V6", + "input": "xn--0ugz7551c.xn--1ug", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud804\uddc0.\udb42\ude31", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--wd1d.xn--k946e", + "output": null + }, + { + "input": "\ud83a\udd2a.\u03c2", + "output": "xn--ie6h.xn--3xa" + }, + { + "input": "\ud83a\udd08.\u03a3", + "output": "xn--ie6h.xn--4xa" + }, + { + "input": "\ud83a\udd2a.\u03c3", + "output": "xn--ie6h.xn--4xa" + }, + { + "input": "\ud83a\udd08.\u03c3", + "output": "xn--ie6h.xn--4xa" + }, + { + "input": "xn--ie6h.xn--4xa", + "output": "xn--ie6h.xn--4xa" + }, + { + "input": "\ud83a\udd08.\u03c2", + "output": "xn--ie6h.xn--3xa" + }, + { + "input": "xn--ie6h.xn--3xa", + "output": "xn--ie6h.xn--3xa" + }, + { + "input": "\ud83a\udd2a.\u03a3", + "output": "xn--ie6h.xn--4xa" + }, + { + "comment": "C1; P1; V6", + "input": "\u200c\u10ba\uff61\u03c2", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u200c\u10ba\u3002\u03c2", + "output": null + }, + { + "comment": "C1", + "input": "\u200c\u2d1a\u3002\u03c2", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u200c\u10ba\u3002\u03a3", + "output": null + }, + { + "comment": "C1", + "input": "\u200c\u2d1a\u3002\u03c3", + "output": null + }, + { + "input": "xn--ilj.xn--4xa", + "output": "xn--ilj.xn--4xa" + }, + { + "input": "\u2d1a.\u03c3", + "output": "xn--ilj.xn--4xa" + }, + { + "comment": "P1; V6", + "input": "\u10ba.\u03a3", + "output": null + }, + { + "input": "\u2d1a.\u03c2", + "output": "xn--ilj.xn--3xa" + }, + { + "comment": "P1; V6", + "input": "\u10ba.\u03c2", + "output": null + }, + { + "comment": "V6", + "input": "xn--ynd.xn--4xa", + "output": null + }, + { + "comment": "V6", + "input": "xn--ynd.xn--3xa", + "output": null + }, + { + "input": "xn--ilj.xn--3xa", + "output": "xn--ilj.xn--3xa" + }, + { + "comment": "P1; V6", + "input": "\u10ba.\u03c3", + "output": null + }, + { + "comment": "C1", + "input": "xn--0ug262c.xn--4xa", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--ynd759e.xn--4xa", + "output": null + }, + { + "comment": "C1", + "input": "xn--0ug262c.xn--3xa", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--ynd759e.xn--3xa", + "output": null + }, + { + "comment": "C1", + "input": "\u200c\u2d1a\uff61\u03c2", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u200c\u10ba\uff61\u03a3", + "output": null + }, + { + "comment": "C1", + "input": "\u200c\u2d1a\uff61\u03c3", + "output": null + }, + { + "comment": "C1; C2", + "input": "\u200d\u2f95\u3002\u200c\u0310\ua953\ua84e", + "output": null + }, + { + "comment": "C1; C2", + "input": "\u200d\u2f95\u3002\u200c\ua953\u0310\ua84e", + "output": null + }, + { + "comment": "C1; C2", + "input": "\u200d\u8c37\u3002\u200c\ua953\u0310\ua84e", + "output": null + }, + { + "comment": "V5", + "input": "xn--6g3a.xn--0sa8175flwa", + "output": null + }, + { + "comment": "C1; C2", + "input": "xn--1ug0273b.xn--0sa359l6n7g13a", + "output": null + }, + { + "input": "\u6dfd\u3002\u183e", + "output": "xn--34w.xn--x7e" + }, + { + "input": "xn--34w.xn--x7e", + "output": "xn--34w.xn--x7e" + }, + { + "input": "\u6dfd.\u183e", + "output": "xn--34w.xn--x7e" + }, + { + "comment": "P1; V5; V6", + "input": "\uda72\ude29\u10b3\u2753\uff61\ud804\udd28", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\uda72\ude29\u10b3\u2753\u3002\ud804\udd28", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\uda72\ude29\u2d13\u2753\u3002\ud804\udd28", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--8di78qvw32y.xn--k80d", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--rnd896i0j14q.xn--k80d", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\uda72\ude29\u2d13\u2753\uff61\ud804\udd28", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u17ff\uff61\ud83a\udf33", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u17ff\u3002\ud83a\udf33", + "output": null + }, + { + "comment": "V6", + "input": "xn--45e.xn--et6h", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u0652\u200d\uff61\u0ccd\ud805\udeb3", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u0652\u200d\u3002\u0ccd\ud805\udeb3", + "output": null + }, + { + "comment": "V5", + "input": "xn--uhb.xn--8tc4527k", + "output": null + }, + { + "comment": "C2; V5", + "input": "xn--uhb882k.xn--8tc4527k", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u00df\ud880\udc3b\ud8da\udf17\uff61\ud836\ude68\ud83d\udd6e\u00df", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u00df\ud880\udc3b\ud8da\udf17\u3002\ud836\ude68\ud83d\udd6e\u00df", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "SS\ud880\udc3b\ud8da\udf17\u3002\ud836\ude68\ud83d\udd6eSS", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "ss\ud880\udc3b\ud8da\udf17\u3002\ud836\ude68\ud83d\udd6ess", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "Ss\ud880\udc3b\ud8da\udf17\u3002\ud836\ude68\ud83d\udd6eSs", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--ss-jl59biy67d.xn--ss-4d11aw87d", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--zca20040bgrkh.xn--zca3653v86qa", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "SS\ud880\udc3b\ud8da\udf17\uff61\ud836\ude68\ud83d\udd6eSS", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "ss\ud880\udc3b\ud8da\udf17\uff61\ud836\ude68\ud83d\udd6ess", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "Ss\ud880\udc3b\ud8da\udf17\uff61\ud836\ude68\ud83d\udd6eSs", + "output": null + }, + { + "comment": "C1; C2", + "input": "\u200d\u3002\u200c", + "output": null + }, + { + "comment": "C1; C2", + "input": "xn--1ug.xn--0ug", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb41\udc58\uff0e\udb40\udd2e", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb41\udc58.\udb40\udd2e", + "output": null + }, + { + "comment": "V6", + "input": "xn--s136e.", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ua9b7\udb37\udd59\uba79\u3002\u249b\udb42\ude07", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ua9b7\udb37\udd59\u1106\u1167\u11b0\u3002\u249b\udb42\ude07", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ua9b7\udb37\udd59\uba79\u300220.\udb42\ude07", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ua9b7\udb37\udd59\u1106\u1167\u11b0\u300220.\udb42\ude07", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--ym9av13acp85w.20.xn--d846e", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--ym9av13acp85w.xn--dth22121k", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u200c\uff61\ufe12", + "output": null + }, + { + "comment": "C1; A4_2 (ignored)", + "input": "\u200c\u3002\u3002", + "output": null + }, + { + "comment": "A4_2 (ignored)", + "input": "..", + "output": ".." + }, + { + "comment": "C1; A4_2 (ignored)", + "input": "xn--0ug..", + "output": null + }, + { + "comment": "V6; A4_2 (ignored)", + "input": ".xn--y86c", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--0ug.xn--y86c", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u1872-\ud835\udff9.\u00df-\u200c-", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u1872-3.\u00df-\u200c-", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u1872-3.SS-\u200c-", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u1872-3.ss-\u200c-", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u1872-3.Ss-\u200c-", + "output": null + }, + { + "comment": "V2 (ignored); V3 (ignored)", + "input": "xn---3-p9o.ss--", + "output": "xn---3-p9o.ss--" + }, + { + "comment": "C1; V3 (ignored)", + "input": "xn---3-p9o.xn--ss---276a", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "xn---3-p9o.xn-----fia9303a", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u1872-\ud835\udff9.SS-\u200c-", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u1872-\ud835\udff9.ss-\u200c-", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u1872-\ud835\udff9.Ss-\u200c-", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\udb27\udd9c\u1898\u3002\u1a7f\u2ea2", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--ibf35138o.xn--fpfz94g", + "output": null + }, + { + "comment": "P1; V6", + "input": "\uda1c\udda7\ud835\udfef\u3002\u2488\u1a76\ud835\udfda\uda41\ude0c", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\uda1c\udda73\u30021.\u1a762\uda41\ude0c", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--3-rj42h.1.xn--2-13k96240l", + "output": null + }, + { + "comment": "V6", + "input": "xn--3-rj42h.xn--2-13k746cq465x", + "output": null + }, + { + "input": "\ua860\uff0e\u06f2", + "output": "xn--5c9a.xn--fmb" + }, + { + "input": "\ua860.\u06f2", + "output": "xn--5c9a.xn--fmb" + }, + { + "input": "xn--5c9a.xn--fmb", + "output": "xn--5c9a.xn--fmb" + }, + { + "comment": "C1; P1; V5; V6", + "input": "\ua67d\u200c\ud87e\uddf5\ud83c\udd06\uff61\u200c\ud804\udc42\u1b01", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\ua67d\u200c\u9723\ud83c\udd06\uff61\u200c\ud804\udc42\u1b01", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--2q5a751a653w.xn--4sf0725i", + "output": null + }, + { + "comment": "C1; V5; V6", + "input": "xn--0ug4208b2vjuk63a.xn--4sf36u6u4w", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u514e\uff61\u183c\udb43\udd1c\ud805\udeb6\ud807\udc3f", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u514e\u3002\u183c\udb43\udd1c\ud805\udeb6\ud807\udc3f", + "output": null + }, + { + "comment": "V6", + "input": "xn--b5q.xn--v7e6041kqqd4m251b", + "output": null + }, + { + "comment": "C2", + "input": "\ud835\udfd9\uff61\u200d\ud835\udff8\u200d\u2077", + "output": null + }, + { + "comment": "C2", + "input": "1\u3002\u200d2\u200d7", + "output": null + }, + { + "comment": "C2", + "input": "1.xn--27-l1tb", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\u1868-\uff61\udb43\udecb\ud835\udff7", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\u1868-\u3002\udb43\udecb1", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn----z8j.xn--1-5671m", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u10bc\ud9e3\udded\u0f80\u2f87\u3002\u10af\u2640\u200c\u200c", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u10bc\ud9e3\udded\u0f80\u821b\u3002\u10af\u2640\u200c\u200c", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u2d1c\ud9e3\udded\u0f80\u821b\u3002\u2d0f\u2640\u200c\u200c", + "output": null + }, + { + "comment": "V6", + "input": "xn--zed372mdj2do3v4h.xn--e5h11w", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--zed372mdj2do3v4h.xn--0uga678bgyh", + "output": null + }, + { + "comment": "V6", + "input": "xn--zed54dz10wo343g.xn--nnd651i", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--zed54dz10wo343g.xn--nnd089ea464d", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u2d1c\ud9e3\udded\u0f80\u2f87\u3002\u2d0f\u2640\u200c\u200c", + "output": null + }, + { + "comment": "C2; V5", + "input": "\ud804\udc46\ud835\udff0.\u200d", + "output": null + }, + { + "comment": "C2; V5", + "input": "\ud804\udc464.\u200d", + "output": null + }, + { + "comment": "V5", + "input": "xn--4-xu7i.", + "output": null + }, + { + "comment": "C2; V5", + "input": "xn--4-xu7i.xn--1ug", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\ud97b\udd18\u10be\u7640\uff61\ud805\ude3f\u200d\u200c\ubdbc", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\ud97b\udd18\u10be\u7640\uff61\ud805\ude3f\u200d\u200c\u1107\u1170\u11ab", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\ud97b\udd18\u10be\u7640\u3002\ud805\ude3f\u200d\u200c\ubdbc", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\ud97b\udd18\u10be\u7640\u3002\ud805\ude3f\u200d\u200c\u1107\u1170\u11ab", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\ud97b\udd18\u2d1e\u7640\u3002\ud805\ude3f\u200d\u200c\u1107\u1170\u11ab", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\ud97b\udd18\u2d1e\u7640\u3002\ud805\ude3f\u200d\u200c\ubdbc", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--mlju35u7qx2f.xn--et3bn23n", + "output": null + }, + { + "comment": "C1; V5; V6", + "input": "xn--mlju35u7qx2f.xn--0ugb6122js83c", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--2nd6803c7q37d.xn--et3bn23n", + "output": null + }, + { + "comment": "C1; V5; V6", + "input": "xn--2nd6803c7q37d.xn--0ugb6122js83c", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\ud97b\udd18\u2d1e\u7640\uff61\ud805\ude3f\u200d\u200c\u1107\u1170\u11ab", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\ud97b\udd18\u2d1e\u7640\uff61\ud805\ude3f\u200d\u200c\ubdbc", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "-\ud804\ude36\u248f\uff0e\u248e\ud881\udee2\udb40\udfad", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored); A4_2 (ignored)", + "input": "-\ud804\ude368..7.\ud881\udee2\udb40\udfad", + "output": null + }, + { + "comment": "V6; V3 (ignored); A4_2 (ignored)", + "input": "xn---8-bv5o..7.xn--c35nf1622b", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn----scp6252h.xn--zshy411yzpx2d", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\u0ecb\u200d\uff0e\u9381\udb43\udc11", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\u0ecb\u200d.\u9381\udb43\udc11", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--t8c.xn--iz4a43209d", + "output": null + }, + { + "comment": "C2; V5; V6", + "input": "xn--t8c059f.xn--iz4a43209d", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\ud9e5\udef4.-\u1862\u0592\ud836\ude20", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn--ep37b.xn----hec165lho83b", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\ud8bc\udc2b\uff0e\u1baa\u03c2\u10a6\u200d", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\ud8bc\udc2b.\u1baa\u03c2\u10a6\u200d", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\ud8bc\udc2b.\u1baa\u03c2\u2d06\u200d", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\ud8bc\udc2b.\u1baa\u03a3\u10a6\u200d", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\ud8bc\udc2b.\u1baa\u03c3\u2d06\u200d", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\ud8bc\udc2b.\u1baa\u03a3\u2d06\u200d", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--nu4s.xn--4xa153j7im", + "output": null + }, + { + "comment": "C2; V5; V6", + "input": "xn--nu4s.xn--4xa153jk8cs1q", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--nu4s.xn--4xa217dxri", + "output": null + }, + { + "comment": "C2; V5; V6", + "input": "xn--nu4s.xn--4xa217dxriome", + "output": null + }, + { + "comment": "C2; V5; V6", + "input": "xn--nu4s.xn--3xa353jk8cs1q", + "output": null + }, + { + "comment": "C2; V5; V6", + "input": "xn--nu4s.xn--3xa417dxriome", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\ud8bc\udc2b\uff0e\u1baa\u03c2\u2d06\u200d", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\ud8bc\udc2b\uff0e\u1baa\u03a3\u10a6\u200d", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\ud8bc\udc2b\uff0e\u1baa\u03c3\u2d06\u200d", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\ud8bc\udc2b\uff0e\u1baa\u03a3\u2d06\u200d", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\u2488\u200c\uaaec\ufe12\uff0e\u0acd", + "output": null + }, + { + "comment": "C1; V5; A4_2 (ignored)", + "input": "1.\u200c\uaaec\u3002.\u0acd", + "output": null + }, + { + "comment": "V5; A4_2 (ignored)", + "input": "1.xn--sv9a..xn--mfc", + "output": null + }, + { + "comment": "C1; V5; A4_2 (ignored)", + "input": "1.xn--0ug7185c..xn--mfc", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--tsh0720cse8b.xn--mfc", + "output": null + }, + { + "comment": "C1; V5; V6", + "input": "xn--0ug78o720myr1c.xn--mfc", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\u00df\u200d.\u1bf2\ud8d3\udfbc", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "SS\u200d.\u1bf2\ud8d3\udfbc", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "ss\u200d.\u1bf2\ud8d3\udfbc", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "Ss\u200d.\u1bf2\ud8d3\udfbc", + "output": null + }, + { + "comment": "V5; V6", + "input": "ss.xn--0zf22107b", + "output": null + }, + { + "comment": "C2; V5; V6", + "input": "xn--ss-n1t.xn--0zf22107b", + "output": null + }, + { + "comment": "C2; V5; V6", + "input": "xn--zca870n.xn--0zf22107b", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud83d\udd7c\uff0e\uffa0", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud83d\udd7c.\u1160", + "output": null + }, + { + "comment": "V6", + "input": "xn--my8h.xn--psd", + "output": null + }, + { + "comment": "V6", + "input": "xn--my8h.xn--cl7c", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u7215\uda8d\ude51\uff0e\ud835\udff0\u6c17", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u7215\uda8d\ude51.4\u6c17", + "output": null + }, + { + "comment": "V6", + "input": "xn--1zxq3199c.xn--4-678b", + "output": null + }, + { + "comment": "P1; V6; V2 (ignored); V3 (ignored)", + "input": "\udb39\udf43\u3002\uda04\udd83\ud8e6\udc97--", + "output": null + }, + { + "comment": "V6; V2 (ignored); V3 (ignored)", + "input": "xn--2y75e.xn-----1l15eer88n", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u8530\u3002\udb40\udc79\u08dd-\ud804\ude35", + "output": null + }, + { + "comment": "V6", + "input": "xn--sz1a.xn----mrd9984r3dl0i", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u03c2\u10c5\u3002\u075a", + "output": null + }, + { + "input": "\u03c2\u2d25\u3002\u075a", + "output": "xn--3xa403s.xn--epb" + }, + { + "comment": "P1; V6", + "input": "\u03a3\u10c5\u3002\u075a", + "output": null + }, + { + "input": "\u03c3\u2d25\u3002\u075a", + "output": "xn--4xa203s.xn--epb" + }, + { + "input": "\u03a3\u2d25\u3002\u075a", + "output": "xn--4xa203s.xn--epb" + }, + { + "input": "xn--4xa203s.xn--epb", + "output": "xn--4xa203s.xn--epb" + }, + { + "input": "\u03c3\u2d25.\u075a", + "output": "xn--4xa203s.xn--epb" + }, + { + "comment": "P1; V6", + "input": "\u03a3\u10c5.\u075a", + "output": null + }, + { + "input": "\u03a3\u2d25.\u075a", + "output": "xn--4xa203s.xn--epb" + }, + { + "comment": "V6", + "input": "xn--4xa477d.xn--epb", + "output": null + }, + { + "input": "xn--3xa403s.xn--epb", + "output": "xn--3xa403s.xn--epb" + }, + { + "input": "\u03c2\u2d25.\u075a", + "output": "xn--3xa403s.xn--epb" + }, + { + "comment": "V6", + "input": "xn--3xa677d.xn--epb", + "output": null + }, + { + "input": "xn--vkb.xn--08e172a", + "output": "xn--vkb.xn--08e172a" + }, + { + "input": "\u06bc.\u1e8f\u1864", + "output": "xn--vkb.xn--08e172a" + }, + { + "input": "\u06bc.y\u0307\u1864", + "output": "xn--vkb.xn--08e172a" + }, + { + "input": "\u06bc.Y\u0307\u1864", + "output": "xn--vkb.xn--08e172a" + }, + { + "input": "\u06bc.\u1e8e\u1864", + "output": "xn--vkb.xn--08e172a" + }, + { + "comment": "V6", + "input": "xn--pt9c.xn--hnd666l", + "output": null + }, + { + "input": "xn--pt9c.xn--0kjya", + "output": "xn--pt9c.xn--0kjya" + }, + { + "input": "\ud802\ude57.\u2d09\u2d15", + "output": "xn--pt9c.xn--0kjya" + }, + { + "comment": "P1; V6", + "input": "\ud802\ude57.\u10a9\u10b5", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud802\ude57.\u10a9\u2d15", + "output": null + }, + { + "comment": "V6", + "input": "xn--pt9c.xn--hndy", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\u200c\u200c\u3124\uff0e\u032e\udb16\ude11\u09c2", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\u200c\u200c\u3124.\u032e\udb16\ude11\u09c2", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--1fk.xn--vta284a9o563a", + "output": null + }, + { + "comment": "C1; V5; V6", + "input": "xn--0uga242k.xn--vta284a9o563a", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u10b4\ud836\ude28\u2083\udb40\udc66\uff0e\ud835\udff3\ud804\udcb9\u0b82", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u10b4\ud836\ude283\udb40\udc66.7\ud804\udcb9\u0b82", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2d14\ud836\ude283\udb40\udc66.7\ud804\udcb9\u0b82", + "output": null + }, + { + "comment": "V6", + "input": "xn--3-ews6985n35s3g.xn--7-cve6271r", + "output": null + }, + { + "comment": "V6", + "input": "xn--3-b1g83426a35t0g.xn--7-cve6271r", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2d14\ud836\ude28\u2083\udb40\udc66\uff0e\ud835\udff3\ud804\udcb9\u0b82", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u43c8\u200c\u3002\u200c\u2488\ud986\udc95", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u43c8\u200c\u3002\u200c1.\ud986\udc95", + "output": null + }, + { + "comment": "V6", + "input": "xn--eco.1.xn--ms39a", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--0ug491l.xn--1-rgn.xn--ms39a", + "output": null + }, + { + "comment": "V6", + "input": "xn--eco.xn--tsh21126d", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--0ug491l.xn--0ug88oot66q", + "output": null + }, + { + "comment": "V5", + "input": "\uff11\uaaf6\u00df\ud807\udca5\uff61\u1dd8", + "output": null + }, + { + "comment": "V5", + "input": "1\uaaf6\u00df\ud807\udca5\u3002\u1dd8", + "output": null + }, + { + "comment": "V5", + "input": "1\uaaf6SS\ud807\udca5\u3002\u1dd8", + "output": null + }, + { + "comment": "V5", + "input": "1\uaaf6ss\ud807\udca5\u3002\u1dd8", + "output": null + }, + { + "comment": "V5", + "input": "xn--1ss-ir6ln166b.xn--weg", + "output": null + }, + { + "comment": "V5", + "input": "xn--1-qfa2471kdb0d.xn--weg", + "output": null + }, + { + "comment": "V5", + "input": "\uff11\uaaf6SS\ud807\udca5\uff61\u1dd8", + "output": null + }, + { + "comment": "V5", + "input": "\uff11\uaaf6ss\ud807\udca5\uff61\u1dd8", + "output": null + }, + { + "comment": "V5", + "input": "1\uaaf6Ss\ud807\udca5\u3002\u1dd8", + "output": null + }, + { + "comment": "V5", + "input": "\uff11\uaaf6Ss\ud807\udca5\uff61\u1dd8", + "output": null + }, + { + "comment": "V6", + "input": "xn--3j78f.xn--mkb20b", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud88a\udd31\u249b\u2fb3\uff0e\ua866\u2488", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud88a\udd3120.\u97f3.\ua8661.", + "output": null + }, + { + "comment": "V6", + "input": "xn--20-9802c.xn--0w5a.xn--1-eg4e.", + "output": null + }, + { + "comment": "V6", + "input": "xn--dth6033bzbvx.xn--tsh9439b", + "output": null + }, + { + "input": "xn--ge6h.xn--oc9a", + "output": "xn--ge6h.xn--oc9a" + }, + { + "input": "\ud83a\udd28.\ua84f", + "output": "xn--ge6h.xn--oc9a" + }, + { + "input": "\ud83a\udd06.\ua84f", + "output": "xn--ge6h.xn--oc9a" + }, + { + "comment": "C1; P1; V6; V3 (ignored)", + "input": "\u200c.\u00df\u10a9-", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u200c.\u00df\u2d09-", + "output": null + }, + { + "comment": "C1; P1; V6; V3 (ignored)", + "input": "\u200c.SS\u10a9-", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u200c.ss\u2d09-", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "\u200c.Ss\u2d09-", + "output": null + }, + { + "comment": "V3 (ignored); A4_2 (ignored)", + "input": ".xn--ss--bi1b", + "output": ".xn--ss--bi1b" + }, + { + "comment": "C1; V3 (ignored)", + "input": "xn--0ug.xn--ss--bi1b", + "output": null + }, + { + "comment": "V6; V3 (ignored); A4_2 (ignored)", + "input": ".xn--ss--4rn", + "output": null + }, + { + "comment": "C1; V6; V3 (ignored)", + "input": "xn--0ug.xn--ss--4rn", + "output": null + }, + { + "comment": "C1; V3 (ignored)", + "input": "xn--0ug.xn----pfa2305a", + "output": null + }, + { + "comment": "C1; V6; V3 (ignored)", + "input": "xn--0ug.xn----pfa042j", + "output": null + }, + { + "input": "\u9f59--\ud835\udff0.\u00df", + "output": "xn----4-p16k.xn--zca" + }, + { + "input": "\u9f59--4.\u00df", + "output": "xn----4-p16k.xn--zca" + }, + { + "input": "\u9f59--4.SS", + "output": "xn----4-p16k.ss" + }, + { + "input": "\u9f59--4.ss", + "output": "xn----4-p16k.ss" + }, + { + "input": "\u9f59--4.Ss", + "output": "xn----4-p16k.ss" + }, + { + "input": "xn----4-p16k.ss", + "output": "xn----4-p16k.ss" + }, + { + "input": "xn----4-p16k.xn--zca", + "output": "xn----4-p16k.xn--zca" + }, + { + "input": "\u9f59--\ud835\udff0.SS", + "output": "xn----4-p16k.ss" + }, + { + "input": "\u9f59--\ud835\udff0.ss", + "output": "xn----4-p16k.ss" + }, + { + "input": "\u9f59--\ud835\udff0.Ss", + "output": "xn----4-p16k.ss" + }, + { + "comment": "C2; P1; V6", + "input": "\udacf\udc99\udb40\uded8\uff61?-\u200d", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\udacf\udc99\udb40\uded8\u3002?-\u200d", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "xn--ct86d8w51a.?-", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "xn--ct86d8w51a.xn--?--n1t", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "xn--ct86d8w51a.?-\u200d", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "XN--CT86D8W51A.?-\u200d", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "Xn--Ct86d8w51a.?-\u200d", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud836\ude9e\u10b0\uff61\ucaa1", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud836\ude9e\u10b0\uff61\u110d\u1168\u11a8", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud836\ude9e\u10b0\u3002\ucaa1", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud836\ude9e\u10b0\u3002\u110d\u1168\u11a8", + "output": null + }, + { + "comment": "V5", + "input": "\ud836\ude9e\u2d10\u3002\u110d\u1168\u11a8", + "output": null + }, + { + "comment": "V5", + "input": "\ud836\ude9e\u2d10\u3002\ucaa1", + "output": null + }, + { + "comment": "V5", + "input": "xn--7kj1858k.xn--pi6b", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--ond3755u.xn--pi6b", + "output": null + }, + { + "comment": "V5", + "input": "\ud836\ude9e\u2d10\uff61\u110d\u1168\u11a8", + "output": null + }, + { + "comment": "V5", + "input": "\ud836\ude9e\u2d10\uff61\ucaa1", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u1845\uff10\u200c\uff61\u23a2\udb52\ude04", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u18450\u200c\u3002\u23a2\udb52\ude04", + "output": null + }, + { + "comment": "V6", + "input": "xn--0-z6j.xn--8lh28773l", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--0-z6jy93b.xn--8lh28773l", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\ud88a\udf9a\uff19\ua369\u17d3\uff0e\u200d\u00df", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\ud88a\udf9a9\ua369\u17d3.\u200d\u00df", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\ud88a\udf9a9\ua369\u17d3.\u200dSS", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\ud88a\udf9a9\ua369\u17d3.\u200dss", + "output": null + }, + { + "comment": "V6", + "input": "xn--9-i0j5967eg3qz.ss", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--9-i0j5967eg3qz.xn--ss-l1t", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--9-i0j5967eg3qz.xn--zca770n", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\ud88a\udf9a\uff19\ua369\u17d3\uff0e\u200dSS", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\ud88a\udf9a\uff19\ua369\u17d3\uff0e\u200dss", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\ud88a\udf9a9\ua369\u17d3.\u200dSs", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\ud88a\udf9a\uff19\ua369\u17d3\uff0e\u200dSs", + "output": null + }, + { + "input": "\ua5f7\ud804\udd80.\u075d\ud802\ude52", + "output": "xn--ju8a625r.xn--hpb0073k" + }, + { + "input": "xn--ju8a625r.xn--hpb0073k", + "output": "xn--ju8a625r.xn--hpb0073k" + }, + { + "comment": "C2; P1; V6; V3 (ignored)", + "input": "\u10af\udb40\udd4b-\uff0e\u200d\u10a9", + "output": null + }, + { + "comment": "C2; P1; V6; V3 (ignored)", + "input": "\u10af\udb40\udd4b-.\u200d\u10a9", + "output": null + }, + { + "comment": "C2; V3 (ignored)", + "input": "\u2d0f\udb40\udd4b-.\u200d\u2d09", + "output": null + }, + { + "comment": "V3 (ignored)", + "input": "xn----3vs.xn--0kj", + "output": "xn----3vs.xn--0kj" + }, + { + "comment": "C2; V3 (ignored)", + "input": "xn----3vs.xn--1ug532c", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn----00g.xn--hnd", + "output": null + }, + { + "comment": "C2; V6; V3 (ignored)", + "input": "xn----00g.xn--hnd399e", + "output": null + }, + { + "comment": "C2; V3 (ignored)", + "input": "\u2d0f\udb40\udd4b-\uff0e\u200d\u2d09", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "\u1714\u3002\udb40\udda3-\ud804\udeea", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "xn--fze.xn----ly8i", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "\uabe8-\uff0e\uda60\udfdc\u05bd\u00df", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "\uabe8-.\uda60\udfdc\u05bd\u00df", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "\uabe8-.\uda60\udfdc\u05bdSS", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "\uabe8-.\uda60\udfdc\u05bdss", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "\uabe8-.\uda60\udfdc\u05bdSs", + "output": null + }, + { + "comment": "V5; V6; V3 (ignored)", + "input": "xn----pw5e.xn--ss-7jd10716y", + "output": null + }, + { + "comment": "V5; V6; V3 (ignored)", + "input": "xn----pw5e.xn--zca50wfv060a", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "\uabe8-\uff0e\uda60\udfdc\u05bdSS", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "\uabe8-\uff0e\uda60\udfdc\u05bdss", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "\uabe8-\uff0e\uda60\udfdc\u05bdSs", + "output": null + }, + { + "comment": "V5", + "input": "\ud835\udfe5\u266e\ud805\udf2b\u08ed\uff0e\u17d2\ud805\udf2b8\udb40\udd8f", + "output": null + }, + { + "comment": "V5", + "input": "3\u266e\ud805\udf2b\u08ed.\u17d2\ud805\udf2b8\udb40\udd8f", + "output": null + }, + { + "comment": "V5", + "input": "xn--3-ksd277tlo7s.xn--8-f0jx021l", + "output": null + }, + { + "comment": "C2; P1; V6; V3 (ignored)", + "input": "-\uff61\uda14\udf00\u200d\u2761", + "output": null + }, + { + "comment": "C2; P1; V6; V3 (ignored)", + "input": "-\u3002\uda14\udf00\u200d\u2761", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "-.xn--nei54421f", + "output": null + }, + { + "comment": "C2; V6; V3 (ignored)", + "input": "-.xn--1ug800aq795s", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ud835\udfd3\u2631\ud835\udfd0\uda57\udc35\uff61\ud836\udeae\ud902\udc73", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "5\u26312\uda57\udc35\u3002\ud836\udeae\ud902\udc73", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--52-dwx47758j.xn--kd3hk431k", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "-.-\u251c\uda1a\udda3", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "-.xn----ukp70432h", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u03c2\uff0e\ufdc1\ud83d\udf9b\u2488", + "output": null + }, + { + "input": "\u03c2.\u0641\u0645\u064a\ud83d\udf9b1.", + "output": "xn--3xa.xn--1-gocmu97674d." + }, + { + "input": "\u03a3.\u0641\u0645\u064a\ud83d\udf9b1.", + "output": "xn--4xa.xn--1-gocmu97674d." + }, + { + "input": "\u03c3.\u0641\u0645\u064a\ud83d\udf9b1.", + "output": "xn--4xa.xn--1-gocmu97674d." + }, + { + "input": "xn--4xa.xn--1-gocmu97674d.", + "output": "xn--4xa.xn--1-gocmu97674d." + }, + { + "input": "xn--3xa.xn--1-gocmu97674d.", + "output": "xn--3xa.xn--1-gocmu97674d." + }, + { + "comment": "P1; V6", + "input": "\u03a3\uff0e\ufdc1\ud83d\udf9b\u2488", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u03c3\uff0e\ufdc1\ud83d\udf9b\u2488", + "output": null + }, + { + "comment": "V6", + "input": "xn--4xa.xn--dhbip2802atb20c", + "output": null + }, + { + "comment": "V6", + "input": "xn--3xa.xn--dhbip2802atb20c", + "output": null + }, + { + "comment": "P1; V6", + "input": "9\udb40\udde5\uff0e\udb6b\udd34\u1893", + "output": null + }, + { + "comment": "P1; V6", + "input": "9\udb40\udde5.\udb6b\udd34\u1893", + "output": null + }, + { + "comment": "V6", + "input": "9.xn--dbf91222q", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\ufe12\u10b6\u0366\uff0e\u200c", + "output": null + }, + { + "comment": "C1; P1; V6; A4_2 (ignored)", + "input": "\u3002\u10b6\u0366.\u200c", + "output": null + }, + { + "comment": "C1; A4_2 (ignored)", + "input": "\u3002\u2d16\u0366.\u200c", + "output": null + }, + { + "comment": "A4_2 (ignored)", + "input": ".xn--hva754s.", + "output": ".xn--hva754s." + }, + { + "comment": "C1; A4_2 (ignored)", + "input": ".xn--hva754s.xn--0ug", + "output": null + }, + { + "comment": "V6; A4_2 (ignored)", + "input": ".xn--hva929d.", + "output": null + }, + { + "comment": "C1; V6; A4_2 (ignored)", + "input": ".xn--hva929d.xn--0ug", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\ufe12\u2d16\u0366\uff0e\u200c", + "output": null + }, + { + "comment": "V6", + "input": "xn--hva754sy94k.", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--hva754sy94k.xn--0ug", + "output": null + }, + { + "comment": "V6", + "input": "xn--hva929dl29p.", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--hva929dl29p.xn--0ug", + "output": null + }, + { + "input": "xn--hva754s.", + "output": "xn--hva754s." + }, + { + "input": "\u2d16\u0366.", + "output": "xn--hva754s." + }, + { + "comment": "P1; V6", + "input": "\u10b6\u0366.", + "output": null + }, + { + "comment": "V6", + "input": "xn--hva929d.", + "output": null + }, + { + "input": "xn--hzb.xn--ukj4430l", + "output": "xn--hzb.xn--ukj4430l" + }, + { + "input": "\u08bb.\u2d03\ud838\udc12", + "output": "xn--hzb.xn--ukj4430l" + }, + { + "comment": "P1; V6", + "input": "\u08bb.\u10a3\ud838\udc12", + "output": null + }, + { + "comment": "V6", + "input": "xn--hzb.xn--bnd2938u", + "output": null + }, + { + "comment": "C1; C2; P1; V6", + "input": "\u200d\u200c\u3002\uff12\u4af7\udb42\uddf7", + "output": null + }, + { + "comment": "C1; C2; P1; V6", + "input": "\u200d\u200c\u30022\u4af7\udb42\uddf7", + "output": null + }, + { + "comment": "V6; A4_2 (ignored)", + "input": ".xn--2-me5ay1273i", + "output": null + }, + { + "comment": "C1; C2; V6", + "input": "xn--0ugb.xn--2-me5ay1273i", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "-\ud838\udc24\udb32\udc10\u3002\ud9e2\udf16", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn----rq4re4997d.xn--l707b", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\udb8d\udec2\ufe12\u200c\u37c0\uff0e\u0624\u2488", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\udb8d\udec2\ufe12\u200c\u37c0\uff0e\u0648\u0654\u2488", + "output": null + }, + { + "comment": "V6", + "input": "xn--z272f.xn--etl.xn--1-smc.", + "output": null + }, + { + "comment": "V6", + "input": "xn--etlt457ccrq7h.xn--jgb476m", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--0ug754gxl4ldlt0k.xn--jgb476m", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u07fc\ud803\ude06.\ud80d\udd8f\ufe12\ud8ea\ude29\u10b0", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u07fc\ud803\ude06.\ud80d\udd8f\u3002\ud8ea\ude29\u10b0", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u07fc\ud803\ude06.\ud80d\udd8f\u3002\ud8ea\ude29\u2d10", + "output": null + }, + { + "comment": "V6", + "input": "xn--0tb8725k.xn--tu8d.xn--7kj73887a", + "output": null + }, + { + "comment": "V6", + "input": "xn--0tb8725k.xn--tu8d.xn--ond97931d", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u07fc\ud803\ude06.\ud80d\udd8f\ufe12\ud8ea\ude29\u2d10", + "output": null + }, + { + "comment": "V6", + "input": "xn--0tb8725k.xn--7kj9008dt18a7py9c", + "output": null + }, + { + "comment": "V6", + "input": "xn--0tb8725k.xn--ond3562jt18a7py9c", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u10c5\u26ad\udb41\uddab\u22c3\uff61\ud804\udf3c", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u10c5\u26ad\udb41\uddab\u22c3\u3002\ud804\udf3c", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u2d25\u26ad\udb41\uddab\u22c3\u3002\ud804\udf3c", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--vfh16m67gx1162b.xn--ro1d", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--9nd623g4zc5z060c.xn--ro1d", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u2d25\u26ad\udb41\uddab\u22c3\uff61\ud804\udf3c", + "output": null + }, + { + "comment": "V3 (ignored)", + "input": "\udb40\udd93\u26cf-\u3002\ua852", + "output": "xn----o9p.xn--rc9a" + }, + { + "comment": "V3 (ignored)", + "input": "xn----o9p.xn--rc9a", + "output": "xn----o9p.xn--rc9a" + }, + { + "comment": "C2; P1; V6", + "input": "\u200d\u650c\uabed\u3002\u1896-\u10b8", + "output": null + }, + { + "comment": "C2", + "input": "\u200d\u650c\uabed\u3002\u1896-\u2d18", + "output": null + }, + { + "input": "xn--p9ut19m.xn----mck373i", + "output": "xn--p9ut19m.xn----mck373i" + }, + { + "input": "\u650c\uabed.\u1896-\u2d18", + "output": "xn--p9ut19m.xn----mck373i" + }, + { + "comment": "P1; V6", + "input": "\u650c\uabed.\u1896-\u10b8", + "output": null + }, + { + "comment": "V6", + "input": "xn--p9ut19m.xn----k1g451d", + "output": null + }, + { + "comment": "C2", + "input": "xn--1ug592ykp6b.xn----mck373i", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--1ug592ykp6b.xn----k1g451d", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u200c\ua5a8\uff0e\u2497\uff13\ud212\u06f3", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u200c\ua5a8\uff0e\u2497\uff13\u1110\u116d\u11a9\u06f3", + "output": null + }, + { + "comment": "C1", + "input": "\u200c\ua5a8.16.3\ud212\u06f3", + "output": null + }, + { + "comment": "C1", + "input": "\u200c\ua5a8.16.3\u1110\u116d\u11a9\u06f3", + "output": null + }, + { + "input": "xn--9r8a.16.xn--3-nyc0117m", + "output": "xn--9r8a.16.xn--3-nyc0117m" + }, + { + "input": "\ua5a8.16.3\ud212\u06f3", + "output": "xn--9r8a.16.xn--3-nyc0117m" + }, + { + "input": "\ua5a8.16.3\u1110\u116d\u11a9\u06f3", + "output": "xn--9r8a.16.xn--3-nyc0117m" + }, + { + "comment": "C1", + "input": "xn--0ug2473c.16.xn--3-nyc0117m", + "output": null + }, + { + "comment": "V6", + "input": "xn--9r8a.xn--3-nyc678tu07m", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--0ug2473c.xn--3-nyc678tu07m", + "output": null + }, + { + "comment": "C2", + "input": "\ud835\udfcf\ud836\ude19\u2e16.\u200d", + "output": null + }, + { + "comment": "C2", + "input": "1\ud836\ude19\u2e16.\u200d", + "output": null + }, + { + "input": "xn--1-5bt6845n.", + "output": "xn--1-5bt6845n." + }, + { + "input": "1\ud836\ude19\u2e16.", + "output": "xn--1-5bt6845n." + }, + { + "comment": "C2", + "input": "xn--1-5bt6845n.xn--1ug", + "output": null + }, + { + "comment": "P1; V6", + "input": "F\udb40\udd5f\uff61\ud9fd\uddc5\u265a", + "output": null + }, + { + "comment": "P1; V6", + "input": "F\udb40\udd5f\u3002\ud9fd\uddc5\u265a", + "output": null + }, + { + "comment": "P1; V6", + "input": "f\udb40\udd5f\u3002\ud9fd\uddc5\u265a", + "output": null + }, + { + "comment": "V6", + "input": "f.xn--45hz6953f", + "output": null + }, + { + "comment": "P1; V6", + "input": "f\udb40\udd5f\uff61\ud9fd\uddc5\u265a", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u0b4d\ud804\udd34\u1de9\u3002\ud835\udfee\u10b8\ud838\udc28\ud8ce\udd47", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u0b4d\ud804\udd34\u1de9\u30022\u10b8\ud838\udc28\ud8ce\udd47", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u0b4d\ud804\udd34\u1de9\u30022\u2d18\ud838\udc28\ud8ce\udd47", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--9ic246gs21p.xn--2-nws2918ndrjr", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--9ic246gs21p.xn--2-k1g43076adrwq", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u0b4d\ud804\udd34\u1de9\u3002\ud835\udfee\u2d18\ud838\udc28\ud8ce\udd47", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\uda0e\udc2d\u200c\u200c\u2488\u3002\u52c9\ud804\udc45", + "output": null + }, + { + "comment": "C1; P1; V6; A4_2 (ignored)", + "input": "\uda0e\udc2d\u200c\u200c1.\u3002\u52c9\ud804\udc45", + "output": null + }, + { + "comment": "V6; A4_2 (ignored)", + "input": "xn--1-yi00h..xn--4grs325b", + "output": null + }, + { + "comment": "C1; V6; A4_2 (ignored)", + "input": "xn--1-rgna61159u..xn--4grs325b", + "output": null + }, + { + "comment": "V6", + "input": "xn--tsh11906f.xn--4grs325b", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--0uga855aez302a.xn--4grs325b", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u1843.\u73bf\ud96c\ude1c\udb15\udf90", + "output": null + }, + { + "comment": "V6", + "input": "xn--27e.xn--7cy81125a0yq4a", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "\u20da\uff0e\ud805\ude3f-", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "\u20da.\ud805\ude3f-", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "xn--w0g.xn----bd0j", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\u1082-\u200d\ua8ea\uff0e\ua84a\u200d\ud9b3\ude33", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\u1082-\u200d\ua8ea.\ua84a\u200d\ud9b3\ude33", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn----gyg3618i.xn--jc9ao4185a", + "output": null + }, + { + "comment": "C2; V5; V6", + "input": "xn----gyg250jio7k.xn--1ug8774cri56d", + "output": null + }, + { + "comment": "V5", + "input": "\ud804\ude35\u5eca.\ud802\udc0d", + "output": null + }, + { + "comment": "V5", + "input": "xn--xytw701b.xn--yc9c", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u10be\ud899\udec0\ud82d\uddfb\uff0e\u1897\ub9ab", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u10be\ud899\udec0\ud82d\uddfb\uff0e\u1897\u1105\u1174\u11c2", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u10be\ud899\udec0\ud82d\uddfb.\u1897\ub9ab", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u10be\ud899\udec0\ud82d\uddfb.\u1897\u1105\u1174\u11c2", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2d1e\ud899\udec0\ud82d\uddfb.\u1897\u1105\u1174\u11c2", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2d1e\ud899\udec0\ud82d\uddfb.\u1897\ub9ab", + "output": null + }, + { + "comment": "V6", + "input": "xn--mlj0486jgl2j.xn--hbf6853f", + "output": null + }, + { + "comment": "V6", + "input": "xn--2nd8876sgl2j.xn--hbf6853f", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2d1e\ud899\udec0\ud82d\uddfb\uff0e\u1897\u1105\u1174\u11c2", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2d1e\ud899\udec0\ud82d\uddfb\uff0e\u1897\ub9ab", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u00df\u200d\u103a\uff61\u2488", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "SS\u200d\u103a\uff61\u2488", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "ss\u200d\u103a\uff61\u2488", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "Ss\u200d\u103a\uff61\u2488", + "output": null + }, + { + "comment": "V6", + "input": "xn--ss-f4j.xn--tsh", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--ss-f4j585j.xn--tsh", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--zca679eh2l.xn--tsh", + "output": null + }, + { + "input": "\u06cc\ud802\ude3f\uff0e\u00df\u0f84\ud804\udf6c", + "output": "xn--clb2593k.xn--zca216edt0r" + }, + { + "input": "\u06cc\ud802\ude3f.\u00df\u0f84\ud804\udf6c", + "output": "xn--clb2593k.xn--zca216edt0r" + }, + { + "input": "\u06cc\ud802\ude3f.SS\u0f84\ud804\udf6c", + "output": "xn--clb2593k.xn--ss-toj6092t" + }, + { + "input": "\u06cc\ud802\ude3f.ss\u0f84\ud804\udf6c", + "output": "xn--clb2593k.xn--ss-toj6092t" + }, + { + "input": "xn--clb2593k.xn--ss-toj6092t", + "output": "xn--clb2593k.xn--ss-toj6092t" + }, + { + "input": "xn--clb2593k.xn--zca216edt0r", + "output": "xn--clb2593k.xn--zca216edt0r" + }, + { + "input": "\u06cc\ud802\ude3f\uff0eSS\u0f84\ud804\udf6c", + "output": "xn--clb2593k.xn--ss-toj6092t" + }, + { + "input": "\u06cc\ud802\ude3f\uff0ess\u0f84\ud804\udf6c", + "output": "xn--clb2593k.xn--ss-toj6092t" + }, + { + "input": "\u06cc\ud802\ude3f.Ss\u0f84\ud804\udf6c", + "output": "xn--clb2593k.xn--ss-toj6092t" + }, + { + "input": "\u06cc\ud802\ude3f\uff0eSs\u0f84\ud804\udf6c", + "output": "xn--clb2593k.xn--ss-toj6092t" + }, + { + "comment": "V5; V3 (ignored)", + "input": "\u0f9f\uff0e-\u082a", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "\u0f9f.-\u082a", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "xn--vfd.xn----fhd", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u1d6c\udb40\udda0\uff0e\ud552\u2492\u2488\udbe0\udd26", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u1d6c\udb40\udda0\uff0e\u1111\u1175\u11bd\u2492\u2488\udbe0\udd26", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u1d6c\udb40\udda0.\ud55211.1.\udbe0\udd26", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u1d6c\udb40\udda0.\u1111\u1175\u11bd11.1.\udbe0\udd26", + "output": null + }, + { + "comment": "V6", + "input": "xn--tbg.xn--11-5o7k.1.xn--k469f", + "output": null + }, + { + "comment": "V6", + "input": "xn--tbg.xn--tsht7586kyts9l", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2488\u270c\uda3e\udf1f\uff0e\ud835\udfe1\ud943\udc63", + "output": null + }, + { + "comment": "P1; V6", + "input": "1.\u270c\uda3e\udf1f.9\ud943\udc63", + "output": null + }, + { + "comment": "V6", + "input": "1.xn--7bi44996f.xn--9-o706d", + "output": null + }, + { + "comment": "V6", + "input": "xn--tsh24g49550b.xn--9-o706d", + "output": null + }, + { + "comment": "V5", + "input": "\u03c2\uff0e\ua9c0\ua8c4", + "output": null + }, + { + "comment": "V5", + "input": "\u03c2.\ua9c0\ua8c4", + "output": null + }, + { + "comment": "V5", + "input": "\u03a3.\ua9c0\ua8c4", + "output": null + }, + { + "comment": "V5", + "input": "\u03c3.\ua9c0\ua8c4", + "output": null + }, + { + "comment": "V5", + "input": "xn--4xa.xn--0f9ars", + "output": null + }, + { + "comment": "V5", + "input": "xn--3xa.xn--0f9ars", + "output": null + }, + { + "comment": "V5", + "input": "\u03a3\uff0e\ua9c0\ua8c4", + "output": null + }, + { + "comment": "V5", + "input": "\u03c3\uff0e\ua9c0\ua8c4", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2786\ud99e\uddd5\u1ed7\u2488\uff0e\uda06\udf12\ud945\ude2e\u085b\ud835\udfeb", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2786\ud99e\uddd5o\u0302\u0303\u2488\uff0e\uda06\udf12\ud945\ude2e\u085b\ud835\udfeb", + "output": null + }, + { + "comment": "P1; V6; A4_2 (ignored)", + "input": "\u2786\ud99e\uddd5\u1ed71..\uda06\udf12\ud945\ude2e\u085b9", + "output": null + }, + { + "comment": "P1; V6; A4_2 (ignored)", + "input": "\u2786\ud99e\uddd5o\u0302\u03031..\uda06\udf12\ud945\ude2e\u085b9", + "output": null + }, + { + "comment": "P1; V6; A4_2 (ignored)", + "input": "\u2786\ud99e\uddd5O\u0302\u03031..\uda06\udf12\ud945\ude2e\u085b9", + "output": null + }, + { + "comment": "P1; V6; A4_2 (ignored)", + "input": "\u2786\ud99e\uddd5\u1ed61..\uda06\udf12\ud945\ude2e\u085b9", + "output": null + }, + { + "comment": "V6; A4_2 (ignored)", + "input": "xn--1-3xm292b6044r..xn--9-6jd87310jtcqs", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2786\ud99e\uddd5O\u0302\u0303\u2488\uff0e\uda06\udf12\ud945\ude2e\u085b\ud835\udfeb", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2786\ud99e\uddd5\u1ed6\u2488\uff0e\uda06\udf12\ud945\ude2e\u085b\ud835\udfeb", + "output": null + }, + { + "comment": "V6", + "input": "xn--6lg26tvvc6v99z.xn--9-6jd87310jtcqs", + "output": null + }, + { + "comment": "A4_2 (ignored)", + "input": ".xn--ye6h", + "output": ".xn--ye6h" + }, + { + "input": "xn--ye6h", + "output": "xn--ye6h" + }, + { + "input": "\ud83a\udd3a", + "output": "xn--ye6h" + }, + { + "input": "\ud83a\udd18", + "output": "xn--ye6h" + }, + { + "comment": "C1; P1; V5; V6; V3 (ignored)", + "input": "\u073c\u200c-\u3002\ud80d\udc3e\u00df", + "output": null + }, + { + "comment": "C1; P1; V5; V6; V3 (ignored)", + "input": "\u073c\u200c-\u3002\ud80d\udc3eSS", + "output": null + }, + { + "comment": "C1; P1; V5; V6; V3 (ignored)", + "input": "\u073c\u200c-\u3002\ud80d\udc3ess", + "output": null + }, + { + "comment": "C1; P1; V5; V6; V3 (ignored)", + "input": "\u073c\u200c-\u3002\ud80d\udc3eSs", + "output": null + }, + { + "comment": "V5; V6; V3 (ignored)", + "input": "xn----s2c.xn--ss-066q", + "output": null + }, + { + "comment": "C1; V5; V6; V3 (ignored)", + "input": "xn----s2c071q.xn--ss-066q", + "output": null + }, + { + "comment": "C1; V5; V6; V3 (ignored)", + "input": "xn----s2c071q.xn--zca7848m", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "-\uda9d\udf6c\u135e\ud805\udf27.\u1deb-\ufe12", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "-\uda9d\udf6c\u135e\ud805\udf27.\u1deb-\u3002", + "output": null + }, + { + "comment": "V5; V6; V3 (ignored)", + "input": "xn----b5h1837n2ok9f.xn----mkm.", + "output": null + }, + { + "comment": "V5; V6; V3 (ignored)", + "input": "xn----b5h1837n2ok9f.xn----mkmw278h", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ufe12.\uda2a\udc21\u1a59", + "output": null + }, + { + "comment": "P1; V6; A4_2 (ignored)", + "input": "\u3002.\uda2a\udc21\u1a59", + "output": null + }, + { + "comment": "V6; A4_2 (ignored)", + "input": "..xn--cof61594i", + "output": null + }, + { + "comment": "V6", + "input": "xn--y86c.xn--cof61594i", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "\ud807\udc3a.-\uda05\udfcf", + "output": null + }, + { + "comment": "V5; V6; V3 (ignored)", + "input": "xn--jk3d.xn----iz68g", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb43\udee9\uff0e\u8d4f", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb43\udee9.\u8d4f", + "output": null + }, + { + "comment": "V6", + "input": "xn--2856e.xn--6o3a", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u10ad\uff0e\ud8f4\udde6\u200c", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u10ad.\ud8f4\udde6\u200c", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u2d0d.\ud8f4\udde6\u200c", + "output": null + }, + { + "comment": "V6", + "input": "xn--4kj.xn--p01x", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--4kj.xn--0ug56448b", + "output": null + }, + { + "comment": "V6", + "input": "xn--lnd.xn--p01x", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--lnd.xn--0ug56448b", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u2d0d\uff0e\ud8f4\udde6\u200c", + "output": null + }, + { + "input": "\ud835\udfdb\uff0e\uf9f8", + "output": "3.xn--6vz" + }, + { + "input": "\ud835\udfdb\uff0e\u7b20", + "output": "3.xn--6vz" + }, + { + "input": "3.\u7b20", + "output": "3.xn--6vz" + }, + { + "input": "3.xn--6vz", + "output": "3.xn--6vz" + }, + { + "comment": "C2; P1; V6; V3 (ignored)", + "input": "-\u200d.\u10be\ud800\udef7", + "output": null + }, + { + "comment": "C2; V3 (ignored)", + "input": "-\u200d.\u2d1e\ud800\udef7", + "output": null + }, + { + "comment": "V3 (ignored)", + "input": "-.xn--mlj8559d", + "output": "-.xn--mlj8559d" + }, + { + "comment": "C2; V3 (ignored)", + "input": "xn----ugn.xn--mlj8559d", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "-.xn--2nd2315j", + "output": null + }, + { + "comment": "C2; V6; V3 (ignored)", + "input": "xn----ugn.xn--2nd2315j", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u200d\u03c2\u00df\u0731\uff0e\u0bcd", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u200d\u03c2\u00df\u0731.\u0bcd", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u200d\u03a3SS\u0731.\u0bcd", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u200d\u03c3ss\u0731.\u0bcd", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u200d\u03a3ss\u0731.\u0bcd", + "output": null + }, + { + "comment": "V5", + "input": "xn--ss-ubc826a.xn--xmc", + "output": null + }, + { + "comment": "C2; V5", + "input": "xn--ss-ubc826ab34b.xn--xmc", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u200d\u03a3\u00df\u0731.\u0bcd", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u200d\u03c3\u00df\u0731.\u0bcd", + "output": null + }, + { + "comment": "C2; V5", + "input": "xn--zca39lk1di19a.xn--xmc", + "output": null + }, + { + "comment": "C2; V5", + "input": "xn--zca19ln1di19a.xn--xmc", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u200d\u03a3SS\u0731\uff0e\u0bcd", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u200d\u03c3ss\u0731\uff0e\u0bcd", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u200d\u03a3ss\u0731\uff0e\u0bcd", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u200d\u03a3\u00df\u0731\uff0e\u0bcd", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u200d\u03c3\u00df\u0731\uff0e\u0bcd", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb5c\udef5\u09cd\u03c2\uff0e\u03c2\ud802\ude3f", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb5c\udef5\u09cd\u03c2.\u03c2\ud802\ude3f", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb5c\udef5\u09cd\u03a3.\u03a3\ud802\ude3f", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb5c\udef5\u09cd\u03c3.\u03c2\ud802\ude3f", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb5c\udef5\u09cd\u03c3.\u03c3\ud802\ude3f", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb5c\udef5\u09cd\u03a3.\u03c3\ud802\ude3f", + "output": null + }, + { + "comment": "V6", + "input": "xn--4xa502av8297a.xn--4xa6055k", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb5c\udef5\u09cd\u03a3.\u03c2\ud802\ude3f", + "output": null + }, + { + "comment": "V6", + "input": "xn--4xa502av8297a.xn--3xa8055k", + "output": null + }, + { + "comment": "V6", + "input": "xn--3xa702av8297a.xn--3xa8055k", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb5c\udef5\u09cd\u03a3\uff0e\u03a3\ud802\ude3f", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb5c\udef5\u09cd\u03c3\uff0e\u03c2\ud802\ude3f", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb5c\udef5\u09cd\u03c3\uff0e\u03c3\ud802\ude3f", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb5c\udef5\u09cd\u03a3\uff0e\u03c3\ud802\ude3f", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb5c\udef5\u09cd\u03a3\uff0e\u03c2\ud802\ude3f", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud94e\udd12\uff61\ub967", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud94e\udd12\uff61\u1105\u1172\u11b6", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud94e\udd12\u3002\ub967", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud94e\udd12\u3002\u1105\u1172\u11b6", + "output": null + }, + { + "comment": "V6", + "input": "xn--s264a.xn--pw2b", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u1846\ud805\udcdd\uff0e\ud83b\udd46", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u1846\ud805\udcdd.\ud83b\udd46", + "output": null + }, + { + "comment": "V6", + "input": "xn--57e0440k.xn--k86h", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udbef\udfe6\uff61\u183d", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udbef\udfe6\u3002\u183d", + "output": null + }, + { + "comment": "V6", + "input": "xn--j890g.xn--w7e", + "output": null + }, + { + "comment": "C2", + "input": "\u5b03\ud834\udf4c\uff0e\u200d\u0b44", + "output": null + }, + { + "comment": "C2", + "input": "\u5b03\ud834\udf4c.\u200d\u0b44", + "output": null + }, + { + "comment": "V5", + "input": "xn--b6s0078f.xn--0ic", + "output": null + }, + { + "comment": "C2", + "input": "xn--b6s0078f.xn--0ic557h", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u200c.\ud93d\udee4", + "output": null + }, + { + "comment": "V6; A4_2 (ignored)", + "input": ".xn--q823a", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--0ug.xn--q823a", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udaa9\uded5\u10a3\u4805\uff0e\ud803\ude11", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udaa9\uded5\u10a3\u4805.\ud803\ude11", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udaa9\uded5\u2d03\u4805.\ud803\ude11", + "output": null + }, + { + "comment": "V6", + "input": "xn--ukju77frl47r.xn--yl0d", + "output": null + }, + { + "comment": "V6", + "input": "xn--bnd074zr557n.xn--yl0d", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udaa9\uded5\u2d03\u4805\uff0e\ud803\ude11", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "-\uff61\ufe12", + "output": null + }, + { + "comment": "V3 (ignored); A4_2 (ignored)", + "input": "-\u3002\u3002", + "output": "-.." + }, + { + "comment": "V3 (ignored); A4_2 (ignored)", + "input": "-..", + "output": "-.." + }, + { + "comment": "V6; V3 (ignored)", + "input": "-.xn--y86c", + "output": null + }, + { + "comment": "C2", + "input": "\u200d.F", + "output": null + }, + { + "comment": "C2", + "input": "\u200d.f", + "output": null + }, + { + "comment": "A4_2 (ignored)", + "input": ".f", + "output": ".f" + }, + { + "comment": "C2", + "input": "xn--1ug.f", + "output": null + }, + { + "input": "f", + "output": "f" + }, + { + "comment": "C2", + "input": "\u200d\u3a32\uff61\u00df", + "output": null + }, + { + "comment": "C2", + "input": "\u200d\u3a32\u3002\u00df", + "output": null + }, + { + "comment": "C2", + "input": "\u200d\u3a32\u3002SS", + "output": null + }, + { + "comment": "C2", + "input": "\u200d\u3a32\u3002ss", + "output": null + }, + { + "comment": "C2", + "input": "\u200d\u3a32\u3002Ss", + "output": null + }, + { + "input": "xn--9bm.ss", + "output": "xn--9bm.ss" + }, + { + "input": "\u3a32.ss", + "output": "xn--9bm.ss" + }, + { + "input": "\u3a32.SS", + "output": "xn--9bm.ss" + }, + { + "input": "\u3a32.Ss", + "output": "xn--9bm.ss" + }, + { + "comment": "C2", + "input": "xn--1ug914h.ss", + "output": null + }, + { + "comment": "C2", + "input": "xn--1ug914h.xn--zca", + "output": null + }, + { + "comment": "C2", + "input": "\u200d\u3a32\uff61SS", + "output": null + }, + { + "comment": "C2", + "input": "\u200d\u3a32\uff61ss", + "output": null + }, + { + "comment": "C2", + "input": "\u200d\u3a32\uff61Ss", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u200d\uff0e\udbc3\ude28", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u200d.\udbc3\ude28", + "output": null + }, + { + "comment": "V6; A4_2 (ignored)", + "input": ".xn--h327f", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--1ug.xn--h327f", + "output": null + }, + { + "comment": "V6", + "input": "xn--98e.xn--om9c", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\uaaf6\u188f\u0e3a\uff12.\ud800\udee2\u0745\u0f9f\ufe12", + "output": null + }, + { + "comment": "V5", + "input": "\uaaf6\u188f\u0e3a2.\ud800\udee2\u0745\u0f9f\u3002", + "output": null + }, + { + "comment": "V5", + "input": "xn--2-2zf840fk16m.xn--sob093b2m7s.", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--2-2zf840fk16m.xn--sob093bj62sz9d", + "output": null + }, + { + "input": "\ud835\udfce\u3002\u752f", + "output": "0.xn--qny" + }, + { + "input": "0\u3002\u752f", + "output": "0.xn--qny" + }, + { + "input": "0.xn--qny", + "output": "0.xn--qny" + }, + { + "input": "0.\u752f", + "output": "0.xn--qny" + }, + { + "comment": "V5; V3 (ignored)", + "input": "-\u2f86\uff0e\uaaf6", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "-\u820c.\uaaf6", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "xn----ef8c.xn--2v9a", + "output": null + }, + { + "comment": "V3 (ignored)", + "input": "-\uff61\u1898", + "output": "-.xn--ibf" + }, + { + "comment": "V3 (ignored)", + "input": "-\u3002\u1898", + "output": "-.xn--ibf" + }, + { + "comment": "V3 (ignored)", + "input": "-.xn--ibf", + "output": "-.xn--ibf" + }, + { + "comment": "C1", + "input": "\u74bc\ud836\ude2d\uff61\u200c\udb40\udddf", + "output": null + }, + { + "comment": "C1", + "input": "\u74bc\ud836\ude2d\u3002\u200c\udb40\udddf", + "output": null + }, + { + "input": "xn--gky8837e.", + "output": "xn--gky8837e." + }, + { + "input": "\u74bc\ud836\ude2d.", + "output": "xn--gky8837e." + }, + { + "comment": "C1", + "input": "xn--gky8837e.xn--0ug", + "output": null + }, + { + "comment": "C1", + "input": "\u200c.\u200c", + "output": null + }, + { + "comment": "C1", + "input": "xn--0ug.xn--0ug", + "output": null + }, + { + "input": "xn--157b.xn--gnb", + "output": "xn--157b.xn--gnb" + }, + { + "input": "\ud29b.\u0716", + "output": "xn--157b.xn--gnb" + }, + { + "input": "\u1110\u1171\u11c2.\u0716", + "output": "xn--157b.xn--gnb" + }, + { + "comment": "P1; V5; V6", + "input": "\u10b7\uff0e\u05c2\ud804\udd34\ua9b7\ud920\udce8", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u10b7\uff0e\ud804\udd34\u05c2\ua9b7\ud920\udce8", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u10b7.\ud804\udd34\u05c2\ua9b7\ud920\udce8", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u2d17.\ud804\udd34\u05c2\ua9b7\ud920\udce8", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--flj.xn--qdb0605f14ycrms3c", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--vnd.xn--qdb0605f14ycrms3c", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u2d17\uff0e\ud804\udd34\u05c2\ua9b7\ud920\udce8", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u2d17\uff0e\u05c2\ud804\udd34\ua9b7\ud920\udce8", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u2488\u916b\ufe12\u3002\u08d6", + "output": null + }, + { + "comment": "V5; A4_2 (ignored)", + "input": "1.\u916b\u3002\u3002\u08d6", + "output": null + }, + { + "comment": "V5; A4_2 (ignored)", + "input": "1.xn--8j4a..xn--8zb", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--tsh4490bfe8c.xn--8zb", + "output": null + }, + { + "comment": "V6", + "input": "xn--co6h.xn--1-h1g429s", + "output": null + }, + { + "comment": "V6", + "input": "xn--co6h.xn--1-kwssa", + "output": null + }, + { + "comment": "V6", + "input": "xn--co6h.xn--1-h1gs", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ua806\u3002\ud8ad\ude8f\u0fb0\u2495", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\ua806\u3002\ud8ad\ude8f\u0fb014.", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--l98a.xn--14-jsj57880f.", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--l98a.xn--dgd218hhp28d", + "output": null + }, + { + "comment": "C2", + "input": "\ud835\udfe04\udb40\uddd7\ud834\ude3b\uff0e\u200d\ud800\udef5\u26e7\u200d", + "output": null + }, + { + "comment": "C2", + "input": "84\udb40\uddd7\ud834\ude3b.\u200d\ud800\udef5\u26e7\u200d", + "output": null + }, + { + "input": "xn--84-s850a.xn--59h6326e", + "output": "xn--84-s850a.xn--59h6326e" + }, + { + "input": "84\ud834\ude3b.\ud800\udef5\u26e7", + "output": "xn--84-s850a.xn--59h6326e" + }, + { + "comment": "C2", + "input": "xn--84-s850a.xn--1uga573cfq1w", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\ud975\udf0e\u2488\uff61\u200c\ud835\udfe4", + "output": null + }, + { + "comment": "C1; P1; V6; A4_2 (ignored)", + "input": "\ud975\udf0e1.\u3002\u200c2", + "output": null + }, + { + "comment": "C1; V6; A4_2 (ignored)", + "input": "xn--1-ex54e..xn--2-rgn", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--tsh94183d.xn--2-rgn", + "output": null + }, + { + "comment": "C1; C2", + "input": "\u200d\u200c\udb40\uddaa\uff61\u00df\ud805\udcc3", + "output": null + }, + { + "comment": "C1; C2", + "input": "\u200d\u200c\udb40\uddaa\u3002\u00df\ud805\udcc3", + "output": null + }, + { + "comment": "C1; C2", + "input": "\u200d\u200c\udb40\uddaa\u3002SS\ud805\udcc3", + "output": null + }, + { + "comment": "C1; C2", + "input": "\u200d\u200c\udb40\uddaa\u3002ss\ud805\udcc3", + "output": null + }, + { + "comment": "C1; C2", + "input": "\u200d\u200c\udb40\uddaa\u3002Ss\ud805\udcc3", + "output": null + }, + { + "comment": "A4_2 (ignored)", + "input": ".xn--ss-bh7o", + "output": ".xn--ss-bh7o" + }, + { + "comment": "C1; C2", + "input": "xn--0ugb.xn--ss-bh7o", + "output": null + }, + { + "comment": "C1; C2", + "input": "xn--0ugb.xn--zca0732l", + "output": null + }, + { + "comment": "C1; C2", + "input": "\u200d\u200c\udb40\uddaa\uff61SS\ud805\udcc3", + "output": null + }, + { + "comment": "C1; C2", + "input": "\u200d\u200c\udb40\uddaa\uff61ss\ud805\udcc3", + "output": null + }, + { + "comment": "C1; C2", + "input": "\u200d\u200c\udb40\uddaa\uff61Ss\ud805\udcc3", + "output": null + }, + { + "input": "xn--ss-bh7o", + "output": "xn--ss-bh7o" + }, + { + "input": "ss\ud805\udcc3", + "output": "xn--ss-bh7o" + }, + { + "input": "SS\ud805\udcc3", + "output": "xn--ss-bh7o" + }, + { + "input": "Ss\ud805\udcc3", + "output": "xn--ss-bh7o" + }, + { + "comment": "C1; P1; V6", + "input": "\ufe12\u200c\u30f6\u44a9.\ua86a", + "output": null + }, + { + "comment": "C1; A4_2 (ignored)", + "input": "\u3002\u200c\u30f6\u44a9.\ua86a", + "output": null + }, + { + "comment": "A4_2 (ignored)", + "input": ".xn--qekw60d.xn--gd9a", + "output": ".xn--qekw60d.xn--gd9a" + }, + { + "comment": "C1; A4_2 (ignored)", + "input": ".xn--0ug287dj0o.xn--gd9a", + "output": null + }, + { + "comment": "V6", + "input": "xn--qekw60dns9k.xn--gd9a", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--0ug287dj0or48o.xn--gd9a", + "output": null + }, + { + "input": "xn--qekw60d.xn--gd9a", + "output": "xn--qekw60d.xn--gd9a" + }, + { + "input": "\u30f6\u44a9.\ua86a", + "output": "xn--qekw60d.xn--gd9a" + }, + { + "comment": "C1; P1; V6", + "input": "\u200c\u2488\ud852\udf8d.\udb49\udccb\u1a60", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u200c1.\ud852\udf8d.\udb49\udccb\u1a60", + "output": null + }, + { + "comment": "V6", + "input": "1.xn--4x6j.xn--jof45148n", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--1-rgn.xn--4x6j.xn--jof45148n", + "output": null + }, + { + "comment": "V6", + "input": "xn--tshw462r.xn--jof45148n", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--0ug88o7471d.xn--jof45148n", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud834\udd75\uff61\ud835\udfeb\ud838\udc08\u4b3a\u2488", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud834\udd75\u30029\ud838\udc08\u4b3a1.", + "output": null + }, + { + "comment": "V6", + "input": "xn--3f1h.xn--91-030c1650n.", + "output": null + }, + { + "comment": "V6", + "input": "xn--3f1h.xn--9-ecp936non25a", + "output": null + }, + { + "input": "xn--8c1a.xn--2ib8jn539l", + "output": "xn--8c1a.xn--2ib8jn539l" + }, + { + "input": "\u821b.\u067d\ud83a\udd34\u06bb", + "output": "xn--8c1a.xn--2ib8jn539l" + }, + { + "input": "\u821b.\u067d\ud83a\udd12\u06bb", + "output": "xn--8c1a.xn--2ib8jn539l" + }, + { + "comment": "V5; V3 (ignored)", + "input": "-\udb40\udd710\uff61\u17cf\u1dfd\ud187\uc2ed", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "-\udb40\udd710\uff61\u17cf\u1dfd\u1110\u1168\u11aa\u1109\u1175\u11b8", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "-\udb40\udd710\u3002\u17cf\u1dfd\ud187\uc2ed", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "-\udb40\udd710\u3002\u17cf\u1dfd\u1110\u1168\u11aa\u1109\u1175\u11b8", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "-0.xn--r4e872ah77nghm", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u115f\u10bf\u10b5\u10e0\uff61\u0b4d", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u115f\u10bf\u10b5\u10e0\u3002\u0b4d", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u115f\u2d1f\u2d15\u10e0\u3002\u0b4d", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u115f\u10bf\u10b5\u1ca0\u3002\u0b4d", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--tndt4hvw.xn--9ic", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--1od7wz74eeb.xn--9ic", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u115f\u2d1f\u2d15\u10e0\uff61\u0b4d", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u115f\u10bf\u10b5\u1ca0\uff61\u0b4d", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u115f\u10bf\u2d15\u10e0\u3002\u0b4d", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--3nd0etsm92g.xn--9ic", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u115f\u10bf\u2d15\u10e0\uff61\u0b4d", + "output": null + }, + { + "comment": "V6", + "input": "xn--l96h.xn--03e93aq365d", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "\ud835\udfdb\ud834\uddaa\ua8c4\uff61\ua8ea-", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "\ud835\udfdb\ua8c4\ud834\uddaa\uff61\ua8ea-", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "3\ua8c4\ud834\uddaa\u3002\ua8ea-", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "xn--3-sl4eu679e.xn----xn4e", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1139\uff61\u0eca\uda42\udfe4\udb40\udd1e", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1139\u3002\u0eca\uda42\udfe4\udb40\udd1e", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--lrd.xn--s8c05302k", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u10a6\udaae\udca9\uff0e\udb40\udda1\ufe09\ud83a\udd0d", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u10a6\udaae\udca9.\udb40\udda1\ufe09\ud83a\udd0d", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2d06\udaae\udca9.\udb40\udda1\ufe09\ud83a\udd2f", + "output": null + }, + { + "comment": "V6", + "input": "xn--xkjw3965g.xn--ne6h", + "output": null + }, + { + "comment": "V6", + "input": "xn--end82983m.xn--ne6h", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2d06\udaae\udca9\uff0e\udb40\udda1\ufe09\ud83a\udd2f", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2d06\udaae\udca9.\udb40\udda1\ufe09\ud83a\udd0d", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u2d06\udaae\udca9\uff0e\udb40\udda1\ufe09\ud83a\udd0d", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud91d\udee8.\ud9d5\udfe2\ud835\udfe8\ua8c4", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud91d\udee8.\ud9d5\udfe26\ua8c4", + "output": null + }, + { + "comment": "V6", + "input": "xn--mi60a.xn--6-sl4es8023c", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud800\udef8\udb79\ude0b\u10c2.\u10a1", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud800\udef8\udb79\ude0b\u2d22.\u2d01", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud800\udef8\udb79\ude0b\u10c2.\u2d01", + "output": null + }, + { + "comment": "V6", + "input": "xn--6nd5215jr2u0h.xn--skj", + "output": null + }, + { + "comment": "V6", + "input": "xn--qlj1559dr224h.xn--skj", + "output": null + }, + { + "comment": "V6", + "input": "xn--6nd5215jr2u0h.xn--8md", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud91d\udc7f\ua806\u2084\uda65\udf86\uff61\ud88a\ude67\udb41\udcb9\u03c2", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud91d\udc7f\ua8064\uda65\udf86\u3002\ud88a\ude67\udb41\udcb9\u03c2", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud91d\udc7f\ua8064\uda65\udf86\u3002\ud88a\ude67\udb41\udcb9\u03a3", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud91d\udc7f\ua8064\uda65\udf86\u3002\ud88a\ude67\udb41\udcb9\u03c3", + "output": null + }, + { + "comment": "V6", + "input": "xn--4-w93ej7463a9io5a.xn--4xa31142bk3f0d", + "output": null + }, + { + "comment": "V6", + "input": "xn--4-w93ej7463a9io5a.xn--3xa51142bk3f0d", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud91d\udc7f\ua806\u2084\uda65\udf86\uff61\ud88a\ude67\udb41\udcb9\u03a3", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud91d\udc7f\ua806\u2084\uda65\udf86\uff61\ud88a\ude67\udb41\udcb9\u03c3", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud8ba\udcac\u3002\u0729\u3002\ucbd95", + "output": null + }, + { + "comment": "P1; V6", + "input": "\ud8ba\udcac\u3002\u0729\u3002\u110d\u1173\u11ac5", + "output": null + }, + { + "comment": "V6", + "input": "xn--t92s.xn--znb.xn--5-y88f", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u17ca.\u200d\ud835\udfee\ud804\udc3f", + "output": null + }, + { + "comment": "C2; V5", + "input": "\u17ca.\u200d2\ud804\udc3f", + "output": null + }, + { + "comment": "V5", + "input": "xn--m4e.xn--2-ku7i", + "output": null + }, + { + "comment": "C2; V5", + "input": "xn--m4e.xn--2-tgnv469h", + "output": null + }, + { + "comment": "V5", + "input": "\uaaf6\u3002\u5b36\u00df\u847d", + "output": null + }, + { + "comment": "V5", + "input": "\uaaf6\u3002\u5b36SS\u847d", + "output": null + }, + { + "comment": "V5", + "input": "\uaaf6\u3002\u5b36ss\u847d", + "output": null + }, + { + "comment": "V5", + "input": "\uaaf6\u3002\u5b36Ss\u847d", + "output": null + }, + { + "comment": "V5", + "input": "xn--2v9a.xn--ss-q40dp97m", + "output": null + }, + { + "comment": "V5", + "input": "xn--2v9a.xn--zca7637b14za", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u03c2\ud805\udc3d\ud896\udc88\ud805\udf2b\uff61\ud83a\udf29\u200c\ud802\udec4", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u03c2\ud805\udc3d\ud896\udc88\ud805\udf2b\u3002\ud83a\udf29\u200c\ud802\udec4", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u03a3\ud805\udc3d\ud896\udc88\ud805\udf2b\u3002\ud83a\udf29\u200c\ud802\udec4", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u03c3\ud805\udc3d\ud896\udc88\ud805\udf2b\u3002\ud83a\udf29\u200c\ud802\udec4", + "output": null + }, + { + "comment": "V6", + "input": "xn--4xa2260lk3b8z15g.xn--tw9ct349a", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--4xa2260lk3b8z15g.xn--0ug4653g2xzf", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--3xa4260lk3b8z15g.xn--0ug4653g2xzf", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u03a3\ud805\udc3d\ud896\udc88\ud805\udf2b\uff61\ud83a\udf29\u200c\ud802\udec4", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u03c3\ud805\udc3d\ud896\udc88\ud805\udf2b\uff61\ud83a\udf29\u200c\ud802\udec4", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u2ea2\ud9df\ude85\ud835\udfe4\uff61\u200d\ud83d\udeb7", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u2ea2\ud9df\ude852\u3002\u200d\ud83d\udeb7", + "output": null + }, + { + "comment": "V6", + "input": "xn--2-4jtr4282f.xn--m78h", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--2-4jtr4282f.xn--1ugz946p", + "output": null + }, + { + "comment": "V5", + "input": "\ud836\ude25\u3002\u2adf\ud804\ude3e", + "output": null + }, + { + "comment": "V5", + "input": "xn--n82h.xn--63iw010f", + "output": null + }, + { + "comment": "C1; P1; V5; V6; V3 (ignored)", + "input": "-\u1897\u200c\ud83c\udd04.\ud805\udf22", + "output": null + }, + { + "comment": "V5; V6; V3 (ignored)", + "input": "xn----pck1820x.xn--9h2d", + "output": null + }, + { + "comment": "C1; V5; V6; V3 (ignored)", + "input": "xn----pck312bx563c.xn--9h2d", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "\u17b4.\ucb87-", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "\u17b4.\u110d\u1170\u11ae-", + "output": null + }, + { + "comment": "V5; V6; V3 (ignored)", + "input": "xn--z3e.xn----938f", + "output": null + }, + { + "comment": "C1; P1; V6", + "input": "\u200c\ud805\udcc2\u3002\u2488-\udbc2\ude9b", + "output": null + }, + { + "comment": "C1; P1; V6; V3 (ignored)", + "input": "\u200c\ud805\udcc2\u30021.-\udbc2\ude9b", + "output": null + }, + { + "comment": "V5; V6; V3 (ignored)", + "input": "xn--wz1d.1.xn----rg03o", + "output": null + }, + { + "comment": "C1; V6; V3 (ignored)", + "input": "xn--0ugy057g.1.xn----rg03o", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--wz1d.xn----dcp29674o", + "output": null + }, + { + "comment": "C1; V6", + "input": "xn--0ugy057g.xn----dcp29674o", + "output": null + }, + { + "comment": "A4_2 (ignored)", + "input": ".xn--hcb32bni", + "output": ".xn--hcb32bni" + }, + { + "input": "xn--hcb32bni", + "output": "xn--hcb32bni" + }, + { + "input": "\u06bd\u0663\u0596", + "output": "xn--hcb32bni" + }, + { + "comment": "V5; V3 (ignored)", + "input": "\u0f94\ua84b-\uff0e-\ud81a\udf34", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "\u0f94\ua84b-.-\ud81a\udf34", + "output": null + }, + { + "comment": "V5; V3 (ignored)", + "input": "xn----ukg9938i.xn----4u5m", + "output": null + }, + { + "comment": "C1; P1; V6; V3 (ignored)", + "input": "\ud9bd\udcb3-\u22e2\u200c\uff0e\u6807-", + "output": null + }, + { + "comment": "C1; P1; V6; V3 (ignored)", + "input": "\ud9bd\udcb3-\u2291\u0338\u200c\uff0e\u6807-", + "output": null + }, + { + "comment": "C1; P1; V6; V3 (ignored)", + "input": "\ud9bd\udcb3-\u22e2\u200c.\u6807-", + "output": null + }, + { + "comment": "C1; P1; V6; V3 (ignored)", + "input": "\ud9bd\udcb3-\u2291\u0338\u200c.\u6807-", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn----9mo67451g.xn----qj7b", + "output": null + }, + { + "comment": "C1; V6; V3 (ignored)", + "input": "xn----sgn90kn5663a.xn----qj7b", + "output": null + }, + { + "comment": "P1; V5; V6; V3 (ignored)", + "input": "-\ud914\ude74.\u06e0\u189a-", + "output": null + }, + { + "comment": "V5; V6; V3 (ignored)", + "input": "xn----qi38c.xn----jxc827k", + "output": null + }, + { + "comment": "P1; V6; A4_2 (ignored)", + "input": "\u3002\u0635\u0649\u0e37\u0644\u0627\u3002\u5c93\u1bf2\udb43\udf83\u1842", + "output": null + }, + { + "comment": "V6; A4_2 (ignored)", + "input": ".xn--mgb1a7bt462h.xn--17e10qe61f9r71s", + "output": null + }, + { + "comment": "V3 (ignored)", + "input": "\u188c\uff0e-\u085a", + "output": "xn--59e.xn----5jd" + }, + { + "comment": "V3 (ignored)", + "input": "\u188c.-\u085a", + "output": "xn--59e.xn----5jd" + }, + { + "comment": "V3 (ignored)", + "input": "xn--59e.xn----5jd", + "output": "xn--59e.xn----5jd" + }, + { + "comment": "P1; V5; V6", + "input": "\u1039-\ud82a\udfad\ud83d\udfa2\uff0e\u00df", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1039-\ud82a\udfad\ud83d\udfa2.\u00df", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1039-\ud82a\udfad\ud83d\udfa2.SS", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1039-\ud82a\udfad\ud83d\udfa2.ss", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1039-\ud82a\udfad\ud83d\udfa2.Ss", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn----9tg11172akr8b.ss", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn----9tg11172akr8b.xn--zca", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1039-\ud82a\udfad\ud83d\udfa2\uff0eSS", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1039-\ud82a\udfad\ud83d\udfa2\uff0ess", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u1039-\ud82a\udfad\ud83d\udfa2\uff0eSs", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\u9523\u3002\u0a4d\udb41\ude3b\udb41\ude86", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--gc5a.xn--ybc83044ppga", + "output": null + }, + { + "input": "xn--8gb2338k.xn--lhb0154f", + "output": "xn--8gb2338k.xn--lhb0154f" + }, + { + "input": "\u063d\ud804\ude3e.\u0649\ua92b", + "output": "xn--8gb2338k.xn--lhb0154f" + }, + { + "comment": "P1; V6", + "input": "\u10c1\u10b16\u0318\u3002\u00df\u1b03", + "output": null + }, + { + "input": "\u2d21\u2d116\u0318\u3002\u00df\u1b03", + "output": "xn--6-8cb7433a2ba.xn--zca894k" + }, + { + "comment": "P1; V6", + "input": "\u10c1\u10b16\u0318\u3002SS\u1b03", + "output": null + }, + { + "input": "\u2d21\u2d116\u0318\u3002ss\u1b03", + "output": "xn--6-8cb7433a2ba.xn--ss-2vq" + }, + { + "comment": "P1; V6", + "input": "\u10c1\u2d116\u0318\u3002Ss\u1b03", + "output": null + }, + { + "comment": "V6", + "input": "xn--6-8cb306hms1a.xn--ss-2vq", + "output": null + }, + { + "input": "xn--6-8cb7433a2ba.xn--ss-2vq", + "output": "xn--6-8cb7433a2ba.xn--ss-2vq" + }, + { + "input": "\u2d21\u2d116\u0318.ss\u1b03", + "output": "xn--6-8cb7433a2ba.xn--ss-2vq" + }, + { + "comment": "P1; V6", + "input": "\u10c1\u10b16\u0318.SS\u1b03", + "output": null + }, + { + "comment": "P1; V6", + "input": "\u10c1\u2d116\u0318.Ss\u1b03", + "output": null + }, + { + "comment": "V6", + "input": "xn--6-8cb555h2b.xn--ss-2vq", + "output": null + }, + { + "input": "xn--6-8cb7433a2ba.xn--zca894k", + "output": "xn--6-8cb7433a2ba.xn--zca894k" + }, + { + "input": "\u2d21\u2d116\u0318.\u00df\u1b03", + "output": "xn--6-8cb7433a2ba.xn--zca894k" + }, + { + "comment": "V6", + "input": "xn--6-8cb555h2b.xn--zca894k", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\udb40\udd0f\ud81a\udf34\udb43\udcbd\uff61\uffa0", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\udb40\udd0f\ud81a\udf34\udb43\udcbd\u3002\u1160", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--619ep9154c.xn--psd", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--619ep9154c.xn--cl7c", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb42\udf54.\ud800\udef1\u2082", + "output": null + }, + { + "comment": "P1; V6", + "input": "\udb42\udf54.\ud800\udef12", + "output": null + }, + { + "comment": "V6", + "input": "xn--vi56e.xn--2-w91i", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u2dbf.\u00df\u200d", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u2dbf.SS\u200d", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u2dbf.ss\u200d", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u2dbf.Ss\u200d", + "output": null + }, + { + "comment": "V6", + "input": "xn--7pj.ss", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--7pj.xn--ss-n1t", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--7pj.xn--zca870n", + "output": null + }, + { + "comment": "C1", + "input": "\u6889\u3002\u200c", + "output": null + }, + { + "input": "xn--7zv.", + "output": "xn--7zv." + }, + { + "input": "\u6889.", + "output": "xn--7zv." + }, + { + "comment": "C1", + "input": "xn--7zv.xn--0ug", + "output": null + }, + { + "input": "xn--iwb.ss", + "output": "xn--iwb.ss" + }, + { + "input": "\u0853.ss", + "output": "xn--iwb.ss" + }, + { + "input": "\u0853.SS", + "output": "xn--iwb.ss" + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\u40da\u87e5-\u3002-\ud9b5\udc98\u2488", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\u40da\u87e5-\u3002-\ud9b5\udc981.", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn----n50a258u.xn---1-up07j.", + "output": null + }, + { + "comment": "V6; V3 (ignored)", + "input": "xn----n50a258u.xn----ecp33805f", + "output": null + }, + { + "comment": "V3 (ignored)", + "input": "-\uff61\u2e90", + "output": "-.xn--6vj" + }, + { + "comment": "V3 (ignored)", + "input": "-\u3002\u2e90", + "output": "-.xn--6vj" + }, + { + "comment": "V3 (ignored)", + "input": "-.xn--6vj", + "output": "-.xn--6vj" + }, + { + "comment": "P1; V5; V6", + "input": "\udb43\udc29\ud807\udcac\uff0e\u065c", + "output": null + }, + { + "comment": "P1; V5; V6", + "input": "\udb43\udc29\ud807\udcac.\u065c", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--sn3d59267c.xn--4hb", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\ud800\udf7a.\ud928\uddc3\u200c", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--ie8c.xn--2g51a", + "output": null + }, + { + "comment": "C1; V5; V6", + "input": "xn--ie8c.xn--0ug03366c", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u200d\u200d\u8954\u3002\u10bc5\ua86e\ud995\udf4f", + "output": null + }, + { + "comment": "C2; P1; V6", + "input": "\u200d\u200d\u8954\u3002\u2d1c5\ua86e\ud995\udf4f", + "output": null + }, + { + "comment": "V6", + "input": "xn--2u2a.xn--5-uws5848bpf44e", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--1uga7691f.xn--5-uws5848bpf44e", + "output": null + }, + { + "comment": "V6", + "input": "xn--2u2a.xn--5-r1g7167ipfw8d", + "output": null + }, + { + "comment": "C2; V6", + "input": "xn--1uga7691f.xn--5-r1g7167ipfw8d", + "output": null + }, + { + "input": "xn--ix9c26l.xn--q0s", + "output": "xn--ix9c26l.xn--q0s" + }, + { + "input": "\ud802\udedc\ud804\udf3c.\u5a40", + "output": "xn--ix9c26l.xn--q0s" + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\ud835\udfd6\u00df\uff0e\udb40\udd10-?\u10af", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "8\u00df.\udb40\udd10-?\u10af", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "8\u00df.\udb40\udd10-?\u2d0f", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "8SS.\udb40\udd10-?\u10af", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "8ss.\udb40\udd10-?\u2d0f", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "8ss.\udb40\udd10-?\u10af", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "8ss.xn---?-gfk", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "8ss.xn---?-261a", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "xn--8-qfa.xn---?-261a", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "xn--8-qfa.xn---?-gfk", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\ud835\udfd6\u00df\uff0e\udb40\udd10-?\u2d0f", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\ud835\udfd6SS\uff0e\udb40\udd10-?\u10af", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\ud835\udfd6ss\uff0e\udb40\udd10-?\u2d0f", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\ud835\udfd6ss\uff0e\udb40\udd10-?\u10af", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "8ss.-?\u10af", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "8ss.-?\u2d0f", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "8SS.-?\u10af", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "xn--8-qfa.-?\u2d0f", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "XN--8-QFA.-?\u10af", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "Xn--8-Qfa.-?\u10af", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "xn--8-qfa.-?\u10af", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "\ud835\udfd6Ss\uff0e\udb40\udd10-?\u10af", + "output": null + }, + { + "comment": "P1; V6; V3 (ignored)", + "input": "8Ss.\udb40\udd10-?\u10af", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\ua9b9\u200d\ud077\ud8af\udda1\uff61\u2082", + "output": null + }, + { + "comment": "C2; P1; V5; V6", + "input": "\ua9b9\u200d\u110f\u1173\u11b2\ud8af\udda1\uff61\u2082", + "output": null + }, + { + "input": "\ud802\udec0\uff0e\u0689\ud804\udf00", + "output": "xn--pw9c.xn--fjb8658k" + }, + { + "input": "\ud802\udec0.\u0689\ud804\udf00", + "output": "xn--pw9c.xn--fjb8658k" + }, + { + "input": "xn--pw9c.xn--fjb8658k", + "output": "xn--pw9c.xn--fjb8658k" + }, + { + "comment": "C2", + "input": "\ud800\udef7\u3002\u200d", + "output": null + }, + { + "input": "xn--r97c.", + "output": "xn--r97c." + }, + { + "input": "\ud800\udef7.", + "output": "xn--r97c." + }, + { + "comment": "C2", + "input": "xn--r97c.xn--1ug", + "output": null + }, + { + "comment": "V5", + "input": "\ud807\udc33\ud804\ude2f\u3002\u296a", + "output": null + }, + { + "comment": "V5", + "input": "xn--2g1d14o.xn--jti", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\ud804\udd80\u4074\ud952\udde3\uff0e\u10b5\ud835\udfdc\u200c\u0348", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\ud804\udd80\u4074\ud952\udde3.\u10b54\u200c\u0348", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\ud804\udd80\u4074\ud952\udde3.\u2d154\u200c\u0348", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--1mnx647cg3x1b.xn--4-zfb5123a", + "output": null + }, + { + "comment": "C1; V5; V6", + "input": "xn--1mnx647cg3x1b.xn--4-zfb502tlsl", + "output": null + }, + { + "comment": "V5; V6", + "input": "xn--1mnx647cg3x1b.xn--4-zfb324h", + "output": null + }, + { + "comment": "C1; V5; V6", + "input": "xn--1mnx647cg3x1b.xn--4-zfb324h32o", + "output": null + }, + { + "comment": "C1; P1; V5; V6", + "input": "\ud804\udd80\u4074\ud952\udde3\uff0e\u2d15\ud835\udfdc\u200c\u0348", + "output": null + } +] diff --git a/test/fixtures/wpt/url/resources/a-element.js b/test/fixtures/wpt/url/resources/a-element.js index 553855a870c559..65c7e85281360f 100644 --- a/test/fixtures/wpt/url/resources/a-element.js +++ b/test/fixtures/wpt/url/resources/a-element.js @@ -19,7 +19,17 @@ function runURLTests(urltests) { // skip without base because you cannot unset the baseURL of a document if (expected.base === null) continue; - test(function() { + function getKey(expected) { + if (expected.protocol) { + return expected.protocol.replace(":", ""); + } + if (expected.failure) { + return expected.input.split(":")[0]; + } + return "other"; + } + + subsetTestByKey(getKey(expected), test, function() { var url = bURL(expected.input, expected.base) if(expected.failure) { if(url.protocol !== ':') { diff --git a/test/fixtures/wpt/url/resources/setters_tests.json b/test/fixtures/wpt/url/resources/setters_tests.json index b709ef5234a4fd..9c4eedc60893ed 100644 --- a/test/fixtures/wpt/url/resources/setters_tests.json +++ b/test/fixtures/wpt/url/resources/setters_tests.json @@ -269,6 +269,57 @@ "protocol": "https:", "port": "" } + }, + { + "comment": "Tab and newline are stripped", + "href": "http://test/", + "new_value": "h\u000D\u000Att\u0009ps", + "expected": { + "href": "https://test/", + "protocol": "https:", + "port": "" + } + }, + { + "href": "http://test/", + "new_value": "https\u000D", + "expected": { + "href": "https://test/", + "protocol": "https:" + } + }, + { + "comment": "Non-tab/newline C0 controls result in no-op", + "href": "http://test/", + "new_value": "https\u0000", + "expected": { + "href": "http://test/", + "protocol": "http:" + } + }, + { + "href": "http://test/", + "new_value": "https\u000C", + "expected": { + "href": "http://test/", + "protocol": "http:" + } + }, + { + "href": "http://test/", + "new_value": "https\u000E", + "expected": { + "href": "http://test/", + "protocol": "http:" + } + }, + { + "href": "http://test/", + "new_value": "https\u0020", + "expected": { + "href": "http://test/", + "protocol": "http:" + } } ], "username": [ @@ -1603,7 +1654,7 @@ ], "pathname": [ { - "comment": "Cannot-be-a-base don’t have a path", + "comment": "Opaque paths cannot be set", "href": "mailto:me@example.net", "new_value": "/foo", "expected": { @@ -1611,6 +1662,22 @@ "pathname": "me@example.net" } }, + { + "href": "data:original", + "new_value": "new value", + "expected": { + "href": "data:original", + "pathname": "original" + } + }, + { + "href": "sc:original", + "new_value": "new value", + "expected": { + "href": "sc:original", + "pathname": "original" + } + }, { "comment": "Special URLs cannot have their paths erased", "href": "file:///some/path", @@ -1830,6 +1897,23 @@ "href": "non-spec:/p", "pathname": "/p" } + }, + { + "comment": "Non-special URLs with non-opaque paths percent-encode U+0020", + "href": "data:/nospace", + "new_value": "space ", + "expected": { + "href": "data:/space%20", + "pathname": "/space%20" + } + }, + { + "href": "sc:/nospace", + "new_value": "space ", + "expected": { + "href": "sc:/space%20", + "pathname": "/space%20" + } } ], "search": [ @@ -1914,6 +1998,42 @@ "href": "http://example.net/?%c3%89t%C3%A9", "search": "?%c3%89t%C3%A9" } + }, + { + "comment": "Drop trailing spaces from trailing opaque paths", + "href": "data:space ?query", + "new_value": "", + "expected": { + "href": "data:space", + "pathname": "space", + "search": "" + } + }, + { + "href": "sc:space ?query", + "new_value": "", + "expected": { + "href": "sc:space", + "pathname": "space", + "search": "" + } + }, + { + "comment": "Do not drop trailing spaces from non-trailing opaque paths", + "href": "data:space ?query#fragment", + "new_value": "", + "expected": { + "href": "data:space #fragment", + "search": "" + } + }, + { + "href": "sc:space ?query#fragment", + "new_value": "", + "expected": { + "href": "sc:space #fragment", + "search": "" + } } ], "hash": [ @@ -2048,6 +2168,42 @@ "href": "javascript:alert(1)#castle", "hash": "#castle" } + }, + { + "comment": "Drop trailing spaces from trailing opaque paths", + "href": "data:space #fragment", + "new_value": "", + "expected": { + "href": "data:space", + "pathname": "space", + "hash": "" + } + }, + { + "href": "sc:space #fragment", + "new_value": "", + "expected": { + "href": "sc:space", + "pathname": "space", + "hash": "" + } + }, + { + "comment": "Do not drop trailing spaces from non-trailing opaque paths", + "href": "data:space ?query#fragment", + "new_value": "", + "expected": { + "href": "data:space ?query", + "hash": "" + } + }, + { + "href": "sc:space ?query#fragment", + "new_value": "", + "expected": { + "href": "sc:space ?query", + "hash": "" + } } ] } diff --git a/test/fixtures/wpt/url/resources/toascii.json b/test/fixtures/wpt/url/resources/toascii.json index b9ceea310676d7..4cb41e94cd4a0f 100644 --- a/test/fixtures/wpt/url/resources/toascii.json +++ b/test/fixtures/wpt/url/resources/toascii.json @@ -61,6 +61,10 @@ "input": "xn--a.ß", "output": null }, + { + "input": "xn--ls8h=", + "output": null + }, { "comment": "Invalid Punycode (contains non-ASCII character)", "input": "xn--tešla", @@ -172,5 +176,18 @@ { "input": "xn--", "output": null + }, + { + "comment": "Interesting UseSTD3ASCIIRules=false cases", + "input": "≠", + "output": "xn--1ch" + }, + { + "input": "≮", + "output": "xn--gdh" + }, + { + "input": "≯", + "output": "xn--hdh" } ] diff --git a/test/fixtures/wpt/url/resources/urltestdata.json b/test/fixtures/wpt/url/resources/urltestdata.json index 3cf106965b1ffd..0265346a6a62d4 100644 --- a/test/fixtures/wpt/url/resources/urltestdata.json +++ b/test/fixtures/wpt/url/resources/urltestdata.json @@ -3917,6 +3917,22 @@ "search": "", "hash": "" }, + "Non-special domains with empty labels", + { + "input": "h://.", + "base": "about:blank", + "href": "h://.", + "origin": "null", + "protocol": "h:", + "username": "", + "password": "", + "host": ".", + "hostname": ".", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, "Broken IPv6", { "input": "http://[www.google.com]/", @@ -3948,6 +3964,16 @@ "base": "http://other.com/", "failure": true }, + { + "input": "http://[::%31]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://%5B::1]", + "base": "http://other.com/", + "failure": true + }, "Misc Unicode", { "input": "http://foo:💩@example.com/bar", @@ -8124,6 +8150,21 @@ "search": "", "hash": "" }, + "IDNA hostnames which get mapped to 'localhost'", + { + "input": "file://loC𝐀𝐋𝐇𝐨𝐬𝐭/usr/bin", + "base": "about:blank", + "href": "file:///usr/bin", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/usr/bin", + "search": "", + "hash": "" + }, "Empty host after the domain to ASCII", { "input": "file://\u00ad/p", @@ -8609,5 +8650,212 @@ "input": "http://💩.123/", "base": "about:blank", "failure": true + }, + "U+0000 and U+FFFF in various places", + { + "input": "https://\u0000y", + "base": "about:blank", + "failure": true + }, + { + "input": "https://x/\u0000y", + "base": "about:blank", + "hash": "", + "host": "x", + "hostname": "x", + "href": "https://x/%00y", + "password": "", + "pathname": "/%00y", + "port": "", + "protocol": "https:", + "search": "", + "username": "" + }, + { + "input": "https://x/?\u0000y", + "base": "about:blank", + "hash": "", + "host": "x", + "hostname": "x", + "href": "https://x/?%00y", + "password": "", + "pathname": "/", + "port": "", + "protocol": "https:", + "search": "?%00y", + "username": "" + }, + { + "input": "https://x/?#\u0000y", + "base": "about:blank", + "hash": "#%00y", + "host": "x", + "hostname": "x", + "href": "https://x/?#%00y", + "password": "", + "pathname": "/", + "port": "", + "protocol": "https:", + "search": "", + "username": "" + }, + { + "input": "https://\uFFFFy", + "base": "about:blank", + "failure": true + }, + { + "input": "https://x/\uFFFFy", + "base": "about:blank", + "hash": "", + "host": "x", + "hostname": "x", + "href": "https://x/%EF%BF%BFy", + "password": "", + "pathname": "/%EF%BF%BFy", + "port": "", + "protocol": "https:", + "search": "", + "username": "" + }, + { + "input": "https://x/?\uFFFFy", + "base": "about:blank", + "hash": "", + "host": "x", + "hostname": "x", + "href": "https://x/?%EF%BF%BFy", + "password": "", + "pathname": "/", + "port": "", + "protocol": "https:", + "search": "?%EF%BF%BFy", + "username": "" + }, + { + "input": "https://x/?#\uFFFFy", + "base": "about:blank", + "hash": "#%EF%BF%BFy", + "host": "x", + "hostname": "x", + "href": "https://x/?#%EF%BF%BFy", + "password": "", + "pathname": "/", + "port": "", + "protocol": "https:", + "search": "", + "username": "" + }, + { + "input": "non-special:\u0000y", + "base": "about:blank", + "hash": "", + "host": "", + "hostname": "", + "href": "non-special:%00y", + "password": "", + "pathname": "%00y", + "port": "", + "protocol": "non-special:", + "search": "", + "username": "" + }, + { + "input": "non-special:x/\u0000y", + "base": "about:blank", + "hash": "", + "host": "", + "hostname": "", + "href": "non-special:x/%00y", + "password": "", + "pathname": "x/%00y", + "port": "", + "protocol": "non-special:", + "search": "", + "username": "" + }, + { + "input": "non-special:x/?\u0000y", + "base": "about:blank", + "hash": "", + "host": "", + "hostname": "", + "href": "non-special:x/?%00y", + "password": "", + "pathname": "x/", + "port": "", + "protocol": "non-special:", + "search": "?%00y", + "username": "" + }, + { + "input": "non-special:x/?#\u0000y", + "base": "about:blank", + "hash": "#%00y", + "host": "", + "hostname": "", + "href": "non-special:x/?#%00y", + "password": "", + "pathname": "x/", + "port": "", + "protocol": "non-special:", + "search": "", + "username": "" + }, + { + "input": "non-special:\uFFFFy", + "base": "about:blank", + "hash": "", + "host": "", + "hostname": "", + "href": "non-special:%EF%BF%BFy", + "password": "", + "pathname": "%EF%BF%BFy", + "port": "", + "protocol": "non-special:", + "search": "", + "username": "" + }, + { + "input": "non-special:x/\uFFFFy", + "base": "about:blank", + "hash": "", + "host": "", + "hostname": "", + "href": "non-special:x/%EF%BF%BFy", + "password": "", + "pathname": "x/%EF%BF%BFy", + "port": "", + "protocol": "non-special:", + "search": "", + "username": "" + }, + { + "input": "non-special:x/?\uFFFFy", + "base": "about:blank", + "hash": "", + "host": "", + "hostname": "", + "href": "non-special:x/?%EF%BF%BFy", + "password": "", + "pathname": "x/", + "port": "", + "protocol": "non-special:", + "search": "?%EF%BF%BFy", + "username": "" + }, + { + "input": "non-special:x/?#\uFFFFy", + "base": "about:blank", + "hash": "#%EF%BF%BFy", + "host": "", + "hostname": "", + "href": "non-special:x/?#%EF%BF%BFy", + "password": "", + "pathname": "x/", + "port": "", + "protocol": "non-special:", + "search": "", + "username": "" } ] diff --git a/test/fixtures/wpt/url/url-constructor.any.js b/test/fixtures/wpt/url/url-constructor.any.js index dfa98092fa6475..297b8ffd9eaf37 100644 --- a/test/fixtures/wpt/url/url-constructor.any.js +++ b/test/fixtures/wpt/url/url-constructor.any.js @@ -1,4 +1,9 @@ +// META: script=/common/subset-tests-by-key.js // META: timeout=long +// META: variant=?include=file +// META: variant=?include=javascript +// META: variant=?include=mailto +// META: variant=?exclude=(file|javascript|mailto) function bURL(url, base) { return base ? new URL(url, base) : new URL(url) @@ -9,7 +14,17 @@ function runURLTests(urltests) { var expected = urltests[i] if (typeof expected === "string") continue // skip comments - test(function() { + function getKey(expected) { + if (expected.protocol) { + return expected.protocol.replace(":", ""); + } + if (expected.failure) { + return expected.input.split(":")[0]; + } + return "other"; + } + + subsetTestByKey(getKey(expected), test, function() { if (expected.failure) { assert_throws_js(TypeError, function() { bURL(expected.input, expected.base) diff --git a/test/fixtures/wpt/url/url-setters-a-area.window.js b/test/fixtures/wpt/url/url-setters-a-area.window.js index 8c66f2883d3c2e..6a5e762cd42fe8 100644 --- a/test/fixtures/wpt/url/url-setters-a-area.window.js +++ b/test/fixtures/wpt/url/url-setters-a-area.window.js @@ -1,3 +1,9 @@ +// META: script=/common/subset-tests-by-key.js +// META: variant=?include=file +// META: variant=?include=javascript +// META: variant=?include=mailto +// META: variant=?exclude=(file|javascript|mailto) + // Keep this file in sync with url-setters.any.js. promise_test(() => fetch("resources/setters_tests.json").then(res => res.json()).then(runURLSettersTests), "Loading data…"); @@ -15,7 +21,8 @@ function runURLSettersTests(all_test_cases) { if ("comment" in test_case) { name += " " + test_case.comment; } - test(function() { + const key = test_case.href.split(":")[0]; + subsetTestByKey(key, test, function() { var url = document.createElement("a"); url.href = test_case.href; url[attribute_to_be_set] = test_case.new_value; @@ -23,7 +30,7 @@ function runURLSettersTests(all_test_cases) { assert_equals(url[attribute], test_case.expected[attribute]) } }, ": " + name) - test(function() { + subsetTestByKey(key, test, function() { var url = document.createElement("area"); url.href = test_case.href; url[attribute_to_be_set] = test_case.new_value; diff --git a/test/fixtures/wpt/url/url-setters.any.js b/test/fixtures/wpt/url/url-setters.any.js index 1cddf94a8ecca9..fe88175ac63446 100644 --- a/test/fixtures/wpt/url/url-setters.any.js +++ b/test/fixtures/wpt/url/url-setters.any.js @@ -1,3 +1,9 @@ +// META: script=/common/subset-tests-by-key.js +// META: variant=?include=file +// META: variant=?include=javascript +// META: variant=?include=mailto +// META: variant=?exclude=(file|javascript|mailto) + // Keep this file in sync with url-setters-a-area.window.js. promise_test(() => fetch("resources/setters_tests.json").then(res => res.json()).then(runURLSettersTests), "Loading data…"); @@ -15,7 +21,8 @@ function runURLSettersTests(all_test_cases) { if ("comment" in test_case) { name += " " + test_case.comment; } - test(function() { + const key = test_case.href.split(":")[0]; + subsetTestByKey(key, test, function() { var url = new URL(test_case.href); url[attribute_to_be_set] = test_case.new_value; for (var attribute in test_case.expected) { diff --git a/test/fixtures/wpt/url/urlsearchparams-delete.any.js b/test/fixtures/wpt/url/urlsearchparams-delete.any.js index 1aa9b313736de2..28ebbce5f13bd8 100644 --- a/test/fixtures/wpt/url/urlsearchparams-delete.any.js +++ b/test/fixtures/wpt/url/urlsearchparams-delete.any.js @@ -43,3 +43,21 @@ test(function() { assert_equals(url.href, 'http://example.com/', 'url.href does not have ?'); assert_equals(url.search, '', 'url.search does not have ?'); }, 'Removing non-existent param removes ? from URL'); + +test(() => { + const url = new URL('data:space ?test'); + assert_true(url.searchParams.has('test')); + url.searchParams.delete('test'); + assert_false(url.searchParams.has('test')); + assert_equals(url.search, ''); + assert_equals(url.pathname, 'space'); + assert_equals(url.href, 'data:space'); +}, 'Changing the query of a URL with an opaque path can impact the path'); + +test(() => { + const url = new URL('data:space ?test#test'); + url.searchParams.delete('test'); + assert_equals(url.search, ''); + assert_equals(url.pathname, 'space '); + assert_equals(url.href, 'data:space #test'); +}, 'Changing the query of a URL with an opaque path can impact the path if the URL has no fragment'); diff --git a/test/fixtures/wpt/versions.json b/test/fixtures/wpt/versions.json index 26ef56f9792810..ca6504fad1338d 100644 --- a/test/fixtures/wpt/versions.json +++ b/test/fixtures/wpt/versions.json @@ -68,7 +68,7 @@ "path": "streams" }, "url": { - "commit": "0a187bc16933e67dfb8755935143a6dd5a9e12f2", + "commit": "f1ade799d04b72b0ff75c13e988744c2cd873741", "path": "url" }, "user-timing": { diff --git a/test/fuzzers/fuzz_url.cc b/test/fuzzers/fuzz_url.cc deleted file mode 100644 index 16c5f644893f86..00000000000000 --- a/test/fuzzers/fuzz_url.cc +++ /dev/null @@ -1,11 +0,0 @@ -#include - -#include "node.h" -#include "node_internals.h" -#include "node_url.h" - -extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - node::url::URL url2(reinterpret_cast(data), size); - - return 0; -} diff --git a/test/parallel/test-process-versions.js b/test/parallel/test-process-versions.js index 91ffd644f01a93..23590d58a587d9 100644 --- a/test/parallel/test-process-versions.js +++ b/test/parallel/test-process-versions.js @@ -18,6 +18,7 @@ const expected_keys = [ 'uvwasi', 'acorn', 'simdutf', + 'ada', ]; const hasUndici = process.config.variables.node_builtin_shareable_builtins.includes('deps/undici/undici.js'); diff --git a/test/parallel/test-whatwg-url-custom-inspect.js b/test/parallel/test-whatwg-url-custom-inspect.js index ad77f5725d30ed..a7d30a6ab936c3 100644 --- a/test/parallel/test-whatwg-url-custom-inspect.js +++ b/test/parallel/test-whatwg-url-custom-inspect.js @@ -45,18 +45,17 @@ assert.strictEqual( search: '?que=ry', searchParams: URLSearchParams { 'que' => 'ry' }, hash: '#hash', - cannotBeBase: false, - special: true, [Symbol(context)]: URLContext { - flags: 2032, - scheme: 'https:', + href: 'https://username:password@host.name:8080/path/name/?que=ry#hash', + origin: 'https://host.name:8080', + protocol: 'https:', + hostname: 'host.name', + pathname: '/path/name/', + search: '?que=ry', username: 'username', password: 'password', - host: 'host.name', - port: 8080, - path: [ 'path', 'name', '', [length]: 3 ], - query: 'que=ry', - fragment: 'hash' + port: '8080', + hash: '#hash' } }`); diff --git a/test/parallel/test-whatwg-url-properties.js b/test/parallel/test-whatwg-url-properties.js index 98a16bdbbdcf6b..69ce14a431a9b7 100644 --- a/test/parallel/test-whatwg-url-properties.js +++ b/test/parallel/test-whatwg-url-properties.js @@ -1,7 +1,7 @@ 'use strict'; require('../common'); const assert = require('assert'); -const { URL, URLSearchParams } = require('url'); +const { URL, URLSearchParams, format } = require('url'); [ { name: 'toString' }, @@ -11,6 +11,17 @@ const { URL, URLSearchParams } = require('url'); testMethod(URL.prototype, name); }); +[ + 'http://www.google.com', + 'https://www.domain.com:443', + 'file:///Users/yagiz/Developer/node', +].forEach((url) => { + const u = new URL(url); + assert.strictEqual(JSON.stringify(u), `"${u.href}"`); + assert.strictEqual(u.toString(), u.href); + assert.strictEqual(format(u), u.href); +}); + [ { name: 'href' }, { name: 'protocol' }, diff --git a/test/wpt/status/url.json b/test/wpt/status/url.json index a1c90f210506db..a0957dccb53c73 100644 --- a/test/wpt/status/url.json +++ b/test/wpt/status/url.json @@ -7,7 +7,13 @@ "skip": "TODO: port from .window.js" }, "historical.any.js": { - "requires": ["small-icu"] + "requires": ["small-icu"], + "fail": { + "expected": [ + "URL: no structured serialize/deserialize support", + "URLSearchParams: no structured serialize/deserialize support" + ] + } }, "urlencoded-parser.any.js": { "requires": ["small-icu"] @@ -23,5 +29,8 @@ }, "url-setters-a-area.window.js": { "skip": "already tested in url-setters.any.js" + }, + "IdnaTestV2.window.js": { + "requires": ["small-icu"] } } diff --git a/test/wpt/test-url.js b/test/wpt/test-url.js index cca2184b47720b..1998ea5bf43798 100644 --- a/test/wpt/test-url.js +++ b/test/wpt/test-url.js @@ -14,4 +14,7 @@ runner.setScriptModifier((obj) => { } }); runner.pretendGlobalThisAs('Window'); +runner.setInitScript(` + globalThis.location ||= {}; +`); runner.runJsTests(); diff --git a/tools/dep_updaters/update-ada.sh b/tools/dep_updaters/update-ada.sh new file mode 100755 index 00000000000000..5b693520349313 --- /dev/null +++ b/tools/dep_updaters/update-ada.sh @@ -0,0 +1,51 @@ +#!/bin/sh +set -e +# Shell script to update ada in the source tree to a specific version + +BASE_DIR=$(cd "$(dirname "$0")/../.." && pwd) +DEPS_DIR="$BASE_DIR/deps" +ADA_VERSION=$1 + +if [ "$#" -le 0 ]; then + echo "Error: please provide an ada version to update to" + echo " e.g. $0 1.0.0" + exit 1 +fi + +echo "Making temporary workspace..." + +WORKSPACE=$(mktemp -d 2> /dev/null || mktemp -d -t 'tmp') + +cleanup () { + EXIT_CODE=$? + [ -d "$WORKSPACE" ] && rm -rf "$WORKSPACE" + exit $EXIT_CODE +} + +trap cleanup INT TERM EXIT + +ADA_REF="v$ADA_VERSION" +ADA_ZIP="ada-$ADA_VERSION.zip" +ADA_LICENSE="LICENSE-MIT" + +cd "$WORKSPACE" + +echo "Fetching ada source archive..." +curl -sL -o "$ADA_ZIP" "https://github.com/ada-url/ada/releases/download/$ADA_REF/singleheader.zip" +unzip "$ADA_ZIP" +rm "$ADA_ZIP" + +curl -sL -o "$ADA_LICENSE" "https://raw.githubusercontent.com/ada-url/ada/HEAD/LICENSE-MIT" + +echo "Replacing existing ada (except GYP build files)" +mv "$DEPS_DIR/ada/"*.gyp "$DEPS_DIR/ada/README.md" "$WORKSPACE/" +rm -rf "$DEPS_DIR/ada" +mv "$WORKSPACE" "$DEPS_DIR/ada" + +echo "All done!" +echo "" +echo "Please git add ada, commit the new version:" +echo "" +echo "$ git add -A deps/ada" +echo "$ git commit -m \"deps: update ada to $ADA_VERSION\"" +echo "" diff --git a/tools/license-builder.sh b/tools/license-builder.sh index 8389f24c681082..1b52a473a15bf2 100755 --- a/tools/license-builder.sh +++ b/tools/license-builder.sh @@ -81,6 +81,8 @@ licenseText="$(sed -e '/The data format used by the zlib library/,$d' -e 's/^\/\ addlicense "zlib" "deps/zlib" "$licenseText" licenseText="$(cat "${rootdir}/deps/simdutf/LICENSE-MIT")" addlicense "simdutf" "deps/simdutf" "$licenseText" +licenseText="$(curl -sL https://raw.githubusercontent.com/ada-url/ada/HEAD/LICENSE-MIT)" +addlicense "ada" "deps/ada" "$licenseText" # npm licenseText="$(cat "${rootdir}/deps/npm/LICENSE")"