Skip to content

Commit 5807351

Browse files
authored
ICU-free ada (#216)
* This is a prototype of what an ICU-free ada would look like. * Adding ada_idna files. * Updating to latest idna. * Updating ada idna * adding IDNA_TEST_V2 * Cleaning wpt_tests. * Trimming useless parameter * Updating the documentation.
1 parent caf31e7 commit 5807351

17 files changed

+9979
-405
lines changed

.github/workflows/lint_and_format_check.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
ubuntu-build:
1616
runs-on: ubuntu-latest
1717
steps:
18-
- uses: actions/checkout@v3
18+
- uses: actions/checkout@v3
1919
- name: Install ICU
2020
run: sudo apt-get install -y libicu-dev pkg-config
2121

.github/workflows/ubuntu_install.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
- name: Setup Ninja
2525
run: sudo apt-get install ninja-build
2626
- name: Prepare
27-
run: cmake -G Ninja -DCMAKE_INSTALL_PREFIX:PATH=destination -B build
27+
run: cmake -G Ninja -DCMAKE_INSTALL_PREFIX:PATH=destination -B build
2828
- name: Build
2929
run: cmake --build build -j=2
3030
- name: Install

CMakeLists.txt

Lines changed: 0 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ add_subdirectory(src)
2323
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/scripts/cmake)
2424

2525
option(ADA_BENCHMARKS "Build benchmarks" OFF)
26-
option(ADA_USE_ICU "Use ICU if available" ON)
2726

2827
if(BUILD_TESTING OR ADA_BENCHMARKS)
2928
include(${PROJECT_SOURCE_DIR}/cmake/import.cmake)
@@ -40,60 +39,6 @@ else()
4039
endif()
4140
endif(BUILD_TESTING)
4241

43-
if(ADA_USE_ICU)
44-
45-
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
46-
message(STATUS "Apple system detected.")
47-
# People who run macOS often use brew.
48-
if(EXISTS /opt/homebrew/opt/icu4c)
49-
message(STATUS "icu is provided by homebrew at /opt/homebrew/opt/icu4c.")
50-
## This is a bit awkward, but it is a lot better than asking the
51-
## user to figure that out.
52-
list(APPEND CMAKE_PREFIX_PATH "/opt/homebrew/opt/icu4c/include")
53-
list(APPEND CMAKE_LIBRARY_PATH "/opt/homebrew/opt/icu4c/lib")
54-
elseif(EXISTS /usr/local/opt/icu4c)
55-
message(STATUS "icu is provided by homebrew at /usr/local/opt/icu4c.")
56-
list(APPEND CMAKE_PREFIX_PATH "/usr/local/opt/icu4c/include")
57-
list(APPEND CMAKE_LIBRARY_PATH "/usr/local/opt/icu4c/lib")
58-
endif()
59-
endif()
60-
61-
find_package(ICU COMPONENTS uc i18n)
62-
### If the user does not have ICU, let us help them with instructions:
63-
if(ICU_FOUND)
64-
target_link_libraries(ada PRIVATE ICU::uc ICU::i18n)
65-
else(ICU_FOUND)
66-
# Under Windows, we have a backup.
67-
if(MSVC) # could be WIN32 but there are other systems under Windows.
68-
if(MSVC_VERSION GREATER_EQUAL 1920)
69-
message(STATUS "You have a recent Visual Studio (2019 or better), congratulations!")
70-
else()
71-
message(SEND_ERROR "If you use Visual Studio, then Visual Studio 2019 or better is required.")
72-
return()
73-
endif()
74-
option(BUILD_SHARED_LIBS "Build using shared libraries" OFF)
75-
target_link_libraries(ada PRIVATE Normaliz)
76-
else(MSVC)
77-
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
78-
if(EXISTS /opt/homebrew)
79-
message(STATUS "Under macOS, you may install ICU with brew, using 'brew install icu4c'.")
80-
else()
81-
message(STATUS "Under macOS, you should install brew (see https://brew.sh) and then icu4c ('brew install icu4c').")
82-
endif()
83-
elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
84-
message(STATUS "Under Linux, you may be able to install ICU with a command such as 'apt-get install libicu-dev'." )
85-
endif()
86-
message(SEND_ERROR "ICU with components uc and i18n/in is required for building ada.")
87-
return()
88-
endif(MSVC)
89-
endif(ICU_FOUND)
90-
91-
else(ADA_USE_ICU)
92-
target_compile_definitions(ada PUBLIC ADA_HAS_ICU=0)
93-
94-
### Going forward, we have ICU for sure, except under Windows.
95-
endif(ADA_USE_ICU)
96-
9742
If(ADA_BENCHMARKS)
9843
message(STATUS "Ada benchmarks enabled.")
9944
add_subdirectory(benchmarks)

README.md

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,15 @@ Ada is a fast and spec-compliant URL parser written in C++.
99
Specification for URL parser can be found from the
1010
[WHATWG](https://url.spec.whatwg.org/#url-parsing) website.
1111

12+
The Ada library passes the full range of tests from the specification,
13+
across a wide range of platforms (e.g., Windows, Linux, macOS). It fully
14+
supports the relevant [Unicode Technical Standard](https://www.unicode.org/reports/tr46/#ToUnicode).
15+
1216
## Requirements
1317

1418
- A recent C++ compiler supporting C++17. We test GCC 9 or better, LLVM 10 or better and Microsoft Visual Studio 2022.
15-
- We use [ICU](https://icu.unicode.org) when it is available.
19+
20+
The project is otherwise self-contained and it has no dependency.
1621

1722
## Usage
1823

@@ -131,14 +136,10 @@ Ada uses cmake as a build system. It's recommended you to run the following comm
131136
132137
Windows users need additional flags to specify the build configuration, e.g. `--config Release`.
133138
134-
Project can also be built via docker using default docker file of repository with following commands.
139+
The project can also be built via docker using default docker file of repository with following commands.
135140
136141
`docker build -t ada-builder . && docker run --rm -it -v ${PWD}:/repo ada-builder`
137142
138-
### Installing ICU
139-
140-
For macOS, you may install it with [brew](https://brew.sh) using `brew install icu4c`. Linux users may install ICU according to the their distribution: under Ubuntu, the command is `apt-get install -y libicu-dev`.
141-
142143
### Amalgamation
143144
144145
You may amalgamate all source files into only two files (`ada.h` and `ada.cpp`) by typing executing the Python 3 script `singleheader/amalgamate.py`. By default, the files are created in the `singleheader` directory.

include/ada/common_defs.h

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -104,12 +104,12 @@
104104
ADA_DISABLE_GCC_WARNING(-Wconversion) \
105105
ADA_DISABLE_GCC_WARNING(-Wextra) \
106106
ADA_DISABLE_GCC_WARNING(-Wattributes) \
107-
ADA_DISABLE_GCC_WARNING(-Wimplicit - fallthrough) \
108-
ADA_DISABLE_GCC_WARNING(-Wnon - virtual - dtor) \
109-
ADA_DISABLE_GCC_WARNING(-Wreturn - type) \
107+
ADA_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \
108+
ADA_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \
109+
ADA_DISABLE_GCC_WARNING(-Wreturn-type) \
110110
ADA_DISABLE_GCC_WARNING(-Wshadow) \
111-
ADA_DISABLE_GCC_WARNING(-Wunused - parameter) \
112-
ADA_DISABLE_GCC_WARNING(-Wunused - variable)
111+
ADA_DISABLE_GCC_WARNING(-Wunused-parameter) \
112+
ADA_DISABLE_GCC_WARNING(-Wunused-variable)
113113
#define ADA_PRAGMA(P) _Pragma(#P)
114114
#define ADA_DISABLE_GCC_WARNING(WARNING) \
115115
ADA_PRAGMA(GCC diagnostic ignored #WARNING)
@@ -219,23 +219,6 @@ namespace ada {
219219

220220
#endif // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__
221221

222-
#ifndef ADA_HAS_ICU
223-
#if __has_include(<unicode/uidna.h>)
224-
#define ADA_HAS_ICU 1
225-
#else
226-
#define ADA_HAS_ICU 0
227-
#endif // __has_include(<unicode/uidna.h>)
228-
#endif // ADA_HAS_ICU
229-
230-
#if ADA_HAS_ICU
231-
#include <unicode/utypes.h>
232-
#include <unicode/uidna.h>
233-
#include <unicode/utf8.h>
234-
#endif // ADA_HAS_ICU
235-
236-
#define ADA_WINDOWS_TO_ASCII_FALLBACK \
237-
0 // we never use anything but ICU. No fallback.
238-
239222
// Unless the programmer has already set ADA_DEVELOPMENT_CHECKS,
240223
// we want to set it under debug builds. We detect a debug build
241224
// under Visual Studio when the _DEBUG macro is set. Under the other

include/ada/unicode.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,14 @@ namespace ada::unicode {
5050
* script.
5151
*
5252
* The resulting strings should not exceed 255 octets according to RFC 1035
53-
* section 2.3.4. ICU checks for label size and domain size, but if we pass
54-
* "be_strict = false", these errors are ignored.
53+
* section 2.3.4. ICU checks for label size and domain size, but these errors
54+
* are ignored.
5555
*
5656
* @see https://url.spec.whatwg.org/#concept-domain-to-ascii
5757
*
5858
*/
5959
bool to_ascii(std::optional<std::string>& out, std::string_view plain,
60-
bool be_strict, size_t first_percent);
60+
size_t first_percent);
6161

6262
/**
6363
* Checks if the input has tab or newline characters.

include/ada_idna.h

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
/* auto-generated on 2023-03-27 13:47:27 -0400. Do not edit! */
2+
// dofile: invoked with prepath=/Users/dlemire/CVS/github/idna/include,
3+
// filename=idna.h
4+
/* begin file include/idna.h */
5+
#ifndef ADA_IDNA_H
6+
#define ADA_IDNA_H
7+
8+
// dofile: invoked with prepath=/Users/dlemire/CVS/github/idna/include,
9+
// filename=ada/idna/unicode_transcoding.h
10+
/* begin file include/ada/idna/unicode_transcoding.h */
11+
#ifndef ADA_IDNA_UNICODE_TRANSCODING_H
12+
#define ADA_IDNA_UNICODE_TRANSCODING_H
13+
14+
#include <string>
15+
#include <string_view>
16+
17+
namespace ada::idna {
18+
19+
size_t utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output);
20+
21+
size_t utf8_length_from_utf32(const char32_t* buf, size_t len);
22+
23+
size_t utf32_length_from_utf8(const char* buf, size_t len);
24+
25+
size_t utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output);
26+
27+
} // namespace ada::idna
28+
29+
#endif // ADA_IDNA_UNICODE_TRANSCODING_H
30+
/* end file include/ada/idna/unicode_transcoding.h */
31+
// dofile: invoked with prepath=/Users/dlemire/CVS/github/idna/include,
32+
// filename=ada/idna/mapping.h
33+
/* begin file include/ada/idna/mapping.h */
34+
#ifndef ADA_IDNA_MAPPING_H
35+
#define ADA_IDNA_MAPPING_H
36+
37+
#include <string>
38+
#include <string_view>
39+
namespace ada::idna {
40+
41+
// If the input is ascii, then the mapping is just -> lower case.
42+
void ascii_map(char* input, size_t length);
43+
// check whether an ascii string needs mapping
44+
bool ascii_has_upper_case(char* input, size_t length);
45+
// Map the characters according to IDNA, returning the empty string on error.
46+
std::u32string map(std::u32string_view input);
47+
48+
} // namespace ada::idna
49+
50+
#endif
51+
/* end file include/ada/idna/mapping.h */
52+
// dofile: invoked with prepath=/Users/dlemire/CVS/github/idna/include,
53+
// filename=ada/idna/normalization.h
54+
/* begin file include/ada/idna/normalization.h */
55+
#ifndef ADA_IDNA_NORMALIZATION_H
56+
#define ADA_IDNA_NORMALIZATION_H
57+
58+
#include <string>
59+
#include <string_view>
60+
namespace ada::idna {
61+
62+
// Normalize the characters according to IDNA (Unicode Normalization Form C).
63+
void normalize(std::u32string& input);
64+
65+
} // namespace ada::idna
66+
#endif
67+
/* end file include/ada/idna/normalization.h */
68+
// dofile: invoked with prepath=/Users/dlemire/CVS/github/idna/include,
69+
// filename=ada/idna/punycode.h
70+
/* begin file include/ada/idna/punycode.h */
71+
#ifndef ADA_IDNA_PUNYCODE_H
72+
#define ADA_IDNA_PUNYCODE_H
73+
74+
#include <string>
75+
#include <string_view>
76+
namespace ada::idna {
77+
78+
bool punycode_to_utf32(std::string_view input, std::u32string& out);
79+
bool verify_punycode(std::string_view input);
80+
bool utf32_to_punycode(std::u32string_view input, std::string& out);
81+
82+
} // namespace ada::idna
83+
84+
#endif // ADA_IDNA_PUNYCODE_H
85+
/* end file include/ada/idna/punycode.h */
86+
// dofile: invoked with prepath=/Users/dlemire/CVS/github/idna/include,
87+
// filename=ada/idna/validity.h
88+
/* begin file include/ada/idna/validity.h */
89+
#ifndef ADA_IDNA_VALIDITY_H
90+
#define ADA_IDNA_VALIDITY_H
91+
92+
#include <string>
93+
#include <string_view>
94+
95+
namespace ada::idna {
96+
97+
/**
98+
* @see https://www.unicode.org/reports/tr46/#Validity_Criteria
99+
*/
100+
bool is_label_valid(const std::u32string_view label);
101+
102+
} // namespace ada::idna
103+
104+
#endif // ADA_IDNA_VALIDITY_H
105+
/* end file include/ada/idna/validity.h */
106+
// dofile: invoked with prepath=/Users/dlemire/CVS/github/idna/include,
107+
// filename=ada/idna/to_ascii.h
108+
/* begin file include/ada/idna/to_ascii.h */
109+
#ifndef ADA_IDNA_TO_ASCII_H
110+
#define ADA_IDNA_TO_ASCII_H
111+
112+
#include <string>
113+
#include <string_view>
114+
115+
namespace ada::idna {
116+
// Converts a domain (e.g., www.google.com) possibly containing international
117+
// characters to an ascii domain (with punycode). It will not do percent
118+
// decoding: percent decoding should be done prior to calling this function. We
119+
// do not remove tabs and spaces, they should have been removed prior to calling
120+
// this function. We also do not trim control characters. We also assume that
121+
// the input is not empty. We return "" on error. For now.
122+
std::string to_ascii(std::string_view ut8_string);
123+
124+
bool constexpr begins_with(std::u32string_view view,
125+
std::u32string_view prefix);
126+
bool constexpr begins_with(std::string_view view, std::string_view prefix);
127+
128+
bool constexpr is_ascii(std::u32string_view view);
129+
bool constexpr is_ascii(std::string_view view);
130+
131+
static std::string from_ascii_to_ascii(std::string_view ut8_string);
132+
133+
} // namespace ada::idna
134+
135+
#endif // ADA_IDNA_TO_ASCII_H
136+
/* end file include/ada/idna/to_ascii.h */
137+
// dofile: invoked with prepath=/Users/dlemire/CVS/github/idna/include,
138+
// filename=ada/idna/to_unicode.h
139+
/* begin file include/ada/idna/to_unicode.h */
140+
141+
#ifndef ADA_IDNA_TO_UNICODE_H
142+
#define ADA_IDNA_TO_UNICODE_H
143+
144+
namespace ada::idna {
145+
std::string to_unicode(std::string_view input);
146+
} // namespace ada::idna
147+
148+
#endif // ADA_IDNA_TO_UNICODE_H
149+
/* end file include/ada/idna/to_unicode.h */
150+
151+
#endif
152+
/* end file include/idna.h */

singleheader/CMakeLists.txt

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,6 @@ if (Python3_Interpreter_FOUND)
4747
add_library(ada-singleheader-source INTERFACE)
4848
target_sources(ada-singleheader-source INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/ada.cpp>)
4949
target_link_libraries(ada-singleheader-source INTERFACE ada-singleheader-include-source)
50-
if(MSVC)
51-
target_link_libraries(ada-singleheader-include-source INTERFACE Normaliz)
52-
else()
53-
if(ADA_USE_ICU)
54-
target_link_libraries(ada-singleheader-include-source INTERFACE ICU::uc ICU::i18n)
55-
else(ADA_USE_ICU)
56-
target_compile_definitions(ada-singleheader-include-source INTERFACE ADA_HAS_ICU=0)
57-
endif(ADA_USE_ICU)
58-
endif()
5950
if (BUILD_TESTING)
6051
add_executable(demo $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/demo.cpp>)
6152
target_link_libraries(demo ada-singleheader-include-source)

0 commit comments

Comments
 (0)