Skip to content

ICU-free ada #216

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/lint_and_format_check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
ubuntu-build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v3
- name: Install ICU
run: sudo apt-get install -y libicu-dev pkg-config

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ubuntu_install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
- name: Setup Ninja
run: sudo apt-get install ninja-build
- name: Prepare
run: cmake -G Ninja -DCMAKE_INSTALL_PREFIX:PATH=destination -B build
run: cmake -G Ninja -DCMAKE_INSTALL_PREFIX:PATH=destination -B build
- name: Build
run: cmake --build build -j=2
- name: Install
Expand Down
55 changes: 0 additions & 55 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ add_subdirectory(src)
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/scripts/cmake)

option(ADA_BENCHMARKS "Build benchmarks" OFF)
option(ADA_USE_ICU "Use ICU if available" ON)

if(BUILD_TESTING OR ADA_BENCHMARKS)
include(${PROJECT_SOURCE_DIR}/cmake/import.cmake)
Expand All @@ -40,60 +39,6 @@ else()
endif()
endif(BUILD_TESTING)

if(ADA_USE_ICU)

if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
message(STATUS "Apple system detected.")
# People who run macOS often use brew.
if(EXISTS /opt/homebrew/opt/icu4c)
message(STATUS "icu is provided by homebrew at /opt/homebrew/opt/icu4c.")
## This is a bit awkward, but it is a lot better than asking the
## user to figure that out.
list(APPEND CMAKE_PREFIX_PATH "/opt/homebrew/opt/icu4c/include")
list(APPEND CMAKE_LIBRARY_PATH "/opt/homebrew/opt/icu4c/lib")
elseif(EXISTS /usr/local/opt/icu4c)
message(STATUS "icu is provided by homebrew at /usr/local/opt/icu4c.")
list(APPEND CMAKE_PREFIX_PATH "/usr/local/opt/icu4c/include")
list(APPEND CMAKE_LIBRARY_PATH "/usr/local/opt/icu4c/lib")
endif()
endif()

find_package(ICU COMPONENTS uc i18n)
### If the user does not have ICU, let us help them with instructions:
if(ICU_FOUND)
target_link_libraries(ada PRIVATE ICU::uc ICU::i18n)
else(ICU_FOUND)
# Under Windows, we have a backup.
if(MSVC) # could be WIN32 but there are other systems under Windows.
if(MSVC_VERSION GREATER_EQUAL 1920)
message(STATUS "You have a recent Visual Studio (2019 or better), congratulations!")
else()
message(SEND_ERROR "If you use Visual Studio, then Visual Studio 2019 or better is required.")
return()
endif()
option(BUILD_SHARED_LIBS "Build using shared libraries" OFF)
target_link_libraries(ada PRIVATE Normaliz)
else(MSVC)
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
if(EXISTS /opt/homebrew)
message(STATUS "Under macOS, you may install ICU with brew, using 'brew install icu4c'.")
else()
message(STATUS "Under macOS, you should install brew (see https://brew.sh) and then icu4c ('brew install icu4c').")
endif()
elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
message(STATUS "Under Linux, you may be able to install ICU with a command such as 'apt-get install libicu-dev'." )
endif()
message(SEND_ERROR "ICU with components uc and i18n/in is required for building ada.")
return()
endif(MSVC)
endif(ICU_FOUND)

else(ADA_USE_ICU)
target_compile_definitions(ada PUBLIC ADA_HAS_ICU=0)

### Going forward, we have ICU for sure, except under Windows.
endif(ADA_USE_ICU)

If(ADA_BENCHMARKS)
message(STATUS "Ada benchmarks enabled.")
add_subdirectory(benchmarks)
Expand Down
13 changes: 7 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,15 @@ Ada is a fast and spec-compliant URL parser written in C++.
Specification for URL parser can be found from the
[WHATWG](https://url.spec.whatwg.org/#url-parsing) website.

The Ada library passes the full range of tests from the specification,
across a wide range of platforms (e.g., Windows, Linux, macOS). It fully
supports the relevant [Unicode Technical Standard](https://www.unicode.org/reports/tr46/#ToUnicode).

## Requirements

- A recent C++ compiler supporting C++17. We test GCC 9 or better, LLVM 10 or better and Microsoft Visual Studio 2022.
- We use [ICU](https://icu.unicode.org) when it is available.

The project is otherwise self-contained and it has no dependency.

## Usage

Expand Down Expand Up @@ -131,14 +136,10 @@ Ada uses cmake as a build system. It's recommended you to run the following comm

Windows users need additional flags to specify the build configuration, e.g. `--config Release`.

Project can also be built via docker using default docker file of repository with following commands.
The project can also be built via docker using default docker file of repository with following commands.

`docker build -t ada-builder . && docker run --rm -it -v ${PWD}:/repo ada-builder`

### Installing ICU

For macOS, you may install it with [brew](https://brew.sh) using `brew install icu4c`. Linux users may install ICU according to the their distribution: under Ubuntu, the command is `apt-get install -y libicu-dev`.

### Amalgamation

You may amalgamate all source files into only two files (`ada.h` and `ada.cpp`) by typing executing the Python 3 script `singleheader/amalgamate.py`. By default, the files are created in the `singleheader` directory.
27 changes: 5 additions & 22 deletions include/ada/common_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,12 @@
ADA_DISABLE_GCC_WARNING(-Wconversion) \
ADA_DISABLE_GCC_WARNING(-Wextra) \
ADA_DISABLE_GCC_WARNING(-Wattributes) \
ADA_DISABLE_GCC_WARNING(-Wimplicit - fallthrough) \
ADA_DISABLE_GCC_WARNING(-Wnon - virtual - dtor) \
ADA_DISABLE_GCC_WARNING(-Wreturn - type) \
ADA_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \
ADA_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \
ADA_DISABLE_GCC_WARNING(-Wreturn-type) \
ADA_DISABLE_GCC_WARNING(-Wshadow) \
ADA_DISABLE_GCC_WARNING(-Wunused - parameter) \
ADA_DISABLE_GCC_WARNING(-Wunused - variable)
ADA_DISABLE_GCC_WARNING(-Wunused-parameter) \
ADA_DISABLE_GCC_WARNING(-Wunused-variable)
#define ADA_PRAGMA(P) _Pragma(#P)
#define ADA_DISABLE_GCC_WARNING(WARNING) \
ADA_PRAGMA(GCC diagnostic ignored #WARNING)
Expand Down Expand Up @@ -219,23 +219,6 @@ namespace ada {

#endif // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__

#ifndef ADA_HAS_ICU
#if __has_include(<unicode/uidna.h>)
#define ADA_HAS_ICU 1
#else
#define ADA_HAS_ICU 0
#endif // __has_include(<unicode/uidna.h>)
#endif // ADA_HAS_ICU

#if ADA_HAS_ICU
#include <unicode/utypes.h>
#include <unicode/uidna.h>
#include <unicode/utf8.h>
#endif // ADA_HAS_ICU

#define ADA_WINDOWS_TO_ASCII_FALLBACK \
0 // we never use anything but ICU. No fallback.

// Unless the programmer has already set ADA_DEVELOPMENT_CHECKS,
// we want to set it under debug builds. We detect a debug build
// under Visual Studio when the _DEBUG macro is set. Under the other
Expand Down
6 changes: 3 additions & 3 deletions include/ada/unicode.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ namespace ada::unicode {
* script.
*
* The resulting strings should not exceed 255 octets according to RFC 1035
* section 2.3.4. ICU checks for label size and domain size, but if we pass
* "be_strict = false", these errors are ignored.
* section 2.3.4. ICU checks for label size and domain size, but these errors
* are ignored.
*
* @see https://url.spec.whatwg.org/#concept-domain-to-ascii
*
*/
bool to_ascii(std::optional<std::string>& out, std::string_view plain,
bool be_strict, size_t first_percent);
size_t first_percent);

/**
* Checks if the input has tab or newline characters.
Expand Down
152 changes: 152 additions & 0 deletions include/ada_idna.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/* auto-generated on 2023-03-27 13:47:27 -0400. Do not edit! */
// dofile: invoked with prepath=/Users/dlemire/CVS/github/idna/include,
// filename=idna.h
/* begin file include/idna.h */
#ifndef ADA_IDNA_H
#define ADA_IDNA_H

// dofile: invoked with prepath=/Users/dlemire/CVS/github/idna/include,
// filename=ada/idna/unicode_transcoding.h
/* begin file include/ada/idna/unicode_transcoding.h */
#ifndef ADA_IDNA_UNICODE_TRANSCODING_H
#define ADA_IDNA_UNICODE_TRANSCODING_H

#include <string>
#include <string_view>

namespace ada::idna {

size_t utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output);

size_t utf8_length_from_utf32(const char32_t* buf, size_t len);

size_t utf32_length_from_utf8(const char* buf, size_t len);

size_t utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output);

} // namespace ada::idna

#endif // ADA_IDNA_UNICODE_TRANSCODING_H
/* end file include/ada/idna/unicode_transcoding.h */
// dofile: invoked with prepath=/Users/dlemire/CVS/github/idna/include,
// filename=ada/idna/mapping.h
/* begin file include/ada/idna/mapping.h */
#ifndef ADA_IDNA_MAPPING_H
#define ADA_IDNA_MAPPING_H

#include <string>
#include <string_view>
namespace ada::idna {

// If the input is ascii, then the mapping is just -> lower case.
void ascii_map(char* input, size_t length);
// check whether an ascii string needs mapping
bool ascii_has_upper_case(char* input, size_t length);
// Map the characters according to IDNA, returning the empty string on error.
std::u32string map(std::u32string_view input);

} // namespace ada::idna

#endif
/* end file include/ada/idna/mapping.h */
// dofile: invoked with prepath=/Users/dlemire/CVS/github/idna/include,
// filename=ada/idna/normalization.h
/* begin file include/ada/idna/normalization.h */
#ifndef ADA_IDNA_NORMALIZATION_H
#define ADA_IDNA_NORMALIZATION_H

#include <string>
#include <string_view>
namespace ada::idna {

// Normalize the characters according to IDNA (Unicode Normalization Form C).
void normalize(std::u32string& input);

} // namespace ada::idna
#endif
/* end file include/ada/idna/normalization.h */
// dofile: invoked with prepath=/Users/dlemire/CVS/github/idna/include,
// filename=ada/idna/punycode.h
/* begin file include/ada/idna/punycode.h */
#ifndef ADA_IDNA_PUNYCODE_H
#define ADA_IDNA_PUNYCODE_H

#include <string>
#include <string_view>
namespace ada::idna {

bool punycode_to_utf32(std::string_view input, std::u32string& out);
bool verify_punycode(std::string_view input);
bool utf32_to_punycode(std::u32string_view input, std::string& out);

} // namespace ada::idna

#endif // ADA_IDNA_PUNYCODE_H
/* end file include/ada/idna/punycode.h */
// dofile: invoked with prepath=/Users/dlemire/CVS/github/idna/include,
// filename=ada/idna/validity.h
/* begin file include/ada/idna/validity.h */
#ifndef ADA_IDNA_VALIDITY_H
#define ADA_IDNA_VALIDITY_H

#include <string>
#include <string_view>

namespace ada::idna {

/**
* @see https://www.unicode.org/reports/tr46/#Validity_Criteria
*/
bool is_label_valid(const std::u32string_view label);

} // namespace ada::idna

#endif // ADA_IDNA_VALIDITY_H
/* end file include/ada/idna/validity.h */
// dofile: invoked with prepath=/Users/dlemire/CVS/github/idna/include,
// filename=ada/idna/to_ascii.h
/* begin file include/ada/idna/to_ascii.h */
#ifndef ADA_IDNA_TO_ASCII_H
#define ADA_IDNA_TO_ASCII_H

#include <string>
#include <string_view>

namespace ada::idna {
// Converts a domain (e.g., www.google.com) possibly containing international
// characters to an ascii domain (with punycode). It will not do percent
// decoding: percent decoding should be done prior to calling this function. We
// do not remove tabs and spaces, they should have been removed prior to calling
// this function. We also do not trim control characters. We also assume that
// the input is not empty. We return "" on error. For now.
std::string to_ascii(std::string_view ut8_string);

bool constexpr begins_with(std::u32string_view view,
std::u32string_view prefix);
bool constexpr begins_with(std::string_view view, std::string_view prefix);

bool constexpr is_ascii(std::u32string_view view);
bool constexpr is_ascii(std::string_view view);

static std::string from_ascii_to_ascii(std::string_view ut8_string);

} // namespace ada::idna

#endif // ADA_IDNA_TO_ASCII_H
/* end file include/ada/idna/to_ascii.h */
// dofile: invoked with prepath=/Users/dlemire/CVS/github/idna/include,
// filename=ada/idna/to_unicode.h
/* begin file include/ada/idna/to_unicode.h */

#ifndef ADA_IDNA_TO_UNICODE_H
#define ADA_IDNA_TO_UNICODE_H

namespace ada::idna {
std::string to_unicode(std::string_view input);
} // namespace ada::idna

#endif // ADA_IDNA_TO_UNICODE_H
/* end file include/ada/idna/to_unicode.h */

#endif
/* end file include/idna.h */
9 changes: 0 additions & 9 deletions singleheader/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,6 @@ if (Python3_Interpreter_FOUND)
add_library(ada-singleheader-source INTERFACE)
target_sources(ada-singleheader-source INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/ada.cpp>)
target_link_libraries(ada-singleheader-source INTERFACE ada-singleheader-include-source)
if(MSVC)
target_link_libraries(ada-singleheader-include-source INTERFACE Normaliz)
else()
if(ADA_USE_ICU)
target_link_libraries(ada-singleheader-include-source INTERFACE ICU::uc ICU::i18n)
else(ADA_USE_ICU)
target_compile_definitions(ada-singleheader-include-source INTERFACE ADA_HAS_ICU=0)
endif(ADA_USE_ICU)
endif()
if (BUILD_TESTING)
add_executable(demo $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/demo.cpp>)
target_link_libraries(demo ada-singleheader-include-source)
Expand Down
Loading