Skip to content

Commit f6a56a1

Browse files
authored
Bidi Data Adapter (#1784)
Add Bidi Data Adapter
1 parent f5e2eec commit f6a56a1

File tree

4 files changed

+171
-2
lines changed

4 files changed

+171
-2
lines changed

Cargo.lock

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

components/properties/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ icu_provider = { version = "0.5", path = "../../provider/core", features = ["mac
3838
icu_uniset = { version = "0.4.1", path = "../../utils/uniset"}
3939
serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true }
4040
zerovec = { version = "0.6", path = "../../utils/zerovec", features = ["derive"] }
41+
unicode-bidi = { version = "0.3.8", optional = true , default-features = false }
4142

4243
[dev-dependencies]
4344
icu = { path = "../icu", default-features = false }
@@ -52,3 +53,4 @@ std = ["icu_provider/std"]
5253
default = []
5354
serde = ["dep:serde", "zerovec/serde", "icu_uniset/serde", "icu_codepointtrie/serde"]
5455
datagen = ["serde", "zerovec/serde_serialize", "icu_uniset/serde_serialize", "icu_codepointtrie/serde_serialize"]
56+
bidi = [ "dep:unicode-bidi" ]

components/properties/src/bidi.rs

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
// This file is part of ICU4X. For terms of use, please see the file
2+
// called LICENSE at the top level of the ICU4X source tree
3+
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4+
5+
//! This module exposes tooling for running the [unicode bidi algorithm](https://unicode.org/reports/tr9/) using ICU4X data.
6+
//!
7+
//! `BidiClassAdapter` enables ICU4X to provide data to [`unicode-bidi`].
8+
//!
9+
//! # Examples
10+
//!
11+
//!```
12+
//! use icu_properties::bidi::BidiClassAdapter;
13+
//! use icu_properties::{maps, BidiClass};
14+
//! use icu_codepointtrie::CodePointTrie;
15+
//! use unicode_bidi::BidiClass as DataSourceBidiClass;
16+
//! use unicode_bidi::BidiDataSource;
17+
//! use unicode_bidi::BidiInfo;
18+
//! // This example text is defined using `concat!` because some browsers
19+
//! // and text editors have trouble displaying bidi strings.
20+
//! let text = concat![
21+
//! "א",
22+
//! "ב",
23+
//! "ג",
24+
//! "a",
25+
//! "b",
26+
//! "c",
27+
//! ];
28+
//!
29+
//! // Create an adapter to provide the data to `BidiInfo`.
30+
//! let provider = icu_testdata::get_provider();
31+
//!
32+
//! let payload =
33+
//! maps::get_bidi_class(&provider)
34+
//! .expect("The data should be valid");
35+
//! let data_struct = payload.get();
36+
//! let bc = &data_struct.code_point_trie;
37+
//!
38+
//! let adapter = BidiClassAdapter::new(&bc);
39+
//! // Resolve embedding levels within the text. Pass `None` to detect the
40+
//! // paragraph level automatically.
41+
//!
42+
//! let bidi_info = BidiInfo::new_with_data_source(&adapter, &text, None);
43+
//!
44+
//! // This paragraph has embedding level 1 because its first strong character is RTL.
45+
//! assert_eq!(bidi_info.paragraphs.len(), 1);
46+
//! let para = &bidi_info.paragraphs[0];
47+
//! assert_eq!(para.level.number(), 1);
48+
//! assert_eq!(para.level.is_rtl(), true);
49+
//!
50+
//! // Re-ordering is done after wrapping each paragraph into a sequence of
51+
//! // lines. For this example, I'll just use a single line that spans the
52+
//! // entire paragraph.
53+
//! let line = para.range.clone();
54+
//!
55+
//! let display = bidi_info.reorder_line(para, line);
56+
//! assert_eq!(display, concat![
57+
//! "a",
58+
//! "b",
59+
//! "c",
60+
//! "ג",
61+
//! "ב",
62+
//! "א",
63+
//! ]);
64+
//! ```
65+
66+
use crate::props::BidiClass;
67+
use icu_codepointtrie::CodePointTrie;
68+
use unicode_bidi::data_source::BidiDataSource;
69+
use unicode_bidi::BidiClass as DataSourceBidiClass;
70+
71+
/// An adapter to convert from icu4x `BidiClass` to `unicode_bidi::BidiClass`.
72+
///
73+
/// # Example
74+
///
75+
/// ```
76+
/// use icu_properties::bidi::BidiClassAdapter;
77+
/// use icu_properties::{maps, BidiClass};
78+
/// use icu_codepointtrie::CodePointTrie;
79+
/// use unicode_bidi::BidiClass as DataSourceBidiClass;
80+
/// use unicode_bidi::BidiDataSource;
81+
///
82+
/// let provider = icu_testdata::get_provider();
83+
///
84+
/// let payload =
85+
/// maps::get_bidi_class(&provider)
86+
/// .expect("The data should be valid");
87+
/// let data_struct = payload.get();
88+
/// let bc = &data_struct.code_point_trie;
89+
///
90+
/// let adapter = BidiClassAdapter::new(&bc);
91+
/// assert_eq!(adapter.bidi_class('a'), DataSourceBidiClass::L);
92+
/// assert_eq!(adapter.bidi_class('ع'), DataSourceBidiClass::AL);
93+
/// ```
94+
pub struct BidiClassAdapter<'a> {
95+
bidi_trie: &'a CodePointTrie<'a, BidiClass>,
96+
}
97+
98+
impl<'a> BidiClassAdapter<'a> {
99+
/// Creates new instance of `BidiClassAdapter`.
100+
pub fn new(bidi_trie: &'a CodePointTrie<'a, BidiClass>) -> BidiClassAdapter<'a> {
101+
BidiClassAdapter { bidi_trie }
102+
}
103+
}
104+
105+
impl<'a> BidiDataSource for BidiClassAdapter<'a> {
106+
/// Returns a [`DataSourceBidiClass`] given a unicode character.
107+
///
108+
/// # Example
109+
///
110+
/// ```
111+
/// use icu_properties::bidi::BidiClassAdapter;
112+
/// use icu_properties::{maps, BidiClass};
113+
/// use icu_codepointtrie::CodePointTrie;
114+
/// use unicode_bidi::BidiClass as DataSourceBidiClass;
115+
/// use unicode_bidi::BidiDataSource;
116+
///
117+
/// let provider = icu_testdata::get_provider();
118+
///
119+
/// let payload =
120+
/// maps::get_bidi_class(&provider)
121+
/// .expect("The data should be valid");
122+
/// let data_struct = payload.get();
123+
/// let bc = &data_struct.code_point_trie;
124+
///
125+
/// let adapter = BidiClassAdapter::new(&bc);
126+
/// assert_eq!(adapter.bidi_class('a'), DataSourceBidiClass::L);
127+
/// ```
128+
///
129+
/// [`CodePointTrie`]: icu_codepointtrie::CodePointTrie
130+
fn bidi_class(&self, c: char) -> DataSourceBidiClass {
131+
let bidi_class = self.bidi_trie.get(c as u32);
132+
match bidi_class {
133+
BidiClass::LeftToRight => DataSourceBidiClass::L,
134+
BidiClass::RightToLeft => DataSourceBidiClass::R,
135+
BidiClass::EuropeanNumber => DataSourceBidiClass::EN,
136+
BidiClass::EuropeanSeparator => DataSourceBidiClass::ES,
137+
BidiClass::EuropeanTerminator => DataSourceBidiClass::ET,
138+
BidiClass::ArabicNumber => DataSourceBidiClass::AN,
139+
BidiClass::CommonSeparator => DataSourceBidiClass::CS,
140+
BidiClass::ParagraphSeparator => DataSourceBidiClass::B,
141+
BidiClass::SegmentSeparator => DataSourceBidiClass::S,
142+
BidiClass::WhiteSpace => DataSourceBidiClass::WS,
143+
BidiClass::OtherNeutral => DataSourceBidiClass::ON,
144+
BidiClass::LeftToRightEmbedding => DataSourceBidiClass::LRE,
145+
BidiClass::LeftToRightOverride => DataSourceBidiClass::LRO,
146+
BidiClass::ArabicLetter => DataSourceBidiClass::AL,
147+
BidiClass::RightToLeftEmbedding => DataSourceBidiClass::RLE,
148+
BidiClass::RightToLeftOverride => DataSourceBidiClass::RLO,
149+
BidiClass::PopDirectionalFormat => DataSourceBidiClass::PDF,
150+
BidiClass::NonspacingMark => DataSourceBidiClass::NSM,
151+
BidiClass::BoundaryNeutral => DataSourceBidiClass::BN,
152+
BidiClass::FirstStrongIsolate => DataSourceBidiClass::FSI,
153+
BidiClass::LeftToRightIsolate => DataSourceBidiClass::LRI,
154+
BidiClass::RightToLeftIsolate => DataSourceBidiClass::RLI,
155+
BidiClass::PopDirectionalIsolate => DataSourceBidiClass::PDI,
156+
_ =>
157+
// This must not happen.
158+
{
159+
DataSourceBidiClass::ON
160+
}
161+
}
162+
}
163+
}

components/properties/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@
8282
)
8383
)]
8484

85+
#[cfg(feature = "bidi")]
86+
pub mod bidi;
87+
8588
mod error;
8689
pub mod maps;
8790
mod props;

0 commit comments

Comments
 (0)