Skip to content

Commit d6ea7c1

Browse files
quakehuacnlee
andauthored
perf: Use Cow to avoid creates String on format. (#248)
This pull request focuses on improving the performance and memory usage of the formatting functions by changing their return types to `Cow<str>`. This allows the functions to return either borrowed or owned data, reducing unnecessary allocations when the input is not modified. Most benchmarks gain 10% ~ 20% improvement on my local PC: ``` format_json time: [171.73 µs 172.00 µs 172.28 µs] change: [-17.792% -17.409% -17.049%] (p = 0.00 < 0.05) Performance has improved. Found 4 outliers among 100 measurements (4.00%) 4 (4.00%) high mild format_javascript time: [360.43 µs 360.88 µs 361.32 µs] change: [-15.179% -13.906% -12.966%] (p = 0.00 < 0.05) Performance has improved. Found 3 outliers among 100 measurements (3.00%) 3 (3.00%) high severe format_json_2k time: [21.865 ms 21.882 ms 21.900 ms] change: [-15.133% -14.999% -14.869%] (p = 0.00 < 0.05) Performance has improved. Found 3 outliers among 100 measurements (3.00%) 2 (2.00%) high mild 1 (1.00%) high severe format_jupyter time: [275.67 µs 276.03 µs 276.38 µs] change: [-7.5263% -7.2771% -7.0001%] (p = 0.00 < 0.05) Performance has improved. Found 7 outliers among 100 measurements (7.00%) 4 (4.00%) high mild 3 (3.00%) high severe format_markdown time: [2.2873 ms 2.2896 ms 2.2923 ms] change: [-14.583% -14.448% -14.325%] (p = 0.00 < 0.05) Performance has improved. Found 3 outliers among 100 measurements (3.00%) 1 (1.00%) high mild 2 (2.00%) high severe lint_markdown time: [2.3867 ms 2.3939 ms 2.4021 ms] change: [-15.337% -15.092% -14.799%] (p = 0.00 < 0.05) Performance has improved. Found 10 outliers among 100 measurements (10.00%) 5 (5.00%) high mild 5 (5.00%) high severe lint_json time: [180.12 µs 180.38 µs 180.64 µs] change: [-19.382% -18.912% -18.449%] (p = 0.00 < 0.05) Performance has improved. Found 6 outliers among 100 measurements (6.00%) 3 (3.00%) high mild 3 (3.00%) high severe lint_html time: [694.02 µs 694.96 µs 695.95 µs] change: [-15.000% -14.666% -14.362%] (p = 0.00 < 0.05) Performance has improved. Found 5 outliers among 100 measurements (5.00%) 4 (4.00%) high mild 1 (1.00%) high severe lint_javascript time: [367.04 µs 367.55 µs 368.05 µs] change: [-27.115% -23.420% -20.369%] (p = 0.00 < 0.05) Performance has improved. Found 5 outliers among 100 measurements (5.00%) ``` --------- Co-authored-by: Jason Lee <huacnlee@gmail.com>
1 parent d28cb09 commit d6ea7c1

File tree

8 files changed

+212
-139
lines changed

8 files changed

+212
-139
lines changed

autocorrect/src/format.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use crate::{
2323
/// // => "既に、世界中の数百という企業が Rust を採用し、高速で低リソースのクロスプラットフォームソリューションを実現しています。"
2424
/// ```
2525
pub fn format(text: &str) -> String {
26-
format_or_lint(text, false).out
26+
format_or_lint(text, false).out.into_owned()
2727
}
2828

2929
/// Format a html content.

autocorrect/src/rule/fullwidth.rs

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// autocorrect: false
22
use regex::Regex;
3-
use std::collections::HashMap;
3+
use std::{borrow::Cow, collections::HashMap};
44

55
const SPCIAL_PUNCTUATIONS: &str = "[.:!]([ ]*)";
66
const NORMAL_PUNCTUATIONS: &str = "[,?]([ ]*)";
@@ -31,24 +31,28 @@ lazy_static! {
3131
}
3232

3333
// fullwidth correct punctuations near the CJK chars
34-
pub fn format(text: &str) -> String {
35-
let out = PUNCTUATION_WITH_LEFT_CJK_RE.replace_all(text, |cap: &regex::Captures| {
36-
fullwidth_replace_part(&cap[0])
37-
});
34+
pub fn format(text: &str) -> Cow<str> {
35+
let patterns = [
36+
&*PUNCTUATION_WITH_LEFT_CJK_RE,
37+
&*PUNCTUATION_WITH_RIGHT_CJK_RE,
38+
&*PUNCTUATION_WITH_SPEICAL_CJK_RE,
39+
&*PUNCTUATION_WITH_SPEICAL_LAST_CJK_RE,
40+
];
3841

39-
let out = PUNCTUATION_WITH_RIGHT_CJK_RE.replace_all(&out, |cap: &regex::Captures| {
40-
fullwidth_replace_part(&cap[0])
41-
});
42-
43-
let out = PUNCTUATION_WITH_SPEICAL_CJK_RE.replace_all(&out, |cap: &regex::Captures| {
44-
fullwidth_replace_part(&cap[0])
45-
});
46-
47-
let out = PUNCTUATION_WITH_SPEICAL_LAST_CJK_RE.replace_all(&out, |cap: &regex::Captures| {
48-
fullwidth_replace_part(&cap[0])
49-
});
42+
let mut result = Cow::Borrowed(text);
43+
for pattern in &patterns {
44+
if let Cow::Owned(new_text) = pattern.replace_all(&result, |cap: &regex::Captures| {
45+
fullwidth_replace_part(&cap[0])
46+
}) {
47+
result = Cow::Owned(new_text);
48+
}
49+
}
5050

51-
out.to_string()
51+
if let Cow::Owned(new_text) = result {
52+
Cow::Owned(new_text)
53+
} else {
54+
Cow::Borrowed(text)
55+
}
5256
}
5357

5458
fn fullwidth_replace_part(part: &str) -> String {

autocorrect/src/rule/halfwidth.rs

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// autocorrect: false
22
use regex::Regex;
3-
use std::collections::HashMap;
3+
use std::{borrow::Cow, collections::HashMap};
44

55
use super::CJK_RE;
66

@@ -108,37 +108,59 @@ impl CharMatching for char {
108108
}
109109
}
110110

111-
pub fn format_punctuation(text: &str) -> String {
111+
pub fn format_punctuation(text: &str) -> Cow<str> {
112112
// Get first non space char as quote
113113
let wrap_quote = text.chars().find(|c| !c.is_whitespace()).unwrap_or(' ');
114114

115-
text.split_inclusive('\n')
116-
.map(|line| format_line(line, wrap_quote))
117-
.collect()
115+
let mut changed = false;
116+
let lines: Vec<_> = text
117+
.split_inclusive('\n')
118+
.map(|line| match format_line(line, wrap_quote) {
119+
Cow::Borrowed(s) => Cow::Borrowed(s),
120+
Cow::Owned(s) => {
121+
changed = true;
122+
Cow::Owned(s)
123+
}
124+
})
125+
.collect();
126+
127+
if changed {
128+
Cow::Owned(lines.into_iter().collect::<String>())
129+
} else {
130+
Cow::Borrowed(text)
131+
}
118132
}
119133

120134
/// Normalize chars to use general half width in Chinese contents.
121-
pub fn format_word(text: &str) -> String {
135+
pub fn format_word(text: &str) -> Cow<str> {
136+
let mut changed = false;
122137
let out = text
123138
.chars()
124139
.map(|c| match c {
125140
// Unicode Fullwidth ASCII variants (Only numbers and alphabetics)
126141
// 0 .. 9 | A .. Z | a .. z
127142
// https://www.unicode.org/charts/nameslist/n_FF00.html
128143
'\u{FF10}'..='\u{FF19}' | '\u{FF21}'..='\u{FF3A}' | '\u{FF41}'..='\u{FF5A}' => {
144+
changed = true;
129145
// checked char is in range of fullwidth number and alphabetic
130146
unsafe { char::from_u32_unchecked(c as u32 - 0xFEE0) }
131147
}
132148
// Ideographic Space:
133149
// https://en.wikipedia.org/wiki/Whitespace_character#Unicode
134-
'\u{3000}' => ' ',
150+
'\u{3000}' => {
151+
changed = true;
152+
' '
153+
}
135154
_ => c,
136155
})
137156
.collect::<String>();
138157

139-
// Fix 12:00 -> 12:00
140-
let out = HALF_TIME_RE.replace_all(&out, |cap: &regex::Captures| cap[0].replace(':', ":"));
141-
out.into_owned()
158+
if changed {
159+
let out = HALF_TIME_RE.replace_all(&out, |cap: &regex::Captures| cap[0].replace(':', ":"));
160+
Cow::Owned(out.into_owned())
161+
} else {
162+
HALF_TIME_RE.replace_all(text, |cap: &regex::Captures| cap[0].replace(':', ":"))
163+
}
142164
}
143165

144166
fn is_may_only_english(text: &str) -> bool {
@@ -166,12 +188,13 @@ fn is_may_only_english(text: &str) -> bool {
166188
false
167189
}
168190

169-
fn format_line(text: &str, wrap_quote: char) -> String {
191+
fn format_line(text: &str, wrap_quote: char) -> Cow<str> {
170192
if !is_may_only_english(text) {
171-
return String::from(text);
193+
return Cow::Borrowed(text);
172194
}
173195

174196
let mut out = String::with_capacity(text.len());
197+
let mut changed = false;
175198

176199
let mut parts = text.chars().peekable();
177200
while let Some(part) = parts.next() {
@@ -202,12 +225,16 @@ fn format_line(text: &str, wrap_quote: char) -> String {
202225
escape_quote(&mut out, wrap_quote, rule.to);
203226
}
204227
}
228+
changed = true;
205229
} else {
206230
out.push(part);
207231
}
208232
}
209-
210-
out
233+
if changed {
234+
Cow::Owned(out)
235+
} else {
236+
Cow::Borrowed(text)
237+
}
211238
}
212239

213240
fn escape_quote(out: &mut String, wrap_quote: char, quote: char) {

autocorrect/src/rule/mod.rs

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ mod word;
88
pub mod halfwidth;
99
pub mod spellcheck;
1010

11-
use std::collections::HashMap;
11+
use std::{borrow::Cow, collections::HashMap};
1212

1313
use regex::Regex;
1414
use rule::{Rule, RuleResult};
@@ -81,11 +81,11 @@ pub(crate) fn format_or_lint(text: &str, lint: bool) -> RuleResult {
8181
format_or_lint_with_disable_rules(text, lint, &map![])
8282
}
8383

84-
pub(crate) fn format_or_lint_with_disable_rules(
85-
text: &str,
84+
pub(crate) fn format_or_lint_with_disable_rules<'a>(
85+
text: &'a str,
8686
lint: bool,
8787
disable_rules: &HashMap<String, bool>,
88-
) -> RuleResult {
88+
) -> RuleResult<'a> {
8989
let mut result = RuleResult::default();
9090

9191
// skip if not has CJK
@@ -98,13 +98,13 @@ pub(crate) fn format_or_lint_with_disable_rules(
9898
if matches!(ch, ' ' | '\n' | '\r') {
9999
let mut sub_result = RuleResult::new(&part);
100100
sub_result.severity = result.severity;
101-
102-
part.clear();
103-
104101
format_part(&mut sub_result, lint, disable_rules);
105102

106-
result.out.push_str(&sub_result.out);
103+
let mut out = result.out.into_owned();
104+
out.push_str(&sub_result.out);
105+
result.out = Cow::Owned(out);
107106
result.severity = sub_result.severity;
107+
part.clear();
108108
}
109109
}
110110

@@ -114,11 +114,13 @@ pub(crate) fn format_or_lint_with_disable_rules(
114114

115115
format_part(&mut sub_result, lint, disable_rules);
116116

117-
result.out.push_str(&sub_result.out);
117+
let mut out = result.out.into_owned();
118+
out.push_str(&sub_result.out);
119+
result.out = Cow::Owned(out);
118120
result.severity = sub_result.severity;
119121
}
120122
} else {
121-
result.out = text.to_string();
123+
result.out = Cow::Borrowed(text);
122124
}
123125

124126
format_after_rules(&mut result, lint, disable_rules);

autocorrect/src/rule/rule.rs

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,31 @@
1+
use std::borrow::Cow;
2+
13
use crate::config::SeverityMode;
24
use crate::result::Severity;
35

46
pub(crate) struct Rule {
57
#[allow(dead_code)]
68
pub name: String,
7-
pub format_fn: fn(input: &str) -> String,
9+
pub format_fn: for<'a> fn(input: &'a str) -> Cow<'a, str>,
810
}
911

1012
#[derive(Default)]
11-
pub(crate) struct RuleResult {
12-
pub out: String,
13+
pub(crate) struct RuleResult<'a> {
14+
pub out: Cow<'a, str>,
1315
pub severity: Severity,
1416
}
1517

16-
impl RuleResult {
17-
pub fn new(input: &str) -> Self {
18+
impl<'a> RuleResult<'a> {
19+
pub fn new(input: &'a str) -> Self {
1820
Self {
19-
out: input.to_string(),
21+
out: Cow::Borrowed(input),
2022
..Default::default()
2123
}
2224
}
2325
}
2426

2527
impl Rule {
26-
pub fn new(name: &str, format: fn(input: &str) -> String) -> Self {
28+
pub fn new(name: &str, format: for<'a> fn(input: &'a str) -> Cow<'a, str>) -> Self {
2729
Rule {
2830
name: name.to_string(),
2931
format_fn: format,
@@ -35,27 +37,27 @@ impl Rule {
3537
return;
3638
}
3739

38-
let new = (self.format_fn)(&result.out);
39-
if result.out.ne(&new) {
40+
if let Cow::Owned(new) = (self.format_fn)(&result.out) {
4041
result.severity = Severity::Error;
42+
result.out = Cow::Owned(new);
4143
}
42-
result.out = new;
4344
}
4445

4546
pub fn lint(&self, result: &mut RuleResult) {
4647
if self.severity() == SeverityMode::Off {
4748
return;
4849
}
4950

50-
let new = (self.format_fn)(&result.out);
51-
if result.out.ne(&new) && result.severity == Severity::Pass {
52-
if self.severity() == SeverityMode::Warning {
53-
result.severity = Severity::Warning;
54-
} else {
55-
result.severity = Severity::Error;
51+
if let Cow::Owned(new) = (self.format_fn)(&result.out) {
52+
if result.severity == Severity::Pass {
53+
if self.severity() == SeverityMode::Warning {
54+
result.severity = Severity::Warning;
55+
} else {
56+
result.severity = Severity::Error;
57+
}
5658
}
59+
result.out = Cow::Owned(new);
5760
}
58-
result.out = new;
5961
}
6062

6163
fn severity(&self) -> SeverityMode {
@@ -75,7 +77,7 @@ mod tests {
7577

7678
#[test]
7779
fn test_rule_not_pass() {
78-
let rule = Rule::new("space-word", |input| format!("{input} - foo"));
80+
let rule = Rule::new("space-word", |input| Cow::Owned(format!("{input} - foo")));
7981
assert_eq!(rule.severity(), SeverityMode::Error);
8082
assert_eq!(rule.name, "space-word");
8183

@@ -92,7 +94,7 @@ mod tests {
9294

9395
#[test]
9496
fn test_rule_pass() {
95-
let rule = Rule::new("spellcheck", |input| input.to_string());
97+
let rule = Rule::new("spellcheck", |input| Cow::Borrowed(input));
9698

9799
let mut result = RuleResult::new("test");
98100
rule.format(&mut result);

0 commit comments

Comments
 (0)