Skip to content

Commit 5119ebe

Browse files
authored
feat: add url_aggregator::replace_and_resize (#302)
* feat: add url_aggregator::replace_and_resize * perf: improve update_base_pathname performance
1 parent 785a8e2 commit 5119ebe

File tree

2 files changed

+33
-23
lines changed

2 files changed

+33
-23
lines changed

include/ada/url_aggregator-inl.h

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,26 @@ inline void url_aggregator::update_unencoded_base_hash(std::string_view input) {
8787
ADA_ASSERT_TRUE(validate());
8888
}
8989

90+
ada_really_inline uint32_t url_aggregator::replace_and_resize(uint32_t start, uint32_t end, std::string_view input) {
91+
uint32_t current_length = end - start;
92+
uint32_t input_size = uint32_t(input.size());
93+
uint32_t new_difference = input_size - current_length;
94+
95+
if (current_length == 0) {
96+
buffer.insert(start, input);
97+
} else if (input_size == current_length) {
98+
buffer.replace(start, input_size, input);
99+
} else if (input_size < current_length) {
100+
buffer.erase(start, current_length - input_size);
101+
buffer.replace(start, input_size, input);
102+
} else {
103+
buffer.replace(start, current_length, input.substr(0, current_length));
104+
buffer.insert(start + current_length, input.substr(current_length));
105+
}
106+
107+
return new_difference;
108+
}
109+
90110
inline void url_aggregator::update_base_hostname(const std::string_view input) {
91111
ada_log("url_aggregator::update_base_hostname ", input, " [", input.size(), " bytes], buffer is '", buffer, "' [", buffer.size()," bytes]");
92112
ADA_ASSERT_TRUE(validate());
@@ -95,21 +115,13 @@ inline void url_aggregator::update_base_hostname(const std::string_view input) {
95115
// This next line is required for when parsing a URL like `foo://`
96116
add_authority_slashes_if_needed();
97117

98-
bool has_credential = components.protocol_end + 2 < components.host_start;
99-
uint32_t current_length = components.host_end - components.host_start;
100-
// next line could overflow but unsigned arithmetic has well-defined overflows.
101-
uint32_t new_difference = uint32_t(input.size()) - current_length;
102-
// The common case is current_length == 0.
103-
buffer.erase(components.host_start, current_length);
104-
105-
uint32_t host_start = components.host_start;
106-
// The common case is components.host_start == buffer.size().
107-
if (has_credential) {
108-
buffer.insert(host_start, "@");
109-
host_start++;
118+
bool has_credentials = components.protocol_end + 2 < components.host_start;
119+
uint32_t new_difference = replace_and_resize(components.host_start, components.host_end, input);
120+
121+
if (has_credentials) {
122+
buffer.insert(components.host_start, "@");
110123
new_difference++;
111124
}
112-
buffer.insert(host_start, input);
113125
components.host_end += new_difference;
114126
components.pathname_start += new_difference;
115127
if (components.search_start != url_components::omitted) { components.search_start += new_difference; }
@@ -203,27 +215,22 @@ inline void url_aggregator::update_base_pathname(const std::string_view input) {
203215
ada_log("url_aggregator::update_base_pathname '", input, "' [", input.size(), " bytes] \n", to_diagram());
204216
ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
205217
ADA_ASSERT_TRUE(validate());
218+
206219
const bool begins_with_dashdash = checkers::begins_with(input, "//");
207-
// uncommon branch:
208220
if(!begins_with_dashdash && has_dash_dot()) {
209221
ada_log("url_aggregator::update_base_pathname has /.: \n", to_diagram());
210222
// We must delete the ./
211223
delete_dash_dot();
212224
}
213-
uint32_t current_length = get_pathname_length();
214-
uint32_t difference = uint32_t(input.size()) - current_length;
215-
// The common case is current_length == 0.
216-
buffer.erase(components.pathname_start, current_length);
217-
// next line is very uncommon and we should seek to optimize it accordingly.
218-
if (begins_with_dashdash && !has_opaque_path && !has_authority()) {
225+
226+
if (begins_with_dashdash && !has_opaque_path && !has_authority() && !has_dash_dot()) {
219227
// If url’s host is null, url does not have an opaque path, url’s path’s size is greater than 1,
220228
// then append U+002F (/) followed by U+002E (.) to output.
221229
buffer.insert(components.pathname_start, "/.");
222230
components.pathname_start += 2;
223-
difference += 2;
224231
}
225-
// The common case is components.pathname_start == buffer.size() so this is effectively an append.
226-
buffer.insert(components.pathname_start, input);
232+
233+
uint32_t difference = replace_and_resize(components.pathname_start, components.pathname_start + get_pathname_length(), input);
227234
if (components.search_start != url_components::omitted) { components.search_start += difference; }
228235
if (components.hash_start != url_components::omitted) { components.hash_start += difference; }
229236
ada_log("url_aggregator::update_base_pathname end '", input, "' [", input.size(), " bytes] \n", to_diagram());

include/ada/url_aggregator.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,9 @@ namespace ada {
231231
template <bool has_state_override = false>
232232
[[nodiscard]] ada_really_inline bool parse_scheme_with_colon(const std::string_view input);
233233

234+
/** @private */
235+
ada_really_inline uint32_t replace_and_resize(uint32_t start, uint32_t end, std::string_view input);
236+
234237
/**
235238
* Useful for implementing efficient serialization for the URL.
236239
*

0 commit comments

Comments
 (0)