Skip to content

Commit 57b1a57

Browse files
committed
Speed up passing ASCII-only strings to WASM
Some speed up numbers from my string-heavy WASM benchmarks: - Firefox + encodeInto: +45% - Chrome + encodeInto: +80% - Firefox + encode: +29% - Chrome + encode: +62% Note that this helps specifically with case of lots of small ASCII strings, in case of large strings there is no measurable difference in either direction.
1 parent befefe0 commit 57b1a57

File tree

1 file changed

+43
-18
lines changed
  • crates/cli-support/src/js

1 file changed

+43
-18
lines changed

crates/cli-support/src/js/mod.rs

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1445,18 +1445,51 @@ impl<'a> Context<'a> {
14451445
self.expose_text_encoder();
14461446
self.expose_uint8_memory();
14471447

1448+
// A fast path that directly writes char codes into WASM memory as long
1449+
// as it finds only ASCII characters.
1450+
//
1451+
// This is much faster for common ASCII strings because it can avoid
1452+
// calling out into C++ TextEncoder code.
1453+
//
1454+
// This might be not very intuitive, but such calls are usually more
1455+
// expensive in mainstream engines than staying in the JS, and
1456+
// charCodeAt on ASCII strings is usually optimised to raw bytes.
1457+
let start_encoding_as_ascii = format!(
1458+
"
1459+
{}
1460+
let size = arg.length;
1461+
let ptr = wasm.__wbindgen_malloc(size);
1462+
let offset = 0;
1463+
{{
1464+
const mem = getUint8Memory();
1465+
for (; offset < arg.length; offset++) {{
1466+
const code = arg.charCodeAt(offset);
1467+
if (code > 0x7F) {{
1468+
arg = arg.slice(offset);
1469+
break;
1470+
}}
1471+
mem[ptr + offset] = code;
1472+
}}
1473+
}}
1474+
",
1475+
debug
1476+
);
1477+
14481478
// The first implementation we have for this is to use
14491479
// `TextEncoder#encode` which has been around for quite some time.
14501480
let use_encode = format!(
14511481
"
14521482
{}
1453-
const buf = cachedTextEncoder.encode(arg);
1454-
const ptr = wasm.__wbindgen_malloc(buf.length);
1455-
getUint8Memory().set(buf, ptr);
1456-
WASM_VECTOR_LEN = buf.length;
1483+
if (offset !== arg.length) {{
1484+
const buf = cachedTextEncoder.encode(arg);
1485+
ptr = wasm.__wbindgen_realloc(ptr, size, size += buf.length);
1486+
getUint8Memory().set(buf, ptr + offset);
1487+
offset += buf.length;
1488+
}}
1489+
WASM_VECTOR_LEN = offset;
14571490
return ptr;
14581491
",
1459-
debug
1492+
start_encoding_as_ascii
14601493
);
14611494

14621495
// Another possibility is to use `TextEncoder#encodeInto` which is much
@@ -1465,23 +1498,15 @@ impl<'a> Context<'a> {
14651498
let use_encode_into = format!(
14661499
"
14671500
{}
1468-
let size = arg.length;
1469-
let ptr = wasm.__wbindgen_malloc(size);
1470-
let writeOffset = 0;
1471-
while (true) {{
1472-
const view = getUint8Memory().subarray(ptr + writeOffset, ptr + size);
1473-
const {{ read, written }} = cachedTextEncoder.encodeInto(arg, view);
1474-
writeOffset += written;
1475-
if (read === arg.length) {{
1476-
break;
1477-
}}
1478-
arg = arg.substring(read);
1501+
if (offset !== arg.length) {{
14791502
ptr = wasm.__wbindgen_realloc(ptr, size, size += arg.length * 3);
1503+
const view = getUint8Memory().subarray(ptr + offset, ptr + size);
1504+
offset += cachedTextEncoder.encodeInto(arg, view).written;
14801505
}}
1481-
WASM_VECTOR_LEN = writeOffset;
1506+
WASM_VECTOR_LEN = offset;
14821507
return ptr;
14831508
",
1484-
debug
1509+
start_encoding_as_ascii
14851510
);
14861511

14871512
// Looks like `encodeInto` doesn't currently work when the memory passed

0 commit comments

Comments
 (0)