Skip to content

Commit 4fe9aa6

Browse files
anonriglemire
andcommitted
buffer: improve base64 and base64url performance
Co-authored-by: Daniel Lemire <[email protected]>
1 parent db17461 commit 4fe9aa6

File tree

1 file changed

+90
-14
lines changed

1 file changed

+90
-14
lines changed

src/string_bytes.cc

Lines changed: 90 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -346,14 +346,78 @@ size_t StringBytes::Write(Isolate* isolate,
346346
}
347347

348348
case BASE64URL:
349-
// Fall through
349+
if (str->IsExternalOneByte()) { // 8-bit case
350+
auto ext = str->GetExternalOneByteStringResource();
351+
// Try with WHATWG base64 standard first, adapted for base64url
352+
simdutf::result r = simdutf::base64_to_binary_safe(
353+
ext->data(), ext->length(), buf, buflen, simdutf::base64_url);
354+
if (r.error == simdutf::error_code::SUCCESS) {
355+
nbytes = buflen;
356+
} else {
357+
// The input does not follow the WHATWG forgiving-base64 specification
358+
// adapted for base64url
359+
// https://infra.spec.whatwg.org/#forgiving-base64-decode
360+
nbytes = base64_decode(buf, buflen, ext->data(), ext->length());
361+
}
362+
} else { // 16-bit case
363+
// Typically, a base64url string is stored as an 8-bit string within v8.
364+
// Thus str->IsOneByte() is typically true. The next line thus often
365+
// allocates a temporary 16-bit buffer to store a 16-bit copy of the
366+
// 8-bit v8 string. Hence the creation of the String::Value value is
367+
// likely a performance bottleneck.
368+
String::Value value(isolate, str);
369+
// Try with WHATWG base64 standard first
370+
simdutf::result r = simdutf::base64_to_binary_safe(
371+
reinterpret_cast<const char16_t*>(*value),
372+
value.length(),
373+
buf,
374+
buflen,
375+
simdutf::base64_url);
376+
if (r.error == simdutf::error_code::SUCCESS) {
377+
nbytes = buflen;
378+
} else {
379+
// The input does not follow the WHATWG forgiving-base64 specification
380+
// (adapted for base64url with + and / replaced by - and _).
381+
// https://infra.spec.whatwg.org/#forgiving-base64-decode
382+
nbytes = base64_decode(buf, buflen, *value, value.length());
383+
}
384+
}
385+
break;
386+
350387
case BASE64:
351-
if (str->IsExternalOneByte()) {
388+
if (str->IsExternalOneByte()) { // 8-bit case
352389
auto ext = str->GetExternalOneByteStringResource();
353-
nbytes = base64_decode(buf, buflen, ext->data(), ext->length());
354-
} else {
390+
// Try with WHATWG base64 standard first
391+
auto result = simdutf::base64_to_binary_safe(
392+
ext->data(), ext->length(), buf, buflen, simdutf::base64_default);
393+
if (result.error == simdutf::error_code::SUCCESS) {
394+
nbytes = buflen;
395+
} else {
396+
// The input does not follow the WHATWG forgiving-base64 specification
397+
// https://infra.spec.whatwg.org/#forgiving-base64-decode
398+
nbytes = base64_decode(buf, buflen, ext->data(), ext->length());
399+
}
400+
} else { // 16-bit case
401+
// Typically, a base64 string is stored as an 8-bit string within v8.
402+
// Thus str->IsOneByte() is typically true. The next line thus often
403+
// allocates a temporary 16-bit buffer to store a 16-bit copy of the
404+
// 8-bit v8 string. Hence, the creation of the String::Value value is
405+
// likely a performance bottleneck.
355406
String::Value value(isolate, str);
356-
nbytes = base64_decode(buf, buflen, *value, value.length());
407+
// Try with WHATWG base64 standard first
408+
auto result = simdutf::base64_to_binary_safe(
409+
reinterpret_cast<const char16_t*>(*value),
410+
value.length(),
411+
buf,
412+
buflen,
413+
simdutf::base64_default);
414+
if (result.error == simdutf::error_code::SUCCESS) {
415+
nbytes = buflen;
416+
} else {
417+
// The input does not follow the WHATWG base64 specification
418+
// https://infra.spec.whatwg.org/#forgiving-base64-decode
419+
nbytes = base64_decode(buf, buflen, *value, value.length());
420+
}
357421
}
358422
break;
359423

@@ -411,9 +475,12 @@ Maybe<size_t> StringBytes::StorageSize(Isolate* isolate,
411475
break;
412476

413477
case BASE64URL:
414-
// Fall through
478+
data_size = simdutf::base64_length_from_binary(str->Length(),
479+
simdutf::base64_url);
480+
break;
481+
415482
case BASE64:
416-
data_size = base64_decoded_size_fast(str->Length());
483+
data_size = simdutf::base64_length_from_binary(str->Length());
417484
break;
418485

419486
case HEX:
@@ -452,11 +519,16 @@ Maybe<size_t> StringBytes::Size(Isolate* isolate,
452519
case UCS2:
453520
return Just(str->Length() * sizeof(uint16_t));
454521

455-
case BASE64URL:
456-
// Fall through
522+
case BASE64URL: {
523+
String::Value value(isolate, str);
524+
return Just(simdutf::base64_length_from_binary(value.length(),
525+
simdutf::base64_url));
526+
}
527+
457528
case BASE64: {
458529
String::Value value(isolate, str);
459-
return Just(base64_decoded_size(*value, value.length()));
530+
return Just(simdutf::base64_length_from_binary(value.length(),
531+
simdutf::base64_default));
460532
}
461533

462534
case HEX:
@@ -609,28 +681,32 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
609681
return ExternOneByteString::NewFromCopy(isolate, buf, buflen, error);
610682

611683
case BASE64: {
612-
size_t dlen = base64_encoded_size(buflen);
684+
size_t dlen =
685+
simdutf::base64_length_from_binary(buflen, simdutf::base64_default);
613686
char* dst = node::UncheckedMalloc(dlen);
614687
if (dst == nullptr) {
615688
*error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
616689
return MaybeLocal<Value>();
617690
}
618691

619-
size_t written = base64_encode(buf, buflen, dst, dlen);
692+
size_t written =
693+
simdutf::binary_to_base64(buf, buflen, dst, simdutf::base64_default);
620694
CHECK_EQ(written, dlen);
621695

622696
return ExternOneByteString::New(isolate, dst, dlen, error);
623697
}
624698

625699
case BASE64URL: {
626-
size_t dlen = base64_encoded_size(buflen, Base64Mode::URL);
700+
size_t dlen =
701+
simdutf::base64_length_from_binary(buflen, simdutf::base64_url);
627702
char* dst = node::UncheckedMalloc(dlen);
628703
if (dst == nullptr) {
629704
*error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
630705
return MaybeLocal<Value>();
631706
}
632707

633-
size_t written = base64_encode(buf, buflen, dst, dlen, Base64Mode::URL);
708+
size_t written =
709+
simdutf::binary_to_base64(buf, buflen, dst, simdutf::base64_url);
634710
CHECK_EQ(written, dlen);
635711

636712
return ExternOneByteString::New(isolate, dst, dlen, error);

0 commit comments

Comments
 (0)