Skip to content

Commit be10b36

Browse files
committed
buffer: add buffer.isUtf8 for utf8 validation
1 parent 09f33c9 commit be10b36

File tree

4 files changed

+54
-1
lines changed

4 files changed

+54
-1
lines changed

doc/api/buffer.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5130,6 +5130,17 @@ For code running using Node.js APIs, converting between base64-encoded strings
51305130
and binary data should be performed using `Buffer.from(str, 'base64')` and
51315131
`buf.toString('base64')`.**
51325132

5133+
### `buffer.isUtf8(input)`
5134+
5135+
<!-- YAML
5136+
added: REPLACEME
5137+
-->
5138+
5139+
* input {Buffer | ArrayBuffer | TypedArray} The input to validate.
5140+
* Returns: {boolean} Returns true if and only if the input is valid UTF-8.
5141+
5142+
This function is used to check if input contains UTF-8 code points (characters).
5143+
51335144
### `buffer.INSPECT_MAX_BYTES`
51345145

51355146
<!-- YAML

lib/buffer.js

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ const {
5757
compareOffset,
5858
createFromString,
5959
fill: bindingFill,
60+
isUtf8: bindingIsUtf8,
6061
indexOfBuffer,
6162
indexOfNumber,
6263
indexOfString,
@@ -84,7 +85,8 @@ const {
8485
const {
8586
isAnyArrayBuffer,
8687
isArrayBufferView,
87-
isUint8Array
88+
isUint8Array,
89+
isTypedArray,
8890
} = require('internal/util/types');
8991
const {
9092
inspect: utilInspect
@@ -1314,10 +1316,23 @@ function atob(input) {
13141316
return Buffer.from(input, 'base64').toString('latin1');
13151317
}
13161318

1319+
function isUtf8(input) {
1320+
if (isTypedArray(input) || Buffer.isBuffer(input)) {
1321+
return bindingIsUtf8(input.buffer);
1322+
}
1323+
1324+
if (isAnyArrayBuffer(input)) {
1325+
return bindingIsUtf8(input);
1326+
}
1327+
1328+
return false;
1329+
}
1330+
13171331
module.exports = {
13181332
Buffer,
13191333
SlowBuffer,
13201334
transcode,
1335+
isUtf8,
13211336

13221337
// Legacy
13231338
kMaxLength,

src/node_buffer.cc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,6 +1223,14 @@ static void EncodeInto(const FunctionCallbackInfo<Value>& args) {
12231223
results[1] = written;
12241224
}
12251225

1226+
static void IsUtf8(const FunctionCallbackInfo<Value>& args) {
1227+
CHECK_GE(args.Length(), 1);
1228+
CHECK(args[0]->IsArrayBuffer());
1229+
Local<ArrayBuffer> input = args[0].As<ArrayBuffer>();
1230+
auto external = static_cast<const char*>(input->Data());
1231+
args.GetReturnValue().Set(
1232+
simdutf::validate_utf8(external, input->ByteLength()));
1233+
}
12261234

12271235
void SetBufferPrototype(const FunctionCallbackInfo<Value>& args) {
12281236
Environment* env = Environment::GetCurrent(args);
@@ -1358,6 +1366,8 @@ void Initialize(Local<Object> target,
13581366
SetMethod(context, target, "encodeInto", EncodeInto);
13591367
SetMethodNoSideEffect(context, target, "encodeUtf8String", EncodeUtf8String);
13601368

1369+
SetMethodNoSideEffect(context, target, "isUtf8", IsUtf8);
1370+
13611371
target
13621372
->Set(context,
13631373
FIXED_ONE_BYTE_STRING(isolate, "kMaxLength"),
@@ -1413,6 +1423,8 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
14131423
registry->Register(EncodeInto);
14141424
registry->Register(EncodeUtf8String);
14151425

1426+
registry->Register(IsUtf8);
1427+
14161428
registry->Register(StringSlice<ASCII>);
14171429
registry->Register(StringSlice<BASE64>);
14181430
registry->Register(StringSlice<BASE64URL>);

test/parallel/test-buffer-isutf8.js

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
'use strict';
2+
3+
require('../common');
4+
const assert = require('assert');
5+
const { isUtf8 } = require('buffer');
6+
const { TextEncoder } = require('util');
7+
8+
const encoder = new TextEncoder();
9+
10+
assert.strictEqual(isUtf8(encoder.encode('hello')), true);
11+
assert.strictEqual(isUtf8(encoder.encode('ğ')), true);
12+
assert.strictEqual(isUtf8(Buffer.from([0xf8])), false);
13+
14+
assert.strictEqual(isUtf8(null), false);
15+
assert.strictEqual(isUtf8(undefined), false);

0 commit comments

Comments
 (0)