Skip to content

src: use simdjson #59308

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
215 changes: 111 additions & 104 deletions src/json_parser.cc
Original file line number Diff line number Diff line change
@@ -1,156 +1,163 @@
#include "json_parser.h"
#include "node_errors.h"
#include "node_v8_platform-inl.h"
#include "util-inl.h"
#include <cstdio>
#include "debug_utils.h"

namespace node {
using v8::Array;
using v8::Context;
using v8::Isolate;
using v8::Local;
using v8::Object;
using v8::String;
using v8::Value;

JSONParser::JSONParser() {}

bool JSONParser::Parse(const std::string& content) {
DCHECK(!parsed_);

Isolate* isolate = isolate_.get();
v8::Locker locker(isolate);
v8::Isolate::Scope isolate_scope(isolate);
v8::HandleScope handle_scope(isolate);

Local<Context> context = Context::New(isolate);
Context::Scope context_scope(context);

// It's not a real script, so don't print the source line.
errors::PrinterTryCatch bootstrapCatch(
isolate, errors::PrinterTryCatch::kDontPrintSourceLine);
Local<Value> json_string_value;
Local<Value> result_value;
if (!ToV8Value(context, content).ToLocal(&json_string_value) ||
!json_string_value->IsString() ||
!v8::JSON::Parse(context, json_string_value.As<String>())
.ToLocal(&result_value) ||
!result_value->IsObject()) {
json_content_ = content;
size_t json_length = json_content_.size();
json_content_.append(simdjson::SIMDJSON_PADDING, ' ');

simdjson::padded_string_view json_view(
json_content_.data(), json_length, json_content_.size());

simdjson::ondemand::document document;
simdjson::error_code error = parser_.iterate(json_view).get(document);

if (error != simdjson::SUCCESS) {
error_message_ = simdjson::error_message(error);
std::fprintf(stderr, "%s\n", error_message_.c_str());
return false;
}

context_.Reset(isolate, context);
content_.Reset(isolate, result_value.As<Object>());
parsed_ = true;
simdjson::ondemand::object obj;
error = document.get_object().get(obj);
if (error != simdjson::SUCCESS) {
error_message_ = simdjson::error_message(error);
std::fprintf(stderr, "%s\n", error_message_.c_str());
return false;
}

parsed_ = true;
return true;
}

std::optional<std::string> JSONParser::GetTopLevelStringField(
std::string_view field) {
Isolate* isolate = isolate_.get();
v8::Locker locker(isolate);
v8::Isolate::Scope isolate_scope(isolate);
v8::HandleScope handle_scope(isolate);

Local<Context> context = context_.Get(isolate);
Context::Scope context_scope(context);

Local<Object> content_object = content_.Get(isolate);

Local<Value> value;
// It's not a real script, so don't print the source line.
errors::PrinterTryCatch bootstrapCatch(
isolate, errors::PrinterTryCatch::kDontPrintSourceLine);
Local<Value> field_local;
if (!ToV8Value(context, field, isolate).ToLocal(&field_local)) {
if (!parsed_) {
return {};
}
if (!content_object->Get(context, field_local).ToLocal(&value) ||
!value->IsString()) {

simdjson::padded_string_view json_view(
json_content_.data(),
json_content_.size() - simdjson::SIMDJSON_PADDING,
json_content_.size());

simdjson::ondemand::document document;
simdjson::error_code error = parser_.iterate(json_view).get(document);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually reading this I realized that if we are moving towards simdjson, it's better to just use simdjson directly in the callsite instead of shimming JSONParser over it, because simdjson does not really support random access, unlike the current JSON parser interface. As a result trying to shim it is bound to lead to repetitive parsing like what's happening here. The correct way to handle fields of unknown order is to just iterate over the fields and handle them for different specific key values on an ad-hoc basis, as suggested by the simdjson documentation: https://github.com/simdjson/simdjson/blob/master/doc/basics.md#using-the-parsed-json

For example, for SEA, this would be the recommended way to parse the document according to the simdjson documentaiton: #59323

if (error != simdjson::SUCCESS) {
return {};
}
Utf8Value utf8_value(isolate, value);
return utf8_value.ToString();

simdjson::ondemand::object obj;
error = document.get_object().get(obj);
if (error != simdjson::SUCCESS) {
return {};
}

std::string_view result;
error = obj[field].get_string().get(result);
if (error != simdjson::SUCCESS) {
return {};
}

return std::string(result);
}

std::optional<bool> JSONParser::GetTopLevelBoolField(std::string_view field) {
Isolate* isolate = isolate_.get();
v8::Locker locker(isolate);
v8::Isolate::Scope isolate_scope(isolate);
v8::HandleScope handle_scope(isolate);

Local<Context> context = context_.Get(isolate);
Context::Scope context_scope(context);

Local<Object> content_object = content_.Get(isolate);
Local<Value> value;
bool has_field;
// It's not a real script, so don't print the source line.
errors::PrinterTryCatch bootstrapCatch(
isolate, errors::PrinterTryCatch::kDontPrintSourceLine);
Local<Value> field_local;
if (!ToV8Value(context, field, isolate).ToLocal(&field_local)) {
if (!parsed_) {
return {};
}
if (!content_object->Has(context, field_local).To(&has_field)) {

simdjson::padded_string_view json_view(
json_content_.data(),
json_content_.size() - simdjson::SIMDJSON_PADDING,
json_content_.size());

simdjson::ondemand::document document;
simdjson::error_code error = parser_.iterate(json_view).get(document);
if (error != simdjson::SUCCESS) {
return {};
}
if (!has_field) {

simdjson::ondemand::object obj;
error = document.get_object().get(obj);
if (error != simdjson::SUCCESS) {
return {};
}

simdjson::ondemand::value val;
error = obj[field].get(val);
if (error != simdjson::SUCCESS) {
return false;
}
if (!content_object->Get(context, field_local).ToLocal(&value) ||
!value->IsBoolean()) {

bool result;
error = val.get_bool().get(result);
if (error != simdjson::SUCCESS) {
return {};
}
return value->BooleanValue(isolate);

return result;
}

std::optional<JSONParser::StringDict> JSONParser::GetTopLevelStringDict(
std::string_view field) {
Isolate* isolate = isolate_.get();
v8::Locker locker(isolate);
v8::Isolate::Scope isolate_scope(isolate);
v8::HandleScope handle_scope(isolate);
Local<Context> context = context_.Get(isolate);
Local<Object> content_object = content_.Get(isolate);
Local<Value> value;
bool has_field;
// It's not a real script, so don't print the source line.
errors::PrinterTryCatch bootstrapCatch(
isolate, errors::PrinterTryCatch::kDontPrintSourceLine);
Local<Value> field_local;
if (!ToV8Value(context, field, isolate).ToLocal(&field_local)) {
if (!parsed_) {
return std::nullopt;
}
if (!content_object->Has(context, field_local).To(&has_field)) {

simdjson::padded_string_view json_view(
json_content_.data(),
json_content_.size() - simdjson::SIMDJSON_PADDING,
json_content_.size());

simdjson::ondemand::document document;
simdjson::error_code error = parser_.iterate(json_view).get(document);
if (error != simdjson::SUCCESS) {
return std::nullopt;
}
if (!has_field) {
return StringDict();
}
if (!content_object->Get(context, field_local).ToLocal(&value) ||
!value->IsObject()) {

simdjson::ondemand::object obj;
error = document.get_object().get(obj);
if (error != simdjson::SUCCESS) {
return std::nullopt;
}
Local<Object> dict = value.As<Object>();
Local<Array> keys;
if (!dict->GetOwnPropertyNames(context).ToLocal(&keys)) {

simdjson::ondemand::value val;
error = obj[field].get(val);
if (error != simdjson::SUCCESS) {
return StringDict();
}

simdjson::ondemand::object dict;
error = val.get_object().get(dict);
if (error != simdjson::SUCCESS) {
return std::nullopt;
}
std::unordered_map<std::string, std::string> result;
uint32_t length = keys->Length();
for (uint32_t i = 0; i < length; ++i) {
Local<Value> key;
Local<Value> value;
if (!keys->Get(context, i).ToLocal(&key) || !key->IsString())

StringDict result;
for (auto field_value : dict) {
std::string_view key_view;
error = field_value.unescaped_key().get(key_view);
if (error != simdjson::SUCCESS) {
return StringDict();
if (!dict->Get(context, key).ToLocal(&value) || !value->IsString())
}

std::string_view value_view;
error = field_value.value().get_string().get(value_view);
if (error != simdjson::SUCCESS) {
return StringDict();
}

Utf8Value key_utf8(isolate, key);
Utf8Value value_utf8(isolate, value);
result.emplace(*key_utf8, *value_utf8);
result.emplace(std::string(key_view), std::string(value_view));
}

return result;
}

Expand Down
13 changes: 5 additions & 8 deletions src/json_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
#include <optional>
#include <string>
#include <unordered_map>
#include "util.h"
#include "v8.h"
#include "simdjson.h"

namespace node {
// This is intended to be used to get some top-level fields out of a JSON
Expand All @@ -23,14 +22,12 @@ class JSONParser {
std::optional<std::string> GetTopLevelStringField(std::string_view field);
std::optional<bool> GetTopLevelBoolField(std::string_view field);
std::optional<StringDict> GetTopLevelStringDict(std::string_view field);
std::string GetErrorMessage() const { return error_message_; }

private:
// We might want a lighter-weight JSON parser for this use case. But for now
// using V8 is good enough.
RAIIIsolateWithoutEntering isolate_;

v8::Global<v8::Context> context_;
v8::Global<v8::Object> content_;
simdjson::ondemand::parser parser_;
std::string json_content_;
std::string error_message_;
bool parsed_ = false;
};
} // namespace node
Expand Down
4 changes: 1 addition & 3 deletions test/parallel/test-single-executable-blob-config-errors.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,7 @@ const assert = require('assert');
['--experimental-sea-config', config], {
cwd: tmpdir.path,
});
const stderr = child.stderr.toString();
assert.strictEqual(child.status, 1);
assert.match(stderr, /SyntaxError: Expected ':' after property name/);
const stderr = child.stderr.toString(); assert.match(stderr, /INCOMPLETE_ARRAY_OR_OBJECT/);
assert(
stderr.includes(
`Cannot parse JSON from ${config}`
Expand Down
Loading