Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP faster hash building #6

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ GEM
remote: https://rubygems.org/
specs:
benchmark-ips (2.10.0)
fast_jsonparser (0.5.0)
json (2.6.1)
minitest (5.15.0)
oj (3.13.11)
fast_jsonparser (0.6.0)
json (2.6.3)
minitest (5.17.0)
oj (3.13.23)
rake (13.0.6)
rake-compiler (1.1.9)
rake-compiler (1.2.1)
rake
yajl-ruby (1.4.1)
yajl-ruby (1.4.3)

PLATFORMS
ruby
Expand Down
6 changes: 5 additions & 1 deletion benchmark/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,17 @@ def benchmark_parsing(name, json_output)
puts "== Parsing #{name} (#{json_output.size} bytes)"

Benchmark.ips do |x|
x.report("yajl") { Yajl::Parser.new.parse(json_output) } if RUN[:yajl]
x.config quiet: true if ENV["QUIET"]

x.report("json") { JSON.parse(json_output) } if RUN[:json]
x.report("yajl") { Yajl::Parser.new.parse(json_output) } if RUN[:yajl]
x.report("oj") { Oj.load(json_output) } if RUN[:oj]
x.report("oj strict") { Oj.strict_load(json_output) } if RUN[:oj]
x.report("Oj::Parser") { Oj::Parser.usual.parse(json_output) } if RUN[:oj]
x.report("fast_jsonparser") { FastJsonparser.parse(json_output) } if RUN[:fast_jsonparser]
x.report("rapidjson") { RapidJSON.parse(json_output) } if RUN[:rapidjson]

x.compare!
end
puts
end
Expand Down
84 changes: 72 additions & 12 deletions ext/rapidjson/parser.hh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ class NullHandler : public BaseReaderHandler<UTF8<>, NullHandler> {
};

struct RubyObjectHandler : public BaseReaderHandler<UTF8<>, RubyObjectHandler> {
enum class ObjectType : char {
Array,
BufferedHash,
Hash,
};

bool Null() {
return PutValue(Qnil);
}
Expand Down Expand Up @@ -59,7 +65,9 @@ struct RubyObjectHandler : public BaseReaderHandler<UTF8<>, RubyObjectHandler> {
}

bool StartObject() {
return push(rb_hash_new());
//return push(rb_hash_new());
//return push(rb_hash_new(), ObjectType::Hash);
return push(Qundef, ObjectType::BufferedHash);
}

bool Key(const char* str, SizeType length, bool copy) {
Expand All @@ -72,12 +80,13 @@ struct RubyObjectHandler : public BaseReaderHandler<UTF8<>, RubyObjectHandler> {
}

bool EndObject(SizeType memberCount) {
materialize_hash();
return PutValue(pop());
}

bool StartArray() {
VALUE array = rb_ary_new();
return push(array);
return push(array, ObjectType::Array);
}

bool EndArray(SizeType elementCount) {
Expand All @@ -86,9 +95,30 @@ struct RubyObjectHandler : public BaseReaderHandler<UTF8<>, RubyObjectHandler> {
return true;
}

bool push(VALUE val) {
void materialize_hash() {
auto top_type = stack_type[depth - 1];

if (top_type == ObjectType::BufferedHash) {
if (hash_buffer_idx & 1) {
// drop last key
hash_buffer_idx--;
}

VALUE hash = rb_hash_new_capa(hash_buffer_idx / 2);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added rb_hash_new_capa in 3.2, we'd likely need a fallback for older rubies.

rb_hash_bulk_insert(hash_buffer_idx, hash_buffer, hash);

stack[depth - 1] = hash;
stack_type[depth - 1] = ObjectType::Hash;
hash_buffer_idx = 0;
}
}

bool push(VALUE val, ObjectType type) {
if (depth < MAX_DEPTH) {
materialize_hash();

stack[depth] = val;
stack_type[depth] = type;
depth++;
return true;
} else {
Expand All @@ -108,8 +138,24 @@ struct RubyObjectHandler : public BaseReaderHandler<UTF8<>, RubyObjectHandler> {

bool PutKey(VALUE key) {
if (depth > 0) {
last_key[depth - 1] = key;
return true;
auto top_type = stack_type[depth - 1];

if (top_type == ObjectType::BufferedHash) {
if (hash_buffer_idx >= HASH_BUFFER_LEN) {
materialize_hash();
last_key[depth - 1] = key;
return true;
}
if (hash_buffer_idx & 1) {
rb_bug("rapidjson: key at odd offset");
}
hash_buffer[hash_buffer_idx++] = key;
last_key[depth - 1] = key;
return true;
} else {
last_key[depth - 1] = key;
return true;
}
} else {
rb_bug("rapidjson: key at depth 0");
return false;
Expand All @@ -121,12 +167,21 @@ struct RubyObjectHandler : public BaseReaderHandler<UTF8<>, RubyObjectHandler> {
stack[0] = val;
} else {
VALUE top_val = stack[depth - 1];
if (RB_TYPE_P(top_val, T_ARRAY)) {
rb_ary_push(top_val, val);
} else if (RB_TYPE_P(top_val, T_HASH)) {
rb_hash_aset(top_val, last_key[depth - 1], val);
} else {
rb_bug("rapidjson: bad type on stack");
auto top_type = stack_type[depth - 1];
switch(top_type) {
case ObjectType::Array:
rb_ary_push(top_val, val);
break;
case ObjectType::BufferedHash:
if (hash_buffer_idx >= HASH_BUFFER_LEN) {
rb_bug("rapidjson: FIXME: key would overflow buffer");
}
hash_buffer[hash_buffer_idx++] = val;
break;
materialize_hash();
case ObjectType::Hash:
rb_hash_aset(top_val, last_key[depth - 1], val);
break;
}
}
return true;
Expand All @@ -140,12 +195,17 @@ struct RubyObjectHandler : public BaseReaderHandler<UTF8<>, RubyObjectHandler> {
return stack[0];
}

RubyObjectHandler(): depth(0) {
RubyObjectHandler(): depth(0), hash_buffer_idx(0) {
stack[0] = Qundef;
}

static const int MAX_DEPTH = 256;
int depth;
VALUE stack[MAX_DEPTH];
ObjectType stack_type[MAX_DEPTH];
VALUE last_key[MAX_DEPTH];

static const int HASH_BUFFER_LEN = 16;
VALUE hash_buffer[HASH_BUFFER_LEN];
int hash_buffer_idx;
};