From 4065a9c86a671f8258c9d689963ab19fc6f994c9 Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Wed, 18 Jan 2023 15:00:39 -0800 Subject: [PATCH 1/3] Update dependencies --- Gemfile.lock | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 814d990..baa3a61 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -7,14 +7,14 @@ GEM remote: https://rubygems.org/ specs: benchmark-ips (2.10.0) - fast_jsonparser (0.5.0) - json (2.6.1) - minitest (5.15.0) - oj (3.13.11) + fast_jsonparser (0.6.0) + json (2.6.3) + minitest (5.17.0) + oj (3.13.23) rake (13.0.6) - rake-compiler (1.1.9) + rake-compiler (1.2.1) rake - yajl-ruby (1.4.1) + yajl-ruby (1.4.3) PLATFORMS ruby From cbb5d1d6839a1a4ec4bf8623f581b123427abf1d Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Wed, 18 Jan 2023 15:01:31 -0800 Subject: [PATCH 2/3] Update rapidjson --- ext/rapidjson/rapidjson | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/rapidjson/rapidjson b/ext/rapidjson/rapidjson index e4bde97..012be85 160000 --- a/ext/rapidjson/rapidjson +++ b/ext/rapidjson/rapidjson @@ -1 +1 @@ -Subproject commit e4bde977440d4a00f820b6586899e48a972d2493 +Subproject commit 012be8528783cdbf4b7a9e64f78bd8f056b97e24 From 58dea095a0eaedce27a8ed7a43122f8fd3fd6e4a Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Sun, 22 Jan 2023 21:49:16 -0800 Subject: [PATCH 3/3] WIP: cached hash building --- benchmark/parser.rb | 6 ++- ext/rapidjson/parser.hh | 84 +++++++++++++++++++++++++++++++++++------ 2 files changed, 77 insertions(+), 13 deletions(-) diff --git a/benchmark/parser.rb b/benchmark/parser.rb index e83f0b5..6603be5 100644 --- a/benchmark/parser.rb +++ b/benchmark/parser.rb @@ -19,13 +19,17 @@ def benchmark_parsing(name, json_output) puts "== Parsing #{name} (#{json_output.size} bytes)" Benchmark.ips do |x| - x.report("yajl") { Yajl::Parser.new.parse(json_output) } if RUN[:yajl] + x.config quiet: true if ENV["QUIET"] + x.report("json") { JSON.parse(json_output) } if RUN[:json] + x.report("yajl") { Yajl::Parser.new.parse(json_output) } if RUN[:yajl] x.report("oj") { Oj.load(json_output) } if RUN[:oj] x.report("oj strict") { Oj.strict_load(json_output) } if RUN[:oj] x.report("Oj::Parser") { Oj::Parser.usual.parse(json_output) } if RUN[:oj] x.report("fast_jsonparser") { FastJsonparser.parse(json_output) } if RUN[:fast_jsonparser] x.report("rapidjson") { RapidJSON.parse(json_output) } if RUN[:rapidjson] + + x.compare! end puts end diff --git a/ext/rapidjson/parser.hh b/ext/rapidjson/parser.hh index f11079c..9c6a0b3 100644 --- a/ext/rapidjson/parser.hh +++ b/ext/rapidjson/parser.hh @@ -25,6 +25,12 @@ class NullHandler : public BaseReaderHandler, NullHandler> { }; struct RubyObjectHandler : public BaseReaderHandler, RubyObjectHandler> { + enum class ObjectType : char { + Array, + BufferedHash, + Hash, + }; + bool Null() { return PutValue(Qnil); } @@ -59,7 +65,9 @@ struct RubyObjectHandler : public BaseReaderHandler, RubyObjectHandler> { } bool StartObject() { - return push(rb_hash_new()); + //return push(rb_hash_new()); + //return push(rb_hash_new(), ObjectType::Hash); + return push(Qundef, ObjectType::BufferedHash); } bool Key(const char* str, SizeType length, bool copy) { @@ -72,12 +80,13 @@ struct RubyObjectHandler : public BaseReaderHandler, RubyObjectHandler> { } bool EndObject(SizeType memberCount) { + materialize_hash(); return PutValue(pop()); } bool StartArray() { VALUE array = rb_ary_new(); - return push(array); + return push(array, ObjectType::Array); } bool EndArray(SizeType elementCount) { @@ -86,9 +95,30 @@ struct RubyObjectHandler : public BaseReaderHandler, RubyObjectHandler> { return true; } - bool push(VALUE val) { + void materialize_hash() { + auto top_type = stack_type[depth - 1]; + + if (top_type == ObjectType::BufferedHash) { + if (hash_buffer_idx & 1) { + // drop last key + hash_buffer_idx--; + } + + VALUE hash = rb_hash_new_capa(hash_buffer_idx / 2); + rb_hash_bulk_insert(hash_buffer_idx, hash_buffer, hash); + + stack[depth - 1] = hash; + stack_type[depth - 1] = ObjectType::Hash; + hash_buffer_idx = 0; + } + } + + bool push(VALUE val, ObjectType type) { if (depth < MAX_DEPTH) { + materialize_hash(); + stack[depth] = val; + stack_type[depth] = type; depth++; return true; } else { @@ -108,8 +138,24 @@ struct RubyObjectHandler : public BaseReaderHandler, RubyObjectHandler> { bool PutKey(VALUE key) { if (depth > 0) { - last_key[depth - 1] = key; - return true; + auto top_type = stack_type[depth - 1]; + + if (top_type == ObjectType::BufferedHash) { + if (hash_buffer_idx >= HASH_BUFFER_LEN) { + materialize_hash(); + last_key[depth - 1] = key; + return true; + } + if (hash_buffer_idx & 1) { + rb_bug("rapidjson: key at odd offset"); + } + hash_buffer[hash_buffer_idx++] = key; + last_key[depth - 1] = key; + return true; + } else { + last_key[depth - 1] = key; + return true; + } } else { rb_bug("rapidjson: key at depth 0"); return false; @@ -121,12 +167,21 @@ struct RubyObjectHandler : public BaseReaderHandler, RubyObjectHandler> { stack[0] = val; } else { VALUE top_val = stack[depth - 1]; - if (RB_TYPE_P(top_val, T_ARRAY)) { - rb_ary_push(top_val, val); - } else if (RB_TYPE_P(top_val, T_HASH)) { - rb_hash_aset(top_val, last_key[depth - 1], val); - } else { - rb_bug("rapidjson: bad type on stack"); + auto top_type = stack_type[depth - 1]; + switch(top_type) { + case ObjectType::Array: + rb_ary_push(top_val, val); + break; + case ObjectType::BufferedHash: + if (hash_buffer_idx >= HASH_BUFFER_LEN) { + rb_bug("rapidjson: FIXME: key would overflow buffer"); + } + hash_buffer[hash_buffer_idx++] = val; + break; + materialize_hash(); + case ObjectType::Hash: + rb_hash_aset(top_val, last_key[depth - 1], val); + break; } } return true; @@ -140,12 +195,17 @@ struct RubyObjectHandler : public BaseReaderHandler, RubyObjectHandler> { return stack[0]; } - RubyObjectHandler(): depth(0) { + RubyObjectHandler(): depth(0), hash_buffer_idx(0) { stack[0] = Qundef; } static const int MAX_DEPTH = 256; int depth; VALUE stack[MAX_DEPTH]; + ObjectType stack_type[MAX_DEPTH]; VALUE last_key[MAX_DEPTH]; + + static const int HASH_BUFFER_LEN = 16; + VALUE hash_buffer[HASH_BUFFER_LEN]; + int hash_buffer_idx; };