This repository has been archived by the owner on Apr 4, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 81
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
494: Only flatten the required objects r=Kerollmops a=irevoire Instead of flattening every object to write them in the flatenned sorter we now check if we needs to flatten the object or if we can insert it as-is. ``` group indexing_flatten-what-is-needed_a6031f9a indexing_main_6b073738 ----- ---------------------------------------- ---------------------- indexing/Indexing geo_point 1.00 25.1±0.20s ? ?/sec 1.00 25.2±0.20s ? ?/sec indexing/Indexing movies in three batches 1.01 18.3±0.12s ? ?/sec 1.00 18.2±0.10s ? ?/sec indexing/Indexing movies with default settings 1.00 17.6±0.11s ? ?/sec 1.00 17.5±0.09s ? ?/sec indexing/Indexing songs in three batches with default settings 1.00 66.4±0.46s ? ?/sec 1.03 68.3±1.01s ? ?/sec indexing/Indexing songs with default settings 1.00 55.7±1.15s ? ?/sec 1.14 63.2±0.78s ? ?/sec indexing/Indexing songs without any facets 1.00 51.6±1.04s ? ?/sec 1.16 59.6±1.00s ? ?/sec indexing/Indexing songs without faceted numbers 1.00 55.3±1.09s ? ?/sec 1.13 62.8±0.38s ? ?/sec indexing/Indexing wiki 1.00 1006.6±26.89s ? ?/sec 1.00 1009.2±25.25s ? ?/sec indexing/Indexing wiki in three batches 1.00 1140.5±11.97s ? ?/sec 1.00 1142.0±9.97s ? ?/sec ``` We now have performance similar to what we had before for the non nested datasets 🎉 Co-authored-by: Tamo <tamo@meilisearch.com>
- Loading branch information
Showing
8 changed files
with
251 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
[package] | ||
name = "json-depth-checker" | ||
version = "0.1.0" | ||
edition = "2021" | ||
|
||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||
|
||
[dependencies] | ||
serde_json = "1.0" | ||
|
||
[dev-dependencies] | ||
criterion = "0.3" | ||
|
||
[[bench]] | ||
name = "depth" | ||
harness = false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
use criterion::{criterion_group, criterion_main, Criterion}; | ||
use json_depth_checker::should_flatten_from_unchecked_slice; | ||
use serde_json::json; | ||
|
||
fn criterion_benchmark(c: &mut Criterion) { | ||
let null = serde_json::to_vec(&json!(null)).unwrap(); | ||
let bool_true = serde_json::to_vec(&json!(true)).unwrap(); | ||
let bool_false = serde_json::to_vec(&json!(false)).unwrap(); | ||
let integer = serde_json::to_vec(&json!(42)).unwrap(); | ||
let float = serde_json::to_vec(&json!(1456.258)).unwrap(); | ||
let string = serde_json::to_vec(&json!("hello world")).unwrap(); | ||
let object = serde_json::to_vec(&json!({ "hello": "world",})).unwrap(); | ||
let complex_object = serde_json::to_vec(&json!({ | ||
"doggos": [ | ||
{ "bernard": true }, | ||
{ "michel": 42 }, | ||
false, | ||
], | ||
"bouvier": true, | ||
"caniche": null, | ||
})) | ||
.unwrap(); | ||
let simple_array = serde_json::to_vec(&json!([ | ||
1, | ||
2, | ||
3, | ||
"viva", | ||
"l\"algeria", | ||
true, | ||
"[array]", | ||
"escaped string \"" | ||
])) | ||
.unwrap(); | ||
let array_of_array = serde_json::to_vec(&json!([1, [2, [3]]])).unwrap(); | ||
let array_of_object = serde_json::to_vec(&json!([1, [2, [3]], {}])).unwrap(); | ||
|
||
c.bench_function("null", |b| b.iter(|| should_flatten_from_unchecked_slice(&null))); | ||
c.bench_function("true", |b| b.iter(|| should_flatten_from_unchecked_slice(&bool_true))); | ||
c.bench_function("false", |b| b.iter(|| should_flatten_from_unchecked_slice(&bool_false))); | ||
c.bench_function("integer", |b| b.iter(|| should_flatten_from_unchecked_slice(&integer))); | ||
c.bench_function("float", |b| b.iter(|| should_flatten_from_unchecked_slice(&float))); | ||
c.bench_function("string", |b| b.iter(|| should_flatten_from_unchecked_slice(&string))); | ||
c.bench_function("object", |b| b.iter(|| should_flatten_from_unchecked_slice(&object))); | ||
c.bench_function("complex object", |b| { | ||
b.iter(|| should_flatten_from_unchecked_slice(&complex_object)) | ||
}); | ||
c.bench_function("simple array", |b| { | ||
b.iter(|| should_flatten_from_unchecked_slice(&simple_array)) | ||
}); | ||
c.bench_function("array of array", |b| { | ||
b.iter(|| should_flatten_from_unchecked_slice(&array_of_array)) | ||
}); | ||
c.bench_function("array of object", |b| { | ||
b.iter(|| should_flatten_from_unchecked_slice(&array_of_object)) | ||
}); | ||
} | ||
|
||
criterion_group!(benches, criterion_benchmark); | ||
criterion_main!(benches); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
[package] | ||
name = "json-depth-checker" | ||
version = "0.0.0" | ||
authors = ["Automatically generated"] | ||
publish = false | ||
edition = "2018" | ||
|
||
[package.metadata] | ||
cargo-fuzz = true | ||
|
||
[dependencies] | ||
libfuzzer-sys = "0.4" | ||
arbitrary-json = "0.1.1" | ||
serde_json = "1.0.79" | ||
|
||
[dependencies.json-depth-checker] | ||
path = ".." | ||
|
||
# Prevent this from interfering with workspaces | ||
[workspace] | ||
members = ["."] | ||
|
||
[[bin]] | ||
name = "depth" | ||
path = "fuzz_targets/depth.rs" | ||
test = false | ||
doc = false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#![no_main] | ||
use arbitrary_json::ArbitraryValue; | ||
use json_depth_checker::*; | ||
use libfuzzer_sys::fuzz_target; | ||
|
||
fuzz_target!(|value: ArbitraryValue| { | ||
let value = serde_json::Value::from(value); | ||
let left = should_flatten_from_value(&value); | ||
let value = serde_json::to_vec(&value).unwrap(); | ||
let right = should_flatten_from_unchecked_slice(&value); | ||
|
||
assert_eq!(left, right); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
use serde_json::Value; | ||
|
||
/// Your json MUST BE valid and generated by `serde_json::to_vec` before being | ||
/// sent in this function. This function is DUMB and FAST but makes a lot of | ||
/// asumption about the way `serde_json` will generate its input. | ||
/// | ||
/// Will return `true` if the JSON contains an object, an array of array | ||
/// or an array containing an object. Returns `false` for everything else. | ||
pub fn should_flatten_from_unchecked_slice(json: &[u8]) -> bool { | ||
if json.is_empty() { | ||
return false; | ||
} | ||
|
||
// since the json we receive has been generated by serde_json we know | ||
// it doesn't contains any whitespace at the beginning thus we can check | ||
// directly if we're looking at an object. | ||
if json[0] == b'{' { | ||
return true; | ||
} else if json[0] != b'[' { | ||
// if the json isn't an object or an array it means it's a simple value. | ||
return false; | ||
} | ||
|
||
// The array case is a little bit more complex. We are looking for a second | ||
// `[` but we need to ensure that it doesn't appear inside of a string. Thus | ||
// we need to keep track of if we're in a string or not. | ||
|
||
// will be used when we met a `\` to skip the next character. | ||
let mut skip_next = false; | ||
let mut in_string = false; | ||
|
||
for byte in json.iter().skip(1) { | ||
match byte { | ||
// handle the backlash. | ||
_ if skip_next => skip_next = false, | ||
b'\\' => skip_next = true, | ||
|
||
// handle the strings. | ||
byte if in_string => { | ||
if *byte == b'"' { | ||
in_string = false; | ||
} | ||
} | ||
b'"' => in_string = true, | ||
|
||
// handle the arrays. | ||
b'[' => return true, | ||
// since we know the json is valid we don't need to ensure the | ||
// array is correctly closed | ||
|
||
// handle the objects. | ||
b'{' => return true, | ||
|
||
// ignore everything else | ||
_ => (), | ||
} | ||
} | ||
|
||
false | ||
} | ||
|
||
/// Consider using [`should_flatten_from_unchecked_slice`] when you can. | ||
/// Will returns `true` if the json contains an object, an array of array | ||
/// or an array containing an object. | ||
/// Returns `false` for everything else. | ||
/// This function has been written to test the [`should_flatten_from_unchecked_slice`]. | ||
pub fn should_flatten_from_value(json: &Value) -> bool { | ||
match json { | ||
Value::Object(..) => true, | ||
Value::Array(array) => array.iter().any(|value| value.is_array() || value.is_object()), | ||
_ => false, | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use serde_json::*; | ||
|
||
use super::*; | ||
|
||
#[test] | ||
fn test_shouldnt_flatten() { | ||
let shouldnt_flatten = vec![ | ||
json!(null), | ||
json!(true), | ||
json!(false), | ||
json!("a superb string"), | ||
json!("a string escaping other \"string\""), | ||
json!([null, true, false]), | ||
json!(["hello", "world", "!"]), | ||
json!(["a \"string\" escaping 'an other'", "\"[\"", "\"{\""]), | ||
]; | ||
for value in shouldnt_flatten { | ||
assert!(!should_flatten_from_value(&value)); | ||
let value = serde_json::to_vec(&value).unwrap(); | ||
assert!(!should_flatten_from_unchecked_slice(&value)); | ||
} | ||
} | ||
|
||
#[test] | ||
fn test_should_flatten() { | ||
let should_flatten = vec![ | ||
json!({}), | ||
json!({ "hello": "world" }), | ||
json!(["hello", ["world"]]), | ||
json!([true, true, true, true, true, true, true, true, true, {}]), | ||
]; | ||
for value in should_flatten { | ||
assert!(should_flatten_from_value(&value)); | ||
let value = serde_json::to_vec(&value).unwrap(); | ||
assert!(should_flatten_from_unchecked_slice(&value)); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters