Skip to content

Commit

Permalink
61 regex perf (besok#62)
Browse files Browse the repository at this point in the history
* add rgex bench

* fix inter

* init impl

* add config

* fix complains

---------

Co-authored-by: Boris Zhguchev <boris.zhguchev@gropyus.com>
  • Loading branch information
besok and Boris Zhguchev authored Feb 19, 2024
1 parent 668b626 commit a07c7b6
Show file tree
Hide file tree
Showing 11 changed files with 405 additions and 86 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,8 @@
* **`0.3.5`**
* add `!` negation operation in filters
* allow using () in filters
* **`0.5`**
* add config for jsonpath
* add an option to add a regex cache for boosting performance


10 changes: 8 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "jsonpath-rust"
description = "The library provides the basic functionality to find the set of the data according to the filtering query."
version = "0.4.0"
version = "0.5.0"
authors = ["BorisZhguchev <zhguchev@gmail.com>"]
edition = "2018"
license-file = "LICENSE"
Expand All @@ -17,6 +17,12 @@ regex = "1"
pest = "2.0"
pest_derive = "2.0"
thiserror = "1.0.50"
lazy_static = "1.4"
once_cell = "1.19.0"

[dev-dependencies]
lazy_static = "1.0"
criterion = "0.5.1"

[[bench]]
name = "regex_bench"
harness = false
43 changes: 43 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,50 @@ fn test() {
** If the value has been modified during the search, there is no way to find a path of a new value.
It can happen if we try to find a length() of array, for in stance.**

## Configuration

The JsonPath provides a wat to configure the search by using `JsonPathConfig`.

```rust
pub fn main() {
let cfg = JsonPathConfig::new(RegexCache::Implemented(DefaultRegexCacheInst::default()));
}
```

### Regex cache
The configuration provides an ability to use a regex cache to improve the [performance](https://github.com/besok/jsonpath-rust/issues/61)

To instantiate the cache needs to use `RegexCache` enum with the implementation of the trait `RegexCacheInst`.
Default implementation `DefaultRegexCacheInst` uses `Arc<Mutex<HashMap<String,Regex>>>`.
The pair of Box<Value> or Value and config can be used:
```rust
pub fn main(){
let cfg = JsonPathConfig::new(RegexCache::Implemented(DefaultRegexCacheInst::default()));
let json = Box::new(json!({
"author":"abcd(Rees)",
}));

let _v = (json, cfg).path("$.[?(@.author ~= '.*(?i)d\\(Rees\\)')]")
.expect("the path is correct");


}
```
or using `JsonPathFinder` :

```rust
fn main() {
let cfg = JsonPathConfig::new(RegexCache::Implemented(DefaultRegexCacheInst::default()));
let finder = JsonPathFinder::from_str_with_cfg(
r#"{"first":{"second":[{"active":1},{"passive":1}]}}"#,
"$.first.second[?(@.active)]",
cfg,
).unwrap();
let slice_of_data: Vec<&Value> = finder.find_slice();
let js = json!({"active":1});
assert_eq!(slice_of_data, vec![JsonPathValue::Slice(&js, "$.first.second[0]".to_string())]);
}
```

## The structure

Expand Down
40 changes: 40 additions & 0 deletions benches/regex_bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use jsonpath_rust::path::config::cache::{DefaultRegexCacheInst, RegexCache};
use jsonpath_rust::path::config::JsonPathConfig;
use jsonpath_rust::{JsonPathFinder, JsonPathInst, JsonPathQuery};
use once_cell::sync::Lazy;
use serde_json::{json, Value};
use std::str::FromStr;

fn regex_perf_test_with_cache(cfg: JsonPathConfig) {
let json = Box::new(json!({
"author":"abcd(Rees)",
}));

let _v = (json, cfg)
.path("$.[?(@.author ~= '.*(?i)d\\(Rees\\)')]")
.expect("the path is correct");
}

fn regex_perf_test_without_cache() {
let json = Box::new(json!({
"author":"abcd(Rees)",
}));

let _v = json
.path("$.[?(@.author ~= '.*(?i)d\\(Rees\\)')]")
.expect("the path is correct");
}

pub fn criterion_benchmark(c: &mut Criterion) {
let cfg = JsonPathConfig::new(RegexCache::Implemented(DefaultRegexCacheInst::default()));
c.bench_function("regex bench without cache", |b| {
b.iter(|| regex_perf_test_without_cache())
});
c.bench_function("regex bench with cache", |b| {
b.iter(|| regex_perf_test_with_cache(cfg.clone()))
});
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
60 changes: 50 additions & 10 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@

use crate::parser::model::JsonPath;
use crate::parser::parser::parse_json_path;
use crate::path::config::JsonPathConfig;
use crate::path::{json_path_instance, PathInstance};
use serde_json::Value;
use std::convert::TryInto;
Expand Down Expand Up @@ -182,8 +183,12 @@ impl FromStr for JsonPathInst {
}

impl JsonPathInst {
pub fn find_slice<'a>(&'a self, value: &'a Value) -> Vec<JsonPtr<'a, Value>> {
json_path_instance(&self.inner, value)
pub fn find_slice<'a>(
&'a self,
value: &'a Value,
cfg: JsonPathConfig,
) -> Vec<JsonPtr<'a, Value>> {
json_path_instance(&self.inner, value, cfg)
.find(JsonPathValue::from_root(value))
.into_iter()
.filter(|v| v.has_value())
Expand Down Expand Up @@ -224,13 +229,27 @@ impl JsonPathQuery for Box<Value> {
}
}

impl JsonPathQuery for (Box<Value>, JsonPathConfig) {
fn path(self, query: &str) -> Result<Value, String> {
let p = JsonPathInst::from_str(query)?;
Ok(JsonPathFinder::new_with_cfg(self.0, Box::new(p), self.1).find())
}
}

impl JsonPathQuery for Value {
fn path(self, query: &str) -> Result<Value, String> {
let p = JsonPathInst::from_str(query)?;
Ok(JsonPathFinder::new(Box::new(self), Box::new(p)).find())
}
}

impl JsonPathQuery for (Value, JsonPathConfig) {
fn path(self, query: &str) -> Result<Value, String> {
let p = JsonPathInst::from_str(query)?;
Ok(JsonPathFinder::new_with_cfg(Box::new(self.0), Box::new(p), self.1).find())
}
}

/// just to create a json path value of data
/// Example:
/// - json_path_value(&json) = `JsonPathValue::Slice(&json)`
Expand Down Expand Up @@ -294,6 +313,7 @@ type JsPathStr = String;
pub(crate) fn jsp_idx(prefix: &str, idx: usize) -> String {
format!("{}[{}]", prefix, idx)
}

pub(crate) fn jsp_obj(prefix: &str, key: &str) -> String {
format!("{}.['{}']", prefix, key)
}
Expand Down Expand Up @@ -337,7 +357,7 @@ impl<'a, Data: Clone + Debug + Default> JsonPathValue<'a, Data> {
}

impl<'a, Data> JsonPathValue<'a, Data> {
fn only_no_value(input: &Vec<JsonPathValue<'a, Data>>) -> bool {
fn only_no_value(input: &[JsonPathValue<'a, Data>]) -> bool {
!input.is_empty() && input.iter().filter(|v| v.has_value()).count() == 0
}
fn map_vec(data: Vec<(&'a Data, JsPathStr)>) -> Vec<JsonPathValue<'a, Data>> {
Expand Down Expand Up @@ -407,12 +427,26 @@ impl<'a, Data> JsonPathValue<'a, Data> {
pub struct JsonPathFinder {
json: Box<Value>,
path: Box<JsonPathInst>,
cfg: JsonPathConfig,
}

impl JsonPathFinder {
/// creates a new instance of [JsonPathFinder]
pub fn new(json: Box<Value>, path: Box<JsonPathInst>) -> Self {
JsonPathFinder { json, path }
JsonPathFinder {
json,
path,
cfg: JsonPathConfig::default(),
}
}

pub fn new_with_cfg(json: Box<Value>, path: Box<JsonPathInst>, cfg: JsonPathConfig) -> Self {
JsonPathFinder { json, path, cfg }
}

/// sets a cfg with a new one
pub fn set_cfg(&mut self, cfg: JsonPathConfig) {
self.cfg = cfg
}

/// updates a path with a new one
Expand Down Expand Up @@ -440,10 +474,15 @@ impl JsonPathFinder {
let path = Box::new(JsonPathInst::from_str(path)?);
Ok(JsonPathFinder::new(json, path))
}
pub fn from_str_with_cfg(json: &str, path: &str, cfg: JsonPathConfig) -> Result<Self, String> {
let json = serde_json::from_str(json).map_err(|e| e.to_string())?;
let path = Box::new(JsonPathInst::from_str(path)?);
Ok(JsonPathFinder::new_with_cfg(json, path, cfg))
}

/// creates an instance to find a json slice from the json
pub fn instance(&self) -> PathInstance {
json_path_instance(&self.path.inner, &self.json)
json_path_instance(&self.path.inner, &self.json, self.cfg.clone())
}
/// finds a slice of data in the set json.
/// The result is a vector of references to the incoming structure.
Expand Down Expand Up @@ -494,6 +533,7 @@ impl JsonPathFinder {

#[cfg(test)]
mod tests {
use crate::path::config::JsonPathConfig;
use crate::JsonPathQuery;
use crate::JsonPathValue::{NoValue, Slice};
use crate::{jp_v, JsonPathFinder, JsonPathInst, JsonPathValue};
Expand Down Expand Up @@ -1194,7 +1234,7 @@ mod tests {
let query = JsonPathInst::from_str("$..book[?(@.author size 10)].title")
.expect("the path is correct");

let results = query.find_slice(&json);
let results = query.find_slice(&json, JsonPathConfig::default());
let v = results.first().expect("to get value");

// V can be implicitly converted to &Value
Expand Down Expand Up @@ -1257,7 +1297,7 @@ mod tests {
v,
vec![Slice(
&json!({"second":{"active": 1}}),
"$.['first']".to_string()
"$.['first']".to_string(),
)]
);

Expand All @@ -1271,7 +1311,7 @@ mod tests {
v,
vec![Slice(
&json!({"second":{"active": 1}}),
"$.['first']".to_string()
"$.['first']".to_string(),
)]
);

Expand All @@ -1285,7 +1325,7 @@ mod tests {
v,
vec![Slice(
&json!({"second":{"active": 1}}),
"$.['first']".to_string()
"$.['first']".to_string(),
)]
);

Expand All @@ -1299,7 +1339,7 @@ mod tests {
v,
vec![Slice(
&json!({"second":{"active": 1}}),
"$.['first']".to_string()
"$.['first']".to_string(),
)]
);
}
Expand Down
16 changes: 16 additions & 0 deletions src/path/config.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
pub mod cache;

use crate::path::config::cache::RegexCache;

/// Configuration to adjust the jsonpath search
#[derive(Clone, Default)]
pub struct JsonPathConfig {
/// cache to provide
pub regex_cache: RegexCache,
}

impl JsonPathConfig {
pub fn new(regex_cache: RegexCache) -> Self {
Self { regex_cache }
}
}
Loading

0 comments on commit a07c7b6

Please sign in to comment.