Skip to content

Commit

Permalink
Add configurability for unicode and skipping parsers (#14)
Browse files Browse the repository at this point in the history
* Add configurability for unicode and skipping parsers

- Unicode support greatly increases memory and latency and is not
generally needed.
- Compiling regex for unused parsers is just a waste of memory.

Both unicode support and device/os/user_agent parsers can now be
configured via `UserAgentParserBuilder`.

* Hide UserAgentParserBuilder behind builder method

* Update tests/bench/examples with new builder syntax

---------

Co-authored-by: David Armstrong Lewis <6754950+davidarmstronglewis@users.noreply.github.com>
  • Loading branch information
hamiltop and oceanlewis authored Jul 23, 2023
1 parent ad3af40 commit 71a5f66
Show file tree
Hide file tree
Showing 15 changed files with 453 additions and 96 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ keywords = ["user", "agent", "parser", "uap", "uaparser"]

[dependencies]
lazy_static = "1.4.0"
regex = "1.5.5"
regex = "1.9.1"
serde = "1.0.137"
serde_yaml = "0.8.24"
serde_derive = "1.0.137"
Expand Down
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,13 @@ To get to the docs, clone the repo and run `cargo doc --open` to build the docum
- `git submodule update --init` to get started
- `cargo test`
- `cargo test -- --nocapture` for the full results

## Performance and Benchmarking
`cargo bench` will run a criterion benchmark suite.

To see memory usage of the compiled regex list you can run the examples with a tool that tracks memory usage.

Example (on MacOS):
```
/usr/bin/time -l cargo run --examples full_parser
```
68 changes: 64 additions & 4 deletions benches/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@ struct TestCases {
}

fn bench_os(c: &mut Criterion) {
let parser = UserAgentParser::from_yaml("./src/core/regexes.yaml")
let parser = UserAgentParser::builder()
.with_device(false)
.with_os(true)
.with_user_agent(false)
.build_from_yaml("./src/core/regexes.yaml")
.expect("Parser creation failed");

let file = File::open("./src/core/tests/test_os.yaml").unwrap();
Expand All @@ -28,10 +32,30 @@ fn bench_os(c: &mut Criterion) {
}
})
});

let parser = UserAgentParser::builder()
.with_device(false)
.with_os(true)
.with_user_agent(false)
.with_unicode_support(false)
.build_from_yaml("./src/core/regexes.yaml")
.expect("Parser creation failed");

c.bench_function("parse_os unicode disabled", |b| {
b.iter(|| {
for case in &test_cases.test_cases {
black_box(parser.parse_os(&case.user_agent_string));
}
})
});
}

fn bench_device(c: &mut Criterion) {
let parser = UserAgentParser::from_yaml("./src/core/regexes.yaml")
let parser = UserAgentParser::builder()
.with_device(true)
.with_os(false)
.with_user_agent(false)
.build_from_yaml("./src/core/regexes.yaml")
.expect("Parser creation failed");

let file = File::open("./src/core/tests/test_device.yaml").unwrap();
Expand All @@ -44,13 +68,33 @@ fn bench_device(c: &mut Criterion) {
}
})
});

let parser = UserAgentParser::builder()
.with_device(true)
.with_os(false)
.with_user_agent(false)
.with_unicode_support(false)
.build_from_yaml("./src/core/regexes.yaml")
.expect("Parser creation failed");

c.bench_function("parse_device unicode disabled", |b| {
b.iter(|| {
for case in &test_cases.test_cases {
black_box(parser.parse_device(&case.user_agent_string));
}
})
});
}

fn bench_ua(c: &mut Criterion) {
let parser = UserAgentParser::from_yaml("./src/core/regexes.yaml")
let parser = UserAgentParser::builder()
.with_device(false)
.with_os(false)
.with_user_agent(true)
.build_from_yaml("./src/core/regexes.yaml")
.expect("Parser creation failed");

let file = std::fs::File::open("./src/core/tests/test_ua.yaml").unwrap();
let file = File::open("./src/core/tests/test_ua.yaml").unwrap();
let test_cases: TestCases = serde_yaml::from_reader(file).unwrap();

c.bench_function("parse_user_agent", |b| {
Expand All @@ -60,6 +104,22 @@ fn bench_ua(c: &mut Criterion) {
}
})
});

let parser = UserAgentParser::builder()
.with_device(false)
.with_os(false)
.with_user_agent(true)
.with_unicode_support(false)
.build_from_yaml("./src/core/regexes.yaml")
.expect("Parser creation failed");

c.bench_function("parse_user_agent unicode disabled", |b| {
b.iter(|| {
for case in &test_cases.test_cases {
black_box(parser.parse_user_agent(&case.user_agent_string));
}
})
});
}

criterion_group!(
Expand Down
9 changes: 9 additions & 0 deletions examples/full_parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use uaparser::{Parser, UserAgentParser};

fn main() {
let parser = UserAgentParser::builder()
.build_from_yaml("./src/core/regexes.yaml")
.expect("Parser creation failed");

println!("{:?}", parser.parse("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"))
}
10 changes: 10 additions & 0 deletions examples/no_unicode_parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
use uaparser::{Parser, UserAgentParser};

fn main() {
let parser = UserAgentParser::builder()
.with_unicode_support(false)
.build_from_yaml("./src/core/regexes.yaml")
.expect("Parser creation failed");

println!("{:?}", parser.parse("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"))
}
13 changes: 13 additions & 0 deletions examples/os_only_no_unicode_parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
use uaparser::{Parser, UserAgentParser};

fn main() {
let parser = UserAgentParser::builder()
.with_unicode_support(false)
.with_device(false)
.with_os(true)
.with_user_agent(false)
.build_from_yaml("./src/core/regexes.yaml")
.expect("Parser creation failed");

println!("{:?}", parser.parse("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"))
}
66 changes: 51 additions & 15 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion nix/shell.nix
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,15 @@ mkShell {

nativeBuildInputs = [
(rust-bin.stable.latest.default.override {
extensions = [ "rust-src" ];
extensions = [
"rust-src"
"rust-analysis"
"clippy"
];
})
cargo-criterion
cargo-edit
cargo-watch
gnuplot
];
}
5 changes: 4 additions & 1 deletion src/client.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use super::{Deserialize, Device, Serialize, UserAgent, OS};
use super::Device;
use super::UserAgent;
use super::OS;
use super::{Deserialize, Serialize};

/// Houses the `Device`, `OS`, and `UserAgent` structs, which each get parsed
/// out from a user agent string by a `UserAgentParser`.
Expand Down
Loading

0 comments on commit 71a5f66

Please sign in to comment.