Skip to content

Commit

Permalink
Migrate to web_accessible_resources and scriptlets.js (#50)
Browse files Browse the repository at this point in the history
* allow parsing web_accessible_resources and scriptlets.js into JSON format

* integrate new resources into existing structures

* expose resource assembly from neon bindings

* add redirect performance benchmark

* moves resources into a module
  • Loading branch information
antonok-edm authored and Andrius committed Oct 28, 2019
1 parent 0b21a81 commit b1310df
Show file tree
Hide file tree
Showing 54 changed files with 4,096 additions and 2,698 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ criterion = "0.2"
csv = "1"
serde_json = "1.0"
reqwest = "0.9"
psl = "0.4.1"

[lib]
bench = false
Expand All @@ -61,6 +62,10 @@ harness = false
name = "bench_rules"
harness = false

[[bench]]
name = "bench_redirect_performance"
harness = false

[features]
default = ["full-regex-handling", "object-pooling"]
full-domain-matching = [] # feature has no explicit dependencies
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ const AdBlockClient = require('adblock-rs');
let el_rules = fs.readFileSync('./data/easylist.to/easylist/easylist.txt', { encoding: 'utf-8' }).split('\n');
let ubo_unbreak_rules = fs.readFileSync('./data/uBlockOrigin/unbreak.txt', { encoding: 'utf-8' }).split('\n');
let rules = el_rules.concat(ubo_unbreak_rules);
let resources = fs.readFileSync('./data/uBlockOrigin/resources.txt', { encoding: 'utf-8' });
let resources = AdBlockClient.uBlockResources('uBlockOrigin/src/web_accessible_resources', 'uBlockOrigin/src/js/redirect-engine.js', 'uBlockOrigin/assets/resources/scriptlets.js');

// create client with debug = true
const client = new AdBlockClient.Engine(rules, true);
Expand Down
179 changes: 179 additions & 0 deletions benches/bench_redirect_performance.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
use criterion::*;
use std::path::Path;
use psl::Psl;
use lazy_static::lazy_static;

use adblock;
use adblock::filters::network::{NetworkFilter, NetworkFilterMask};
use adblock::request::Request;
use adblock::blocker::{Blocker, BlockerOptions};
use adblock::resource_assembler::{assemble_web_accessible_resources, assemble_scriptlet_resources};

lazy_static! {
static ref PSL_LIST: psl::List = psl::List::new();
}

/// Gets all rules with redirects, and modifies them to apply to resources at `a{0-n}.com/bad.js`
fn get_redirect_rules() -> Vec<NetworkFilter> {
adblock::filter_lists::default::default_lists()
.into_iter()
.map(|list| {
let filters: Vec<String> = reqwest::get(&list.url).expect("Could not request rules")
.text().expect("Could not get rules as text")
.lines()
.map(|s| s.to_owned())
.collect();

let (network_filters, _) = adblock::lists::parse_filters(&filters, true, false, true);
network_filters
})
.flatten()
.filter(|rule| {
if let Some(ref redirect) = rule.redirect {
if redirect != "none" {
return true;
}
}
false
})
.enumerate()
.map(|(index, mut rule)| {
rule.mask.insert(NetworkFilterMask::IS_LEFT_ANCHOR);
rule.mask.insert(NetworkFilterMask::IS_RIGHT_ANCHOR);
rule.hostname = Some(format!("a{}.com/bad.js", index));

rule.filter = adblock::filters::network::FilterPart::Empty;
rule.mask.remove(NetworkFilterMask::IS_HOSTNAME_ANCHOR);
rule.mask.remove(NetworkFilterMask::IS_HOSTNAME_REGEX);
rule.mask.remove(NetworkFilterMask::IS_REGEX);
rule.mask.remove(NetworkFilterMask::IS_COMPLETE_REGEX);
rule.mask.remove(NetworkFilterMask::FUZZY_MATCH);

rule
})
.collect()
}

/// Loads the supplied rules, and the test set of resources, into a Blocker
fn get_preloaded_blocker(rules: Vec<NetworkFilter>) -> Blocker {
let blocker_options = BlockerOptions {
debug: false,
enable_optimizations: true,
load_cosmetic_filters: false,
load_network_filters: true
};

let mut blocker = Blocker::new(rules, &blocker_options);

let mut resources = assemble_web_accessible_resources(
Path::new("data/test/fake-uBO-files/web_accessible_resources"),
Path::new("data/test/fake-uBO-files/redirect-engine.js")
);
resources.append(&mut assemble_scriptlet_resources(
Path::new("data/test/fake-uBO-files/scriptlets.js"),
));

blocker.with_resources(&resources);

blocker
}

/// Maps network filter rules into `Request`s that would trigger those rules
pub fn build_custom_requests(rules: Vec<NetworkFilter>) -> Vec<Request> {
rules.iter().map(|rule| {
let raw_type = if rule.mask.contains(NetworkFilterMask::FROM_IMAGE) {
"image"
} else if rule.mask.contains(NetworkFilterMask::FROM_MEDIA) {
"media"
} else if rule.mask.contains(NetworkFilterMask::FROM_OBJECT) {
"object"
} else if rule.mask.contains(NetworkFilterMask::FROM_OTHER) {
"other"
} else if rule.mask.contains(NetworkFilterMask::FROM_PING) {
"ping"
} else if rule.mask.contains(NetworkFilterMask::FROM_SCRIPT) {
"script"
} else if rule.mask.contains(NetworkFilterMask::FROM_STYLESHEET) {
"stylesheet"
} else if rule.mask.contains(NetworkFilterMask::FROM_SUBDOCUMENT) {
"subdocument"
} else if rule.mask.contains(NetworkFilterMask::FROM_DOCUMENT) {
"main_frame"
} else if rule.mask.contains(NetworkFilterMask::FROM_XMLHTTPREQUEST) {
"xhr"
} else if rule.mask.contains(NetworkFilterMask::FROM_WEBSOCKET) {
"websocket"
} else if rule.mask.contains(NetworkFilterMask::FROM_FONT) {
"font"
} else {
unreachable!()
};

let rule_hostname = rule.hostname.clone().unwrap();
let url = format!("https://{}", rule_hostname.clone());
let domain = &rule_hostname[..rule_hostname.find('/').unwrap()];
let hostname = domain;

let raw_line = rule.raw_line.clone().unwrap();
let (source_hostname, source_domain) = if rule.opt_domains.is_some() {
let domain_start = raw_line.rfind("domain=").unwrap() + "domain=".len();
let from_start = &raw_line[domain_start..];
let domain_end = from_start.find('|').or_else(|| from_start.find(",")).or_else(|| Some(from_start.len())).unwrap() + domain_start;
let source_hostname = &raw_line[domain_start..domain_end];

let suffix = PSL_LIST.suffix(source_hostname).unwrap();
let suffix = suffix.to_str();
let domain_start = source_hostname[..source_hostname.len()-suffix.len()-1].rfind('.');
let source_domain = if let Some(domain_start) = domain_start {
&source_hostname[domain_start+1..]
} else {
source_hostname
};
(source_hostname, source_domain)
} else {
(hostname, domain)
};

Request::new(
raw_type,
&url,
"https",
hostname,
domain,
source_hostname,
source_domain,
)
}).collect::<Vec<_>>()
}

fn bench_fn(blocker: &Blocker, requests: &[Request]) {
requests.iter().for_each(|request| {
let block_result = blocker.check(&request);
assert!(block_result.redirect.is_some());
});
}

fn redirect_performance(c: &mut Criterion) {
let rules = get_redirect_rules();

let blocker = get_preloaded_blocker(rules.clone());
let requests = build_custom_requests(rules.clone());
let requests_len = requests.len() as u32;

c.bench(
"redirect_performance",
Benchmark::new(
"without_alias_lookup",
move |b| {
b.iter(|| bench_fn(&blocker, &requests))
},
).throughput(Throughput::Elements(requests_len))
.sample_size(10)
);
}

criterion_group!(
benches,
redirect_performance,
);
criterion_main!(benches);
Loading

0 comments on commit b1310df

Please sign in to comment.