Skip to content

Commit

Permalink
handle cases with wildcard in adblock style syntax
Browse files Browse the repository at this point in the history
  • Loading branch information
jellizaveta committed Dec 18, 2024
1 parent 22e49a9 commit a2c8adc
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 27 deletions.
76 changes: 50 additions & 26 deletions src/transformations/validate.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,35 @@ const tldts = require('tldts');
const utils = require('../utils');
const ruleUtils = require('../rule');

const DOMAIN_PREFIX = '||';
const DOMAIN_SEPARATOR = '^';
const WILDCARD = '*';
const WILDCARD_DOMAIN_PART = '*.';
const DOT = '.';

/**
* The list of modifiers that limit the rule for specific domains.
*/
const LIMITING_MODIFIERS = [
'denyallow',
'badfilter',
// DNS-related modifiers.
'client',
];

/**
* The list of modifiers supported by hosts-level blockers.
*/
const SUPPORTED_MODIFIERS = [
'important',
'~important',
'badfilter',
'ctag',
'denyallow',
// DNS-related modifiers.
'client',
'dnstype',
'dnsrewrite',
'ctag',
];

/**
* The list of modifiers that limit the rule.
*/
const LIMITING_MODIFIERS = [
'denyallow',
'client',
'badfilter',
// modifiers that limit the rule for specific domains
...LIMITING_MODIFIERS,
];

/**
Expand Down Expand Up @@ -168,26 +174,44 @@ function validAdblockRule(ruleText, allowedIP) {

// 4. Validate domain name
// Note that we don't check rules that contain wildcard characters
const sepIdx = props.pattern.indexOf('^');
const wildcardIdx = props.pattern.indexOf('*');
const sepIdx = props.pattern.indexOf(DOMAIN_SEPARATOR);
const wildcardIdx = props.pattern.indexOf(WILDCARD);
if (sepIdx !== -1 && wildcardIdx !== -1 && wildcardIdx > sepIdx) {
// Smth like ||example.org^test* -- invalid
return false;
}

if (_.startsWith(props.pattern, '||')
&& sepIdx !== -1
&& wildcardIdx === -1) {
const hostname = utils.substringBetween(ruleText, '||', '^');
if (!validHostname(hostname, ruleText, allowedIP, hasLimitModifier)) {
return false;
}
// Check if the pattern starts with the domain prefix and contains a domain separator
if (_.startsWith(props.pattern, DOMAIN_PREFIX) && sepIdx !== -1) {
// Extract the domain to check from the rule text
const domainToCheck = utils.substringBetween(ruleText, DOMAIN_PREFIX, DOMAIN_SEPARATOR);

// If there's something after ^ in the pattern - something went wrong
// unless it's `^|` which is a rather often case
if (props.pattern.length > (sepIdx + 1)
&& props.pattern[sepIdx + 1] !== '|') {
return false;
// If there are no wildcard characters in the pattern
if (wildcardIdx === -1) {
// Validate the domain
if (!validHostname(domainToCheck, ruleText, allowedIP, hasLimitModifier)) {
return false;
}

// Ensure there's nothing after the domain separator unless it's `^|`
if (props.pattern.length > (sepIdx + 1) && props.pattern[sepIdx + 1] !== '|') {
return false;
}
} else {
// Check if the rule has wildcard characters but includes only TLD (e.g., ||*.org^)
const isWildcardOnlyTLD = domainToCheck
.startsWith(WILDCARD_DOMAIN_PART) && domainToCheck.split(DOT).length === 2;

// If the rule has wildcard characters but is not a TLD (e.g., ||*.example.org^)
// return true
if (!isWildcardOnlyTLD) {
return true;
}
// If it's a wildcard with TLD, validate the cleaned TLD
const cleanedDomain = domainToCheck.replace(WILDCARD_DOMAIN_PART, '');
if (!validHostname(cleanedDomain, ruleText, allowedIP, hasLimitModifier)) {
return false;
}
}
}

Expand Down
17 changes: 16 additions & 1 deletion test/transformations/validate.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,30 @@ describe('Validate', () => {
});

it('adblock-style rules', () => {
const rules = `||asia^$denyallow=amzn.asia
const rules = `||example.*.asia^
||asia^
||asia^$denyallow=amzn.asia
||bar^$denyallow=fap.bar
||beauty^$denyallow=homelab.beauty|nic.beauty|vipbj.beauty`.split(/\r?\n/);
const filtered = validate(rules);

expect(filtered).toEqual([
'||example.*.asia^',
'||asia^$denyallow=amzn.asia',
'||bar^$denyallow=fap.bar',
'||beauty^$denyallow=homelab.beauty|nic.beauty|vipbj.beauty',
]);
});

it.only('adblock-style rules with wildcard', () => {
const rules = `||*.asia^
||*.example.org^
||*.asia^$denyallow=fap.bar`.split(/\r?\n/);
const filtered = validate(rules);

expect(filtered).toEqual([
'||*.example.org^',
'||*.asia^$denyallow=fap.bar',
]);
});
});

0 comments on commit a2c8adc

Please sign in to comment.