Skip to content

Commit

Permalink
Add optional heuristic pattern detection (#555)
Browse files Browse the repository at this point in the history
* Set waits log default value

* Increase the timeout for the prehide filterlist check

* Add optional check for heuristic patterns

* Make sure the heuristics are tested after a CMP is detected
  • Loading branch information
muodov authored Dec 6, 2024
1 parent 0a46026 commit 892fb30
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 1 deletion.
1 change: 1 addition & 0 deletions addon/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ export async function initConfig() {
const storedConfig = (await storageGet('config')) || {};
console.log('storedConfig', storedConfig);
const updatedConfig = normalizeConfig(storedConfig);
updatedConfig.enableHeuristicDetection = true;
console.log('updated config', updatedConfig);
await storageSet({
config: updatedConfig,
Expand Down
39 changes: 39 additions & 0 deletions lib/heuristics.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// TODO: check for false positive detections per pattern
export const DETECT_PATTERNS = [
/accept cookies/gi,
/accept all/gi,
/reject all/gi,
/only necessary cookies/gi, // "only necessary" is probably too broad
/by clicking.*(accept|agree|allow)/gi,
/by continuing/gi,
/we (use|serve)( optional)? cookies/gi,
/we are using cookies/gi,
/use of cookies/gi,
/(this|our) (web)?site.*cookies/gi,
/cookies (and|or) .* technologies/gi,
/such as cookies/gi,
/read more about.*cookies/gi,
/consent to.*cookies/gi,
/we and our partners.*cookies/gi,
/we.*store.*information.*such as.*cookies/gi,
/store and\/or access information.*on a device/gi,
/personalised ads and content, ad and content measurement/gi,

// it might be tempting to add the patterns below, but they cause too many false positives. Don't do it :)
// /cookies? settings/i,
// /cookies? preferences/i,
];

export function checkHeuristicPatterns() {
const allText = document.documentElement.innerText;
const patterns = [];
const snippets = [];
for (const p of DETECT_PATTERNS) {
const matches = allText.match(p);
if (matches) {
patterns.push(p.toString());
snippets.push(...matches.map((m) => m.substring(0, 200)));
}
}
return { patterns, snippets };
}
3 changes: 3 additions & 0 deletions lib/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ export type Config = {
isMainWorld: boolean;
prehideTimeout: number;
enableFilterList: boolean;
enableHeuristicDetection: boolean;
logs: {
lifecycle: boolean;
rulesteps: boolean;
Expand Down Expand Up @@ -88,5 +89,7 @@ export type ConsentState = {
findCmpAttempts: number; // Number of times we tried to find CMPs in this frame.
detectedCmps: string[]; // Names of CMP rules where `detectCmp` returned true.
detectedPopups: string[]; // Names of CMP rules where `detectPopup` returned true.
heuristicPatterns: string[]; // Matched heuristic patterns
heuristicSnippets: string[]; // Matched heuristic snippets
selfTest: boolean; // null if no self test was run, otherwise it holds the result of the self test.
};
2 changes: 2 additions & 0 deletions lib/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ export function normalizeConfig(providedConfig: any): Config {
disabledCmps: [],
enablePrehide: true,
enableCosmeticRules: true,
enableHeuristicDetection: false,
detectRetries: 20,
isMainWorld: false,
prehideTimeout: 2000,
Expand All @@ -84,6 +85,7 @@ export function normalizeConfig(providedConfig: any): Config {
evals: false,
errors: true,
messages: false,
waits: false,
},
};
const updatedConfig: Config = copyObject(defaultConfig);
Expand Down
21 changes: 20 additions & 1 deletion lib/web.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { normalizeConfig, scheduleWhenIdle } from './utils';
import { deserializeFilterList, getCosmeticStylesheet, getFilterlistSelectors } from './filterlist-utils';
import { FiltersEngine } from '@ghostery/adblocker';
import serializedEngine from './filterlist-engine';
import { checkHeuristicPatterns } from './heuristics';

function filterCMPs(rules: AutoCMP[], config: Config) {
return rules.filter((cmp) => {
Expand All @@ -34,6 +35,8 @@ export default class AutoConsent {
findCmpAttempts: 0,
detectedCmps: [],
detectedPopups: [],
heuristicPatterns: [],
heuristicSnippets: [],
selfTest: null,
};
domActions: DomActions;
Expand Down Expand Up @@ -244,6 +247,8 @@ export default class AutoConsent {
}
}

this.detectHeuristics();

if (foundCMPs.length === 0 && retries > 0) {
await this.domActions.wait(500);
return this.findCmp(retries - 1);
Expand All @@ -252,6 +257,19 @@ export default class AutoConsent {
return foundCMPs;
}

detectHeuristics() {
if (this.config.enableHeuristicDetection) {
const { patterns, snippets } = checkHeuristicPatterns();
if (
patterns.length > 0 &&
(patterns.length !== this.state.heuristicPatterns.length || this.state.heuristicPatterns.some((p, i) => p !== patterns[i]))
) {
this.config.logs.lifecycle && console.log('Heuristic patterns found', patterns, snippets);
this.updateState({ heuristicPatterns: patterns, heuristicSnippets: snippets }); // we don't care about previously found patterns
}
}
}

/**
* Detect if a CMP has a popup open. Fullfils with the CMP if a popup is open, otherwise rejects.
*/
Expand Down Expand Up @@ -281,6 +299,7 @@ export default class AutoConsent {

await Promise.any(tasks)
.then((cmp) => {
this.detectHeuristics();
onFirstPopupAppears(cmp);
})
.catch(() => null);
Expand Down Expand Up @@ -489,7 +508,7 @@ export default class AutoConsent {
logsConfig?.lifecycle && console.log("Prehide cosmetic filters didn't match", location.href);
}
}
}, 1000);
}, 2000);

this.updateState({ cosmeticFiltersOn: true });
try {
Expand Down

0 comments on commit 892fb30

Please sign in to comment.