Skip to content

Commit

Permalink
Update CDN & Reject Hosts
Browse files Browse the repository at this point in the history
  • Loading branch information
SukkaW committed Oct 22, 2024
1 parent e3d03f4 commit b8f4169
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 36 deletions.
17 changes: 10 additions & 7 deletions Build/constants/reject-data-source.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,7 @@ export const HOSTS_EXTRA: HostsSource[] = [
// Dan Pollock's hosts file, 0.0.0.0 version is 30 KiB smaller
['https://someonewhocares.org/hosts/zero/hosts', null, true, TTL.THREE_HOURS()],
// ad-wars is not actively maintained, so we set a 7 days cache ttl
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', null, false, TTL.TWO_WEEKS()],
[
'https://raw.githubusercontent.com/durablenapkin/scamblocklist/master/hosts.txt',
[],
true, TTL.TWLVE_HOURS()
]
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', null, false, TTL.TWO_WEEKS()]
];

export const DOMAIN_LISTS: HostsSource[] = [
Expand Down Expand Up @@ -84,6 +79,10 @@ export const DOMAIN_LISTS_EXTRA: HostsSource[] = [
['https://raw.githubusercontent.com/Spam404/lists/master/main-blacklist.txt', [], true, TTL.TEN_DAYS()]
];

export const PHISHING_HOSTS_EXTRA: HostsSource[] = [
['https://raw.githubusercontent.com/durablenapkin/scamblocklist/master/hosts.txt', [], true, TTL.THREE_DAYS()]
];

export const PHISHING_DOMAIN_LISTS_EXTRA: HostsSource[] = [
[
'https://phishing-filter.pages.dev/phishing-filter-domains.txt',
Expand Down Expand Up @@ -396,5 +395,9 @@ export const PREDEFINED_WHITELIST = [
'ab.chatgpt.com', // EasyPrivacy blocks this
'jnn-pa.googleapis.com', // ad-wars
'imasdk.googleapis.com', // ad-wars
'.l.qq.com' // ad-wars
'.l.qq.com', // ad-wars
'.clients.your-server.de', // rDNS .static.183.213.201.138.clients.your-server.de
'.bc.googleusercontent.com', // rDNS 218.178.172.34.bc.googleusercontent.com
'.host.secureserver.net', // rDNS .64.149.167.72.host.secureserver.net,
'.ip.linodeusercontent.com' // rDNS 45-79-169-153.ip.linodeusercontent.com
];
62 changes: 34 additions & 28 deletions Build/lib/get-phishing-domains.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import { processDomainLists } from './parse-filter';
import { processDomainLists, processHosts } from './parse-filter';
import * as tldts from 'tldts-experimental';

import { dummySpan } from '../trace';
import type { Span } from '../trace';
import { appendArrayInPlaceCurried } from './append-array-in-place';
import { PHISHING_DOMAIN_LISTS_EXTRA } from '../constants/reject-data-source';
import { PHISHING_DOMAIN_LISTS_EXTRA, PHISHING_HOSTS_EXTRA } from '../constants/reject-data-source';
import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt';
import picocolors from 'picocolors';
import createKeywordFilter from './aho-corasick';
Expand All @@ -22,7 +22,7 @@ const BLACK_TLD = new Set([
'ga', 'gd', 'gives', 'gq', 'group', 'host',
'icu', 'id', 'info', 'ink',
'lat', 'life', 'live', 'link', 'loan', 'lol', 'ltd',
'me', 'men', 'ml', 'mobi', 'mom',
'me', 'men', 'ml', 'mobi', 'mom', 'monster',
'net.pl',
'one', 'online',
'party', 'pro', 'pl', 'pw',
Expand All @@ -48,6 +48,12 @@ const WHITELIST_MAIN_DOMAINS = new Set([
'zendesk.com'
]);

const leathalKeywords = createKeywordFilter([
'vinted-',
'inpost-pl',
'vlnted-'
]);

const sensitiveKeywords = createKeywordFilter([
'.amazon-',
'-amazon',
Expand All @@ -65,14 +71,15 @@ const sensitiveKeywords = createKeywordFilter([
'booking-com',
'booking.com-',
'booking-eu',
'vinted-cz',
'vinted-',
'inpost-pl',
'login.microsoft',
'login-microsoft',
'microsoftonline',
'google.com-',
'minecraft',
'staemco'
'staemco',
'oferta'
]);
const lowKeywords = createKeywordFilter([
'transactions-',
Expand All @@ -96,7 +103,8 @@ const lowKeywords = createKeywordFilter([
'microsof',
'passwordreset',
'.google-',
'recover'
'recover',
'banking'
]);

const cacheKey = createCacheKey(__filename);
Expand Down Expand Up @@ -154,20 +162,15 @@ const processPhihsingDomains = cache(function processPhihsingDomains(domainArr:
if (
// !WHITELIST_MAIN_DOMAINS.has(apexDomain)
(domainScoreMap[apexDomain] >= 24)
|| (domainScoreMap[apexDomain] >= 16 && domainCountMap[apexDomain] >= 4)
|| (domainScoreMap[apexDomain] >= 13 && domainCountMap[apexDomain] >= 7)
|| (domainScoreMap[apexDomain] >= 5 && domainCountMap[apexDomain] >= 10)
|| (domainScoreMap[apexDomain] >= 3 && domainCountMap[apexDomain] >= 16)
|| (domainScoreMap[apexDomain] >= 16 && domainCountMap[apexDomain] >= 7)
|| (domainScoreMap[apexDomain] >= 13 && domainCountMap[apexDomain] >= 11)
|| (domainScoreMap[apexDomain] >= 5 && domainCountMap[apexDomain] >= 14)
|| (domainScoreMap[apexDomain] >= 3 && domainCountMap[apexDomain] >= 20)
) {
domainArr.push('.' + apexDomain);
}
}

// console.log(
// domainScoreMap['wordpress.com'],
// domainCountMap['wordpress.com']
// );

return Promise.resolve(domainArr);
}, {
serializer: serializeArray,
Expand All @@ -179,8 +182,10 @@ export function getPhishingDomains(parentSpan: Span) {
const domainArr = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => {
const domainArr: string[] = [];

(await Promise.all(PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(curSpan, ...entry, cacheKey))))
.forEach(appendArrayInPlaceCurried(domainArr));
await Promise.all([
...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(curSpan, ...entry, cacheKey)),
...PHISHING_HOSTS_EXTRA.map(entry => processHosts(curSpan, ...entry, cacheKey))
]).then(domainGroups => domainGroups.forEach(appendArrayInPlaceCurried(domainArr)));

return domainArr;
});
Expand All @@ -193,6 +198,10 @@ export function getPhishingDomains(parentSpan: Span) {
}

export function calcDomainAbuseScore(subdomain: string, fullDomain: string = subdomain) {
if (leathalKeywords(fullDomain)) {
return 100;
}

let weight = 0;

const hitLowKeywords = lowKeywords(fullDomain);
Expand All @@ -209,17 +218,14 @@ export function calcDomainAbuseScore(subdomain: string, fullDomain: string = sub

const subdomainLength = subdomain.length;

if (subdomainLength > 6) {
weight += 0.25;
if (subdomainLength > 11) {
weight += 0.6;
if (subdomainLength > 20) {
weight += 1;
if (subdomainLength > 30) {
weight += 2;
if (subdomainLength > 40) {
weight += 4;
}
if (subdomainLength > 13) {
weight += 0.2;
if (subdomainLength > 20) {
weight += 1;
if (subdomainLength > 30) {
weight += 5;
if (subdomainLength > 40) {
weight += 10;
}
}
}
Expand Down
8 changes: 7 additions & 1 deletion Source/domainset/cdn.conf
Original file line number Diff line number Diff line change
Expand Up @@ -1045,9 +1045,12 @@ cdn.consentmanager.net
widget.usersnap.com
cdn.playwire.com
widget.usepylon.com

app.groove.cm
app.groovefunnels.com
loader.mantis-intelligence.com
mantisloader.mantis-awx.com
conversiongorilla.com
.patientpop.com

cdn.transcend.io
.transcend-cdn.com
Expand Down Expand Up @@ -3094,3 +3097,6 @@ assets.raspberrypi.com
assets.brevo.com
corp-backend.brevo.com
designsystem.brevo.com
assets.grooveapps.com
assets.clickfunnels.com
.wixstudio.io
9 changes: 9 additions & 0 deletions Source/domainset/reject_sukka.conf
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ inst.360safe.com
.dkonto.pl
.googleplay.pro
.printondemandmerchandise.com
.thebitmeister.com
# Phishing domain like `www-facebook.to-kr.com`
.to-kr.com
# Poland Hosting dhosting.pl's free subdomain
Expand Down Expand Up @@ -718,6 +719,10 @@ ads-shopping.shouqianba.com
ad.maoyan.com
e.jparking.cn
adapi.izuiyou.com
.sponsor.printondemandagency.com
.whatisaweekend.com
.mob.com
.duomeng.cn

adimg.daumcdn.net
live.tvpot.daum.net
Expand Down Expand Up @@ -892,6 +897,10 @@ metrics.brevo.com
.adfunlink.com
.ubixioe.com

# CNAME: dualstack.beaconserver-ce-vpc0-1537565064.eu-west-1.elb.amazonaws.com
# note "beaconserver"
.internal.dradis.netflix.com

.adjust.io
.airbrake.io
.apsalar.com
Expand Down

0 comments on commit b8f4169

Please sign in to comment.