From 2d4c07d62fcb767f7a005beca9b9c6e6412b46d5 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Tue, 22 Oct 2024 21:23:08 +0800 Subject: [PATCH] Update CDN & Reject Hosts --- Build/constants/reject-data-source.ts | 17 +++++--- Build/lib/get-phishing-domains.ts | 62 +++++++++++++++------------ Source/domainset/cdn.conf | 8 +++- Source/domainset/reject_sukka.conf | 10 +++++ 4 files changed, 61 insertions(+), 36 deletions(-) diff --git a/Build/constants/reject-data-source.ts b/Build/constants/reject-data-source.ts index 748cfd8a2..06996b968 100644 --- a/Build/constants/reject-data-source.ts +++ b/Build/constants/reject-data-source.ts @@ -22,12 +22,7 @@ export const HOSTS_EXTRA: HostsSource[] = [ // Dan Pollock's hosts file, 0.0.0.0 version is 30 KiB smaller ['https://someonewhocares.org/hosts/zero/hosts', null, true, TTL.THREE_HOURS()], // ad-wars is not actively maintained, so we set a 7 days cache ttl - ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', null, false, TTL.TWO_WEEKS()], - [ - 'https://raw.githubusercontent.com/durablenapkin/scamblocklist/master/hosts.txt', - [], - true, TTL.TWLVE_HOURS() - ] + ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', null, false, TTL.TWO_WEEKS()] ]; export const DOMAIN_LISTS: HostsSource[] = [ @@ -84,6 +79,10 @@ export const DOMAIN_LISTS_EXTRA: HostsSource[] = [ ['https://raw.githubusercontent.com/Spam404/lists/master/main-blacklist.txt', [], true, TTL.TEN_DAYS()] ]; +export const PHISHING_HOSTS_EXTRA: HostsSource[] = [ + ['https://raw.githubusercontent.com/durablenapkin/scamblocklist/master/hosts.txt', [], true, TTL.THREE_DAYS()] +]; + export const PHISHING_DOMAIN_LISTS_EXTRA: HostsSource[] = [ [ 'https://phishing-filter.pages.dev/phishing-filter-domains.txt', @@ -396,5 +395,9 @@ export const PREDEFINED_WHITELIST = [ 'ab.chatgpt.com', // EasyPrivacy blocks this 'jnn-pa.googleapis.com', // ad-wars 'imasdk.googleapis.com', // ad-wars - '.l.qq.com' // ad-wars + '.l.qq.com', // ad-wars + '.clients.your-server.de', // rDNS .static.183.213.201.138.clients.your-server.de + '.bc.googleusercontent.com', // rDNS 218.178.172.34.bc.googleusercontent.com + '.host.secureserver.net', // rDNS .64.149.167.72.host.secureserver.net, + '.ip.linodeusercontent.com' // rDNS 45-79-169-153.ip.linodeusercontent.com ]; diff --git a/Build/lib/get-phishing-domains.ts b/Build/lib/get-phishing-domains.ts index 75e203bd7..2a6a111ba 100644 --- a/Build/lib/get-phishing-domains.ts +++ b/Build/lib/get-phishing-domains.ts @@ -1,10 +1,10 @@ -import { processDomainLists } from './parse-filter'; +import { processDomainLists, processHosts } from './parse-filter'; import * as tldts from 'tldts-experimental'; import { dummySpan } from '../trace'; import type { Span } from '../trace'; import { appendArrayInPlaceCurried } from './append-array-in-place'; -import { PHISHING_DOMAIN_LISTS_EXTRA } from '../constants/reject-data-source'; +import { PHISHING_DOMAIN_LISTS_EXTRA, PHISHING_HOSTS_EXTRA } from '../constants/reject-data-source'; import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt'; import picocolors from 'picocolors'; import createKeywordFilter from './aho-corasick'; @@ -22,7 +22,7 @@ const BLACK_TLD = new Set([ 'ga', 'gd', 'gives', 'gq', 'group', 'host', 'icu', 'id', 'info', 'ink', 'lat', 'life', 'live', 'link', 'loan', 'lol', 'ltd', - 'me', 'men', 'ml', 'mobi', 'mom', + 'me', 'men', 'ml', 'mobi', 'mom', 'monster', 'net.pl', 'one', 'online', 'party', 'pro', 'pl', 'pw', @@ -48,6 +48,12 @@ const WHITELIST_MAIN_DOMAINS = new Set([ 'zendesk.com' ]); +const leathalKeywords = createKeywordFilter([ + 'vinted-', + 'inpost-pl', + 'vlnted-' +]); + const sensitiveKeywords = createKeywordFilter([ '.amazon-', '-amazon', @@ -65,14 +71,15 @@ const sensitiveKeywords = createKeywordFilter([ 'booking-com', 'booking.com-', 'booking-eu', - 'vinted-cz', + 'vinted-', 'inpost-pl', 'login.microsoft', 'login-microsoft', 'microsoftonline', 'google.com-', 'minecraft', - 'staemco' + 'staemco', + 'oferta' ]); const lowKeywords = createKeywordFilter([ 'transactions-', @@ -96,7 +103,8 @@ const lowKeywords = createKeywordFilter([ 'microsof', 'passwordreset', '.google-', - 'recover' + 'recover', + 'banking' ]); const cacheKey = createCacheKey(__filename); @@ -154,20 +162,15 @@ const processPhihsingDomains = cache(function processPhihsingDomains(domainArr: if ( // !WHITELIST_MAIN_DOMAINS.has(apexDomain) (domainScoreMap[apexDomain] >= 24) - || (domainScoreMap[apexDomain] >= 16 && domainCountMap[apexDomain] >= 4) - || (domainScoreMap[apexDomain] >= 13 && domainCountMap[apexDomain] >= 7) - || (domainScoreMap[apexDomain] >= 5 && domainCountMap[apexDomain] >= 10) - || (domainScoreMap[apexDomain] >= 3 && domainCountMap[apexDomain] >= 16) + || (domainScoreMap[apexDomain] >= 16 && domainCountMap[apexDomain] >= 7) + || (domainScoreMap[apexDomain] >= 13 && domainCountMap[apexDomain] >= 11) + || (domainScoreMap[apexDomain] >= 5 && domainCountMap[apexDomain] >= 14) + || (domainScoreMap[apexDomain] >= 3 && domainCountMap[apexDomain] >= 20) ) { domainArr.push('.' + apexDomain); } } - // console.log( - // domainScoreMap['wordpress.com'], - // domainCountMap['wordpress.com'] - // ); - return Promise.resolve(domainArr); }, { serializer: serializeArray, @@ -179,8 +182,10 @@ export function getPhishingDomains(parentSpan: Span) { const domainArr = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => { const domainArr: string[] = []; - (await Promise.all(PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(curSpan, ...entry, cacheKey)))) - .forEach(appendArrayInPlaceCurried(domainArr)); + await Promise.all([ + ...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(curSpan, ...entry, cacheKey)), + ...PHISHING_HOSTS_EXTRA.map(entry => processHosts(curSpan, ...entry, cacheKey)) + ]).then(domainGroups => domainGroups.forEach(appendArrayInPlaceCurried(domainArr))); return domainArr; }); @@ -193,6 +198,10 @@ export function getPhishingDomains(parentSpan: Span) { } export function calcDomainAbuseScore(subdomain: string, fullDomain: string = subdomain) { + if (leathalKeywords(fullDomain)) { + return 100; + } + let weight = 0; const hitLowKeywords = lowKeywords(fullDomain); @@ -209,17 +218,14 @@ export function calcDomainAbuseScore(subdomain: string, fullDomain: string = sub const subdomainLength = subdomain.length; - if (subdomainLength > 6) { - weight += 0.25; - if (subdomainLength > 11) { - weight += 0.6; - if (subdomainLength > 20) { - weight += 1; - if (subdomainLength > 30) { - weight += 2; - if (subdomainLength > 40) { - weight += 4; - } + if (subdomainLength > 13) { + weight += 0.2; + if (subdomainLength > 20) { + weight += 1; + if (subdomainLength > 30) { + weight += 5; + if (subdomainLength > 40) { + weight += 10; } } } diff --git a/Source/domainset/cdn.conf b/Source/domainset/cdn.conf index 09fd9055b..bf637f345 100644 --- a/Source/domainset/cdn.conf +++ b/Source/domainset/cdn.conf @@ -1045,9 +1045,12 @@ cdn.consentmanager.net widget.usersnap.com cdn.playwire.com widget.usepylon.com - +app.groove.cm +app.groovefunnels.com loader.mantis-intelligence.com mantisloader.mantis-awx.com +conversiongorilla.com +.patientpop.com cdn.transcend.io .transcend-cdn.com @@ -3094,3 +3097,6 @@ assets.raspberrypi.com assets.brevo.com corp-backend.brevo.com designsystem.brevo.com +assets.grooveapps.com +assets.clickfunnels.com +.wixstudio.io diff --git a/Source/domainset/reject_sukka.conf b/Source/domainset/reject_sukka.conf index 3d957e350..b4e79afdf 100644 --- a/Source/domainset/reject_sukka.conf +++ b/Source/domainset/reject_sukka.conf @@ -319,6 +319,7 @@ inst.360safe.com .dkonto.pl .googleplay.pro .printondemandmerchandise.com +.thebitmeister.com # Phishing domain like `www-facebook.to-kr.com` .to-kr.com # Poland Hosting dhosting.pl's free subdomain @@ -718,6 +719,10 @@ ads-shopping.shouqianba.com ad.maoyan.com e.jparking.cn adapi.izuiyou.com +.sponsor.printondemandagency.com +.whatisaweekend.com +.mob.com +.duomeng.cn adimg.daumcdn.net live.tvpot.daum.net @@ -892,6 +897,10 @@ metrics.brevo.com .adfunlink.com .ubixioe.com +# CNAME: dualstack.beaconserver-ce-vpc0-1537565064.eu-west-1.elb.amazonaws.com +# note "beaconserver" +.internal.dradis.netflix.com + .adjust.io .airbrake.io .apsalar.com @@ -1968,6 +1977,7 @@ xadx.file.market.xiaomi.com .pandora.xiaomi.com mi-stat.gslb.mi-idc.com mlog.search.xiaomi.net +# verify.sec.xiaomi.com # 验证码 captcha # 小米云扫描组件 .avlyun.sec.intl.miui.com .avlyun.sec.miui.com