Skip to content

Commit

Permalink
Create tld plus one helper
Browse files Browse the repository at this point in the history
  • Loading branch information
wardpeet committed May 20, 2018
1 parent 61f7038 commit ed6d37d
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 5 deletions.
51 changes: 47 additions & 4 deletions lighthouse-core/lib/url-shim.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,24 @@ const Util = require('../report/html/renderer/util.js');
const URL = /** @type {!Window["URL"]} */ (typeof self !== 'undefined' && self.URL) ||
require('url').URL;

const tldPlusOne = {
ar: ['com', 'edu', 'gob', 'int', 'mil', 'mar', 'net', 'org', 'tur', 'musica'],
at: ['co', 'or', 'priv', 'ac'],
fr: ['avocat', 'aeroport', 'veterinaire'],
nz: ['ac', 'co', 'school', 'cri', 'govt', 'mil', 'parliament'],
il: ['org', 'k12', 'gov', 'muni', 'idf'],
ru: ['com', 'edu', 'gob', 'int', 'mil', 'mar', 'net', 'org', 'tur', 'musica'],
za: ['ac', 'gov', 'law', 'mil', 'nom', 'school', 'net'],
kr: ['ac', 'co', 'es', 'go', 'hs', 'kg', 'mil', 'ms', 'ne', 'or', 'pe', 're', 'sc', 'busan',
'chungbuk', 'chungnam', 'daegu', 'daejeon', 'gangwon', 'gwangju', 'gyeongbuk', 'gyeonggi',
'gyeongnam', 'incheon', 'jeju', 'jeonbuk', 'jeonnam', 'seoul', 'ulsan'],
es: ['org', 'gob'],
tr: ['com', 'info', 'biz', 'net', 'org', 'web', 'gen', 'tv', 'av', 'dr', 'bbs', 'name', 'tel',
'gov', 'bel', 'pol', 'mil', 'k12', 'edu', 'kep', 'nc', 'gov.nc'],
ua: ['gov', 'com', 'in', 'org', 'net', 'edu'],
uk: ['co', 'org', 'me', 'ltd', 'plc', 'net', 'sch', 'ac', 'gov', 'mod', 'mil', 'nhs', 'police'],
};

/**
* There is fancy URL rewriting logic for the chrome://settings page that we need to work around.
* Why? Special handling was added by Chrome team to allow a pushState transition between chrome:// pages.
Expand All @@ -32,6 +50,31 @@ function rewriteChromeInternalUrl(url) {
return url.replace(/^chrome:\/\/chrome\//, 'chrome://');
}

/**
* Checks if an url contains a TLD plus one domain
*
* @param {string} url
* @return {boolean}
*/
function isTldPlusDomain(url) {
try {
const parsedUrl = new URL(url);
if (!parsedUrl.hostname) {
return false;
}

const tld = parsedUrl.hostname.split('.').slice(-1)[0];
if (!tldPlusOne[tld]) {
return false;
}

const tldPlusOneRegex = new RegExp(`\\.(${tldPlusOne[tld].join('|')})\\.${tld}`);
return tldPlusOneRegex.test(url);
} catch (err) {
return false;
}
}

class URLShim extends URL {
/**
* @param {string} url
Expand Down Expand Up @@ -107,11 +150,11 @@ class URLShim extends URL {
return false;
}

const isTldA = isTldPlusDomain(urlAInfo.hostname);
const isTldB = isTldPlusDomain(urlBInfo.hostname);
const isTldPlusOneA = isTldPlusDomain(urlA);
const isTldPlusOneB = isTldPlusDomain(urlB);

const urlARootDomain = urlAInfo.hostname.split('.').slice(isTldA ? -3 : -2).join('.');
const urlBRootDomain = urlBInfo.hostname.split('.').slice(isTldB ? -3 : -2).join('.');
const urlARootDomain = urlAInfo.hostname.split('.').slice(isTldPlusOneA ? -3 : -2).join('.');
const urlBRootDomain = urlBInfo.hostname.split('.').slice(isTldPlusOneB ? -3 : -2).join('.');

return urlARootDomain === urlBRootDomain;
}
Expand Down
17 changes: 16 additions & 1 deletion lighthouse-core/test/lib/url-shim-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ describe('URL Shim', () => {

describe('rootDomainsMatch', () => {
it('matches a subdomain and a root domain', () => {

const urlA = 'http://example.com/js/test.js';
const urlB = 'http://example.com/';
const urlC = 'http://sub.example.com/js/test.js';
Expand All @@ -121,6 +120,22 @@ describe('URL Shim', () => {
assert.ok(!URL.rootDomainsMatch(urlB, urlD));
assert.ok(!URL.rootDomainsMatch(urlB, urlE));
});

it(`matches tld plus domains`, () => {
const coUkA = 'http://example.co.uk/js/test.js';
const coUkB = 'http://sub.example.co.uk/js/test.js';
const testUkA = 'http://example.test.uk/js/test.js';
const testUkB = 'http://sub.example.test.uk/js/test.js';
const ltdBrA = 'http://example.ltd.br/js/test.js';
const ltdBrB = 'http://sub.example.ltd.br/js/test.js';
const privAtA = 'http://examplepriv.at/js/test.js';
const privAtB = 'http://sub.examplepriv.at/js/test.js';

assert.ok(URL.rootDomainsMatch(coUkA, coUkB));
assert.ok(URL.rootDomainsMatch(testUkA, testUkB));
assert.ok(URL.rootDomainsMatch(ltdBrA, ltdBrB));
assert.ok(URL.rootDomainsMatch(privAtA, privAtB));
});
});

describe('getURLDisplayName', () => {
Expand Down

0 comments on commit ed6d37d

Please sign in to comment.