Skip to content

Commit

Permalink
Keep track of the document URL post-redirects (#582)
Browse files Browse the repository at this point in the history
* Add enableUrlUpdateIfRedirected which keeps track of the document URL after
redirects.
* `options.url` will be kept up-to-date, and `options.initialUrl` will retain the initial URL provided to Lighthouse
* `driver.url` removed.
* networkrecorder emits a 'requestloaded' event whenever any request finishes.
  • Loading branch information
paulirish authored Aug 20, 2016
1 parent a6b548a commit ac70731
Show file tree
Hide file tree
Showing 12 changed files with 127 additions and 86 deletions.
32 changes: 22 additions & 10 deletions lighthouse-core/gather/drivers/driver.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,12 @@ const log = require('../../lib/log.js');
class Driver {

constructor() {
this._url = null;
this.PAUSE_AFTER_LOAD = 500;
this._traceEvents = [];
this._traceCategories = Driver.traceCategories;
this._eventEmitter = null;
}

get url() {
return this._url;
}

set url(_url) {
this._url = _url;
}

static get traceCategories() {
return [
'-*', // exclude default
Expand Down Expand Up @@ -218,6 +209,26 @@ class Driver {
});
}

/**
* If our main document URL redirects, we will update options.url accordingly
* As such, options.url will always represent the post-redirected URL.
* options.initialUrl is the pre-redirect URL that things started with
*
* Caveat: only works when network recording enabled for a pass
*/
enableUrlUpdateIfRedirected(opts) {
this._networkRecorder.on('requestloaded', redirectRequest => {
// Quit if this is not a redirected request
if (!redirectRequest.redirectSource) {
return;
}
const earlierRequest = redirectRequest.redirectSource;
if (earlierRequest.url === opts.url) {
opts.url = redirectRequest.url;
}
});
}

/**
* Navigate to the given URL. Use of this method directly isn't advised: if
* the current page is already at the given URL, navigation will not occur and
Expand Down Expand Up @@ -328,10 +339,11 @@ class Driver {
});
}

beginNetworkCollect() {
beginNetworkCollect(opts) {
return new Promise((resolve, reject) => {
this._networkRecords = [];
this._networkRecorder = new NetworkRecorder(this._networkRecords);
this.enableUrlUpdateIfRedirected(opts);

this.on('Network.requestWillBeSent', this._networkRecorder.onRequestWillBeSent);
this.on('Network.requestServedFromCache', this._networkRecorder.onRequestServedFromCache);
Expand Down
17 changes: 7 additions & 10 deletions lighthouse-core/gather/drivers/extension.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ class ExtensionDriver extends Driver {

_detachCleanup() {
this._tabId = null;
this.url = null;
chrome.debugger.onEvent.removeListener(this._onEvent);
chrome.debugger.onDetach.removeListener(this._onUnexpectedDetach);
this._eventEmitter.removeAllListeners();
Expand All @@ -60,8 +59,8 @@ class ExtensionDriver extends Driver {
}

return this.queryCurrentTab_()
.then(tabId => {
this._tabId = tabId;
.then(tab => {
const tabId = this._tabId = tab.id;
chrome.debugger.onEvent.addListener(this._onEvent);
chrome.debugger.onDetach.addListener(this._onUnexpectedDetach);

Expand Down Expand Up @@ -142,18 +141,16 @@ class ExtensionDriver extends Driver {
return reject(chrome.runtime.lastError);
}

this.url = tabs[0].url;
resolve(tabs[0].id);
resolve(tabs[0]);
});
});
}

/**
* Used by lighthouse-background to kick off the run on the current page
*/
getCurrentTabURL() {
if (this.url === undefined || this.url === null) {
return this.queryCurrentTab_().then(_ => this.url);
}

return Promise.resolve(this.url);
return this.queryCurrentTab_().then(tab => tab.url);
}
}

Expand Down
29 changes: 18 additions & 11 deletions lighthouse-core/gather/gather-runner.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class GatherRunner {
.then(_ => new Promise((resolve, reject) => setTimeout(resolve, 300)))
// Begin tracing and network recording if required.
.then(_ => options.config.trace && driver.beginTrace())
.then(_ => options.config.network && driver.beginNetworkCollect())
.then(_ => options.config.network && driver.beginNetworkCollect(options))
// Navigate.
.then(_ => driver.gotoURL(options.url, {
waitForLoad: true,
Expand Down Expand Up @@ -219,18 +219,25 @@ class GatherRunner {

// Run each pass
.then(_ => {
return passes.reduce((chain, config) => {
// If the main document redirects, we'll update this to keep track
let urlAfterRedirects;
return passes.reduce((chain, config, passIndex) => {
const runOptions = Object.assign({}, options, {config});
return chain
.then(_ => GatherRunner.beforePass(runOptions))
.then(_ => GatherRunner.pass(runOptions))
.then(_ => GatherRunner.afterPass(runOptions))
.then(loadData => {
// Merge pass trace and network data into tracingData.
config.trace && Object.assign(tracingData.traces, loadData.traces);
config.network && (tracingData.networkRecords = loadData.networkRecords);
});
}, Promise.resolve());
.then(_ => GatherRunner.beforePass(runOptions))
.then(_ => GatherRunner.pass(runOptions))
.then(_ => GatherRunner.afterPass(runOptions))
.then(loadData => {
// Merge pass trace and network data into tracingData.
config.trace && Object.assign(tracingData.traces, loadData.traces);
config.network && (tracingData.networkRecords = loadData.networkRecords);
if (passIndex === 0) {
urlAfterRedirects = runOptions.url;
}
});
}, Promise.resolve()).then(_ => {
options.url = urlAfterRedirects;
});
})
.then(_ => {
// We dont need to hold up the reporting for the reload/disconnect,
Expand Down
5 changes: 5 additions & 0 deletions lighthouse-core/gather/gatherers/http-redirect.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@

const Gatherer = require('./gatherer');

/**
* This gatherer changes the options.url so that its pass loads the http page.
* After load it detects if its on a crypographic scheme.
* TODO: Instead of abusing a loadPage pass for this test, it could likely just do an XHR instead
*/
class HTTPRedirect extends Gatherer {

constructor() {
Expand Down
8 changes: 6 additions & 2 deletions lighthouse-core/gather/gatherers/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,12 @@ const Gatherer = require('./gatherer');

class URL extends Gatherer {

beforePass(options) {
this.artifact = options.url || options.driver.url;
afterPass(options) {
// Used currently by cache-start-url audit, which wants to know if the start_url
// in the manifest is stored in the cache.
// Instead of the originally inputted URL (options.initialUrl), we want the resolved
// post-redirect URL (which is here at options.url)
this.artifact = options.url;
}
}

Expand Down
6 changes: 0 additions & 6 deletions lighthouse-core/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,6 @@ module.exports = function(url, flags, configJSON) {
flags.logLevel = flags.logLevel || 'error';
log.setLevel(flags.logLevel);

// If the URL isn't https or localhost complain to the user.
if (url.indexOf('https') !== 0 && url.indexOf('http://localhost') !== 0) {
log.warn('Lighthouse', 'The URL provided should be on HTTPS');
log.warn('Lighthouse', 'Performance stats will be skewed redirecting from HTTP to HTTPS.');
}

// Use ConfigParser to generate a valid config file
const config = new Config(configJSON, flags.auditWhitelist, flags.configPath);

Expand Down
15 changes: 10 additions & 5 deletions lighthouse-core/lib/network-recorder.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,19 @@
'use strict';

const NetworkManager = require('./web-inspector').NetworkManager;
const EventEmitter = require('events').EventEmitter;

const REQUEST_FINISHED = NetworkManager.EventTypes.RequestFinished;

class NetworkRecorder {
class NetworkRecorder extends EventEmitter {
constructor(recordArray) {
this._records = recordArray;
super();

this._records = recordArray;
this.networkManager = NetworkManager.createWithFakeTarget();

// TODO(bckenny): loadingFailed calls are not recorded in REQUEST_FINISHED.
this.networkManager.addEventListener(REQUEST_FINISHED, request => {
this.networkManager.addEventListener(this.EventTypes.RequestFinished, request => {
this._records.push(request.data);
this.emit('requestloaded', request.data);
});

this.onRequestWillBeSent = this.onRequestWillBeSent.bind(this);
Expand All @@ -40,6 +41,10 @@ class NetworkRecorder {
this.onResourceChangedPriority = this.onResourceChangedPriority.bind(this);
}

get EventTypes() {
return NetworkManager.EventTypes;
}

// There are a few differences between the debugging protocol naming and
// the parameter naming used in NetworkManager. These are noted below.

Expand Down
24 changes: 19 additions & 5 deletions lighthouse-core/runner.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,11 @@ const assetSaver = require('./lib/asset-saver');
const log = require('./lib/log');
const fs = require('fs');
const path = require('path');
const url = require('url');

class Runner {
static run(driver, opts) {
// Clean opts input.
if (typeof opts.url !== 'string' || opts.url.length === 0) {
return Promise.reject(new Error('You must provide a url to the driver'));
}

opts.flags = opts.flags || {};

// Default mobile emulation and page loading to true.
Expand All @@ -44,6 +41,20 @@ class Runner {

const config = opts.config;

// save the initialUrl provided by the user
opts.initialUrl = opts.url;
if (typeof opts.initialUrl !== 'string' || opts.initialUrl.length === 0) {
return Promise.reject(new Error('You must provide a url to the driver'));
}
const parsedURL = url.parse(opts.url);
// canonicalize URL with any trailing slashes neccessary
opts.url = url.format(parsedURL);
// If the URL isn't https and is also not localhost complain to the user.
if (!parsedURL.protocol.includes('https') && parsedURL.hostname !== 'localhost') {
log.warn('Lighthouse', 'The URL provided should be on HTTPS');
log.warn('Lighthouse', 'Performance stats will be skewed redirecting from HTTP to HTTPS.');
}

// Check that there are passes & audits...
const validPassesAndAudits = config.passes && config.audits;

Expand All @@ -57,8 +68,9 @@ class Runner {
// to check that there are artifacts specified in the config, and throw if not.
if (validPassesAndAudits || validArtifactsAndAudits) {
if (validPassesAndAudits) {
opts.driver = driver;
// Finally set up the driver to gather.
run = run.then(_ => GatherRunner.run(config.passes, Object.assign({}, opts, {driver})));
run = run.then(_ => GatherRunner.run(config.passes, opts));
} else if (validArtifactsAndAudits) {
run = run.then(_ => config.artifacts);
}
Expand Down Expand Up @@ -108,7 +120,9 @@ class Runner {
formatted[audit.name] = audit;
return formatted;
}, {});

return {
initialUrl: opts.initialUrl,
url: opts.url,
audits: formattedAudits,
aggregations
Expand Down
35 changes: 3 additions & 32 deletions lighthouse-core/test/gather/gatherers/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,42 +19,13 @@

const URLGather = require('../../../gather/gatherers/url');
const assert = require('assert');
let urlGather;

describe('URL gatherer', () => {
// Reset the Gatherer before each test.
beforeEach(() => {
urlGather = new URLGather();
});

it('returns the correct URL from options', () => {
const urlGather = new URLGather();
const url = 'https://example.com';
urlGather.beforePass({
url
});

return assert.equal(urlGather.artifact, url);
});

it('returns the correct URL from options.driver', () => {
const url = 'https://example.com';
urlGather.beforePass({
driver: {
url
}
});

return assert.equal(urlGather.artifact, url);
});

it('chooses the URL from options over options.driver', () => {
const url = 'https://example.com';
const driverUrl = 'https://example2.com';
urlGather.beforePass({
url,
driver: {
url: driverUrl
}
urlGather.afterPass({
url: url
});

return assert.equal(urlGather.artifact, url);
Expand Down
39 changes: 35 additions & 4 deletions lighthouse-core/test/lib/drivers/driver.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@

const Driver = require('../../../gather/drivers/cri.js');
const Element = require('../../../lib/element.js');
const NetworkRecorder = require('../../../lib/network-recorder');
const assert = require('assert');

let DriverStub = new Driver();
let driverStub = new Driver();

DriverStub.sendCommand = function(command, params) {
driverStub.sendCommand = function(command, params) {
switch (command) {
case 'DOM.getDocument':
return Promise.resolve({root: {nodeId: 249}});
Expand All @@ -36,17 +37,47 @@ DriverStub.sendCommand = function(command, params) {
}
};

// mock redirects to test out enableUrlUpdateIfRedirected
const req1 = {
url: 'http://aliexpress.com/'
};
const req2 = {
redirectSource: req1,
url: 'http://www.aliexpress.com/'
};
const req3 = {
redirectSource: req2,
url: 'http://m.aliexpress.com/?tracelog=wwwhome2mobilesitehome'
};
const mockRedirects = [req1, req2, req3];

/* global describe, it */
describe('Browser Driver', () => {
it('returns null when DOM.querySelector finds no node', () => {
return DriverStub.querySelector('invalid').then(value => {
return driverStub.querySelector('invalid').then(value => {
assert.equal(value, null);
});
});

it('returns element when DOM.querySelector finds node', () => {
return DriverStub.querySelector('meta head').then(value => {
return driverStub.querySelector('meta head').then(value => {
assert.equal(value instanceof Element, true);
});
});

it('will update the options.url through redirects', () => {
const networkRecorder = driverStub._networkRecorder = new NetworkRecorder([]);
let opts = {url: req1.url};
driverStub.enableUrlUpdateIfRedirected(opts);

// Fake some reqFinished events
const networkManager = networkRecorder.networkManager;
mockRedirects.forEach(request => {
networkManager.dispatchEventToListeners(networkRecorder.EventTypes.RequestFinished, request);
});

// The above event is handled synchronously by enableUrlUpdateIfRedirected and will be all set
assert.notEqual(opts.url, req1.url, 'opts.url changed after the redirects');
assert.equal(opts.url, req3.url, 'opts.url matches the last redirect');
});
});
Loading

0 comments on commit ac70731

Please sign in to comment.