Skip to content

Commit

Permalink
misc: share markdown parsing in collect-strings and the report (#9514)
Browse files Browse the repository at this point in the history
  • Loading branch information
brendankenny authored and paulirish committed Nov 6, 2019
1 parent 6c46ef3 commit ecb729e
Show file tree
Hide file tree
Showing 6 changed files with 315 additions and 84 deletions.
63 changes: 29 additions & 34 deletions lighthouse-core/report/html/renderer/dom.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
*/
'use strict';

/* globals URL self */
/* globals URL self Util */

/** @typedef {HTMLElementTagNameMap & {[id: string]: HTMLElement}} HTMLElementByTagName */

Expand Down Expand Up @@ -117,52 +117,47 @@ class DOM {
convertMarkdownLinkSnippets(text) {
const element = this.createElement('span');

// Split on markdown links (e.g. [some link](https://...)).
const parts = text.split(/\[([^\]]*?)\]\((https?:\/\/.*?)\)/g);

while (parts.length) {
// Pop off the same number of elements as there are capture groups.
const [preambleText, linkText, linkHref] = parts.splice(0, 3);
element.appendChild(this._document.createTextNode(preambleText));

// Append link if there are any.
if (linkText && linkHref) {
const url = new URL(linkHref);

const DEVELOPERS_GOOGLE_ORIGIN = 'https://developers.google.com';
if (url.origin === DEVELOPERS_GOOGLE_ORIGIN) {
url.searchParams.set('utm_source', 'lighthouse');
url.searchParams.set('utm_medium', this._lighthouseChannel);
}

const a = this.createElement('a');
a.rel = 'noopener';
a.target = '_blank';
a.textContent = linkText;
a.href = url.href;
element.appendChild(a);
for (const segment of Util.splitMarkdownLink(text)) {
if (!segment.isLink) {
// Plain text segment.
element.appendChild(this._document.createTextNode(segment.text));
continue;
}

// Otherwise, append any links found.
const url = new URL(segment.linkHref);

const DEVELOPERS_GOOGLE_ORIGIN = 'https://developers.google.com';
if (url.origin === DEVELOPERS_GOOGLE_ORIGIN) {
url.searchParams.set('utm_source', 'lighthouse');
url.searchParams.set('utm_medium', this._lighthouseChannel);
}

const a = this.createElement('a');
a.rel = 'noopener';
a.target = '_blank';
a.textContent = segment.text;
a.href = url.href;
element.appendChild(a);
}

return element;
}

/**
* @param {string} text
* @param {string} markdownText
* @return {Element}
*/
convertMarkdownCodeSnippets(text) {
convertMarkdownCodeSnippets(markdownText) {
const element = this.createElement('span');

const parts = text.split(/`(.*?)`/g); // Split on markdown code slashes
while (parts.length) {
// Pop off the same number of elements as there are capture groups.
const [preambleText, codeText] = parts.splice(0, 2);
element.appendChild(this._document.createTextNode(preambleText));
if (codeText) {
for (const segment of Util.splitMarkdownCodeSpans(markdownText)) {
if (segment.isCode) {
const pre = this.createElement('code');
pre.textContent = codeText;
pre.textContent = segment.text;
element.appendChild(pre);
} else {
element.appendChild(this._document.createTextNode(segment.text));
}
}

Expand Down
67 changes: 67 additions & 0 deletions lighthouse-core/report/html/renderer/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,73 @@ class Util {
return parts.join(' ');
}

/**
* Split a string by markdown code spans (enclosed in `backticks`), splitting
* into segments that were enclosed in backticks (marked as `isCode === true`)
* and those that outside the backticks (`isCode === false`).
* @param {string} text
* @return {Array<{isCode: true, text: string}|{isCode: false, text: string}>}
*/
static splitMarkdownCodeSpans(text) {
/** @type {Array<{isCode: true, text: string}|{isCode: false, text: string}>} */
const segments = [];

// Split on backticked code spans.
const parts = text.split(/`(.*?)`/g);
for (let i = 0; i < parts.length; i ++) {
const text = parts[i];

// Empty strings are an artifact of splitting, not meaningful.
if (!text) continue;

// Alternates between plain text and code segments.
const isCode = i % 2 !== 0;
segments.push({
isCode,
text,
});
}

return segments;
}

/**
* Split a string on markdown links (e.g. [some link](https://...)) into
* segments of plain text that weren't part of a link (marked as
* `isLink === false`), and segments with text content and a URL that did make
* up a link (marked as `isLink === true`).
* @param {string} text
* @return {Array<{isLink: true, text: string, linkHref: string}|{isLink: false, text: string}>}
*/
static splitMarkdownLink(text) {
/** @type {Array<{isLink: true, text: string, linkHref: string}|{isLink: false, text: string}>} */
const segments = [];

const parts = text.split(/\[([^\]]+?)\]\((https?:\/\/.*?)\)/g);
while (parts.length) {
// Shift off the same number of elements as the pre-split and capture groups.
const [preambleText, linkText, linkHref] = parts.splice(0, 3);

if (preambleText) { // Skip empty text as it's an artifact of splitting, not meaningful.
segments.push({
isLink: false,
text: preambleText,
});
}

// Append link if there are any.
if (linkText && linkHref) {
segments.push({
isLink: true,
text: linkText,
linkHref,
});
}
}

return segments;
}

/**
* @param {URL} parsedUrl
* @param {{numPathParts?: number, preserveQuery?: boolean, preserveHost?: boolean}=} options
Expand Down
68 changes: 36 additions & 32 deletions lighthouse-core/scripts/i18n/collect-strings.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const path = require('path');
const assert = require('assert');
const tsc = require('typescript');
const collectAndBakeCtcStrings = require('./bake-ctc-to-lhl.js');
const Util = require('../../report/html/renderer/util.js');

const LH_ROOT = path.join(__dirname, '../../../');
const UISTRINGS_REGEX = /UIStrings = .*?\};\n/s;
Expand Down Expand Up @@ -157,28 +158,27 @@ function convertMessageToCtc(message, examples = {}) {
* @param {IncrementalCtc} icu
*/
function _processPlaceholderMarkdownCode(icu) {
const message = icu.message;

// Check that number of backticks is even.
const match = icu.message.match(/`/g);
const match = message.match(/`/g);
if (match && match.length % 2 !== 0) {
throw Error(`Open backtick in message "${icu.message}"`);
throw Error(`Open backtick in message "${message}"`);
}

// Split on backticked code spans
const parts = icu.message.split(/`(.*?)`/g);
icu.message = '';
let idx = 0;
while (parts.length) {
// Pop off the same number of elements as there are capture groups.
const [preambleText, codeText] = parts.splice(0, 2);
icu.message += preambleText;
if (codeText) {
for (const segment of Util.splitMarkdownCodeSpans(message)) {
if (segment.isCode) {
const placeholderName = `MARKDOWN_SNIPPET_${idx++}`;
// Backtick replacement looks unreadable here, so .join() instead.
icu.message += '$' + placeholderName + '$';
icu.placeholders[placeholderName] = {
content: '`' + codeText + '`',
example: codeText,
content: '`' + segment.text + '`',
example: segment.text,
};
} else {
icu.message += segment.text;
}
}
}
Expand All @@ -189,35 +189,39 @@ function _processPlaceholderMarkdownCode(icu) {
* @param {IncrementalCtc} icu
*/
function _processPlaceholderMarkdownLink(icu) {
const message = icu.message;

// Check for markdown link common errors, ex:
// * [extra] (space between brackets and parens)
if (icu.message.match(/\[.*\] \(.*\)/)) {
throw Error(`Bad Link syntax in message "${icu.message}"`);
if (message.match(/\[.*\] \(.*\)/)) {
throw Error(`Bad Link spacing in message "${message}"`);
}
// * [](empty link text)
if (message.match(/\[\]\(.*\)/)) {
throw Error(`markdown link text missing in message "${message}"`);
}

// Split on markdown links (e.g. [some link](https://...)).
const parts = icu.message.split(/\[([^\]]*?)\]\((https?:\/\/.*?)\)/g);
icu.message = '';
let idx = 0;

while (parts.length) {
// Pop off the same number of elements as there are capture groups.
const [preambleText, linkText, linkHref] = parts.splice(0, 3);
icu.message += preambleText;

// Append link if there are any.
if (linkText && linkHref) {
const startPlaceholder = `LINK_START_${idx}`;
const endPlaceholder = `LINK_END_${idx}`;
icu.message += '$' + startPlaceholder + '$' + linkText + '$' + endPlaceholder + '$';
idx++;
icu.placeholders[startPlaceholder] = {
content: '[',
};
icu.placeholders[endPlaceholder] = {
content: `](${linkHref})`,
};
for (const segment of Util.splitMarkdownLink(message)) {
if (!segment.isLink) {
// Plain text segment.
icu.message += segment.text;
continue;
}

// Otherwise, append any links found.
const startPlaceholder = `LINK_START_${idx}`;
const endPlaceholder = `LINK_END_${idx}`;
icu.message += '$' + startPlaceholder + '$' + segment.text + '$' + endPlaceholder + '$';
idx++;
icu.placeholders[startPlaceholder] = {
content: '[',
};
icu.placeholders[endPlaceholder] = {
content: `](${segment.linkHref})`,
};
}
}

Expand Down
3 changes: 3 additions & 0 deletions lighthouse-core/test/report/html/renderer/dom-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ const fs = require('fs');
const jsdom = require('jsdom');
const URL = require('../../../../lib/url-shim.js');
const DOM = require('../../../../report/html/renderer/dom.js');
const Util = require('../../../../report/html/renderer/util.js');

const TEMPLATE_FILE = fs.readFileSync(__dirname +
'/../../../../report/html/templates.html', 'utf8');
Expand All @@ -21,13 +22,15 @@ describe('DOM', () => {

beforeAll(() => {
global.URL = URL;
global.Util = Util;
const {document} = new jsdom.JSDOM(TEMPLATE_FILE).window;
dom = new DOM(document);
dom.setLighthouseChannel('someChannel');
});

afterAll(() => {
global.URL = undefined;
global.Util = undefined;
});

describe('createElement', () => {
Expand Down
Loading

0 comments on commit ecb729e

Please sign in to comment.