-
Notifications
You must be signed in to change notification settings - Fork 9.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
misc: share markdown parsing in collect-strings and the report #9514
Changes from 4 commits
b52ae97
e2b102e
1860f8c
7953ee3
e09a47c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -280,6 +280,79 @@ class Util { | |||||
return parts.join(' '); | ||||||
} | ||||||
|
||||||
/** | ||||||
* Split a string by markdown code spans (enclosed in `backticks`), splitting | ||||||
* into segments that were enclosed in backticks (marked as `isCode === true`) | ||||||
* and those that outside the backticks (`isCode === false`). | ||||||
* @param {string} text | ||||||
* @return {Array<{isCode: true, codeText: string}|{isCode: false, plainText: string}>} | ||||||
*/ | ||||||
static splitMarkdownCodeSpans(text) { | ||||||
/** @type {Array<{isCode: true, codeText: string}|{isCode: false, plainText: string}>} */ | ||||||
const segments = []; | ||||||
|
||||||
// Split on backticked code spans. | ||||||
const parts = text.split(/`(.*?)`/g); | ||||||
for (let i = 0; i < parts.length; i ++) { | ||||||
const text = parts[i]; | ||||||
|
||||||
// Empty strings are an artifact of splitting, not meaningful. | ||||||
if (!text) continue; | ||||||
|
||||||
// Alternates between plain text and code segments. | ||||||
if (i % 2 === 0) { | ||||||
segments.push({ | ||||||
isCode: false, | ||||||
plainText: text, | ||||||
}); | ||||||
} else { | ||||||
segments.push({ | ||||||
isCode: true, | ||||||
codeText: text, | ||||||
}); | ||||||
} | ||||||
} | ||||||
|
||||||
return segments; | ||||||
} | ||||||
|
||||||
/** | ||||||
* Split a string on markdown links (e.g. [some link](https://...)) into | ||||||
* segments of plain text that weren't part of a link (marked as | ||||||
* `isLink === false`), and segments with text content and a URL that did make | ||||||
* up a link (marked as `isLink === true`). | ||||||
* @param {string} text | ||||||
* @return {Array<{isLink: true, linkText: string, linkHref: string}|{isLink: false, plainText: string}>} | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same thing about this. Seems like |
||||||
*/ | ||||||
static splitMarkdownLink(text) { | ||||||
/** @type {Array<{isLink: true, linkText: string, linkHref: string}|{isLink: false, plainText: string}>} */ | ||||||
const segments = []; | ||||||
|
||||||
const parts = text.split(/\[([^\]]+?)\]\((https?:\/\/.*?)\)/g); | ||||||
while (parts.length) { | ||||||
// Pop off the same number of elements as there are capture groups. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
;) largely a pedantic suggestion and pre-existing, so can reject if you'd like |
||||||
const [preambleText, linkText, linkHref] = parts.splice(0, 3); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried rewriting this with |
||||||
|
||||||
if (preambleText) { // Empty plain text is an artifact of splitting, not meaningful. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the empty explanation feels a bit weird here, maybe invert it or at least say "We can skip empty b/c ..."? |
||||||
segments.push({ | ||||||
isLink: false, | ||||||
plainText: preambleText, | ||||||
}); | ||||||
} | ||||||
|
||||||
// Append link if there are any. | ||||||
if (linkText && linkHref) { | ||||||
segments.push({ | ||||||
isLink: true, | ||||||
linkText, | ||||||
linkHref, | ||||||
}); | ||||||
} | ||||||
} | ||||||
|
||||||
return segments; | ||||||
} | ||||||
|
||||||
/** | ||||||
* @param {URL} parsedUrl | ||||||
* @param {{numPathParts?: number, preserveQuery?: boolean, preserveHost?: boolean}=} options | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ const path = require('path'); | |
const assert = require('assert'); | ||
const tsc = require('typescript'); | ||
const collectAndBakeCtcStrings = require('./bake-ctc-to-lhl.js'); | ||
const Util = require('../../report/html/renderer/util.js'); | ||
|
||
const LH_ROOT = path.join(__dirname, '../../../'); | ||
const UISTRINGS_REGEX = /UIStrings = .*?\};\n/s; | ||
|
@@ -157,28 +158,27 @@ function convertMessageToCtc(message, examples = {}) { | |
* @param {IncrementalCtc} icu | ||
*/ | ||
function _processPlaceholderMarkdownCode(icu) { | ||
const message = icu.message; | ||
|
||
// Check that number of backticks is even. | ||
const match = icu.message.match(/`/g); | ||
const match = message.match(/`/g); | ||
if (match && match.length % 2 !== 0) { | ||
throw Error(`Open backtick in message "${icu.message}"`); | ||
throw Error(`Open backtick in message "${message}"`); | ||
} | ||
|
||
// Split on backticked code spans | ||
const parts = icu.message.split(/`(.*?)`/g); | ||
icu.message = ''; | ||
let idx = 0; | ||
while (parts.length) { | ||
// Pop off the same number of elements as there are capture groups. | ||
const [preambleText, codeText] = parts.splice(0, 2); | ||
icu.message += preambleText; | ||
if (codeText) { | ||
for (const segment of Util.splitMarkdownCodeSpans(message)) { | ||
if (segment.isCode) { | ||
const placeholderName = `MARKDOWN_SNIPPET_${idx++}`; | ||
// Backtick replacement looks unreadable here, so .join() instead. | ||
icu.message += '$' + placeholderName + '$'; | ||
icu.placeholders[placeholderName] = { | ||
content: '`' + codeText + '`', | ||
example: codeText, | ||
content: '`' + segment.codeText + '`', | ||
example: segment.codeText, | ||
}; | ||
} else { | ||
icu.message += segment.plainText; | ||
} | ||
} | ||
} | ||
|
@@ -189,35 +189,39 @@ function _processPlaceholderMarkdownCode(icu) { | |
* @param {IncrementalCtc} icu | ||
*/ | ||
function _processPlaceholderMarkdownLink(icu) { | ||
const message = icu.message; | ||
|
||
// Check for markdown link common errors, ex: | ||
// * [extra] (space between brackets and parens) | ||
if (icu.message.match(/\[.*\] \(.*\)/)) { | ||
throw Error(`Bad Link syntax in message "${icu.message}"`); | ||
if (message.match(/\[.*\] \(.*\)/)) { | ||
throw Error(`Bad Link spacing in message "${message}"`); | ||
} | ||
// * [](empty link text) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @exterkamp and I discussed this empty linkText case and decided while maybe there's some vague possibility of a need for invisible links at some point, it's almost certainly an accident today, so let's alert the author |
||
if (message.match(/\[\]\(.*\)/)) { | ||
throw Error(`markdown link text missing in message "${message}"`); | ||
} | ||
|
||
// Split on markdown links (e.g. [some link](https://...)). | ||
const parts = icu.message.split(/\[([^\]]*?)\]\((https?:\/\/.*?)\)/g); | ||
icu.message = ''; | ||
let idx = 0; | ||
|
||
while (parts.length) { | ||
// Pop off the same number of elements as there are capture groups. | ||
const [preambleText, linkText, linkHref] = parts.splice(0, 3); | ||
icu.message += preambleText; | ||
|
||
// Append link if there are any. | ||
if (linkText && linkHref) { | ||
const startPlaceholder = `LINK_START_${idx}`; | ||
const endPlaceholder = `LINK_END_${idx}`; | ||
icu.message += '$' + startPlaceholder + '$' + linkText + '$' + endPlaceholder + '$'; | ||
idx++; | ||
icu.placeholders[startPlaceholder] = { | ||
content: '[', | ||
}; | ||
icu.placeholders[endPlaceholder] = { | ||
content: `](${linkHref})`, | ||
}; | ||
for (const segment of Util.splitMarkdownLink(message)) { | ||
if (!segment.isLink) { | ||
// Plain text segment. | ||
icu.message += segment.plainText; | ||
continue; | ||
} | ||
|
||
// Otherwise, append any links found. | ||
const startPlaceholder = `LINK_START_${idx}`; | ||
const endPlaceholder = `LINK_END_${idx}`; | ||
icu.message += '$' + startPlaceholder + '$' + segment.linkText + '$' + endPlaceholder + '$'; | ||
idx++; | ||
icu.placeholders[startPlaceholder] = { | ||
content: '[', | ||
}; | ||
icu.placeholders[endPlaceholder] = { | ||
content: `](${segment.linkHref})`, | ||
}; | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems weird to me to include a boolean flag and use different text fields. Makes more sense to me to have
{isCode: boolean, text: string}
? Is this less canonical for js?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We had a massive discussion about this very thing when discussing the shape of the proto and the hardened LHR API.
I 100% agree this feels anti-JS :)
OTOH, it's for internal use only, never goes over any wire, and we never really need to use just the text, so I don't have a strong reason to object yet.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
haha everyone lives with "preambleText" (does code typically come with a preamble?) including preambleText that occurs after the content and may not have any text in it, but I try to make self describing property names and everyone loses their minds :P
I think there is value in API safety like this (in this case tsc requires the
isCode
discriminator to be checked for the consuming code to be able to know if it can useplainText
orcodeText
) but as you say it's internal only, for a minor feature, and I was really just playing around with the interface (not sure I really like it), so I can switch back totext
or whatever :)