Skip to content

Commit

Permalink
Headings extraction - ignore empty IDs (#1423)
Browse files Browse the repository at this point in the history
Clunky specs may wrap a heading with an ID in a section that has an empty ID,
as currently happens for one heading in the EME spec.

The code thought the presence of an `id` attribute was sufficient and extracted
the heading without id as a result. It now makes sure that it is not empty.
  • Loading branch information
tidoust authored Nov 15, 2023
1 parent 3e84a6e commit ddc6b4f
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 6 deletions.
4 changes: 2 additions & 2 deletions src/browserlib/extract-headings.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ export default function (spec, idToHeading) {
// headings not to create a mess in the outline). In practice, this only
// really happens so far for WHATWG spec titles that (correctly) group the
// title and subtitle headings in a <hgroup>.
const idAttr = n.hasAttribute('id') ? 'id' : 'name';
const headingEl = n.hasAttribute('id') ? n : n.parentNode;
const idAttr = n.id ? 'id' : 'name';
const headingEl = n.id ? n : n.parentNode;
const href = getAbsoluteUrl(n, { singlePage, attribute: idAttr });
const heading = idToHeading[href] || {
id: n.getAttribute(idAttr),
Expand Down
6 changes: 3 additions & 3 deletions src/browserlib/map-ids-to-headings.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,15 @@ export default function () {
// Compute the absolute URL with fragment
// (Note the crawler merges pages of a multi-page spec in the first page
// to ease parsing logic, and we want to get back to the URL of the page)
const idAttr = node.hasAttribute('id') ? 'id' : 'name';
const idAttr = node.id ? 'id' : 'name';
const nodeid = getAbsoluteUrl(node, { singlePage, attribute: idAttr });
let href = nodeid;

if (parentSection) {
let id;

const heading = parentSection.heading;
if (heading.hasAttribute('id')) {
if (heading.id) {
id = heading.id;
href = getAbsoluteUrl(heading, { singlePage });
}
Expand All @@ -101,7 +101,7 @@ export default function () {
}
}

if (parentSection.root && parentSection.root.hasAttribute('id')) {
if (parentSection.root && parentSection.root.id) {
id = parentSection.root.id;
href = getAbsoluteUrl(parentSection.root, { singlePage });
}
Expand Down
7 changes: 6 additions & 1 deletion tests/extract-headings.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,12 @@ const testHeadings = [
title: "ignores test annotations in the heading",
html: "<h2 id=title><div class='annotation'>18 tests</div>2.3 Title</a></h2>",
res: [{id: "title", "href": "about:blank#title", title: "Title", number: "2.3", level: 2}]
}
},
{
title: "ignores an empty id if there's a better one",
html: "<section id><h1 id=title>Heading in a section with empty id</h1>",
res: [{id: "title", "href": "about:blank#title", title: "Heading in a section with empty id", level: 1}]
},
];

describe("Test headings extraction", function () {
Expand Down

0 comments on commit ddc6b4f

Please sign in to comment.