Skip to content

Commit

Permalink
Fix and improve processing of IETF specs (#1138)
Browse files Browse the repository at this point in the history
Take 3 :)

PR #1135 actually had a couple of issues that made the code essentially useless
because it only ran on a handful of IETF specs:
- the code favored info from Specref over info from IETF
- the code only really applied to drafts due to a buggy RegExp

Fixing these problems yielded a new issue: the assumption that HTTP WG specs
are always available under `httpwg.org` turns out to be wrong. Also, there are
other specs that are not published by the HTTP WG but that still have an
`httpwg.org` version. The code now looks at the actual list of specs in the
underlying GitHub repository: https://github.com/httpwg/httpwg.github.io.

As a result, the nightly URL of all IETF specs that have an `httpwg.org`
version now targets that version, implementing the suggestion in #937.
A companion PR was sent to Specref to implement a similar switch there:
tobie/specref#766

The code also looks at the obsolescence data in datatracker and sets the
`standing` and `obsoletedBy` properties accordingly. This fixes #327.
  • Loading branch information
tidoust committed Nov 23, 2023
1 parent 8fafc48 commit e67ff26
Show file tree
Hide file tree
Showing 3 changed files with 203 additions and 64 deletions.
35 changes: 5 additions & 30 deletions specs.json
Original file line number Diff line number Diff line change
Expand Up @@ -600,36 +600,11 @@
"https://www.rfc-editor.org/rfc/rfc8297",
"https://www.rfc-editor.org/rfc/rfc8470",
"https://www.rfc-editor.org/rfc/rfc8942",
{
"url": "https://www.rfc-editor.org/rfc/rfc9110",
"nightly": {
"repository": "https://github.com/httpwg/httpwg.github.io"
}
},
{
"url": "https://www.rfc-editor.org/rfc/rfc9111",
"nightly": {
"repository": "https://github.com/httpwg/httpwg.github.io"
}
},
{
"url": "https://www.rfc-editor.org/rfc/rfc9112",
"nightly": {
"repository": "https://github.com/httpwg/httpwg.github.io"
}
},
{
"url": "https://www.rfc-editor.org/rfc/rfc9113",
"nightly": {
"repository": "https://github.com/httpwg/httpwg.github.io"
}
},
{
"url": "https://www.rfc-editor.org/rfc/rfc9114",
"nightly": {
"repository": "https://github.com/httpwg/httpwg.github.io"
}
},
"https://www.rfc-editor.org/rfc/rfc9110",
"https://www.rfc-editor.org/rfc/rfc9111",
"https://www.rfc-editor.org/rfc/rfc9112",
"https://www.rfc-editor.org/rfc/rfc9113",
"https://www.rfc-editor.org/rfc/rfc9114",
{
"url": "https://www.rfc-editor.org/rfc/rfc9163",
"nightly": {
Expand Down
163 changes: 131 additions & 32 deletions src/fetch-info.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ const puppeteer = require("puppeteer");
const throttle = require("./throttle");
const throttledFetch = throttle(fetch, 2);
const computeShortname = require("./compute-shortname");
const Octokit = require("./octokit");

// Map spec statuses returned by Specref to those used in specs
// Note we typically won't get /TR statuses from Specref, since all /TR URLs
Expand Down Expand Up @@ -241,9 +242,85 @@ async function fetchInfoFromSpecref(specs, options) {


async function fetchInfoFromIETF(specs, options) {
async function fetchJSONDoc(draftName) {
const url = `https://datatracker.ietf.org/doc/${draftName}/doc.json`;
const res = await throttledFetch(url, options);
if (res.status !== 200) {
throw new Error(`IETF datatracker returned an error for ${url}, status code is ${res.status}`);
}
try {
return await res.json();
}
catch (err) {
throw new Error(`IETF datatracker returned invalid JSON for ${url}`);
}
}

async function fetchRFCName(docUrl) {
const res = await fetch(docUrl, options);
if (res.status !== 200) {
throw new Error(`IETF datatracker returned an error for ${url}, status code is ${res.status}`);
}
try {
const body = await res.json();
if (!body.rfc) {
throw new Error(`Could not find an RFC name in ${docUrl}`);
}
return `rfc${body.rfc}`;
}
catch (err) {
throw new Error(`IETF datatracker returned invalid JSON for ${url}`);
}
}

async function fetchObsoletedBy(draftName) {
if (!draftName.startsWith('rfc')) {
return [];
}
const url = `https://datatracker.ietf.org/api/v1/doc/relateddocument/?format=json&relationship__slug__in=obs&target__name__in=${draftName}`;
const res = await throttledFetch(url, options);
if (res.status !== 200) {
throw new Error(`IETF datatracker returned an error for ${url}, status code is ${res.status}`);
}
let body;
try {
body = await res.json();
}
catch (err) {
throw new Error(`IETF datatracker returned invalid JSON for ${url}`);
}

return Promise.all(body.objects
.map(obj => `https://datatracker.ietf.org${obj.source}`)
.map(fetchRFCName));
}

// Most RFCs published by the HTTP WG have a friendly version under:
// https://httpwg.org/specs
// ... but not all (e.g., not rfc9292) and some related specs from other
// groups are also published under httpwg.org. To get a current list of specs
// published under https://httpwg.org/specs, let's look at the contents of
// the underlying GitHub repository:
// https://github.com/httpwg/httpwg.github.io/
async function getHttpwgRFCs() {
let rfcs;
const octokit = new Octokit({ auth: options.githubToken });
const { data } = await octokit.git.getTree({
owner: 'httpwg',
repo: 'httpwg.github.io',
tree_sha: "HEAD",
recursive: true
});
const paths = data.tree;
return paths.filter(p => p.path.match(/^specs\/rfc\d+\.html$/))
.map(p => p.path.match(/(rfc\d+)\.html$/)[1]);
}
const httpwgRFCs = await getHttpwgRFCs();

const info = await Promise.all(specs.map(async spec => {
// IETF can only provide information about IETF specs
if (!spec.url.match(/\.ietf\.org/)) {
if (!spec.url.match(/\.rfc-editor\.org/) &&
!spec.url.match(/datatracker\.ietf\.org/)) {
return;
}

Expand All @@ -254,44 +331,66 @@ async function fetchInfoFromIETF(specs, options) {
if (!draftName) {
throw new Error(`IETF document follows an unexpected URL pattern: ${spec.url}`);
}
const url = `https://datatracker.ietf.org/doc/${draftName[1]}/doc.json`;
const res = await throttledFetch(url, options);
if (res.status !== 200) {
throw new Error(`IETF datatracker returned an error, status code is ${res.status}`);
const jsonDoc = await fetchJSONDoc(draftName[1]);
const lastRevision = jsonDoc.rev_history.pop();
if (lastRevision.name !== draftName[1]) {
throw new Error(`IETF spec ${spec.url} published under a new name "${lastRevision.name}". Canonical URL must be updated accordingly.`);
}
let body;
try {
body = await res.json();

// Compute the nightly URL from the spec name, publication status, and
// groups that develops it.
// Note we prefer the httpwg.org version for HTTP WG RFCs and drafts.
let nightly;
if (lastRevision.name.startsWith('rfc')) {
if (httpwgRFCs.includes(lastRevision.name)) {
nightly = `https://httpwg.org/specs/${lastRevision.name}.html`
}
else {
nightly = `https://www.rfc-editor.org/rfc/${lastRevision.name}`;
}
}
catch (err) {
throw new Error(`IETF datatracker returned invalid JSON for ${url}`);
else if (jsonDoc.group?.acronym === 'httpbis' || jsonDoc.group?.acronym === 'httpstate') {
nightly = `https://httpwg.org/http-extensions/${lastRevision.name}.html`
}
else {
nightly = `https://www.ietf.org/archive/id/${lastRevision.name}-${lastRevision.rev}.html`;
}

const lastRevision = body.rev_history.pop();
if (lastRevision.name !== body.name) {
throw new Error(`IETF spec ${spec.url} published under a new name "${lastRevision.name}". Canonical URL must be updated accordingly.`);
// For the status, use the std_level property, which contains one of the
// statuses in https://datatracker.ietf.org/api/v1/name/stdlevelname/
// The property is null for an unpublished Editor's Draft.
const status = jsonDoc.std_level ?? "Editor's Draft";

const specInfo = { title: jsonDoc.title, nightly, status };

// RFCs may have been obsoleted by another IETF spec. When that happens, we
// should flag the spec as discontinued and obsoleted by the other spec(s).
const obsoletedBy = await fetchObsoletedBy(draftName[1]);
const missingRFC = obsoletedBy.find(shortname => !specs.find(spec => spec.shortname === shortname));
if (missingRFC) {
throw new Error(`IETF spec at ${spec.url} is obsoleted by ${missingRFC} which is not in the list.`);
}

// Prefer the httpwg.org version for HTTP WG drafts
const nightly = (body.group?.acronym === 'httpbis') ?
`https://httpwg.org/http-extensions/${lastRevision.name}.html` :
`https://www.ietf.org/archive/id/${lastRevision.name}-${lastRevision.rev}.html`;
if (obsoletedBy.length > 0) {
specInfo.standing = "discontinued";
specInfo.obsoletedBy = obsoletedBy;
}

return {
title: body.title,
nightly: nightly,
state: body.state
};
return specInfo;
}));

// TODO: use "state" to return a better status than "Editor's Draft".
const results = {};
specs.forEach((spec, idx) => {
if (info[idx]) {
const specInfo = info[idx];
if (specInfo) {
results[spec.shortname] = {
nightly: { url: info[idx].nightly, status: "Editor's Draft" },
title: info[idx].title
nightly: { url: specInfo.nightly, status: specInfo.status },
title: specInfo.title
};
if (specInfo.standing === "discontinued") {
results[spec.shortname].standing = specInfo.standing;
results[spec.shortname].obsoletedBy = specInfo.obsoletedBy;
}
}
});
return results;
Expand Down Expand Up @@ -489,14 +588,14 @@ async function fetchInfo(specs, options) {
let remainingSpecs = specs;
const w3cInfo = await fetchInfoFromW3CApi(remainingSpecs, options);

// Compute information from Specref for remaining specs
remainingSpecs = remainingSpecs.filter(spec => !w3cInfo[spec.shortname]);
const specrefInfo = await fetchInfoFromSpecref(remainingSpecs, options);

// Extract information from IETF datatracker for remaining specs
remainingSpecs = remainingSpecs.filter(spec => !specrefInfo[spec.shortname]);
remainingSpecs = remainingSpecs.filter(spec => !w3cInfo[spec.shortname]);
const ietfInfo = await fetchInfoFromIETF(remainingSpecs, options);

// Compute information from Specref for remaining specs
remainingSpecs = remainingSpecs.filter(spec => !ietfInfo[spec.shortname]);
const specrefInfo = await fetchInfoFromSpecref(remainingSpecs, options);

// Extract information directly from the spec for remaining specs
remainingSpecs = remainingSpecs.filter(spec => !ietfInfo[spec.shortname]);
const specInfo = await fetchInfoFromSpecs(remainingSpecs, options);
Expand All @@ -505,8 +604,8 @@ async function fetchInfo(specs, options) {
const results = {};
specs.map(spec => spec.shortname).forEach(name => results[name] =
(w3cInfo[name] ? Object.assign(w3cInfo[name], { source: "w3c" }) : null) ||
(specrefInfo[name] ? Object.assign(specrefInfo[name], { source: "specref" }) : null) ||
(ietfInfo[name] ? Object.assign(ietfInfo[name], { source: "ietf" }) : null) ||
(specrefInfo[name] ? Object.assign(specrefInfo[name], { source: "specref" }) : null) ||
(specInfo[name] ? Object.assign(specInfo[name], { source: "spec" }) : null));

// Add series info from W3C API
Expand Down
69 changes: 67 additions & 2 deletions test/fetch-info.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,37 @@ describe("fetch-info module", function () {
});

describe("fetch from IETF datatracker", () => {
it("fetches info about RFCs from datatracker", async () => {
const spec = {
url: "https://www.rfc-editor.org/rfc/rfc7578",
shortname: "rfc7578"
};
const info = await fetchInfo([spec]);
assert.ok(info[spec.shortname]);
assert.equal(info[spec.shortname].title, "Returning Values from Forms: multipart/form-data");
assert.equal(info[spec.shortname].source, "ietf");
assert.equal(info[spec.shortname].nightly.url, "https://www.rfc-editor.org/rfc/rfc7578");
});

it("fetches info about HTTP WG RFCs from datatracker", async () => {
const spec = {
url: "https://www.rfc-editor.org/rfc/rfc9110",
shortname: "rfc9110"
};
const info = await fetchInfo([spec]);
assert.ok(info[spec.shortname]);
assert.equal(info[spec.shortname].title, "HTTP Semantics");
assert.equal(info[spec.shortname].source, "ietf");
assert.equal(info[spec.shortname].nightly.url, "https://httpwg.org/specs/rfc9110.html");
});

it("extracts a suitable nightly URL from an IETF draft", async () => {
const spec = {
url: "https://datatracker.ietf.org/doc/html/draft-davidben-http-client-hint-reliability",
shortname: "client-hint-reliability"
};
const info = await fetchInfo([spec]);
assert.ok(info[spec.shortname]);
assert.equal(info[spec.shortname].title, "Client Hint Reliability");
assert.equal(info[spec.shortname].source, "ietf");
assert.match(info[spec.shortname].nightly.url, /^https:\/\/www\.ietf\.org\/archive\/id\/draft-davidben-http-client-hint-reliability-\d+\.html/);
});
Expand All @@ -79,11 +102,53 @@ describe("fetch-info module", function () {
};
const info = await fetchInfo([spec]);
assert.ok(info[spec.shortname]);
assert.equal(info[spec.shortname].title, "Digest Fields");
assert.equal(info[spec.shortname].source, "ietf");
assert.equal(info[spec.shortname].nightly.url, "https://httpwg.org/http-extensions/draft-ietf-httpbis-digest-headers.html");
});

it("extracts a suitable nightly URL from an IETF HTTP State Management Mechanism WG RFC", async () => {
const spec = {
url: "https://www.rfc-editor.org/rfc/rfc6265",
shortname: "rfc6265"
};
const info = await fetchInfo([spec]);
assert.ok(info[spec.shortname]);
assert.equal(info[spec.shortname].source, "ietf");
assert.equal(info[spec.shortname].nightly.url, "https://httpwg.org/specs/rfc6265.html");
});

it("uses the rfc-editor URL as nightly for an IETF HTTP WG RFC not published under httpwg.org", async () => {
const spec = {
url: "https://www.rfc-editor.org/rfc/rfc9163",
shortname: "rfc9163"
};
const info = await fetchInfo([spec]);
assert.ok(info[spec.shortname]);
assert.equal(info[spec.shortname].source, "ietf");
assert.equal(info[spec.shortname].nightly.url, spec.url);
});

it("identifies discontinued IETF specs", async () => {
const info = await fetchInfo([
{ url: "https://www.rfc-editor.org/rfc/rfc7230", shortname: "rfc7230" },
{ url: "https://www.rfc-editor.org/rfc/rfc9110", shortname: "rfc9110" },
{ url: "https://www.rfc-editor.org/rfc/rfc9112", shortname: "rfc9112" }
]);
assert.ok(info["rfc7230"]);
assert.equal(info["rfc7230"].standing, "discontinued");
assert.deepStrictEqual(info["rfc7230"].obsoletedBy, ["rfc9110", "rfc9112"]);
});

it("throws when a discontinued IETF spec is obsoleted by an unknown spec", async () => {
const spec = {
url: "https://www.rfc-editor.org/rfc/rfc7230",
shortname: "rfc7230"
};
await assert.rejects(
fetchInfo([spec]),
/^Error: IETF spec at (.*)rfc7230 is obsoleted by rfc9110 which is not in the list.$/);
});

it("throws when an IETF URL needs to be updated", async () => {
const spec = {
url: "https://datatracker.ietf.org/doc/html/draft-ietf-websec-strict-transport-sec",
Expand Down

0 comments on commit e67ff26

Please sign in to comment.