[algorithms] Fix handling of sub-steps (#1626)

There were a couple of cases where algorithm extraction got confused while parsing sub-steps, either because another sub-step had a `<dfn>` or because the logic recursively went back to the root of the algorithm instead of stopping at the parent step.
w3c · Jul 23, 2024 · 7f5341c · 7f5341c
1 parent 8c3c67f
commit 7f5341c
Show file tree

Hide file tree

Showing 2 changed files with 134 additions and 8 deletions.
diff --git a/src/browserlib/extract-algorithms.mjs b/src/browserlib/extract-algorithms.mjs
@@ -404,14 +404,17 @@ function getDefinedNameIn(el) {
  */
 function findIntroParagraph(algo) {
   let paragraph;
-  let container = algo.root.closest('.algorithm');
+  let container = algo.root.closest('li,.algorithm');
   while (container) {
     const dfn = container.querySelector('dfn');
-    if (dfn) {
-      paragraph = dfn.closest('p,div');
+    if (dfn && !algo.root.contains(dfn)) {
+      paragraph = dfn.closest('p,div,li');
+      break;
+    }
+    if (container.nodeName === 'LI') {
       break;
     }
-    container = container.parentElement.closest('.algorithm');
+    container = container.parentElement.closest('li,.algorithm');
   }
 
   if (!paragraph) {
@@ -441,9 +444,10 @@ function getAlgorithmInfo(algo, context) {
   // Note some specs add the "algorithm" class to the `<ol>` and to the
   // wrapping container, and define the name in the wrapping container.
   let info = {};
+
   let container = algo.root.closest('.algorithm');
-  while (container) {
-    if (container && !context?.nested) {
+  if (!context?.nested) {
+    while (container) {
       if (container.getAttribute('data-algorithm')) {
         info.name = normalize(container.getAttribute('data-algorithm'));
         if (container.getAttribute('data-algorithm-for')) {
@@ -462,13 +466,15 @@ function getAlgorithmInfo(algo, context) {
             info.href = dfn.href;
           }
         }
-        break;
       }
       else {
         info = getDefinedNameIn(container);
+        if (info.name || info.href) {
+          break;
+        }
       }
+      container = container.parentElement.closest('.algorithm');
     }
-    container = container.parentElement.closest('.algorithm');
   }
 
   // Get the introductory prose from the previous paragraph

diff --git a/tests/extract-algorithms.js b/tests/extract-algorithms.js
@@ -380,6 +380,126 @@ const tests = [
     ]
   },
 
+  {
+    title: 'does not get confused by weirdly nested algorithms',
+    html: `
+      <div class="algorithm">
+        <p>To <dfn data-export="" data-dfn-type="dfn" id="do-something">do something</dfn>, run these steps:</p>
+        <ol>
+          <li>Do something.</li>
+          <li>Then run the following steps to <dfn data-export="" data-dfn-type="dfn" id="do-something-else">do something else</dfn>:
+            <ol class="algorithm">
+              <li>Do something else.</li>
+            </ol>
+          </li>
+        </ol>
+      </div>`,
+    algorithms: [
+      {
+        name: 'do something',
+        href: 'about:blank#do-something',
+        rationale: '.algorithm',
+        html: 'To <dfn data-export="" data-dfn-type="dfn" id="do-something">do something</dfn>, run these steps:',
+        steps: [
+          { html: 'Do something.' },
+          {
+            html: 'Then run the following steps to <dfn data-export="" data-dfn-type="dfn" id="do-something-else">do something else</dfn>:',
+            rationale: '.algorithm',
+            steps: [ { html: 'Do something else.' } ]
+          }
+        ]
+      }
+    ]
+  },
+
+  {
+    title: 'uses the list item prose as introductory prose for an algorithm step',
+    html: `
+      <div class="algorithm">
+        <p>To <dfn data-export="" data-dfn-type="dfn" id="do-something">do something</dfn>, run these steps:</p>
+        <ol>
+          <li>Do something.</li>
+          <li>Then run the following steps:
+            <ol>
+              <li>Do something else.</li>
+            </ol>
+          </li>
+        </ol>
+      </div>`,
+    algorithms: [
+      {
+        name: 'do something',
+        href: 'about:blank#do-something',
+        rationale: '.algorithm',
+        html: 'To <dfn data-export="" data-dfn-type="dfn" id="do-something">do something</dfn>, run these steps:',
+        steps: [
+          { html: 'Do something.' },
+          {
+            html: 'Then run the following steps:',
+            rationale: 'do',
+            steps: [ { html: 'Do something else.' } ]
+          }
+        ]
+      }
+    ]
+  },
+
+  {
+    title: 'stops at the first container that has the algorithm name',
+    html: `
+      <div class="algorithm">
+        <p>To <dfn data-export="" data-dfn-type="dfn">do something</dfn>, just do something.</p>
+        <div class="algorithm">
+          <p>To <dfn data-export="" data-dfn-type="dfn">do something else</dfn>, just do something else.</p>
+        </div>
+      </div>`,
+    algorithms: [
+      {
+        name: 'do something',
+        rationale: 'To <dfn>',
+        html: 'To <dfn data-export="" data-dfn-type="dfn">do something</dfn>, just do something.',
+      },
+      {
+        name: 'do something else',
+        rationale: 'To <dfn>',
+        html: 'To <dfn data-export="" data-dfn-type="dfn">do something else</dfn>, just do something else.',
+      }
+    ]
+  },
+
+  {
+    title: 'avoids anchoring on dfns treacherously hidden in algorithm sub-steps',
+    html: `
+      <div class="algorithm">
+        <p>To <dfn data-dfn-type="dfn">do something</dfn>:</p>
+        <ol>
+          <li>Run the following substeps:
+            <ol>
+              <li>Do <dfn data-dfn-type="dfn">something</dfn>.</li>
+              <li>And something else.</li>
+            </ol>
+          </li>
+        </ol>
+      </div>`,
+    algorithms: [
+      {
+        name: 'do something',
+        rationale: '.algorithm',
+        html: 'To <dfn data-dfn-type="dfn">do something</dfn>:',
+        steps: [
+          {
+            html: 'Run the following substeps:',
+            rationale: 'do',
+            steps: [
+              { html: 'Do <dfn data-dfn-type="dfn">something</dfn>.' },
+              { html: 'And something else.' }
+            ]
+          }
+        ]
+      }
+    ]
+  },
+
 ];
 
 describe('The algorithms extraction module', function () {