mdn · caugner · Mar 1, 2024 · Feb 5, 2023 · Feb 5, 2023 · Feb 8, 2023
@@ -8,7 +8,7 @@ import sizeOf from "image-size";
 
 import { Document, Image } from "../content";
 import { FLAW_LEVELS } from "../libs/constants";
-import { findMatchesInText } from "./matches-in-text";
+import { findMatchesInText, findMatchesInMarkdown } from "./matches";
 import { DEFAULT_LOCALE } from "../libs/constants";
 
 /**
@@ -21,50 +21,56 @@ export function checkImageReferences(doc, $, options, { url, rawContent }) {
 
   const checkImages = options.flawLevels.get("images") !== FLAW_LEVELS.IGNORE;
 
-  const checked = new Map();
+  const checked = new Map<string, number>();
+
+  const isMarkdown = doc.isMarkdown;
 
   function addImageFlaw(
     $img,
     src,
-    { explanation, externalImage = false, suggestion = null }
+    {
+      explanation,
+      externalImage = false,
+      suggestion = null,
+    }: {
+      explanation: string;
+      externalImage?: boolean;
+      suggestion?: string;
+    }
   ) {
     // If the document has *two* `<img src="XXX">` tags, this function
     // (addImageFlaw) is called two times. We can then assume the
     // findMatchesInText() will find it two times too. For each call,
     // we need to match the call based in counting matches from findMatchesInText().
-    const matches = [
-      ...findMatchesInText(src, rawContent, {
-        attribute: "src",
-      }),
-    ];
-    const checkedBefore = checked.get(src) || 0;
-    matches.forEach((match, i) => {
-      if (i !== checkedBefore) {
-        return;
-      }
-      if (!("images" in doc.flaws)) {
-        doc.flaws.images = [];
-      }
-      let fixable = false;
-      if (suggestion) {
-        $img.attr("src", suggestion);
-        fixable = true;
-      }
-      const id = `image${doc.flaws.images.length + 1}`;
-      $img.attr("data-flaw", id);
-      doc.flaws.images.push({
-        id,
-        src,
-        fixable,
-        suggestion,
-        explanation,
-        externalImage,
-        ...match,
-      });
-
-      // Use this to remember which in the list of matches we've dealt with.
-      checked.set(src, checkedBefore + 1);
+    const matches = isMarkdown
+      ? findMatchesInMarkdown(src, rawContent, { type: "image" })
+      : findMatchesInText(src, rawContent, { attribute: "src" });
+    const checkedBefore = checked.get(src) ?? 0;
+    if (matches.length <= checkedBefore) {
+      return; // we haven't found the match. We may need to deal with the match finder function.
+    }
+    const match = matches[checkedBefore];
+    if (!("images" in doc.flaws)) {
+      doc.flaws.images = [];
+    }
+    const fixable = Boolean(suggestion);
+    if (suggestion) {
+      $img.attr("src", suggestion);
+    }
+    const id = `image${doc.flaws.images.length + 1}`;
+    $img.attr("data-flaw", id);
+    doc.flaws.images.push({
+      id,
+      src,
+      fixable,
+      suggestion,
+      explanation,
+      externalImage,
+      ...match,
     });
+
+    // Use this to remember which in the list of matches we've dealt with.
+    checked.set(src, checkedBefore + 1);
   }
 
   $("img[src]").each((i, element) => {
@@ -243,11 +249,9 @@ export function checkImageWidths(doc, $, options, { rawContent }) {
     }
     const id = `image_widths${doc.flaws.image_widths.length + 1}`;
     $img.attr("data-flaw", id);
-    const matches = [
-      ...findMatchesInText(style, rawContent, {
-        attribute: "style",
-      }),
-    ];
+    const matches = findMatchesInText(style, rawContent, {
+      attribute: "style",
+    });
     const checkedBefore = checked.get(style) || 0;
     matches.forEach((match, i) => {
       if (i !== checkedBefore) {

@@ -1,11 +1,8 @@
 import fs from "node:fs";
 import path from "node:path";
 
-import fromMarkdown from "mdast-util-from-markdown";
-import visit from "unist-util-visit";
-
 import { Document, Redirect, Image } from "../../content";
-import { findMatchesInText } from "../matches-in-text";
+import { findMatchesInText, findMatchesInMarkdown } from "../matches";
 import {
   DEFAULT_LOCALE,
   FLAW_LEVELS,
@@ -15,17 +12,6 @@ import { isValidLocale } from "../../libs/locale-utils";
 
 const dirname = __dirname;
 
-function findMatchesInMarkdown(rawContent, href) {
-  const matches = [];
-  visit(fromMarkdown(rawContent), "link", (node: any) => {
-    if (node.url == href) {
-      const { line, column } = node.position.start;
-      matches.push({ line, column });
-    }
-  });
-  return matches;
-}
-
 const _safeToHttpsDomains = new Map();
 function getSafeToHttpDomains() {
   if (!_safeToHttpsDomains.size) {
@@ -120,17 +106,13 @@ export function getBrokenLinksFlaws(doc, $, { rawContent }, level) {
         href,
 
         doc.isMarkdown
-          ? findMatchesInMarkdown(rawContent, href)
-          : Array.from(
-              findMatchesInText(href, rawContent, {
-                attribute: "href",
-              })
-            )
+          ? findMatchesInMarkdown(href, rawContent, { type: "link" })
+          : findMatchesInText(href, rawContent, {
+              attribute: "href",
+            })
       );
     }
-    // findMatchesInText() is a generator function so use `Array.from()`
-    // to turn it into an array so we can use `.forEach()` because that
-    // gives us an `i` for every loop.
+
     matches.get(href).forEach((match, i) => {
       if (i !== index) {
         return;

@@ -1,4 +1,4 @@
-import { findMatchesInText } from "../matches-in-text";
+import { findMatchesInText } from "../matches";
 
 // You're not allowed to have `<a>` elements inside `<h2>` or `<h3>` elements
 // because those will be rendered out as "links to themselves".

@@ -8,7 +8,7 @@ import { DEFAULT_LOCALE } from "../../libs/constants";
 import {
   replaceMatchesInText,
   replaceMatchingLinksInMarkdown,
-} from "../matches-in-text";
+} from "../matches";
 import { forceExternalURL, downloadAndResizeImage } from "../utils";
 import { getBadBCDQueriesFlaws } from "./bad-bcd-queries";
 import { getBrokenLinksFlaws } from "./broken-links";

@@ -1,6 +1,6 @@
 import { Flaw } from ".";
 
-import { getFirstMatchInText } from "../matches-in-text";
+import { getFirstMatchInText } from "../matches";
 const escapeHTML = (s) =>
   s
     .replace(/&/g, "&amp;")

@@ -4,7 +4,7 @@ import {
   INTERACTIVE_EXAMPLES_BASE_URL,
   LIVE_SAMPLES_BASE_URL,
 } from "../../libs/env";
-import { findMatchesInText } from "../matches-in-text";
+import { findMatchesInText } from "../matches";
 
 const safeIFrameSrcs = [
   // EmbedGHLiveSample.ejs

@@ -1,30 +1,68 @@
+import fromMarkdown from "mdast-util-from-markdown";
+import visit from "unist-util-visit";
+
 const ESCAPE_CHARS_RE = /[.*+?^${}()|[\]\\]/g;
 
-export function* findMatchesInText(
-  needle,
-  haystack,
+type Match = { line: number; column: number };
+
+export function findMatchesInText(
+  needle: string,
+  haystack: string,
   { attribute = null } = {}
-) {
+): Array<Match> {
   // Need to remove any characters that can affect a regex if we're going
   // use the string in a manually constructed regex.
   const escaped = needle.replace(ESCAPE_CHARS_RE, "\\$&");
-  let rex;
-  if (attribute) {
-    rex = new RegExp(`${attribute}=['"](${escaped})['"]`, "g");
-  } else {
-    rex = new RegExp(`(${escaped})`, "g");
-  }
+  const rex = attribute
+    ? new RegExp(`${attribute}=['"](${escaped})['"]`, "g")
+    : new RegExp(`(${escaped})`, "g");
+  const matches: Array<Match> = [];
   for (const match of haystack.matchAll(rex)) {
     const left = haystack.slice(0, match.index);
     const line = (left.match(/\n/g) || []).length + 1;
     const lastIndexOf = left.lastIndexOf("\n") + 1;
     const column =
       match.index - lastIndexOf + 1 + (attribute ? attribute.length + 2 : 0);
-    yield { line, column };
+    matches.push({ line, column });
   }
+  return matches;
+}
+
+// find links or images in markdown content that match the given URL
+export function findMatchesInMarkdown(
+  url: string,
+  rawContent: string,
+  { type }: { type: "link" | "image" }
+): Array<Match> {
+  const matches: Array<Match> = [];
+  const attributeType = type === "link" ? "href" : "src";
+  const tree = fromMarkdown(rawContent);
+  // Find all the links and images in the markdown
+  // we should also find any HTML elements that contain links or images
+  visit(tree, [type, "html"], (node: any) => {
+    if (node.type === "html") {
+      const matchesInHtml = findMatchesInText(url, node.value, {
+        attribute: attributeType,
+      });
+      const correctedMatches = matchesInHtml.map(({ line, column }): Match => {
+        if (line === 1) {
+          // if it's the first line, we need to add the column offset
+          column += node.position.start.column - 1;
+        }
+        line += node.position.start.line - 1;
+        return { line, column };
+      });
+      matches.push(...correctedMatches);
+    } else if (node.url === url) {
+      // else this would be a markdown link or image
+      const { line, column } = node.position.start;
+      matches.push({ line, column });
+    }
+  });
+  return matches;
 }
 
-export function getFirstMatchInText(needle, haystack) {
+export function getFirstMatchInText(needle, haystack): Match {
   const index = haystack.indexOf(needle);
   const left = haystack.substring(0, index);
   const line = left.split("\n").length;

@@ -0,0 +1,28 @@
+---
+title: <img> tag with src that isn't a valid image
+slug: Web/Images/Bad_src
+---
+
+<!-- resolves, intially, to a directory -->
+![](/en-us/web)
+
+<!-- actually isn't a valid PNG -->
+![](actuallynota.png)
+
+<!--
+is a zero byte image, and as a check for html in markdown file
+add a space at the start of the <img> as a check for image finder function
+-->
+ <img src="empty.gif" alt=""/>
+
+<!--
+multi-line html with broken image,
+exists on disk but isn't actually a real SVG file.
+-->
+<table>
+  <tr>
+    <td>
+      <img src="actuallynota.svg" alt="broken image" />
+    </td>
+  </tr>
+</table>
@@ -1070,6 +1070,21 @@ test("image flaws with bad images", () => {
         "File not present on disk, an empty file, or not an image"
     ).length
   ).toBe(4);
+  // Check the line and column numbers for html <img> tags in markdown
+  expect({
+    line: flaws.images[2].line,
+    column: flaws.images[2].column,
+  }).toEqual({
+    line: 16,
+    column: 12,
+  });
+  expect({
+    line: flaws.images[3].line,
+    column: flaws.images[3].column,
+  }).toEqual({
+    line: 25,
+    column: 17,
+  });
 });
 
 test("linked to local files", () => {