diff --git a/lib/check/check-files.browser.js b/lib/check-files.browser.js similarity index 100% rename from lib/check/check-files.browser.js rename to lib/check-files.browser.js diff --git a/lib/check-files.js b/lib/check-files.js new file mode 100644 index 0000000..94f501f --- /dev/null +++ b/lib/check-files.js @@ -0,0 +1,49 @@ +/** + * @typedef {import('./index.js').Landmarks} Landmarks + */ + +import {constants, promises as fs} from 'node:fs' + +/** + * @param {Landmarks} landmarks + * Landmarks. + * @param {ReadonlyArray} references + * References. + * @returns {Promise} + * Nothing. + */ +export async function checkFiles(landmarks, references) { + /** @type {Array>} */ + const promises = [] + + for (const filePath of references) { + const marks = landmarks.get(filePath) + + if (!marks) { + /** @type {Map} */ + const map = new Map() + + landmarks.set(filePath, map) + + promises.push( + fs.access(filePath, constants.F_OK).then( + /** + * @returns {undefined} + */ + function () { + map.set('', true) + }, + /** + * @param {NodeJS.ErrnoException} error + * @returns {undefined} + */ + function (error) { + map.set('', error.code !== 'ENOENT' && error.code !== 'ENOTDIR') + } + ) + ) + } + } + + await Promise.all(promises) +} diff --git a/lib/check/check-files.js b/lib/check/check-files.js deleted file mode 100644 index a24f388..0000000 --- a/lib/check/check-files.js +++ /dev/null @@ -1,41 +0,0 @@ -/** - * @typedef {import('../types.js').VFile} VFile - * @typedef {import('../types.js').Landmarks} Landmarks - * @typedef {import('../types.js').References} References - */ - -import {constants, promises as fs} from 'node:fs' - -/** - * @param {{files: VFile[], landmarks: Landmarks, references: References}} ctx - */ -export async function checkFiles(ctx) { - const landmarks = ctx.landmarks - const references = ctx.references - /** @type {Array.>} */ - const promises = [] - /** @type {string} */ - let filePath - - for (filePath in references) { - if (landmarks[filePath] === undefined) { - /** @type {Record} */ - const map = Object.create(null) - - landmarks[filePath] = map - - promises.push( - fs.access(filePath, constants.F_OK).then( - () => { - map[''] = true - }, - (/** @type {NodeJS.ErrnoException} */ error) => { - map[''] = error.code !== 'ENOENT' && error.code !== 'ENOTDIR' - } - ) - ) - } - } - - await Promise.all(promises) -} diff --git a/lib/check/index.js b/lib/check/index.js deleted file mode 100644 index 66e7cbc..0000000 --- a/lib/check/index.js +++ /dev/null @@ -1,11 +0,0 @@ -import {trough} from 'trough' -import {mergeLandmarks} from './merge-landmarks.js' -import {mergeReferences} from './merge-references.js' -import {checkFiles} from './check-files.js' -import {validate} from './validate.js' - -export const check = trough() - .use(mergeLandmarks) - .use(mergeReferences) - .use(checkFiles) - .use(validate) diff --git a/lib/check/merge-landmarks.js b/lib/check/merge-landmarks.js deleted file mode 100644 index 1ae1b52..0000000 --- a/lib/check/merge-landmarks.js +++ /dev/null @@ -1,40 +0,0 @@ -/** - * @typedef {import('../types.js').VFile} VFile - * @typedef {import('../types.js').Landmarks} Landmarks - */ - -import {constants} from '../constants.js' - -const own = {}.hasOwnProperty - -/** - * @param {{files: VFile[], landmarks?: Landmarks}} ctx - */ -export function mergeLandmarks(ctx) { - /** @type {Landmarks} */ - const result = Object.create(null) - const files = ctx.files - let index = -1 - - while (++index < files.length) { - const file = files[index] - const landmarks = /** @type {Landmarks|undefined} */ ( - file.data[constants.landmarkId] - ) - /** @type {string} */ - let landmark - - if (landmarks) { - for (landmark in landmarks) { - if (own.call(landmarks, landmark)) { - result[landmark] = Object.assign( - Object.create(null), - landmarks[landmark] - ) - } - } - } - } - - ctx.landmarks = result -} diff --git a/lib/check/merge-references.js b/lib/check/merge-references.js deleted file mode 100644 index 5e777a9..0000000 --- a/lib/check/merge-references.js +++ /dev/null @@ -1,62 +0,0 @@ -/** - * @typedef {import('../types.js').VFile} VFile - * @typedef {import('../types.js').Landmarks} Landmarks - * @typedef {import('../types.js').References} References - * @typedef {import('../types.js').ReferenceMap} ReferenceMap - * @typedef {import('../types.js').Resource} Resource - */ - -import {constants} from '../constants.js' - -const own = {}.hasOwnProperty - -/** - * @param {{files: VFile[], landmarks: Landmarks, references?: References}} ctx - */ -export function mergeReferences(ctx) { - /** @type {References} */ - const result = {} - const files = ctx.files - let index = -1 - - while (++index < files.length) { - const file = files[index] - const references = - /** @type {Record>|undefined} */ ( - file.data[constants.referenceId] - ) - /** @type {string} */ - let reference - - if (!references) { - continue - } - - for (reference in references) { - if (own.call(references, reference)) { - const internal = references[reference] - /** @type {Record} */ - const all = - reference in result - ? result[reference] - : (result[reference] = Object.create(null)) - /** @type {string} */ - let hash - - for (hash in internal) { - // eslint-disable-next-line max-depth - if (own.call(internal, hash)) { - const list = all[hash] || (all[hash] = []) - list.push({ - file, - reference: {filePath: reference, hash}, - nodes: internal[hash] - }) - } - } - } - } - } - - ctx.references = result -} diff --git a/lib/check/validate.js b/lib/check/validate.js deleted file mode 100644 index 7392237..0000000 --- a/lib/check/validate.js +++ /dev/null @@ -1,138 +0,0 @@ -/** - * @typedef {import('../types.js').VFile} VFile - * @typedef {import('../types.js').Landmarks} Landmarks - * @typedef {import('../types.js').References} References - * @typedef {import('../types.js').ReferenceMap} ReferenceMap - */ - -import path from 'node:path' -// @ts-expect-error: untyped. -import propose from 'propose' -import {constants} from '../constants.js' - -const own = {}.hasOwnProperty - -/** - * @param {{files: VFile[], landmarks: Landmarks, references: References}} ctx - */ -export function validate(ctx) { - const landmarks = ctx.landmarks - const references = ctx.references - /** @type {ReferenceMap[]} */ - const missing = [] - /** @type {string} */ - let key - - for (key in references) { - if (own.call(references, key)) { - const refs = references[key] - /** @type {Landmarks} */ - const lands = - // `else` could happen in browser. - /* c8 ignore next */ - key in landmarks ? landmarks[key] : Object.create(null) - /** @type {string} */ - let hash - - for (hash in refs) { - if (!lands[hash]) { - missing.push(...refs[hash]) - } - } - } - } - - let index = -1 - - while (++index < missing.length) { - warn(ctx, missing[index]) - } -} - -/** - * @param {{files: VFile[], landmarks: Landmarks, references: References}} ctx - * @param {ReferenceMap} reference - */ -function warn(ctx, reference) { - const landmarks = ctx.landmarks - const absolute = reference.file.path - ? path.resolve(reference.file.cwd, reference.file.path) - : '' - const base = absolute ? path.dirname(absolute) : null - const filePath = base - ? path.relative(base, reference.reference.filePath) - : reference.reference.filePath - const hash = reference.reference.hash - /** @type {string[]} */ - const dictionary = [] - /** @type {string} */ - let reason - /** @type {string} */ - let ruleId - - if (hash) { - reason = 'Link to unknown heading' - ruleId = constants.headingRuleId - - if (base && path.join(base, filePath) !== absolute) { - reason += ' in `' + filePath + '`' - ruleId = constants.headingInFileRuleId - } - - reason += ': `' + hash + '`' - } else { - reason = 'Link to unknown file: `' + filePath + '`' - ruleId = constants.fileRuleId - } - - const origin = [constants.sourceId, ruleId].join(':') - /** @type {string} */ - let landmark - - for (landmark in landmarks) { - // Only suggest if file exists. - if (!(landmark in landmarks) || !landmarks[landmark]['']) { - continue - } - - const relativeLandmark = base ? path.relative(base, landmark) : landmark - - if (!hash) { - dictionary.push(relativeLandmark) - continue - } - - if (relativeLandmark !== filePath) { - continue - } - - /** @type {string} */ - let subhash - - for (subhash in landmarks[landmark]) { - if (subhash !== '') { - dictionary.push(subhash) - } - } - } - - /** @type {string} */ - const suggestion = propose(hash ? hash : filePath, dictionary, { - threshold: 0.7 - }) - - if (suggestion) { - reason += '. Did you mean `' + suggestion + '`' - } - - let index = -1 - - while (++index < reference.nodes.length) { - const message = reference.file.message( - reason, - reference.nodes[index], - origin - ) - message.url = 'https://github.com/remarkjs/remark-validate-links#readme' - } -} diff --git a/lib/constants.js b/lib/constants.js index 3f60aa0..a74a2f5 100644 --- a/lib/constants.js +++ b/lib/constants.js @@ -1,8 +1,8 @@ -export const constants = { - sourceId: 'remark-validate-links', - headingRuleId: 'missing-heading', - headingInFileRuleId: 'missing-heading-in-file', +export const constants = /** @type {const} */ ({ fileRuleId: 'missing-file', + headingInFileRuleId: 'missing-heading-in-file', + headingRuleId: 'missing-heading', landmarkId: 'remarkValidateLinksLandmarks', - referenceId: 'remarkValidateLinksReferences' -} + referenceId: 'remarkValidateLinksReferences', + sourceId: 'remark-validate-links' +}) diff --git a/lib/find/find-repo.browser.js b/lib/find-repo.browser.js similarity index 100% rename from lib/find/find-repo.browser.js rename to lib/find-repo.browser.js diff --git a/lib/find-repo.js b/lib/find-repo.js new file mode 100644 index 0000000..29a6300 --- /dev/null +++ b/lib/find-repo.js @@ -0,0 +1,59 @@ +/** + * @typedef {import('vfile').VFile} VFile + * @typedef {import('../index.js').Options} Options + */ + +import path from 'node:path' +import {promisify} from 'node:util' +import {exec as execCb} from 'node:child_process' + +const exec = promisify(execCb) + +/** + * @param {VFile} file + * File. + * @param {Readonly} options + * Configuration. + * @returns {Promise<[repo: string | false | undefined, root: string] | undefined>} + * Info. + */ +export async function findRepo(file, options) { + const givenRepo = options.repository + const givenRoot = options.root + let base = file.cwd + /** @type {string | false | undefined} */ + let repo + /** @type {string | undefined} */ + let root + + if (file.path) { + base = path.dirname(path.resolve(base, file.path)) + } + + if (givenRepo === null || givenRepo === undefined) { + const result = await exec('git remote -v', {cwd: base}) + const match = result.stdout.match(/origin\t(.+?) \(fetch\)/) + + if (match) { + repo = match[1] + } + + if (!repo) { + throw new Error('Could not find remote origin') + } + } else { + repo = givenRepo + } + + if (givenRoot) { + root = path.resolve(file.cwd, givenRoot) + } else if (givenRepo === null || givenRepo === undefined) { + const {stdout} = await exec('git rev-parse --show-cdup', {cwd: base}) + const out = stdout.trim() + root = out ? path.join(base, out) : base + } else { + root = file.cwd + } + + return [repo, root] +} diff --git a/lib/find/config.js b/lib/find/config.js deleted file mode 100644 index 09b166c..0000000 --- a/lib/find/config.js +++ /dev/null @@ -1,69 +0,0 @@ -/** - * @typedef {import('mdast').Root} Root - * @typedef {import('vfile').VFile} VFile - * @typedef {import('unified-engine').FileSet} FileSet - * @typedef {import('hosted-git-info').Hosts} Hosts - * @typedef {import('../index.js').Options} Options - * @typedef {import('../index.js').UrlConfig} UrlConfig - */ - -import hostedGitInfo from 'hosted-git-info' - -/** @type {Partial>} */ -const viewPaths = {github: 'blob', gitlab: 'blob', bitbucket: 'src'} -/** @type {Partial>} */ -const headingPrefixes = { - github: '#', - gitlab: '#', - bitbucket: '#markdown-header-' -} -/** @type {Partial>} */ -const topAnchors = {github: '#readme', gitlab: '#readme'} -/** @type {Partial>} */ -const lineLinks = {github: true, gitlab: true} - -/** - * @param {{tree: Root, file: VFile, fileSet?: FileSet, options: Options}} ctx - */ -export function config(ctx) { - const repo = ctx.options.repository - - if (ctx.options.urlConfig) { - return - } - - /** @type {UrlConfig} */ - const urlConfig = { - prefix: '', - headingPrefix: '#', - lines: false, - hostname: undefined, - topAnchor: undefined - } - - if (repo) { - const info = hostedGitInfo.fromUrl(repo) - - if (info && info.type !== 'gist') { - if (info.type in viewPaths) { - urlConfig.prefix = '/' + info.path() + '/' + viewPaths[info.type] + '/' - } - - if (info.type in headingPrefixes) { - urlConfig.headingPrefix = headingPrefixes[info.type] - } - - if (info.type in lineLinks) { - urlConfig.lines = lineLinks[info.type] - } - - if (info.type in topAnchors) { - urlConfig.topAnchor = topAnchors[info.type] - } - - urlConfig.hostname = info.domain - } - } - - ctx.options.urlConfig = urlConfig -} diff --git a/lib/find/find-references.js b/lib/find/find-references.js deleted file mode 100644 index 953dec3..0000000 --- a/lib/find/find-references.js +++ /dev/null @@ -1,349 +0,0 @@ -/** - * @typedef {import('mdast').Root} Root - * @typedef {import('hast').Properties} Properties - * @typedef {import('vfile').VFile} VFile - * @typedef {import('unified-engine').FileSet} FileSet - * @typedef {import('../types.js').Landmarks} Landmarks - * @typedef {import('../types.js').References} References - * @typedef {import('../types.js').ReferenceMap} ReferenceMap - * @typedef {import('../types.js').Resource} Resource - * @typedef {import('../index.js').UrlConfig} UrlConfig - * @typedef {import('../index.js').Options} Options - */ - -import {promises as fs} from 'node:fs' -import path from 'node:path' -import {URL} from 'node:url' -import {toVFile} from 'to-vfile' -import {visit} from 'unist-util-visit' -import {toString} from 'mdast-util-to-string' -import BananaSlug from 'github-slugger' -import {constants} from '../constants.js' - -const slugs = new BananaSlug() - -const slash = '/' -const numberSign = '#' -const questionMark = '?' - -const https = 'https:' -const http = 'http:' -const slashes = '//' - -const lineExpression = /^#l\d/i - -// List from: https://github.com/github/markup#markups -const readmeExtensions = new Set(['.markdown', '.mdown', '.mkdn', '.md']) -const readmeBasename = /^readme$/i - -/** - * @param {{tree: Root, file: VFile, fileSet?: FileSet, options: Options}} ctx - */ -export async function findReferences(ctx) { - const file = ctx.file - const fileSet = ctx.fileSet - const absolute = file.path ? path.resolve(file.cwd, file.path) : '' - const space = file.data - /** @type {Record>} */ - const references = Object.create(null) - /** @type {Landmarks} */ - const landmarks = Object.create(null) - const config = { - // Always set at this point. - /* c8 ignore next */ - urlConfig: ctx.options.urlConfig || {}, - path: absolute, - base: absolute ? path.dirname(absolute) : file.cwd, - root: ctx.options.root - } - /** @type {string[]} */ - const statted = [] - /** @type {string[]} */ - const added = [] - /** @type {Array.>} */ - const promises = [] - - space[constants.referenceId] = references - space[constants.landmarkId] = landmarks - - addLandmarks(absolute, '') - - slugs.reset() - - visit(ctx.tree, (node) => { - const data = node.data || {} - const props = /** @type {Properties} */ (data.hProperties || {}) - let id = String(props.name || props.id || data.id || '') - - if (!id && node.type === 'heading') { - id = slugs.slug( - toString(node, {includeImageAlt: false, includeHtml: false}) - ) - } - - if (id) { - addLandmarks(absolute, id) - } - - if ('url' in node && node.url) { - const info = urlToPath(node.url, config, node.type) - - if (info) { - const fp = info.filePath - const hash = info.hash - - addReference(fp, '', node) - - if (hash) { - if (fileSet || fp === absolute) { - addReference(fp, hash, node) - } - - if (fileSet && fp && !statted.includes(fp)) { - promises.push(addFile(fp)) - } - } - } - } - }) - - await Promise.all(promises) - - /** - * @param {string} filePath - * @param {string} hash - */ - function addLandmarks(filePath, hash) { - addLandmark(filePath, hash) - - // Note: this may add marks too many anchors as defined. - // For example, if there is both a `readme.md` and a `readme.markdown` in a - // folder, both their landmarks will be defined for their parent folder. - // To solve this, we could check whichever sorts first, and ignore the - // others. - // This is an unlikely scenario though, and adds a lot of complexity, so - // we’re ignoring it. - if (readme(filePath)) { - addLandmark(path.dirname(filePath), hash) - } - } - - /** - * @param {string} filePath - * @param {string} hash - */ - function addLandmark(filePath, hash) { - /** @type {Record} */ - const marks = - filePath in landmarks - ? landmarks[filePath] - : (landmarks[filePath] = Object.create(null)) - - marks[hash] = true - } - - /** - * @param {string} filePath - * @param {string} hash - * @param {Resource} node - */ - function addReference(filePath, hash, node) { - /** @type {Record} */ - const refs = - filePath in references - ? references[filePath] - : (references[filePath] = Object.create(null)) - const hashes = hash in refs ? refs[hash] : (refs[hash] = []) - - hashes.push(node) - } - - /** - * @param {string} filePath - */ - async function addFile(filePath) { - statted.push(filePath) - - try { - const stats = await fs.stat(filePath) - - if (stats.isDirectory()) { - /** @type {string[]} */ - let entries = [] - - try { - entries = await fs.readdir(filePath) - // Seems to never happen after a stat. - /* c8 ignore next */ - } catch {} - - const files = entries.sort() - let index = -1 - /** @type {string|undefined} */ - let file - - while (++index < files.length) { - const entry = entries[index] - - if (readme(entry)) { - file = entry - break - } - } - - // To do: test for no readme in directory. - - // Else, there’s no readme that we can parse, so add the directory. - if (file) { - filePath = path.join(filePath, file) - statted.push(filePath) - } - } - } catch {} - - if (fileSet && !added.includes(filePath)) { - added.push(filePath) - fileSet.add( - toVFile({cwd: file.cwd, path: path.relative(file.cwd, filePath)}) - ) - } - } -} - -/** - * @param {string} value - * @param {{urlConfig: UrlConfig, path: string, base: string, root: string|undefined}} config - * @param {string} type - */ -// eslint-disable-next-line complexity -function urlToPath(value, config, type) { - // Absolute paths: `/wooorm/test/blob/main/directory/example.md`. - if (value.charAt(0) === slash) { - if (!config.urlConfig.hostname) { - return - } - - // Create a URL. - value = https + slashes + config.urlConfig.hostname + value - } - - /** @type {URL|undefined} */ - let url - - try { - url = new URL(value) - } catch {} - - // URLs: `https://github.com/wooorm/test/blob/main/directory/example.md`. - if (url && config.root) { - // Exit if we don’t have hosted Git info or this is not a URL to the repo. - if ( - !config.urlConfig.prefix || - !config.urlConfig.hostname || - (url.protocol !== https && url.protocol !== http) || - url.hostname !== config.urlConfig.hostname || - url.pathname.slice(0, config.urlConfig.prefix.length) !== - config.urlConfig.prefix - ) { - return - } - - value = url.pathname.slice(config.urlConfig.prefix.length) - - // Things get interesting here: branches: `foo/bar/baz` could be `baz` on - // the `foo/bar` branch, or, `baz` in the `bar` directory on the `foo` - // branch. - // Currently, we’re ignoring this and just not supporting branches. - value = value.split(slash).slice(1).join(slash) - - return normalize( - path.resolve(config.root, value + (type === 'image' ? '' : url.hash)), - config - ) - } - - // Remove the search: `?foo=bar`. - // But don’t remove stuff if it’s in the hash: `readme.md#heading?`. - let numberSignIndex = value.indexOf(numberSign) - const questionMarkIndex = value.indexOf(questionMark) - - if ( - questionMarkIndex !== -1 && - (numberSignIndex === -1 || numberSignIndex > questionMarkIndex) - ) { - value = - value.slice(0, questionMarkIndex) + - (numberSignIndex === -1 ? '' : value.slice(numberSignIndex)) - numberSignIndex = value.indexOf(numberSign) - } - - // Ignore "headings" in image links: `image.png#metadata` - if (numberSignIndex !== -1 && type === 'image') { - value = value.slice(0, numberSignIndex) - } - - // Local: `#heading`. - if (value.charAt(0) === numberSign) { - value = config.path ? config.path + value : value - } - // Anything else, such as `readme.md`. - else { - value = config.path ? path.resolve(config.base, value) : '' - } - - return normalize(value, config) -} - -/** - * @param {string} url - * @param {{urlConfig: UrlConfig, path: string, base: string, root: string|undefined}} config - */ -function normalize(url, config) { - const numberSignIndex = url.indexOf(numberSign) - const lines = config.urlConfig.lines - const prefix = config.urlConfig.headingPrefix - const topAnchor = config.urlConfig.topAnchor - /** @type {string} */ - let filePath - /** @type {string|undefined} */ - let hash - - if (numberSignIndex === -1) { - filePath = url - } else { - filePath = url.slice(0, numberSignIndex) - hash = url.slice(numberSignIndex).toLowerCase() - - // Ignore the hash if it references the top anchor of the environment - if (topAnchor && hash === topAnchor) { - hash = undefined - } - // Ignore the hash if it references lines in a file or doesn’t start - // with a heading prefix. - else if ( - prefix && - ((lines && lineExpression.test(hash)) || - hash.slice(0, prefix.length) !== prefix) - ) { - hash = undefined - } - // Use the hash if it starts with a heading prefix. - else if (prefix) { - hash = hash.slice(prefix.length) - } - } - - return {filePath: decodeURIComponent(filePath), hash} -} - -/** - * @param {string} filePath - */ -function readme(filePath) { - const ext = path.extname(filePath) - - return ( - readmeExtensions.has(ext) && - readmeBasename.test(path.basename(filePath, ext)) - ) -} diff --git a/lib/find/find-repo.js b/lib/find/find-repo.js deleted file mode 100644 index 31ea482..0000000 --- a/lib/find/find-repo.js +++ /dev/null @@ -1,47 +0,0 @@ -/** - * @typedef {import('vfile').VFile} VFile - * @typedef {import('../index.js').Options} Options - */ - -import path from 'node:path' -import {promisify} from 'node:util' -import {exec as execCb} from 'node:child_process' - -const exec = promisify(execCb) - -/** - * @param {{file: VFile, options: Options}} ctx - */ -export async function findRepo(ctx) { - const repo = ctx.options.repository - const file = ctx.file - let base = file.cwd - - if (file.path) { - base = path.dirname(path.resolve(base, file.path)) - } - - if (repo === null || repo === undefined) { - const {stdout} = await exec('git remote -v', {cwd: base}) - const remote = stdout.match(/origin\t(.+?) \(fetch\)/) - - // To do: don’t mutate. - ctx.options.repository = remote ? remote[1] : undefined - - if (!ctx.options.repository) { - throw new Error('Could not find remote origin') - } - } - - if (ctx.options.root === null || ctx.options.root === undefined) { - if (repo === null || repo === undefined) { - const {stdout} = await exec('git rev-parse --show-cdup', {cwd: base}) - const out = stdout.trim() - ctx.options.root = out ? path.join(base, out) : base - } else { - ctx.options.root = ctx.file.cwd - } - } else { - ctx.options.root = path.resolve(file.cwd, ctx.options.root) - } -} diff --git a/lib/find/index.js b/lib/find/index.js deleted file mode 100644 index 4715225..0000000 --- a/lib/find/index.js +++ /dev/null @@ -1,6 +0,0 @@ -import {trough} from 'trough' -import {findRepo} from './find-repo.js' -import {config} from './config.js' -import {findReferences} from './find-references.js' - -export const find = trough().use(findRepo).use(config).use(findReferences) diff --git a/lib/index.js b/lib/index.js index 4cce6de..450bec2 100644 --- a/lib/index.js +++ b/lib/index.js @@ -1,79 +1,401 @@ +/// + /** + * @typedef {import('hosted-git-info').Hosts} Hosts + * @typedef {import('mdast').Nodes} Nodes + * @typedef {import('mdast').Resource} Resource * @typedef {import('mdast').Root} Root - * @typedef {import('vfile').VFile} VFile * @typedef {import('unified-engine').FileSet} FileSet - * @typedef {import('unified').TransformCallback} Callback + */ + +/** + * @typedef {Map>} Landmarks + * Landmarks. + * + * @typedef Options + * Configuration. + * @property {string | false | null | undefined} [repository] + * URL to hosted Git (default: detected from Git remote); + * if you’re not in a Git repository, you must pass `false`; + * if the repository resolves to something npm understands as a Git host such + * as GitHub, GitLab, or Bitbucket, full URLs to that host (say, + * `https://github.com/remarkjs/remark-validate-links/readme.md#install`) are + * checked. + * @property {string | null | undefined} [root] + * Path to Git root folder (default: local Git folder); + * if both `root` and `repository` are nullish, the Git root is detected; + * if `root` is not given but `repository` is, `file.cwd` is used. + * @property {Readonly | null | undefined} [urlConfig] + * Config on how hosted Git works (default: detected from repo); + * `github.com`, `gitlab.com`, or `bitbucket.org` work automatically; + * otherwise, pass `urlConfig` manually. + * + * @callback Propose + * @param {string} value + * @param {ReadonlyArray} dictionary + * @param {Readonly | null | undefined} [options] + * @returns {string | undefined} + * + * @typedef ProposeOptions + * Configuration for `propose`. + * @property {number| null | undefined} [threshold] + * Threshold. + * + * @typedef {Extract} Resources + * Resources. + * + * @typedef Reference + * Reference to something. + * @property {string} filePath + * Path to file. + * @property {string | undefined} hash + * Hash. + * + * @typedef ReferenceInfo + * Info on a reference. + * @property {VFile} file + * File. + * @property {Readonly} reference + * Reference. + * @property {ReadonlyArray>} nodes + * Nodes that reference it. + * + * @typedef State + * Info passed around. + * @property {string} base + * Folder of file. + * @property {string} path + * Path to file. + * @property {string | null | undefined} root + * Path to Git folder. + * @property {Readonly} urlConfig + * Configuration. * * @typedef UrlConfig * Hosted Git info - * @property {string|undefined} [hostname] + * @property {string | null | undefined} [headingPrefix] + * Prefix of headings (example: `'#'`, `'#markdown-header-'`). + * @property {string | null | undefined} [hostname] * Domain of URLs (example: `'github.com'`, `'bitbucket.org'`). - * @property {string|undefined} [prefix] + * @property {boolean | null | undefined} [lines] + * Whether lines in files can be linked. + * @property {string | null | undefined} [prefix] * Path prefix before files (example: * `'/remarkjs/remark-validate-links/blob/'`, * `'/remarkjs/remark-validate-links/src/'`). - * @property {string|undefined} [headingPrefix] - * Prefix of headings (example: `'#'`, `'#markdown-header-'`). - * @property {string|undefined} [topAnchor] + * @property {string | null | undefined} [topAnchor] * Hash to top of readme (example: `#readme`). - * @property {boolean|undefined} [lines] - * Whether lines in files can be linked. - * - * @typedef Options - * Configuration. - * @property {string | false | undefined} [repository] - * URL to hosted Git. - * If `repository` is nullish, the Git origin remote is detected. - * If the repository resolves to something npm understands as a Git host such - * as GitHub, GitLab, or Bitbucket, full URLs to that host (say, - * `https://github.com/remarkjs/remark-validate-links/readme.md#install`) can - * also be checked. - * If you’re not in a Git repository, you must pass `repository: false` - * explicitly. - * @property {string} [root] - * A `root` (`string?`) can also be passed, referencing the local Git root - * directory (the folder that contains `.git`). - * If both `root` and `repository` are nullish, the Git root is detected. - * If `root` is not given but `repository` is, `file.cwd` is used. - * @property {UrlConfig} [urlConfig] - * If your project is hosted on `github.com`, `gitlab.com`, or `bitbucket.org`, - * this plugin can automatically detect the url configuration. - * Otherwise, use `urlConfig` to specify this manually. */ -import {check} from './check/index.js' -import {find} from './find/index.js' +import fs from 'node:fs/promises' +import path from 'node:path' +import GithubSlugger from 'github-slugger' +import hostedGitInfo from 'hosted-git-info' +import {toString} from 'mdast-util-to-string' +// @ts-expect-error: untyped. +import propose_ from 'propose' +import {visit} from 'unist-util-visit' +import {VFile} from 'vfile' +import {checkFiles} from './check-files.js' import {constants} from './constants.js' +import {findRepo} from './find-repo.js' + +const propose = /** @type {Propose} */ (propose_) cliCompleter.pluginId = constants.sourceId +/** @type {Readonly>>} */ +const viewPaths = {github: 'blob', gitlab: 'blob', bitbucket: 'src'} +/** @type {Readonly>>} */ +const headingPrefixes = { + bitbucket: '#markdown-header-', + github: '#', + gitlab: '#' +} +/** @type {Readonly>>} */ +const topAnchors = {github: '#readme', gitlab: '#readme'} +/** @type {Readonly>>} */ +const lineLinks = {github: true, gitlab: true} + +const slugger = new GithubSlugger() + +const slash = '/' +const numberSign = '#' +const questionMark = '?' +const https = 'https:' +const http = 'http:' +const slashes = '//' +const lineExpression = /^#l\d/i + +// List from: https://github.com/github/markup#markups +const readmeExtensions = new Set(['.markdown', '.mdown', '.mkdn', '.md']) +const readmeBasename = /^readme$/i + /** - * Plugin to validate that Markdown links and images reference existing local - * files and headings. + * Validate that Markdown links and images reference existing local files and + * headings. * - * @type {import('unified').Plugin<[Options?, FileSet?]|[Options?]|void[], Root>} + * @param {Readonly | null | undefined} [options] + * Configuration (optional). + * @param {FileSet | null | undefined} [fileSet] + * File set (optional). + * @returns + * Transform. */ export default function remarkValidateLinks(options, fileSet) { + const settings = options || {} + // Attach a `completer`. if (fileSet) { fileSet.use(cliCompleter) } - // Find references and landmarks. - return (tree, file, next) => { - find.run( - {tree, file, fileSet, options: {...options}}, - /** @type {Callback} */ - (error) => { - if (error) { - next(error) - } else if (fileSet) { - next() - } else { - checkAll([file], next) + /** + * Transform. + * + * @param {Root} tree + * Tree. + * @param {VFile} file + * File. + * @returns {Promise} + * Nothing. + * + * Note: `void` needed because `unified` doesn’t seem to accept `undefined`. + */ + return async function (tree, file) { + /* c8 ignore next -- this yields `undefined` in browsers. */ + const [repo, root] = (await findRepo(file, settings)) || [] + let urlConfig = settings.urlConfig + + if (!urlConfig) { + /** @type {UrlConfig} */ + const config = { + headingPrefix: '#', + hostname: undefined, + lines: false, + prefix: '', + topAnchor: undefined + } + + if (repo) { + const info = hostedGitInfo.fromUrl(repo) + + if (info && info.type !== 'gist') { + if (info.type in viewPaths) { + config.prefix = '/' + info.path() + '/' + viewPaths[info.type] + '/' + } + + if (info.type in headingPrefixes) { + config.headingPrefix = headingPrefixes[info.type] + } + + if (info.type in lineLinks) { + config.lines = lineLinks[info.type] + } + + if (info.type in topAnchors) { + config.topAnchor = topAnchors[info.type] + } + + config.hostname = info.domain } } - ) + + urlConfig = config + } + + const absolute = file.path ? path.resolve(file.cwd, file.path) : '' + const space = file.data + /** @type {Map>>} */ + const references = new Map() + /** @type {Landmarks} */ + const landmarks = new Map() + /** @type {State} */ + const state = { + base: absolute ? path.dirname(absolute) : file.cwd, + path: absolute, + root, + urlConfig + } + /** @type {Array} */ + const statted = [] + /** @type {Array} */ + const added = [] + /** @type {Array>} */ + const promises = [] + + space[constants.referenceId] = references + space[constants.landmarkId] = landmarks + + addLandmarks(absolute, '') + + slugger.reset() + + visit(tree, function (node) { + const data = node.data || {} + const props = data.hProperties || {} + let id = String(props.name || props.id || data.id || '') + + if (!id && node.type === 'heading') { + id = slugger.slug( + toString(node, {includeHtml: false, includeImageAlt: false}) + ) + } + + if (id) { + addLandmarks(absolute, id) + } + + if ('url' in node && node.url) { + const info = urlToPath(node.url, state, node.type) + + if (info) { + const fp = info.filePath + const hash = info.hash + + addReference(fp, '', node) + + if (hash) { + if (fileSet || fp === absolute) { + addReference(fp, hash, node) + } + + if (fileSet && fp && !statted.includes(fp)) { + promises.push(addFile(fp)) + } + } + } + } + }) + + await Promise.all(promises) + + if (!fileSet) { + await checkAll([file]) + } + + /** + * @param {string} filePath + * Absolute path to file. + * @param {string} hash + * Hash. + * @returns {undefined} + * Nothing. + */ + function addLandmarks(filePath, hash) { + addLandmark(filePath, hash) + + // Note: this may add marks too many anchors as defined. + // For example, if there is both a `readme.md` and a `readme.markdown` in a + // folder, both their landmarks will be defined for their parent folder. + // To solve this, we could check whichever sorts first, and ignore the + // others. + // This is an unlikely scenario though, and adds a lot of complexity, so + // we’re ignoring it. + if (readme(filePath)) { + addLandmark(path.dirname(filePath), hash) + } + } + + /** + * @param {string} filePath + * Absolute path to file. + * @param {string} hash + * Hash. + * @returns {undefined} + * Nothing. + */ + function addLandmark(filePath, hash) { + let marks = landmarks.get(filePath) + + if (!marks) { + marks = new Map() + landmarks.set(filePath, marks) + } + + marks.set(hash, true) + } + + /** + * @param {string} filePath + * Absolute path to file. + * @param {string} hash + * Hash. + * @param {Resources} node + * Node. + * @returns {undefined} + * Nothing. + */ + function addReference(filePath, hash, node) { + let refs = references.get(filePath) + + if (!refs) { + refs = new Map() + references.set(filePath, refs) + } + + let hashes = refs.get(hash) + + if (!hashes) { + hashes = [] + refs.set(hash, hashes) + } + + hashes.push(node) + } + + /** + * @param {string} filePath + * Absolute path to file. + * @returns {Promise} + * Nothing. + */ + async function addFile(filePath) { + statted.push(filePath) + + try { + const stats = await fs.stat(filePath) + + if (stats.isDirectory()) { + /** @type {Array} */ + let entries = [] + + try { + entries = await fs.readdir(filePath) + /* c8 ignore next -- seems to never happen after a stat. */ + } catch {} + + const files = entries.sort() + let index = -1 + /** @type {string | undefined} */ + let file + + while (++index < files.length) { + const entry = entries[index] + + if (readme(entry)) { + file = entry + break + } + } + + // To do: test for no readme in directory. + + // Else, there’s no readme that we can parse, so add the directory. + if (file) { + filePath = path.join(filePath, file) + statted.push(filePath) + } + } + } catch {} + + if (fileSet && !added.includes(filePath)) { + added.push(filePath) + fileSet.add( + new VFile({cwd: file.cwd, path: path.relative(file.cwd, filePath)}) + ) + } + } } } @@ -81,27 +403,333 @@ export default function remarkValidateLinks(options, fileSet) { * Completer for the CLI (multiple files, supports parsing more files). * * @param {FileSet} set - * @param {Callback} next - * @returns {void} + * @returns {Promise} */ -function cliCompleter(set, next) { - checkAll(set.valueOf(), next) +async function cliCompleter(set) { + await checkAll(set.valueOf()) } /** * Completer for the CLI (multiple files, supports parsing more files). * - * @param {VFile[]} files - * @param {Callback} next - * @returns {void} + * @param {ReadonlyArray} files + * Files. + * @returns {Promise} + * Nothing. + */ +async function checkAll(files) { + // Merge landmarks. + /** @type {Landmarks} */ + const landmarks = new Map() + let index = -1 + + while (++index < files.length) { + const file = files[index] + const fileLandmarks = /** @type {Landmarks | undefined} */ ( + file.data[constants.landmarkId] + ) + + if (fileLandmarks) { + for (const [filePath, marks] of fileLandmarks) { + landmarks.set(filePath, new Map(marks)) + } + } + } + + // Merge references. + /** @type {Map>>} */ + const references = new Map() + index = -1 + + while (++index < files.length) { + const file = files[index] + const fileReferences = + /** @type {Map>> | undefined} */ ( + file.data[constants.referenceId] + ) + + if (!fileReferences) { + continue + } + + for (const [reference, internal] of fileReferences) { + let all = references.get(reference) + if (!all) { + all = new Map() + references.set(reference, all) + } + + for (const [hash, nodes] of internal) { + let list = all.get(hash) + + if (!list) { + list = [] + all.set(hash, list) + } + + list.push({ + file, + nodes, + reference: {filePath: reference, hash} + }) + } + } + } + + // Access files to see whether they exist. + await checkFiles(landmarks, [...references.keys()]) + + /** @type {Array} */ + const missing = [] + + for (const [key, refs] of references) { + const lands = landmarks.get(key) + + for (const [hash, infos] of refs) { + /* c8 ignore next -- `else` can only happen in browser. */ + const exists = lands ? lands.get(hash) : false + + if (!exists) { + missing.push(...infos) + } + } + } + + index = -1 + + while (++index < missing.length) { + warn(landmarks, missing[index]) + } +} + +/** + * @param {Landmarks} landmarks + * Landmarks. + * @param {ReferenceInfo} reference + * Reference. + */ +function warn(landmarks, reference) { + const absolute = reference.file.path + ? path.resolve(reference.file.cwd, reference.file.path) + : '' + const base = absolute ? path.dirname(absolute) : null + const filePath = base + ? path.relative(base, reference.reference.filePath) + : reference.reference.filePath + const hash = reference.reference.hash + /** @type {Array} */ + const dictionary = [] + /** @type {string} */ + let reason + /** @type {string} */ + let ruleId + + if (hash) { + reason = 'Link to unknown heading' + ruleId = constants.headingRuleId + + if (base && path.join(base, filePath) !== absolute) { + reason += ' in `' + filePath + '`' + ruleId = constants.headingInFileRuleId + } + + reason += ': `' + hash + '`' + } else { + reason = 'Link to unknown file: `' + filePath + '`' + ruleId = constants.fileRuleId + } + + const origin = [constants.sourceId, ruleId].join(':') + for (const [landmark, marks] of landmarks) { + // Only suggest if file exists. + if (!marks || !marks.get('')) { + continue + } + + const relativeLandmark = base ? path.relative(base, landmark) : landmark + + if (!hash) { + dictionary.push(relativeLandmark) + continue + } + + if (relativeLandmark !== filePath) { + continue + } + + for (const [subhash] of marks) { + if (subhash !== '') { + dictionary.push(subhash) + } + } + } + + const suggestion = propose(hash ? hash : filePath, dictionary, { + threshold: 0.7 + }) + + if (suggestion) { + reason += '. Did you mean `' + suggestion + '`' + } + + let index = -1 + + while (++index < reference.nodes.length) { + const message = reference.file.message( + reason, + reference.nodes[index], + origin + ) + message.url = 'https://github.com/remarkjs/remark-validate-links#readme' + } +} + +/** + * @param {string} value + * URL. + * @param {State} state + * State. + * @param {Resources['type']} type + * Type of node (`'link'` or `'image'`). + * @returns {Reference | undefined} + * Reference. */ -function checkAll(files, next) { - // Check all references and landmarks. - check.run( - {files}, - /** @type {Callback} */ - (error) => { - next(error) +// eslint-disable-next-line complexity +function urlToPath(value, state, type) { + // Absolute paths: `/wooorm/test/blob/main/directory/example.md`. + if (value.charAt(0) === slash) { + if (!state.urlConfig.hostname) { + return + } + + // Create a URL. + value = https + slashes + state.urlConfig.hostname + value + } + + /** @type {URL | undefined} */ + let url + + try { + url = new URL(value) + } catch {} + + // URLs: `https://github.com/wooorm/test/blob/main/directory/example.md`. + if (url && state.root) { + // Exit if we don’t have hosted Git info or this is not a URL to the repo. + if ( + !state.urlConfig.prefix || + !state.urlConfig.hostname || + (url.protocol !== https && url.protocol !== http) || + url.hostname !== state.urlConfig.hostname || + url.pathname.slice(0, state.urlConfig.prefix.length) !== + state.urlConfig.prefix + ) { + return } + + value = url.pathname.slice(state.urlConfig.prefix.length) + + // Things get interesting here: branches: `foo/bar/baz` could be `baz` on + // the `foo/bar` branch, or, `baz` in the `bar` directory on the `foo` + // branch. + // Currently, we’re ignoring this and just not supporting branches. + value = value.split(slash).slice(1).join(slash) + + return normalize( + path.resolve(state.root, value + (type === 'image' ? '' : url.hash)), + state + ) + } + + // Remove the search: `?foo=bar`. + // But don’t remove stuff if it’s in the hash: `readme.md#heading?`. + let numberSignIndex = value.indexOf(numberSign) + const questionMarkIndex = value.indexOf(questionMark) + + if ( + questionMarkIndex !== -1 && + (numberSignIndex === -1 || numberSignIndex > questionMarkIndex) + ) { + value = + value.slice(0, questionMarkIndex) + + (numberSignIndex === -1 ? '' : value.slice(numberSignIndex)) + numberSignIndex = value.indexOf(numberSign) + } + + // Ignore "headings" in image links: `image.png#metadata` + if (numberSignIndex !== -1 && type === 'image') { + value = value.slice(0, numberSignIndex) + } + + // Local: `#heading`. + if (value.charAt(0) === numberSign) { + value = state.path ? state.path + value : value + } + // Anything else, such as `readme.md`. + else { + value = state.path ? path.resolve(state.base, value) : '' + } + + return normalize(value, state) +} + +/** + * @param {string} url + * URL. + * @param {State} state + * State. + * @returns {Reference} + * Reference. + */ +function normalize(url, state) { + const numberSignIndex = url.indexOf(numberSign) + const lines = state.urlConfig.lines + const prefix = state.urlConfig.headingPrefix + const topAnchor = state.urlConfig.topAnchor + /** @type {string} */ + let filePath + /** @type {string | undefined} */ + let hash + + if (numberSignIndex === -1) { + filePath = url + } else { + filePath = url.slice(0, numberSignIndex) + hash = url.slice(numberSignIndex).toLowerCase() + + // Ignore the hash if it references the top anchor of the environment + if (topAnchor && hash === topAnchor) { + hash = undefined + } + // Ignore the hash if it references lines in a file or doesn’t start + // with a heading prefix. + else if ( + prefix && + ((lines && lineExpression.test(hash)) || + hash.slice(0, prefix.length) !== prefix) + ) { + hash = undefined + } + // Use the hash if it starts with a heading prefix. + else if (prefix) { + hash = hash.slice(prefix.length) + } + } + + return {filePath: decodeURIComponent(filePath), hash} +} + +/** + * @param {string} filePath + * Absolute path to file. + * @returns {boolean} + * Whether `filePath` is a readme. + */ +function readme(filePath) { + const ext = path.extname(filePath) + + return ( + readmeExtensions.has(ext) && + readmeBasename.test(path.basename(filePath, ext)) ) } diff --git a/lib/types.js b/lib/types.js deleted file mode 100644 index 3115c32..0000000 --- a/lib/types.js +++ /dev/null @@ -1,20 +0,0 @@ -/** - * @typedef {import('vfile').VFile} VFile - * @typedef {import('mdast').Root} Root - * @typedef {import('mdast').Content} Content - * @typedef {Extract} Resource - * - * @typedef Reference - * @property {string} filePath - * @property {string} hash - * - * @typedef ReferenceMap - * @property {VFile} file - * @property {Reference} reference - * @property {Resource[]} nodes - * - * @typedef {Record>} Landmarks - * @typedef {Record>} References - */ - -export {} diff --git a/package.json b/package.json index 5286080..a826e91 100644 --- a/package.json +++ b/package.json @@ -42,24 +42,22 @@ "index.js" ], "browser": { - "./lib/check/check-files.js": "./lib/check/check-files.browser.js", - "./lib/find/find-repo.js": "./lib/find/find-repo.browser.js" + "./lib/check-files.js": "./lib/check-files.browser.js", + "./lib/find-repo.js": "./lib/find-repo.browser.js" }, "dependencies": { "@types/mdast": "^4.0.0", "github-slugger": "^2.0.0", "hosted-git-info": "^7.0.0", + "mdast-util-to-hast": "^13.0.0", "mdast-util-to-string": "^4.0.0", "propose": "0.0.5", - "to-vfile": "^8.0.0", "trough": "^2.0.0", - "unified": "^11.0.0", "unified-engine": "^11.0.0", "unist-util-visit": "^5.0.0", "vfile": "^6.0.0" }, "devDependencies": { - "@types/hast": "^3.0.0", "@types/hosted-git-info": "^3.0.0", "@types/node": "^20.0.0", "c8": "^8.0.0", @@ -68,6 +66,7 @@ "remark-cli": "^11.0.0", "remark-preset-wooorm": "^9.0.0", "strip-ansi": "^7.0.0", + "to-vfile": "^8.0.0", "type-coverage": "^2.0.0", "typescript": "^5.0.0", "vfile-sort": "^4.0.0", diff --git a/test/index.js b/test/index.js index 9dad3ba..5df79bc 100644 --- a/test/index.js +++ b/test/index.js @@ -1,5 +1,11 @@ /** - * @typedef {import('node:child_process').ExecException & {stdout: string, stderr: string}} ExecError + * @typedef {import('node:child_process').ExecException} ExecException + * Exec error. + */ + +/** + * @typedef {ExecException & {stderr: string, stdout: string}} ExecError + * Exec error (with fields that exist). */ import assert from 'node:assert/strict' @@ -29,6 +35,12 @@ const bin = fileURLToPath( test('remark-validate-links', async function (t) { process.chdir(fileURLToPath(fakeBaseUrl)) + await t.test('should expose the public api', async function () { + assert.deepEqual(Object.keys(await import('../index.js')).sort(), [ + 'default' + ]) + }) + await t.test('should work on the API', async function () { const file = await remark() .use(remarkValidateLinks) @@ -63,18 +75,6 @@ test('remark-validate-links', async function (t) { '# \\_\\_proto__\n# constructor\n# toString\n[](#__proto__), [](#constructor), [](#toString)' ) - assert.deepEqual( - // @ts-expect-error: to do: type. - Object.keys(file.data.remarkValidateLinksLandmarks['']), - ['', '__proto__', 'constructor', 'tostring'] - ) - - assert.deepEqual( - // @ts-expect-error: to do: type. - Object.keys(file.data.remarkValidateLinksReferences['']), - ['', '__proto__', 'constructor', 'tostring'] - ) - assert.deepEqual(file.messages, []) } ) @@ -1080,6 +1080,7 @@ test('remark-validate-links', async function (t) { ) }) - await fs.rm('./.git', {recursive: true}) + await fs.rm('./.git', {recursive: true, force: true}) + process.chdir(fileURLToPath(baseUrl)) }) diff --git a/test/sort.js b/test/sort.js index f11b36e..e431418 100644 --- a/test/sort.js +++ b/test/sort.js @@ -5,12 +5,24 @@ import {compareMessage} from 'vfile-sort' +/** + * Sort messages. + * + * @returns + * Transform. + */ export default function unifiedSort() { /** + * Transform. + * * @param {Root} _ + * Tree. * @param {VFile} file + * File. + * @returns {undefined} + * Nothing. */ - return (_, file) => { + return function (_, file) { file.messages.sort(compareMessage) } } diff --git a/tsconfig.json b/tsconfig.json index 870d82c..7f250e5 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -5,7 +5,7 @@ "declaration": true, "emitDeclarationOnly": true, "exactOptionalPropertyTypes": true, - "lib": ["es2020"], + "lib": ["es2022"], "module": "node16", "strict": true, "target": "es2020"