Skip to content

Commit

Permalink
feat(docs-linter): add lint for local links (gnolang#2416)
Browse files Browse the repository at this point in the history
<!-- please provide a detailed description of the changes made in this
pull request. -->

## Description

This PR improves the docs linter by adding a `os.Stat` check for local
files. It will notify the user if a link to a local file is broken by
trying to find the file in question on the filesystem.

<details><summary>Contributors' checklist...</summary>

- [x] Added new tests, or not needed, or not feasible
- [x] Provided an example (e.g. screenshot) to aid review or the PR is
self-explanatory
- [x] Updated the official documentation or not needed
- [x] No breaking changes were made, or a `BREAKING CHANGE: xxx` message
was included in the description
- [x] Added references to related issues and PRs
- [x] Provided any useful hints for running manual tests
- [x] Added new benchmarks to [generated
graphs](https://gnoland.github.io/benchmarks), if any. More info
[here](https://github.com/gnolang/gno/blob/master/.benchmarks/README.md).
</details>

---------

Co-authored-by: deelawn <dboltz03@gmail.com>
  • Loading branch information
leohhhn and deelawn authored Jun 28, 2024
1 parent f547d7d commit 10a6f7e
Show file tree
Hide file tree
Showing 10 changed files with 352 additions and 116 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: "docs / 404 checker"
name: "docs / lint"

on:
push:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ In this tutorial, you will learn how to start a local Gno node and connect to an
## 1. Initialize the node directory

To initialize a new Gno.land node working directory (configuration and secrets), make sure to
follow [Step 1](./validators-setting-up-a-new-chain#1-generate-the-node-directory-secrets--config) from the
follow [Step 1](./setting-up-a-new-chain.md#1-generate-the-node-directory-secrets--config) from the
chain setup tutorial.

## 2. Obtain the `genesis.json` of the remote chain
Expand Down
2 changes: 1 addition & 1 deletion docs/how-to-guides/connecting-from-go.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ The `gnoclient` package exposes a `Client` struct containing a `Signer` and
`RPCClient` connector. `Client` exposes all available functionality for talking
to a Gno.land chain.

```go
```go
type Client struct {
Signer Signer // Signer for transaction authentication
RPCClient rpcclient.Client // gnolang/gno/tm2/pkg/bft/rpc/client
Expand Down
6 changes: 3 additions & 3 deletions docs/reference/stdlibs/std/testing.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ func TestSetOrigPkgAddr(addr Address)
func TestSetOrigSend(sent, spent Coins)
func TestIssueCoins(addr Address, coins Coins)
func TestSetRealm(realm Realm)
func NewUserRealm(address Address)
func NewCodeRealm(pkgPath string)
func NewUserRealm(address Address) Realm
func NewCodeRealm(pkgPath string) Realm
```

---
Expand Down Expand Up @@ -132,7 +132,7 @@ userRealm := std.NewUserRealm(addr)
## NewCodeRealm

```go
func NewCodeRealm(pkgPath string)
func NewCodeRealm(pkgPath string) Realm
```

Creates a new code realm for testing purposes.
Expand Down
11 changes: 6 additions & 5 deletions misc/docs-linter/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ package main
import "errors"

var (
errEmptyPath = errors.New("you need to pass in a path to scan")
err404Link = errors.New("link returned a 404")
errFound404Links = errors.New("found links resulting in a 404 response status")
errFoundUnescapedJSXTags = errors.New("found unescaped JSX tags")
errFoundLintItems = errors.New("found items that need linting")
errEmptyPath = errors.New("you need to pass in a path to scan")
err404Link = errors.New("link returned a 404")
errFound404Links = errors.New("found links resulting in a 404 response status")
errFoundUnescapedJSXTags = errors.New("found unescaped JSX tags")
errFoundUnreachableLocalLinks = errors.New("found local links that stat fails on")
errFoundLintItems = errors.New("found items that need linting")
)
28 changes: 17 additions & 11 deletions misc/docs-linter/jsx.go
Original file line number Diff line number Diff line change
@@ -1,21 +1,25 @@
package main

import (
"context"
"bytes"
"fmt"
"regexp"
"strings"
)

var (
reCodeBlocks = regexp.MustCompile("(?s)```.*?```")
reInlineCode = regexp.MustCompile("`[^`]*`")
)

// extractJSX extracts JSX tags from given file content
func extractJSX(fileContent []byte) []string {
text := string(fileContent)

// Remove code blocks
reCodeBlocks := regexp.MustCompile("(?s)```.*?```")
contentNoCodeBlocks := reCodeBlocks.ReplaceAllString(text, "")

// Remove inline code
reInlineCode := regexp.MustCompile("`[^`]*`")
contentNoInlineCode := reInlineCode.ReplaceAllString(contentNoCodeBlocks, "")

// Extract JSX/HTML elements
Expand All @@ -34,23 +38,25 @@ func extractJSX(fileContent []byte) []string {
return filteredMatches
}

func lintJSX(fileUrlMap map[string][]string, ctx context.Context) error {
found := false
for filePath, tags := range fileUrlMap {
filePath := filePath
func lintJSX(filepathToJSX map[string][]string) (string, error) {
var (
found bool
output bytes.Buffer
)
for filePath, tags := range filepathToJSX {
for _, tag := range tags {
if !found {
fmt.Println("Tags that need checking:")
output.WriteString("Tags that need checking:\n")
found = true
}

fmt.Printf(">>> %s (found in file: %s)\n", tag, filePath)
output.WriteString(fmt.Sprintf(">>> %s (found in file: %s)\n", tag, filePath))
}
}

if found {
return errFoundUnescapedJSXTags
return output.String(), errFoundUnescapedJSXTags
}

return nil
return "", nil
}
128 changes: 57 additions & 71 deletions misc/docs-linter/links.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,105 +3,91 @@ package main
import (
"bufio"
"bytes"
"context"
"fmt"
"golang.org/x/sync/errgroup"
"io"
"mvdan.cc/xurls/v2"
"net/http"
"os"
"path/filepath"
"regexp"
"strings"
"sync"
)

// extractUrls extracts URLs from a file and maps them to the file
func extractUrls(fileContent []byte) []string {
// Valid start to an embedmd link
const embedmd = `[embedmd]:# `

// Regular expression to match markdown links
var regex = regexp.MustCompile(`]\((\.\.?/.+?)\)`)

// extractLocalLinks extracts links to local files from the given file content
func extractLocalLinks(fileContent []byte) []string {
scanner := bufio.NewScanner(bytes.NewReader(fileContent))
urls := make([]string, 0)
links := make([]string, 0)

// Scan file line by line
for scanner.Scan() {
line := scanner.Text()

// Extract links
rxStrict := xurls.Strict()
url := rxStrict.FindString(line)
// Check for embedmd links
if embedmdPos := strings.Index(line, embedmd); embedmdPos != -1 {
link := line[embedmdPos+len(embedmd)+1:]

// Find closing parentheses
if closePar := strings.LastIndex(link, ")"); closePar != -1 {
link = link[:closePar]
}

// Remove space
if pos := strings.Index(link, " "); pos != -1 {
link = link[:pos]
}

// Check for empty links and skip them
if url == " " || len(url) == 0 {
// Add link to be checked
links = append(links, link)
continue
}

// Look for http & https only
if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") {
// Ignore localhost
if !strings.Contains(url, "localhost") && !strings.Contains(url, "127.0.0.1") {
urls = append(urls, url)
// Find all matches
matches := regex.FindAllString(line, -1)

// Extract and print the local file links
for _, match := range matches {
// Remove ]( from the beginning and ) from end of link
match = match[2 : len(match)-1]

// Remove markdown headers in links
if pos := strings.Index(match, "#"); pos != -1 {
match = match[:pos]
}

links = append(links, match)
}
}

return urls
return links
}

func lintLinks(fileUrlMap map[string][]string, ctx context.Context) error {
// Filter links by prefix & ignore localhost
// Setup parallel checking for links
g, _ := errgroup.WithContext(ctx)

func lintLocalLinks(filepathToLinks map[string][]string, docsPath string) (string, error) {
var (
lock sync.Mutex
notFoundUrls []string
found bool
output bytes.Buffer
)

for filePath, urls := range fileUrlMap {
filePath := filePath
for _, url := range urls {
url := url
g.Go(func() error {
if err := checkUrl(url); err != nil {
lock.Lock()
notFoundUrls = append(notFoundUrls, fmt.Sprintf(">>> %s (found in file: %s)", url, filePath))
lock.Unlock()
}

return nil
})
}
}
for filePath, links := range filepathToLinks {
for _, link := range links {
path := filepath.Join(docsPath, filepath.Dir(filePath), link)

if err := g.Wait(); err != nil {
return err
}
if _, err := os.Stat(path); err != nil {
if !found {
output.WriteString("Could not find files with the following paths:\n")
found = true
}

// Print out the URLs that returned a 404 along with the file names
if len(notFoundUrls) > 0 {
fmt.Println("Links that need checking:")
for _, result := range notFoundUrls {
fmt.Println(result)
output.WriteString(fmt.Sprintf(">>> %s (found in file: %s)\n", link, filePath))
}
}

return errFound404Links
}

return nil
}

// checkUrl checks if a URL is a 404
func checkUrl(url string) error {
// Attempt to retrieve the HTTP header
resp, err := http.Get(url)
if err != nil || resp.StatusCode == http.StatusNotFound {
return err404Link
}

// Ensure the response body is closed properly
cleanup := func(Body io.ReadCloser) error {
if err := Body.Close(); err != nil {
return fmt.Errorf("could not close response properly: %w", err)
}

return nil
if found {
return output.String(), errFoundUnreachableLocalLinks
}

return cleanup(resp.Body)
return "", nil
}
Loading

0 comments on commit 10a6f7e

Please sign in to comment.