Skip to content

Commit

Permalink
Retry count for validate links task #488
Browse files Browse the repository at this point in the history
Selectively retry status codes 500, 502, 503, 504 and SocketTimeoutException up to a configured max retries when validating links.
  • Loading branch information
kpritam authored Jun 10, 2021
1 parent ab19526 commit 218750f
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 6 deletions.
35 changes: 29 additions & 6 deletions core/src/main/scala/com/lightbend/paradox/ParadoxProcessor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@ package com.lightbend.paradox
import com.lightbend.paradox.template.PageTemplate
import com.lightbend.paradox.markdown._
import com.lightbend.paradox.tree.Tree.{ Forest, Location }

import java.io.{ File, FileOutputStream, OutputStreamWriter }
import java.nio.charset.StandardCharsets

import org.jsoup.Jsoup
import org.jsoup.{ Connection, Jsoup }
import org.jsoup.nodes.Document
import org.pegdown.ast._

import java.net.SocketTimeoutException
import scala.annotation.tailrec
import scala.collection.JavaConverters._
import scala.util.control.NonFatal
Expand Down Expand Up @@ -108,6 +109,7 @@ class ParadoxProcessor(reader: Reader = new Reader, writer: Writer = new Writer,
groups: Map[String, Seq[String]],
properties: Map[String, String],
ignorePaths: List[Regex],
retryCount: Int,
validateAbsolute: Boolean,
logger: ParadoxLogger): Int = {

Expand Down Expand Up @@ -151,7 +153,7 @@ class ParadoxProcessor(reader: Reader = new Reader, writer: Writer = new Writer,
reportErrorOnSources(errorCollector, c.allSources)(s"Could not find path [${uri.getPath}] in site")
}
case absolute if validateAbsolute =>
validateExternalLink(absolute, errorCollector, logger)
validateExternalLink(absolute, retryCount, errorCollector, logger)
case _ =>
// Ignore
}
Expand All @@ -160,19 +162,20 @@ class ParadoxProcessor(reader: Reader = new Reader, writer: Writer = new Writer,
errorCollector.errorCount
}

private def validateExternalLink(capturedLink: CapturedLink, errorContext: ErrorContext, logger: ParadoxLogger) = {
private def validateExternalLink(capturedLink: CapturedLink, retryCount: Int, errorContext: ErrorContext, logger: ParadoxLogger) = {
logger.info(s"Validating external link: ${capturedLink.link}")

def reportError = reportErrorOnSources(errorContext, capturedLink.allSources)(_)
val url = capturedLink.link.toString

try {
val response = Jsoup.connect(url)
val request = Jsoup.connect(url)
.userAgent("Paradox Link Validator <https://github.com/lightbend/paradox>")
.followRedirects(false)
.ignoreHttpErrors(true)
.ignoreContentType(true)
.execute()

val response = Validator.validateWithRetries(request, retryCount)

// jsoup doesn't offer any simple way to clean up, the only way to close is to get the body stream and close it,
// but if you've already read the response body, that will throw an exception, and there's no way to check if
Expand Down Expand Up @@ -493,3 +496,23 @@ class ParadoxProcessor(reader: Reader = new Reader, writer: Writer = new Writer,
}

}

object Validator {

//500 Internal Server Error
//502 Bad Gateway
//503 Service Unavailable
//504 Gateway Timeout
private val retryableStatusCodes = Set(500, 502, 503, 504)

def validateWithRetries(request: Connection, retryCount: Int): Connection.Response =
try {
val res = request.execute()
if (retryCount == 0 || res.statusCode() == 200 || !retryableStatusCodes.contains(res.statusCode())) res
else validateWithRetries(request, retryCount - 1)
}
catch {
case e: SocketTimeoutException => if(retryCount == 0) throw e else validateWithRetries(request, retryCount -1)
}

}
8 changes: 8 additions & 0 deletions docs/src/main/paradox/validation.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,11 @@ paradoxValidationIgnorePaths ++= Seq(
"/docs/version/(?!latest).*"
)
```

## Retrying links check

`paradoxValidateLinksRetryCount` setting allows retrying link check for the provided number of times in case linked server does not reply, or returns a possibly temporary failure response code (500, 502, 503 or 504).

```scala
paradoxValidateLinksRetryCount := 3 // retries link check 3 times in case of non 200 response code
```
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ trait ParadoxKeys {
val paradoxBrowse = taskKey[Unit]("Open the docs in the default browser")
val paradoxValidateInternalLinks = taskKey[Unit]("Validate internal, non ref paradox links.")
val paradoxValidateLinks = taskKey[Unit]("Validate all non ref paradox links.")
val paradoxValidateLinksRetryCount = taskKey[Int]("Number of retries for validate links task.")
val paradoxValidationIgnorePaths = settingKey[List[Regex]]("List of regular expressions to apply to paths to determine if they should be ignored.")
val paradoxValidationSiteBasePath = settingKey[Option[String]]("The base path that the documentation is deployed to, allows validating links on the docs site that are outside of the documentation root tree")
val paradoxSingle = taskKey[File]("Build the single page HTML Paradox site")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ object ParadoxPlugin extends AutoPlugin {
paradoxLeadingBreadcrumbs := Nil,
paradoxGroups := Map.empty,
libraryDependencies ++= paradoxTheme.value.toSeq map (_ % ParadoxTheme),
paradoxValidateLinksRetryCount := 0,
paradoxValidationIgnorePaths := List("http://localhost.*".r),
paradoxValidationSiteBasePath := None
)
Expand Down Expand Up @@ -378,6 +379,7 @@ object ParadoxPlugin extends AutoPlugin {
paradoxGroups.value,
paradoxProperties.value,
paradoxValidationIgnorePaths.value,
paradoxValidateLinksRetryCount.value,
validateAbsolute,
new SbtParadoxLogger(strms.log)
)
Expand Down

0 comments on commit 218750f

Please sign in to comment.