diff --git a/config/index.js b/config/index.js
index 48a36afd..907a2bb7 100644
--- a/config/index.js
+++ b/config/index.js
@@ -3,7 +3,7 @@ import { combineConfigs, validateConfig } from './util.js'
/**
*
- * @returns {{defaultConfig, environment, url, checkService: { userAgent: string }}}
+ * @returns {{defaultConfig, environment, url, checkService: { userAgent: string }, datasetsConfig}}
*/
const getConfig = () => {
const environment = process.env.NODE_ENV || process.env.ENVIRONMENT || 'production'
diff --git a/src/controllers/OrganisationsController.js b/src/controllers/OrganisationsController.js
index 0deeff7a..7617c16f 100644
--- a/src/controllers/OrganisationsController.js
+++ b/src/controllers/OrganisationsController.js
@@ -7,6 +7,7 @@ import organisationsMiddleware from '../middleware/organisations.middleware.js'
import getStartedMiddleware from '../middleware/getStarted.middleware.js'
import overviewMiddleware from '../middleware/overview.middleware.js'
import datasetDataviewMiddleware from '../middleware/dataview.middleware.js'
+import datasetEndpointIssueMiddleware from '../middleware/datasetEndpointIssue.middleware.js'
const organisationsController = {
organisationsMiddleware,
@@ -17,7 +18,8 @@ const organisationsController = {
issueTableMiddleware,
getStartedMiddleware,
overviewMiddleware,
- datasetDataviewMiddleware
+ datasetDataviewMiddleware,
+ datasetEndpointIssueMiddleware
}
export default organisationsController
diff --git a/src/middleware/common.middleware.js b/src/middleware/common.middleware.js
index 2f1958ae..e259e808 100644
--- a/src/middleware/common.middleware.js
+++ b/src/middleware/common.middleware.js
@@ -1,7 +1,7 @@
import logger from '../utils/logger.js'
import { types } from '../utils/logging.js'
import performanceDbApi from '../services/performanceDbApi.js'
-import { fetchOne, FetchOptions, FetchOneFallbackPolicy, fetchMany, renderTemplate } from './middleware.builders.js'
+import { fetchMany, fetchOne, FetchOneFallbackPolicy, FetchOptions, renderTemplate } from './middleware.builders.js'
import * as v from 'valibot'
import { pagination } from '../utils/pagination.js'
import datasette from '../services/datasette.js'
@@ -307,10 +307,11 @@ export const extractJsonFieldFromEntities = (req, res, next) => {
return next(new Error('Invalid entities format'))
}
+ let numEntitiesWithNoJson = 0
req.entities = entities.map(entity => {
const jsonField = entity.json
if (!jsonField || jsonField === '') {
- logger.info(`common.middleware/extractJsonField: No json field for entity ${entity.toString()}`)
+ numEntitiesWithNoJson += 1
return entity
}
entity.json = undefined
@@ -318,11 +319,17 @@ export const extractJsonFieldFromEntities = (req, res, next) => {
const parsedJson = JSON.parse(jsonField)
entity = Object.assign({}, parsedJson, entity)
} catch (err) {
- logger.warn(`common.middleware/extractJsonField: Error parsing JSON for entity ${entity.toString()}: ${err.message}`)
+ logger.warn('common.middleware/extractJsonField: Error parsing JSON',
+ { type: types.App, json: jsonField, entity: entity.entity, errorMessage: err.message })
}
return entity
})
+ if (numEntitiesWithNoJson > 0) {
+ logger.info(`Got ${numEntitiesWithNoJson.length} entities with no json field`,
+ { type: types.App, endpoint: req.originalUrl })
+ }
+
next()
}
@@ -629,3 +636,74 @@ export const prepareIssueDetailsTemplateParams = (req, res, next) => {
next()
}
+
+export const validateOrgAndDatasetQueryParams = validateQueryParams({
+ schema: v.object({
+ lpa: v.string(),
+ dataset: v.string()
+ // resource: v.string()
+ })
+})
+
+export const fetchSources = fetchMany({
+ query: ({ params }) => `
+ WITH RankedEndpoints AS (
+ SELECT
+ rhe.endpoint,
+ rhe.endpoint_url,
+ case
+ when rhe.status = '' or rhe.status is null then null
+ else cast(rhe.status as int)
+ end as status,
+ rhe.exception,
+ rhe.resource,
+ rhe.latest_log_entry_date,
+ rhe.endpoint_entry_date,
+ rhe.endpoint_end_date,
+ rhe.resource_start_date as resource_start_date,
+ rhe.resource_end_date,
+ s.documentation_url,
+ ROW_NUMBER() OVER (
+ PARTITION BY rhe.endpoint_url
+ ORDER BY
+ rhe.latest_log_entry_date DESC
+ ) AS row_num
+ FROM
+ reporting_historic_endpoints rhe
+ LEFT JOIN source s ON rhe.endpoint = s.endpoint
+ WHERE
+ REPLACE(rhe.organisation, '-eng', '') = '${params.lpa}'
+ AND rhe.pipeline = '${params.dataset}'
+ AND (
+ rhe.resource_end_date >= current_timestamp
+ OR rhe.resource_end_date IS NULL
+ OR rhe.resource_end_date = ''
+ )
+ AND (
+ rhe.endpoint_end_date >= current_timestamp
+ OR rhe.endpoint_end_date IS NULL
+ OR rhe.endpoint_end_date = ''
+ )
+ )
+ SELECT
+ endpoint,
+ endpoint_url,
+ status,
+ exception,
+ resource,
+ latest_log_entry_date,
+ endpoint_entry_date,
+ endpoint_end_date,
+ resource_start_date,
+ resource_end_date,
+ documentation_url
+ FROM
+ RankedEndpoints
+ WHERE
+ row_num = 1
+ ORDER BY
+ latest_log_entry_date DESC;
+ `,
+ result: 'sources'
+})
+
diff --git a/src/middleware/datasetEndpointIssue.middleware.js b/src/middleware/datasetEndpointIssue.middleware.js
new file mode 100644
index 00000000..20d3c420
--- /dev/null
+++ b/src/middleware/datasetEndpointIssue.middleware.js
@@ -0,0 +1,89 @@
+import * as v from 'valibot'
+import {
+ fetchDatasetInfo,
+ getDatasetTaskListError,
+ validateOrgAndDatasetQueryParams, validateQueryParams
+} from './common.middleware.js'
+import { fetchOne } from './middleware.builders.js'
+
+/** @typedef {import('../types/datasette')} Types */
+
+const fetchOrgInfoWithStatGeo = fetchOne({
+ query: ({ params }) => {
+ return /* sql */ `SELECT name, organisation, statistical_geography FROM organisation WHERE organisation = '${params.lpa}'`
+ },
+ result: 'orgInfo'
+})
+
+const fetchSourceByEndpoint = fetchOne({
+ query: ({ params }) => {
+ return /* sql */ `
+ SELECT
+ rhe.endpoint,
+ rhe.endpoint_url,
+ rhe.status,
+ rhe.exception,
+ rhe.latest_log_entry_date,
+ rle.days_since_200
+ FROM
+ reporting_historic_endpoints rhe
+ LEFT JOIN reporting_latest_endpoints rle
+ ON rhe.endpoint = rle.endpoint
+ WHERE
+ rhe.endpoint = '${params.endpoint}'
+ ORDER BY
+ rhe.latest_log_entry_date DESC
+ LIMIT 1`
+ },
+ result: 'source'
+})
+
+/**
+ *
+ * @param { { orgInfo: Types.OrgInfo, dataset: Types.DatasetInfo, source: Types.Source }} req
+ * @param res
+ * @param next
+ */
+export const prepareDatasetEndpointIssueTemplateParams = (req, res, next) => {
+ const { orgInfo: organisation, dataset, source } = req
+
+ const today = new Date()
+
+ /** @type {number|null} */
+ const daysSince200 = source.days_since_200
+ /** @type {String|null} */
+ let last200Datetime = null
+ if (Number.isSafeInteger(daysSince200) && daysSince200 >= 0) {
+ const last200Date = new Date(today.getTime() - daysSince200 * 24 * 60 * 60 * 1000)
+ last200Datetime = last200Date.toISOString().split('T')[0]
+ }
+
+ req.templateParams = {
+ organisation,
+ dataset,
+ errorData: {
+ endpoint_url: source.endpoint_url,
+ http_status: source.status,
+ latest_log_entry_date: source.latest_log_entry_date,
+ latest_200_date: last200Datetime
+ }
+ }
+
+ next()
+}
+
+const validateEndpointQueryParam = validateQueryParams({
+ schema: v.object({
+ endpoint: v.pipe(v.string(), v.minLength(1))
+ })
+})
+
+export default [
+ validateOrgAndDatasetQueryParams,
+ validateEndpointQueryParam,
+ fetchOrgInfoWithStatGeo,
+ fetchDatasetInfo,
+ fetchSourceByEndpoint,
+ prepareDatasetEndpointIssueTemplateParams,
+ getDatasetTaskListError
+]
diff --git a/src/middleware/datasetOverview.middleware.js b/src/middleware/datasetOverview.middleware.js
index 373b9785..7f8903a6 100644
--- a/src/middleware/datasetOverview.middleware.js
+++ b/src/middleware/datasetOverview.middleware.js
@@ -1,6 +1,17 @@
-import { fetchDatasetInfo, fetchLatestResource, fetchLpaDatasetIssues, fetchOrgInfo, getDatasetTaskListError, isResourceAccessible, isResourceIdInParams, isResourceNotAccessible, logPageError, pullOutDatasetSpecification, takeResourceIdFromParams } from './common.middleware.js'
-import { fetchOne, fetchIf, fetchMany, renderTemplate, FetchOptions, onlyIf } from './middleware.builders.js'
-import { fetchResourceStatus, prepareDatasetTaskListErrorTemplateParams } from './datasetTaskList.middleware.js'
+import {
+ fetchDatasetInfo,
+ fetchLatestResource,
+ fetchLpaDatasetIssues,
+ fetchOrgInfo,
+ fetchSources,
+ isResourceAccessible,
+ isResourceIdInParams,
+ logPageError,
+ pullOutDatasetSpecification,
+ takeResourceIdFromParams
+} from './common.middleware.js'
+import { fetchIf, fetchMany, fetchOne, FetchOptions, renderTemplate } from './middleware.builders.js'
+import { fetchResourceStatus } from './datasetTaskList.middleware.js'
import performanceDbApi from '../services/performanceDbApi.js'
import { getDeadlineHistory, requiredDatasets } from '../utils/utils.js'
import logger from '../utils/logger.js'
@@ -39,65 +50,6 @@ const fetchSpecification = fetchOne({
result: 'specification'
})
-const fetchSources = fetchMany({
- query: ({ params }) => `
- WITH RankedEndpoints AS (
- SELECT
- rhe.endpoint,
- rhe.endpoint_url,
- rhe.status,
- rhe.exception,
- rhe.resource,
- rhe.latest_log_entry_date,
- rhe.endpoint_entry_date,
- rhe.endpoint_end_date,
- rhe.resource_start_date as resource_start_date,
- rhe.resource_end_date,
- s.documentation_url,
- ROW_NUMBER() OVER (
- PARTITION BY rhe.endpoint_url
- ORDER BY
- rhe.latest_log_entry_date DESC
- ) AS row_num
- FROM
- reporting_historic_endpoints rhe
- LEFT JOIN source s ON rhe.endpoint = s.endpoint
- WHERE
- REPLACE(rhe.organisation, '-eng', '') = '${params.lpa}'
- AND rhe.pipeline = '${params.dataset}'
- AND (
- rhe.resource_end_date >= current_timestamp
- OR rhe.resource_end_date IS NULL
- OR rhe.resource_end_date = ''
- )
- AND (
- rhe.endpoint_end_date >= current_timestamp
- OR rhe.endpoint_end_date IS NULL
- OR rhe.endpoint_end_date = ''
- )
- )
- SELECT
- endpoint,
- endpoint_url,
- status,
- exception,
- resource,
- latest_log_entry_date,
- endpoint_entry_date,
- endpoint_end_date,
- resource_start_date,
- resource_end_date,
- documentation_url
- FROM
- RankedEndpoints
- WHERE
- row_num = 1
- ORDER BY
- latest_log_entry_date DESC;
- `,
- result: 'sources'
-})
-
/**
* Sets notices from a source key in the request object.
*
@@ -175,6 +127,12 @@ const fetchEntityCount = fetchOne({
dataset: FetchOptions.fromParams
})
+/**
+ *
+ * @param req {{ orgInfo: OrgInfo, sources: Source[], issues?: Issue[] }} request object
+ * @param res {import('express').Response}
+ * @param next {import('express').NextFunction}
+ */
export const prepareDatasetOverviewTemplateParams = (req, res, next) => {
const { orgInfo, datasetSpecification, columnSummary, entityCount, sources, dataset, issues, notice } = req
@@ -192,24 +150,26 @@ export const prepareDatasetOverviewTemplateParams = (req, res, next) => {
const numberOfExpectedFields = specFields.length
- // I'm pretty sure every endpoint has a separate documentation-url, but this isn't currently represented in the performance db. need to double check this and update if so
+ let endpointErrorIssues = 0
const endpoints = sources.sort((a, b) => {
- if (a.status >= 200 && a.status < 300) return -1
- if (b.status >= 200 && b.status < 300) return 1
+ if (a.status && a.status >= 200 && a.status < 300) return -1
+ if (b.status && b.status >= 200 && b.status < 300) return 1
return 0
}).map((source, index) => {
let error
- if (parseInt(source.status) < 200 || parseInt(source.status) >= 300) {
+ if (!source.status || source.status < 200 || source.status >= 300) {
error = {
- code: parseInt(source.status),
+ code: source.status,
exception: source.exception
}
+ endpointErrorIssues += 1
}
return {
name: `Data Url ${index}`,
- endpoint: source.endpoint_url,
+ endpoint: source.endpoint,
+ endpoint_url: source.endpoint_url,
documentation_url: source.documentation_url,
lastAccessed: source.latest_log_entry_date,
lastUpdated: source.resource_start_date, // as in: when was the _resource_ updated, not data under that resource
@@ -220,7 +180,7 @@ export const prepareDatasetOverviewTemplateParams = (req, res, next) => {
req.templateParams = {
organisation: orgInfo,
dataset,
- taskCount: issues.length ?? 0,
+ taskCount: (issues ?? []).length + endpointErrorIssues,
stats: {
numberOfFieldsSupplied: numberOfFieldsSupplied ?? 0,
numberOfFieldsMatched: numberOfFieldsMatched ?? 0,
@@ -249,8 +209,6 @@ export default [
fetchResourceStatus,
fetchIf(isResourceIdInParams, fetchLatestResource, takeResourceIdFromParams),
fetchIf(isResourceAccessible, fetchLpaDatasetIssues),
- onlyIf(isResourceNotAccessible, prepareDatasetTaskListErrorTemplateParams),
- onlyIf(isResourceNotAccessible, getDatasetTaskListError),
fetchSpecification,
pullOutDatasetSpecification,
fetchSources,
diff --git a/src/middleware/datasetTaskList.middleware.js b/src/middleware/datasetTaskList.middleware.js
index 284dd649..95ca0721 100644
--- a/src/middleware/datasetTaskList.middleware.js
+++ b/src/middleware/datasetTaskList.middleware.js
@@ -1,19 +1,19 @@
import {
+ and,
fetchDatasetInfo,
- isResourceAccessible,
- isResourceNotAccessible,
- fetchLatestResource,
fetchEntityCount,
+ fetchLatestResource,
+ fetchLpaDatasetIssues, fetchSources,
+ isResourceAccessible,
+ isResourceIdValid,
logPageError,
- fetchLpaDatasetIssues,
- validateQueryParams,
- getDatasetTaskListError,
- isResourceIdValid, and
+ validateOrgAndDatasetQueryParams
} from './common.middleware.js'
-import { fetchOne, fetchIf, onlyIf, renderTemplate } from './middleware.builders.js'
+import { fetchIf, fetchOne, renderTemplate } from './middleware.builders.js'
import performanceDbApi from '../services/performanceDbApi.js'
import { statusToTagClass } from '../filters/filters.js'
-import * as v from 'valibot'
+
+/** @typedef {import('../types/datasette')} Types */
/**
* Fetches the resource status
@@ -48,13 +48,13 @@ function getStatusTag (status) {
/**
* Middleware. Updates req with `templateParams`
*
- * @param {*} req
+ * @param {{ orgInfo: Types.OrgInfo, sources: Source[], entityCountRow: undefined | { entity_count: number}, issues: Issue[] }} req
* @param {*} res
* @param {*} next
* @returns { { templateParams: object }}
*/
export const prepareDatasetTaskListTemplateParams = (req, res, next) => {
- const { issues, entityCount: entityCountRow, params, dataset, orgInfo: organisation } = req
+ const { issues, entityCount: entityCountRow, params, dataset, orgInfo: organisation, sources } = req
const { entity_count: entityCount } = entityCountRow ?? { entity_count: 0 }
const { lpa, dataset: datasetId } = params
console.assert(typeof entityCount === 'number', 'entityCount should be a number')
@@ -69,6 +69,19 @@ export const prepareDatasetTaskListTemplateParams = (req, res, next) => {
}
})
+ // include sources which couldn't be accessed
+ for (const source of sources) {
+ if (!source.status || source.status > 300) {
+ taskList.push({
+ title: {
+ text: 'There was an error accessing the URL'
+ },
+ href: `/organisations/${encodeURIComponent(lpa)}/${encodeURIComponent(datasetId)}/endpoint-error/${encodeURIComponent(source.endpoint)}`,
+ status: getStatusTag('Error')
+ })
+ }
+ }
+
req.templateParams = {
taskList,
organisation,
@@ -84,59 +97,22 @@ const getDatasetTaskList = renderTemplate({
handlerName: 'getDatasetTaskList'
})
-/**
- * Middleware. Updates req with `templateParams`
- *
- * @param {*} req
- * @param {*} res
- * @param {} next
- * @returns {{ templateParams: object }}
- */
-export const prepareDatasetTaskListErrorTemplateParams = (req, res, next) => {
- const { orgInfo: organisation, dataset, resourceStatus: resource } = req
-
- const daysSince200 = resource.days_since_200
- const today = new Date()
- const last200Date = new Date(
- today.getTime() - daysSince200 * 24 * 60 * 60 * 1000
- )
- const last200Datetime = last200Date.toISOString().slice(0, 19) + 'Z'
-
- req.templateParams = {
- organisation,
- dataset,
- errorData: {
- endpoint_url: resource.endpoint_url,
- http_status: resource.status,
- latest_log_entry_date: resource.latest_log_entry_date,
- latest_200_date: last200Datetime
- }
- }
-
- next()
-}
-
-const validateParams = validateQueryParams({
- schema: v.object({
- lpa: v.string(),
- dataset: v.string()
- })
-})
+/* eslint-disable-next-line no-return-assign */
+const emptyIssuesList = (req) => req.issues = []
/* eslint-disable-next-line no-return-assign */
const zeroEntityCount = (req) => req.entityCount = { entity_count: 0 }
export default [
- validateParams,
+ validateOrgAndDatasetQueryParams,
fetchResourceStatus,
+ fetchSources,
fetchOrgInfoWithStatGeo,
fetchDatasetInfo,
fetchIf(isResourceAccessible, fetchLatestResource),
- fetchIf(isResourceAccessible, fetchLpaDatasetIssues),
+ fetchIf(isResourceAccessible, fetchLpaDatasetIssues, emptyIssuesList),
fetchIf(and(isResourceAccessible, isResourceIdValid), fetchEntityCount, zeroEntityCount),
- onlyIf(isResourceAccessible, prepareDatasetTaskListTemplateParams),
- onlyIf(isResourceAccessible, getDatasetTaskList),
- onlyIf(isResourceNotAccessible, prepareDatasetTaskListErrorTemplateParams),
- onlyIf(isResourceNotAccessible, getDatasetTaskListError),
+ prepareDatasetTaskListTemplateParams,
+ getDatasetTaskList,
logPageError
]
diff --git a/src/routes/organisations.js b/src/routes/organisations.js
index 62b18553..047f77fc 100644
--- a/src/routes/organisations.js
+++ b/src/routes/organisations.js
@@ -5,6 +5,7 @@ const router = express.Router()
router.get('/:lpa/:dataset/get-started', OrganisationsController.getStartedMiddleware)
router.get('/:lpa/:dataset/overview', OrganisationsController.datasetOverviewMiddleware)
+router.get('/:lpa/:dataset/endpoint-error/:endpoint', OrganisationsController.datasetEndpointIssueMiddleware)
router.get('/:lpa/:dataset/data/:pageNumber', OrganisationsController.datasetDataviewMiddleware)
router.get('/:lpa/:dataset/data', OrganisationsController.datasetDataviewMiddleware)
router.get('/:lpa/:dataset/:issue_type/:issue_field/entity/:pageNumber', OrganisationsController.entityIssueDetailsMiddleware)
diff --git a/src/routes/schemas.js b/src/routes/schemas.js
index 77447993..5a5117bf 100644
--- a/src/routes/schemas.js
+++ b/src/routes/schemas.js
@@ -153,7 +153,8 @@ export const OrgDatasetOverview = v.strictObject({
endpoints: v.array(v.strictObject({
name: v.string(),
documentation_url: v.optional(v.string()),
- endpoint: v.string(),
+ endpoint_url: v.string(),
+ endpoint: NonEmptyString,
lastAccessed: v.string(),
lastUpdated: v.nullable(v.string()),
error: v.optional(v.strictObject({
@@ -198,9 +199,9 @@ export const OrgEndpointError = v.strictObject({
dataset: DatasetNameField,
errorData: v.strictObject({
endpoint_url: v.url(),
- http_status: v.integer(),
+ http_status: v.optional(v.integer()),
latest_log_entry_date: v.isoDateTime(),
- latest_200_date: v.isoDateTime()
+ latest_200_date: v.optional(v.isoDateTime())
})
})
diff --git a/src/services/performanceDbApi.js b/src/services/performanceDbApi.js
index 525ed2af..1b053a36 100644
--- a/src/services/performanceDbApi.js
+++ b/src/services/performanceDbApi.js
@@ -177,7 +177,7 @@ export default {
resourceStatusQuery (lpa, datasetId) {
return /* sql */ `
- select resource, endpoint_url, status, latest_log_entry_date, days_since_200
+ select resource, endpoint_url, endpoint, status, latest_log_entry_date, days_since_200
from reporting_latest_endpoints
WHERE REPLACE(organisation, '-eng', '') = '${lpa}'
AND pipeline = '${datasetId}'`
diff --git a/src/types/datasette.js b/src/types/datasette.js
new file mode 100644
index 00000000..e8623578
--- /dev/null
+++ b/src/types/datasette.js
@@ -0,0 +1,33 @@
+/**
+ * @file Type definitions for data coming out of digital-land datasette instance.
+ */
+
+/**
+ * @typedef Source
+ * @property {string} endpoint
+ * @property {string} endpoint_url
+ * @property {number | null} status
+ * @property {string} exception
+ * @property {string} latest_log_entry_date
+ * @property {string} resource_start_date
+ * @property {string} documentation_url
+ */
+
+/**
+ * @typedef OrgInfo
+ * @property {string} name Full name of the organisation
+ * @property {string} organisation
+ */
+
+/**
+ * @typedef DatasetInfo
+ * @property {string} name Full name of the dataset
+ * @property {string} dataset Short name of the dataset (aka 'slug')
+ */
+
+/**
+ * @typedef Issue
+ * @property {string} status
+ * @property {string} issue_type
+ * @property {string} field
+ */
diff --git a/src/views/organisations/dataset-overview.html b/src/views/organisations/dataset-overview.html
index 6dd538b8..33241015 100644
--- a/src/views/organisations/dataset-overview.html
+++ b/src/views/organisations/dataset-overview.html
@@ -4,6 +4,29 @@
{% from "components/deadlineNotice.html" import deadlineNotice %}
{% extends "layouts/main.html" %}
+{% macro endpointErrorMessage(endpoint) %}
+ {% if endpoint.error.code %}
+ There was a {{ endpoint.error.code }} error accessing the data URL
+ {% else %}
+ There was an error accessing the data URL
+ {% endif %}
+{% endmacro %}
+
+{% macro datasetUrl(organisation, dataset, endpoint) %}
+ /organisations/{{ organisation.organisation | urlencode }}/{{ dataset.dataset | urlencode }}/endpoint-error/{{ endpoint.endpoint | urlencode }}
+{% endmacro %}
+
+{% macro lastAccessedInfo(organisation, dataset, endpoint) %}
+ {{endpoint.lastAccessed | govukDateTime}}
+
+