Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: upgrade tabular data checker to frictionless v5 #1611

Merged
merged 6 commits into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ import './frictionless-components.css';
function getReportErrors(task) {
const reportErrors = {};
for (const error of task.errors) {
const header = task.resource.schema.fields.map((field) => field.name);
const header = task.resource ? task.resource.schema.fields.map((field) => field.name) : task.labels;

// Prepare reportError
let reportError = reportErrors[error.code];
let reportError = reportErrors[error.type || error.code];
if (!reportError) {
reportError = {
count: 0,
code: error.code,
name: error.name,
type: error.type || error.code,
name: error.title || error.name,
tags: error.tags,
description: error.description,
header,
Expand All @@ -25,24 +25,28 @@ function getReportErrors(task) {
}

// Prepare cells
let data = reportError.data[error.rowPosition || 0];
let data = reportError.data[error.rowNumber || error.rowPosition || 0];
if (!data) {
const values = error.cells || error.labels || [];
data = {values, errors: new Set()};
}

// Ensure blank row
if (error.code === 'blank-row') {
if (error.type === 'blank-row' || error.code === 'blank-row') {
data.values = header.map(() => '');
}

// Ensure missing cell
if (error.code === 'missing-cell') {
if (error.type === 'missing-cell') {
data.values[error.fieldNumber - 1] = '';
} else if (error.code === 'missing-cell') {
data.values[error.fieldPosition - 1] = '';
}

// Add row errors
if (error.fieldPosition) {
if (error.fieldNumber) {
data.errors.add(error.fieldNumber);
} else if (error.fieldPosition) {
data.errors.add(error.fieldPosition);
} else if (data.values) {
data.errors = new Set(data.values.map((_, index) => index + 1));
Expand All @@ -51,8 +55,8 @@ function getReportErrors(task) {
// Save reportError
reportError.count += 1;
reportError.messages.push(error.message);
reportError.data[error.rowPosition || 0] = data;
reportErrors[error.code] = reportError;
reportError.data[error.rowNumber || error.rowPosition || 0] = data;
reportErrors[error.type || error.code] = reportError;
}

return reportErrors;
Expand All @@ -70,7 +74,7 @@ function HandleReport(jsReport) {
<p>The report shows the maximum of 10 alerts. More alerts may appear if these 10 are corrected and the file is re-uploaded.</p>
)}
{Object.values(reportErrors).map((reportError) => (
<ReportError key={reportError.code} reportError={reportError} />
<ReportError key={reportError.type} reportError={reportError} />
))}
</div>
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ function getRowPositions(reportError) {

function ReportError(props) {
const {reportError} = props;
console.log(reportError);
const [visibleRowsCount, setVisibleRowsCount] = useState(10);
const rowPositions = getRowPositions(reportError);

Expand Down Expand Up @@ -53,7 +52,7 @@ function ReportError(props) {
</div>

{/* Table view */}
{!['source-error'].includes(reportError.code) && (
{!['source-error'].includes(reportError.type) && (
<div className="table-view">
<div className="inner">
<ReportTable
Expand Down
4 changes: 2 additions & 2 deletions app/models/stash_engine/generic_file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def trigger_frictionless
})

resp = client.invoke(
{ function_name: 'frictionless311',
{ function_name: 'frictionless',
invocation_type: 'Event',
log_type: 'None',
payload: payload }
Expand Down Expand Up @@ -265,7 +265,7 @@ def trigger_excel_to_csv
processor_obj: pr.as_json })

resp = client.invoke(
{ function_name: 'excelToCsv311',
{ function_name: 'excelToCsv',
invocation_type: 'Event',
log_type: 'None',
payload: payload }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ it zips up correctly for the Lambda Layer.

```
pandas
frictionless # the last 4.x version
frictionless[excel] # the last 4.x version
frictionless[csv] # the last 4.x version
frictionless[json] # the last 4.x version
frictionless # the last 5.x version
frictionless[excel] # the last 5.x version
frictionless[csv] # the last 5.x version
frictionless[json] # the last 5.x version
requests
```

Expand Down
15 changes: 7 additions & 8 deletions script/py-frictionless/lambda.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
import time
from pprint import pprint
from frictionless import Detector, validate, validate_resource
from frictionless import Detector, validate
from urllib.request import urlopen
import xml.etree.ElementTree as ET
import requests
Expand Down Expand Up @@ -40,25 +39,25 @@ def lambda_handler(event, context):
else:
detector = Detector(field_missing_values=", ,na,n/a,.,none,NA,N/A,N.A.,n.a.,-,empty,blank".split(","))
try:
report = validate(event["download_url"], "resource", detector=detector, limit_errors=10)
report = validate(event["download_url"], limit_errors=10, detector=detector)
except Exception as e:
update(token=event["token"], status='error', report=str(e), callback=event["callback_url"] )
return {"status": 200, "message": "Error parsing file with Frictionless"}

# these errors indicate a failure by Frictionless to operate on file and are not linting results
if report["errors"]:
update(token=event["token"], status='error', report=report, callback=event["callback_url"] )
if report.errors:
update(token=event["token"], status='error', report=json.dumps({'report': report.to_dict()}), callback=event["callback_url"] )
return {"status": 200, "message": "Error parsing file with Frictionless"}

lint_status = "issues" if report["tasks"][0].get("errors") else 'noissues'
lint_status = "noissues" if report.valid else 'issues'
poss_error_msg = ''
if lint_status == 'issues':
poss_error_msg = report["tasks"][0]["errors"][0].get("description", "")
poss_error_msg = report.tasks[0].errors[0].description

if poss_error_msg.startswith("Data reading error"):
lint_status = "error"

update(token=event["token"], status=lint_status, report=json.dumps({'report': report}), callback=event['callback_url'])
update(token=event["token"], status=lint_status, report=json.dumps({'report': report.to_dict()}), callback=event['callback_url'])

return report

Expand Down
Loading