From 496cc4dabc8724edbb8c30f4bbbe4aeefb61a803 Mon Sep 17 00:00:00 2001 From: ArthurKordes <75675106+ArthurKordes@users.noreply.github.com> Date: Wed, 31 Jul 2024 11:26:34 +0200 Subject: [PATCH 1/6] Update README.md --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index db0a8ad..84e1b70 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,16 @@ dfs = [df] results, brontabel_df, bronattribute_df, dqRegel_df = dq_suite.df_check(dfs, dq_rules, "showcase") ``` +# Validate the schema of a table +In order to output the schema from Unity Catalog, use the following commands (using the required schema name): + +``` +schema_output = dq_suite.export_schema('schema_name', spark) +print(schema_output) +``` + +Copy the string to the Input Form to quickly ingest the schema in Excel. + # Validate the schema of a table It is possible to validate the schema of an entire table to a schema definition from Amsterdam Schema in one go. This is done by adding two fields to the "dq_rules" JSON when describing the table (See: https://github.com/Amsterdam/dq-suite-amsterdam/blob/main/dq_rules_example.json). From 5ecc9ad8fcc85c53b626ada66f73cb9fab4d644b Mon Sep 17 00:00:00 2001 From: ArthurKordes <75675106+ArthurKordes@users.noreply.github.com> Date: Wed, 31 Jul 2024 11:27:07 +0200 Subject: [PATCH 2/6] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 84e1b70..b899ace 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ dfs = [df] results, brontabel_df, bronattribute_df, dqRegel_df = dq_suite.df_check(dfs, dq_rules, "showcase") ``` + # Validate the schema of a table In order to output the schema from Unity Catalog, use the following commands (using the required schema name): @@ -41,6 +42,7 @@ print(schema_output) Copy the string to the Input Form to quickly ingest the schema in Excel. + # Validate the schema of a table It is possible to validate the schema of an entire table to a schema definition from Amsterdam Schema in one go. This is done by adding two fields to the "dq_rules" JSON when describing the table (See: https://github.com/Amsterdam/dq-suite-amsterdam/blob/main/dq_rules_example.json). @@ -50,6 +52,7 @@ You will need: The schema definition is converted into column level expectations (expect_column_values_to_be_of_type) on run time. + # Known exceptions The functions can run on Databricks using a Personal Compute Cluster or using a Job Cluster. Using a Shared Compute Cluster will results in an error, as it does not have the permissions that Great Expectations requires. From c5595459d9c7ff86f780254522dfdb911b12f4c6 Mon Sep 17 00:00:00 2001 From: ArthurKordes <75675106+ArthurKordes@users.noreply.github.com> Date: Wed, 31 Jul 2024 11:28:08 +0200 Subject: [PATCH 3/6] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b899ace..ed510e3 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ results, brontabel_df, bronattribute_df, dqRegel_df = dq_suite.df_check(dfs, dq_ ``` -# Validate the schema of a table +# Export the schema from Unity Catalog to the Input Form In order to output the schema from Unity Catalog, use the following commands (using the required schema name): ``` From d0309e9374924ea2a3459627549e959837a1e6fc Mon Sep 17 00:00:00 2001 From: ArthurKordes <75675106+ArthurKordes@users.noreply.github.com> Date: Wed, 31 Jul 2024 11:39:49 +0200 Subject: [PATCH 4/6] Update df_checker.py --- src/dq_suite/df_checker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dq_suite/df_checker.py b/src/dq_suite/df_checker.py index 308c14a..8bf57d2 100644 --- a/src/dq_suite/df_checker.py +++ b/src/dq_suite/df_checker.py @@ -7,7 +7,7 @@ import great_expectations as gx from great_expectations.checkpoint import Checkpoint -from dq_suite.input_validator import validate_dqrules, expand_input, generate_dq_rules_from_schema, fetch_schema_from_github +from dq_suite.input_helpers import validate_dqrules, expand_input, export_schema, generate_dq_rules_from_schema, fetch_schema_from_github from dq_suite.output_transformations import extract_dq_validatie_data, extract_dq_afwijking_data, create_brontabel, create_bronattribute, create_dqRegel def df_check(dfs: list, dq_rules: str, check_name: str) -> Tuple[Dict[str, Any], Dict[str, Tuple[Any, Any]], pd.DataFrame, pd.DataFrame, pd.DataFrame]: From 15ea78591cbdb34f15a43639d4d5a326d43d4128 Mon Sep 17 00:00:00 2001 From: ArthurKordes <75675106+ArthurKordes@users.noreply.github.com> Date: Wed, 31 Jul 2024 11:44:34 +0200 Subject: [PATCH 5/6] Update input_helpers.py --- src/dq_suite/input_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dq_suite/input_helpers.py b/src/dq_suite/input_helpers.py index 49de508..8285533 100644 --- a/src/dq_suite/input_helpers.py +++ b/src/dq_suite/input_helpers.py @@ -41,7 +41,7 @@ def expand_input(rule_json): :rtype: dict """ - for table in rule_json["dataframe_parameters"]: + for table in rule_json["tables"]: for rule in table["rules"]: for parameter in rule["parameters"]: if "row_condition" in parameter: From ead37ad524727497fb9f3d98243e26a2e9dc475c Mon Sep 17 00:00:00 2001 From: ArthurKordes <75675106+ArthurKordes@users.noreply.github.com> Date: Wed, 31 Jul 2024 15:35:57 +0200 Subject: [PATCH 6/6] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0eda86a..5afde49 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "dq-suite-amsterdam" -version = "0.5.1" +version = "0.5.2" authors = [ { name="Arthur Kordes", email="a.kordes@amsterdam.nl" }, { name="Aysegul Cayir Aydar", email="a.cayiraydar@amsterdam.nl" }