From 2dcfad1b1327dfdd34e232ce91e608783e6d8c9d Mon Sep 17 00:00:00 2001 From: aysegulcayir <49029525+aysegulcayir@users.noreply.github.com> Date: Wed, 31 Jul 2024 15:51:58 +0200 Subject: [PATCH 1/4] Update input_helpers.py --- src/dq_suite/input_helpers.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/dq_suite/input_helpers.py b/src/dq_suite/input_helpers.py index 8285533..df3111f 100644 --- a/src/dq_suite/input_helpers.py +++ b/src/dq_suite/input_helpers.py @@ -96,6 +96,15 @@ def export_schema(dataset: str, spark: SparkSession): def fetch_schema_from_github(dq_rules): + """ + Function fetches a schema from the Github Amsterdam schema using the Excel input form. + + :param rule_json: A dictionary with all DQ configuration. + :type rule_json: dict + :return: schemas: A dictionary with the schema of the required tables. + :rtype: dict + """ + schemas = {} for table in dq_rules['tables']: if 'validate_table_schema_url' in table: @@ -108,6 +117,17 @@ def fetch_schema_from_github(dq_rules): def generate_dq_rules_from_schema(dq_rules: dict, schemas: dict) -> dict: + """ + Function adds expect_column_values_to_be_of_type rule for each column of tables having schema_id and schema_url in rule_json. + + :param rule_json: A dictionary with all DQ configuration. + :type rule_json: dict + : param: schemas: A dictionary with the schemas of the required tables. + : type: dict + :return: dq_rules: A dictionary with all DQ configuration. + :rtype: dict + """ + for table in dq_rules['tables']: if 'validate_table_schema' in table: schema_id = table['validate_table_schema'] From 42f2a500180a78578e1c10476f11cee4a1dac4c7 Mon Sep 17 00:00:00 2001 From: aysegulcayir <49029525+aysegulcayir@users.noreply.github.com> Date: Wed, 31 Jul 2024 16:02:45 +0200 Subject: [PATCH 2/4] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5afde49..483863d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "dq-suite-amsterdam" -version = "0.5.2" +version = "0.5.3" authors = [ { name="Arthur Kordes", email="a.kordes@amsterdam.nl" }, { name="Aysegul Cayir Aydar", email="a.cayiraydar@amsterdam.nl" } From 813ddf895869aab22618fe987bb252cd72c1584f Mon Sep 17 00:00:00 2001 From: aysegulcayir <49029525+aysegulcayir@users.noreply.github.com> Date: Wed, 31 Jul 2024 16:09:28 +0200 Subject: [PATCH 3/4] Update src/dq_suite/input_helpers.py Co-authored-by: ArthurKordes <75675106+ArthurKordes@users.noreply.github.com> --- src/dq_suite/input_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dq_suite/input_helpers.py b/src/dq_suite/input_helpers.py index df3111f..83ff530 100644 --- a/src/dq_suite/input_helpers.py +++ b/src/dq_suite/input_helpers.py @@ -122,7 +122,7 @@ def generate_dq_rules_from_schema(dq_rules: dict, schemas: dict) -> dict: :param rule_json: A dictionary with all DQ configuration. :type rule_json: dict - : param: schemas: A dictionary with the schemas of the required tables. + :param schemas: A dictionary with the schemas of the required tables. : type: dict :return: dq_rules: A dictionary with all DQ configuration. :rtype: dict From af359a33105b91dbbc096dd4b711c97f30bfa531 Mon Sep 17 00:00:00 2001 From: aysegulcayir <49029525+aysegulcayir@users.noreply.github.com> Date: Wed, 31 Jul 2024 16:13:54 +0200 Subject: [PATCH 4/4] Update input_helpers.py --- src/dq_suite/input_helpers.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/dq_suite/input_helpers.py b/src/dq_suite/input_helpers.py index 83ff530..6410c9f 100644 --- a/src/dq_suite/input_helpers.py +++ b/src/dq_suite/input_helpers.py @@ -97,10 +97,10 @@ def export_schema(dataset: str, spark: SparkSession): def fetch_schema_from_github(dq_rules): """ - Function fetches a schema from the Github Amsterdam schema using the Excel input form. + Function fetches a schema from the Github Amsterdam schema using the dq_rules. - :param rule_json: A dictionary with all DQ configuration. - :type rule_json: dict + :param dq_rules: A dictionary with all DQ configuration. + :type dq_rules: dict :return: schemas: A dictionary with the schema of the required tables. :rtype: dict """ @@ -118,10 +118,10 @@ def fetch_schema_from_github(dq_rules): def generate_dq_rules_from_schema(dq_rules: dict, schemas: dict) -> dict: """ - Function adds expect_column_values_to_be_of_type rule for each column of tables having schema_id and schema_url in rule_json. + Function adds expect_column_values_to_be_of_type rule for each column of tables having schema_id and schema_url in dq_rules. - :param rule_json: A dictionary with all DQ configuration. - :type rule_json: dict + :param dq_rules: A dictionary with all DQ configuration. + :type dq_rules: dict :param schemas: A dictionary with the schemas of the required tables. : type: dict :return: dq_rules: A dictionary with all DQ configuration.