diff --git a/pyproject.toml b/pyproject.toml index 5afde49..483863d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "dq-suite-amsterdam" -version = "0.5.2" +version = "0.5.3" authors = [ { name="Arthur Kordes", email="a.kordes@amsterdam.nl" }, { name="Aysegul Cayir Aydar", email="a.cayiraydar@amsterdam.nl" } diff --git a/src/dq_suite/input_helpers.py b/src/dq_suite/input_helpers.py index 8285533..6410c9f 100644 --- a/src/dq_suite/input_helpers.py +++ b/src/dq_suite/input_helpers.py @@ -96,6 +96,15 @@ def export_schema(dataset: str, spark: SparkSession): def fetch_schema_from_github(dq_rules): + """ + Function fetches a schema from the Github Amsterdam schema using the dq_rules. + + :param dq_rules: A dictionary with all DQ configuration. + :type dq_rules: dict + :return: schemas: A dictionary with the schema of the required tables. + :rtype: dict + """ + schemas = {} for table in dq_rules['tables']: if 'validate_table_schema_url' in table: @@ -108,6 +117,17 @@ def fetch_schema_from_github(dq_rules): def generate_dq_rules_from_schema(dq_rules: dict, schemas: dict) -> dict: + """ + Function adds expect_column_values_to_be_of_type rule for each column of tables having schema_id and schema_url in dq_rules. + + :param dq_rules: A dictionary with all DQ configuration. + :type dq_rules: dict + :param schemas: A dictionary with the schemas of the required tables. + : type: dict + :return: dq_rules: A dictionary with all DQ configuration. + :rtype: dict + """ + for table in dq_rules['tables']: if 'validate_table_schema' in table: schema_id = table['validate_table_schema']