Skip to content

Commit

Permalink
First unit test and spark conf
Browse files Browse the repository at this point in the history
  • Loading branch information
ArthurKordes committed Oct 17, 2024
1 parent 424546b commit c4fe323
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 4 deletions.
6 changes: 3 additions & 3 deletions src/dq_suite/df_checker.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime
from typing import List

import great_expectations
import datetime
from great_expectations import Checkpoint, ValidationDefinition
from great_expectations.checkpoint.actions import CheckpointAction
from great_expectations.checkpoint.checkpoint import CheckpointResult
Expand Down Expand Up @@ -217,7 +217,7 @@ def run(
validation_settings_obj=validation_settings_obj,
)
validation_output = checkpoint_result.describe_dict()
run_time = datetime.datetime.now() #TODO: get from RunIdentifier object
run_time = datetime.datetime.now() # TODO: get from RunIdentifier object

# 3) write results to unity catalog
write_non_validation_tables(
Expand All @@ -230,5 +230,5 @@ def run(
df=df,
dataset_name=validation_dict["dataset"]["name"],
unique_identifier=rules_dict["unique_identifier"],
run_time = run_time,
run_time=run_time,
)
2 changes: 1 addition & 1 deletion src/dq_suite/output_transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def extract_dq_afwijking_data(

# "validation_results" is typed List[Dict[str, Any]] in GX
dq_result = dq_result["validation_results"]

extracted_data = []
if not isinstance(unique_identifier, list):
unique_identifier = [unique_identifier]
Expand Down
5 changes: 5 additions & 0 deletions tests/test_data/test_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from pyspark.sql.types import StructType

SCHEMA = (
StructType().add("the_string", "string").add("the_timestamp", "timestamp")
)
22 changes: 22 additions & 0 deletions tests/test_output_transformations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

import pytest
from pyspark.sql import SparkSession

from src.dq_suite.output_transformations import create_empty_dataframe

from .test_data.test_schema import SCHEMA as AFWIJKING_SCHEMA


@pytest.fixture()
def spark():
return SparkSession.builder.master("local").appName("chispa").getOrCreate()


@pytest.mark.usefixtures("spark")
class TestCreateEmptyDataframe:
def test_create_empty_dataframe_returns_empty_dataframe(self, spark):
empty_dataframe = create_empty_dataframe(
spark_session=spark,
schema=AFWIJKING_SCHEMA,
)
assert len(empty_dataframe.head(1)) == 0

0 comments on commit c4fe323

Please sign in to comment.