Skip to content

Commit

Permalink
Merge branch 'main' into 107-feature-enable-alerts-and-threshold-for-…
Browse files Browse the repository at this point in the history
…ignore-scenario
  • Loading branch information
Suatguzl authored Dec 19, 2024
2 parents ee6e301 + 5c7be0a commit 73e755a
Show file tree
Hide file tree
Showing 16 changed files with 374 additions and 41 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ se_user_conf = {
#Below two params are optional and need to be enabled to capture the detailed stats in the <stats_table_name>_detailed.
#user_config.enable_query_dq_detailed_result: True,
#user_config.enable_agg_dq_detailed_result: True,
#Below two params are optional and need to be enabled to pass the custom email body
#user_config.se_notifications_enable_custom_email_body: True,
#user_config.se_notifications_email_custom_body: "Custom statistics: 'product_id': {}",

}
```
Expand Down
2 changes: 2 additions & 0 deletions docs/bigquery.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,13 @@ se: SparkExpectations = SparkExpectations(
# Commented fields are optional or required when notifications are enabled
user_conf = {
user_config.se_notifications_enable_email: False,
# user_config.se_notifications_enable_custom_email_body: True,
# user_config.se_notifications_email_smtp_host: "mailhost.com",
# user_config.se_notifications_email_smtp_port: 25,
# user_config.se_notifications_email_from: "",
# user_config.se_notifications_email_to_other_mail_id: "",
# user_config.se_notifications_email_subject: "spark expectations - data quality - notifications",
# user_config.se_notifications_email_custom_body: "Custom statistics: 'product_id': {}",
user_config.se_notifications_enable_slack: False,
# user_config.se_notifications_slack_webhook_url: "",
# user_config.se_notifications_on_start: True,
Expand Down
2 changes: 2 additions & 0 deletions docs/delta.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,13 @@ se: SparkExpectations = SparkExpectations(
# Commented fields are optional or required when notifications are enabled
user_conf = {
user_config.se_notifications_enable_email: False,
# user_config.se_notifications_enable_custom_email_body: True,
# user_config.se_notifications_email_smtp_host: "mailhost.com",
# user_config.se_notifications_email_smtp_port: 25,
# user_config.se_notifications_email_from: "",
# user_config.se_notifications_email_to_other_mail_id: "",
# user_config.se_notifications_email_subject: "spark expectations - data quality - notifications",
# user_config.se_notifications_email_custom_body: "Custom statistics: 'product_id': {}",
user_config.se_notifications_enable_slack: False,
# user_config.se_notifications_slack_webhook_url: "",
# user_config.se_notifications_on_start: True,
Expand Down
75 changes: 39 additions & 36 deletions docs/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,49 +7,52 @@ from spark_expectations.config.user_config import Constants as user_config

se_user_conf = {
user_config.se_notifications_enable_email: False, # (1)!
user_config.se_notifications_email_smtp_host: "mailhost.com", # (2)!
user_config.se_notifications_email_smtp_port: 25, # (3)!
user_config.se_notifications_email_from: "<sender_email_id>", # (4)!
user_config.se_notifications_email_to_other_mail_id: "<receiver_email_id's>", # (5)!
user_config.se_notifications_email_subject: "spark expectations - data quality - notifications", # (6)!
user_config.se_notifications_enable_slack: True, # (7)!
user_config.se_notifications_slack_webhook_url: "<slack-webhook-url>", # (8)!
user_config.se_notifications_on_start: True, # (9)!
user_config.se_notifications_on_completion: True, # (10)!
user_config.se_notifications_on_fail: True, # (11)!
user_config.se_notifications_on_error_drop_exceeds_threshold_breach: True, # (12)!
user_config.se_notifications_on_rules_action_if_failed_set_ignore: True, # (13)!
user_config.se_notifications_on_error_drop_threshold: 15, # (14)!
user_config.se_enable_error_table: True, # (15)!
user_config.enable_query_dq_detailed_result: True, # (16)!
user_config.enable_agg_dq_detailed_result: True, # (17)!
user_config.querydq_output_custom_table_name: "<catalog.schema.table-name>", #18
user_config.se_notifications_enable_custom_email_body: False, # (2)
user_config.se_notifications_email_smtp_host: "mailhost.com", # (3)!
user_config.se_notifications_email_smtp_port: 25, # (4)!
user_config.se_notifications_email_from: "<sender_email_id>", # (5)!
user_config.se_notifications_email_to_other_mail_id: "<receiver_email_id's>", # (6)!
user_config.se_notifications_email_subject: "spark expectations - data quality - notifications", # (7)!
user_config.se_notifications_email_custom_body: "custom stats: 'product_id': {}", # (8)!
user_config.se_notifications_enable_slack: True, # (9)!
user_config.se_notifications_slack_webhook_url: "<slack-webhook-url>", # (10)!
user_config.se_notifications_on_start: True, # (11)!
user_config.se_notifications_on_completion: True, # (12)!
user_config.se_notifications_on_fail: True, # (13)!
user_config.se_notifications_on_error_drop_exceeds_threshold_breach: True, # (14)!
user_config.se_notifications_on_error_drop_threshold: 15, # (15)!
user_config.se_enable_error_table: True, # (16)!
user_config.enable_query_dq_detailed_result: True, # (17)!
user_config.enable_agg_dq_detailed_result: True, # (18)!
user_config.querydq_output_custom_table_name: "<catalog.schema.table-name>", #19
user_config.se_dq_rules_params: {
"env": "local",
"table": "product",
}, # (19)!
}, # (20)!
}
}
```

1. The `user_config.se_notifications_enable_email` parameter, which controls whether notifications are sent via email, is set to false by default
2. The `user_config.se_notifications_email_smtp_host` parameter is set to "mailhost.com" by default and is used to specify the email SMTP domain host
3. The `user_config.se_notifications_email_smtp_port` parameter, which accepts a port number, is set to "25" by default
4. The `user_config.se_notifications_email_from` parameter is used to specify the email ID that will trigger the email notification
5. The `user_config.se_notifications_email_to_other_mail_id` parameter accepts a list of recipient email IDs
6. The `user_config.se_notifications_email_subject` parameter captures the subject line of the email
7. The `user_config.se_notifications_enable_slack` parameter, which controls whether notifications are sent via slack, is set to false by default
8. The `user_config/se_notifications_slack_webhook_url` parameter accepts the webhook URL of a Slack channel for sending notifications
9. When `user_config.se_notifications_on_start` parameter set to `True` enables notification on start of the spark-expectations, variable by default set to `False`
10. When `user_config.se_notifications_on_completion` parameter set to `True` enables notification on completion of spark-expectations framework, variable by default set to `False`
11. When `user_config.se_notifications_on_fail` parameter set to `True` enables notification on failure of spark-expectations data quality framework, variable by default set to `True`
12. When `user_config.se_notifications_on_error_drop_exceeds_threshold_breach` parameter set to `True` enables notification when error threshold reaches above the configured value
13. When `user_config.se_notifications_on_rules_action_if_failed_set_ignore` parameter set to `True` enables notification when rules action is set to ignore if failed
14. The `user_config.se_notifications_on_error_drop_threshold` parameter captures error drop threshold value
15. The `user_config.se_enable_error_table` parameter, which controls whether error data to load into error table, is set to true by default
16. When `user_config.enable_query_dq_detailed_result` parameter set to `True`, enables the option to capture the query_dq detailed stats to detailed_stats table. By default set to `False`
17. When `user_config.enable_agg_dq_detailed_result` parameter set to `True`, enables the option to capture the agg_dq detailed stats to detailed_stats table. By default set to `False`
18. The `user_config.querydq_output_custom_table_name` parameter is used to specify the name of the custom query_dq output table which captures the output of the alias queries passed in the query dq expectation. Default is `<stats_table>_custom_output`
19. The `user_config.se_dq_rules_params` parameter, which are required to dynamically update dq rules
2. The `user_config.se_notifications_enable_custom_email_body` optional parameter, which controls whether custom email body is enabled, is set to false by default
3. The `user_config.se_notifications_email_smtp_host` parameter is set to "mailhost.com" by default and is used to specify the email SMTP domain host
4. The `user_config.se_notifications_email_smtp_port` parameter, which accepts a port number, is set to "25" by default
5. The `user_config.se_notifications_email_from` parameter is used to specify the email ID that will trigger the email notification
6. The `user_config.se_notifications_email_to_other_mail_id` parameter accepts a list of recipient email IDs
7. The `user_config.se_notifications_email_subject` parameter captures the subject line of the email
8. The `user_config.se_notifications_email_custom_body` optional parameter, captures the custom email body, need to be compliant with certain syntax
9. The `user_config.se_notifications_enable_slack` parameter, which controls whether notifications are sent via slack, is set to false by default
10. The `user_config/se_notifications_slack_webhook_url` parameter accepts the webhook URL of a Slack channel for sending notifications
11. When `user_config.se_notifications_on_start` parameter set to `True` enables notification on start of the spark-expectations, variable by default set to `False`
12. When `user_config.se_notifications_on_completion` parameter set to `True` enables notification on completion of spark-expectations framework, variable by default set to `False`
13. When `user_config.se_notifications_on_fail` parameter set to `True` enables notification on failure of spark-expectations data quality framework, variable by default set to `True`
14. When `user_config.se_notifications_on_error_drop_exceeds_threshold_breach` parameter set to `True` enables notification when error threshold reaches above the configured value
15. The `user_config.se_notifications_on_error_drop_threshold` parameter captures error drop threshold value
16. The `user_config.se_enable_error_table` parameter, which controls whether error data to load into error table, is set to true by default
17. When `user_config.enable_query_dq_detailed_result` parameter set to `True`, enables the option to cature the query_dq detailed stats to detailed_stats table. By default set to `False`
18. When `user_config.enable_agg_dq_detailed_result` parameter set to `True`, enables the option to cature the agg_dq detailed stats to detailed_stats table. By default set to `False`
19. The `user_config.querydq_output_custom_table_name` parameter is used to specify the name of the custom query_dq output table which captures the output of the alias queries passed in the query dq expectation. Default is <stats_table>_custom_output
20. The `user_config.se_dq_rules_params` parameter, which are required to dynamically update dq rules


### Spark Expectations Initialization
Expand Down
2 changes: 2 additions & 0 deletions docs/iceberg.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,13 @@ se: SparkExpectations = SparkExpectations(
# Commented fields are optional or required when notifications are enabled
user_conf = {
user_config.se_notifications_enable_email: False,
# user_config.se_notifications_enable_custom_email_body: True,
# user_config.se_notifications_email_smtp_host: "mailhost.com",
# user_config.se_notifications_email_smtp_port: 25,
# user_config.se_notifications_email_from: "",
# user_config.se_notifications_email_to_other_mail_id: "",
# user_config.se_notifications_email_subject: "spark expectations - data quality - notifications",
# user_config.se_notifications_email_custom_body: "Custom statistics: 'product_id': {}",
user_config.se_notifications_enable_slack: False,
# user_config.se_notifications_slack_webhook_url: "",
# user_config.se_notifications_on_start: True,
Expand Down
4 changes: 4 additions & 0 deletions prospector.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,15 @@ pylint:
- invalid-name
- no-else-return
- no-self-use
- possibly-used-before-assignment
- protected-access
- too-many-arguments
- too-many-locals
- too-many-positional-arguments
- inconsistent-return-statements
- import-outside-toplevel
- consider-using-set-comprehension
- used-before-assignment
- useless-object-inheritance
- unnecessary-pass
- raise-missing-from
Expand All @@ -53,3 +56,4 @@ mccabe:
pyflakes:
disable:
- F999

6 changes: 6 additions & 0 deletions spark_expectations/config/user_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
class Constants:
# declare const user config variables for email notification
se_notifications_enable_email = "spark.expectations.notifications.email.enabled"
se_notifications_enable_custom_email_body = (
"spark.expectations.notifications.enable.custom.email.body"
)
se_notifications_email_smtp_host = (
"spark.expectations.notifications.email.smtp_host"
)
Expand All @@ -16,6 +19,9 @@ class Constants:
"spark.expectations.notifications.email.to.other.mail.com"
)
se_notifications_email_subject = "spark.expectations.notifications.email.subject"
se_notifications_email_custom_body = (
"spark.expectations.notifications.email.custom.body"
)

# declare const user config variables for slack notification
se_notifications_enable_slack = "spark.expectations.notifications.slack.enabled"
Expand Down
57 changes: 56 additions & 1 deletion spark_expectations/core/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from datetime import datetime
from dataclasses import dataclass
from uuid import uuid1
from typing import Dict, Optional, List, Tuple
from typing import Dict, Optional, List, Tuple, Any
from pyspark.sql import DataFrame, SparkSession
from spark_expectations.config.user_config import Constants as user_config
from spark_expectations.core.exceptions import SparkExpectationsMiscException
Expand Down Expand Up @@ -47,11 +47,13 @@ def __post_init__(self) -> None:
self._dq_config_abs_path: Optional[str] = None

self._enable_mail: bool = False
self._enable_custom_email_body: bool = False
self._to_mail: Optional[str] = None
self._mail_subject: Optional[str] = None
self._mail_from: Optional[str] = None
self._mail_smtp_server: str
self._mail_smtp_port: int
self._email_custom_body: Optional[str] = None

self._enable_slack: bool = False
self._slack_webhook_url: Optional[str] = None
Expand Down Expand Up @@ -154,6 +156,8 @@ def __post_init__(self) -> None:

self._query_dq_output_custom_table_name: str

self._stats_dict: List[dict] = []

@property
def get_run_id(self) -> str:
"""
Expand Down Expand Up @@ -495,6 +499,19 @@ def get_enable_mail(self) -> bool:
def set_to_mail(self, to_mail: str) -> None:
self._to_mail = to_mail

def set_enable_custom_email_body(self, enable_custom_email_body: bool) -> None:
self._enable_custom_email_body = bool(enable_custom_email_body)

@property
def get_enable_custom_email_body(self) -> bool:
"""
This function return whether to enable custom email body or not
Returns:
str: Returns _enable_custom_email_body(bool)
"""
return self._enable_custom_email_body

@property
def get_to_mail(self) -> str:
"""
Expand Down Expand Up @@ -548,6 +565,24 @@ def get_mail_subject(self) -> str:
accessing it"""
)

def set_email_custom_body(self, email_custom_body: str) -> None:
self._email_custom_body = email_custom_body

@property
def get_email_custom_body(self) -> str:
"""
This function returns email custom body
Returns:
str: Returns _email_custom_body(str)
"""
if self._email_custom_body:
return self._email_custom_body
raise SparkExpectationsMiscException(
"""The spark expectations context is not set completely, please assign '_email_custom_body' before
accessing it"""
)

def set_enable_slack(self, enable_slack: bool) -> None:
"""
Expand Down Expand Up @@ -2007,3 +2042,23 @@ def get_job_metadata(self) -> Optional[str]:
if self._job_metadata is not None:
return str(self._job_metadata)
return None

def set_stats_dict(self, df: DataFrame) -> None:
"""
This function is used to set the stats_dict
Returns:
dictionary of statistics
"""
self._stats_dict = [row.asDict() for row in df.collect()]

@property
def get_stats_dict(self) -> Optional[List[Dict[str, Any]]]:
"""
This function is used to get the stats_dict
Returns:
Optional[List[Dict[str, Any]]]: Returns the stats_dict if it exists, otherwise None
"""
return self._stats_dict if hasattr(self, "_stats_dict") else None
8 changes: 7 additions & 1 deletion spark_expectations/examples/sample_dq_delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,18 @@
)

user_conf = {
user_config.se_notifications_enable_email: False,
user_config.se_notifications_enable_email: True,
user_config.se_notifications_enable_custom_email_body: True,
user_config.se_notifications_email_smtp_host: "mailhost.com",
user_config.se_notifications_email_smtp_port: 25,
user_config.se_notifications_email_from: "",
user_config.se_notifications_email_to_other_mail_id: "",
user_config.se_notifications_email_subject: "spark expectations - data quality - notifications",
user_config.se_notifications_email_custom_body: """Spark Expectations Statistics for this dq run:
'product_id': {},
'table_name': {},
'source_agg_dq_results': {}',
'dq_status': {}""",
user_config.se_notifications_enable_slack: False,
user_config.se_notifications_slack_webhook_url: "",
user_config.se_notifications_on_start: True,
Expand Down
1 change: 0 additions & 1 deletion spark_expectations/notifications/plugins/email.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def send_notification(
msg["Subject"] = _context.get_mail_subject

# body = _config_args.get('mail_body')

mail_content = f"""{_config_args.get("message")}"""
msg.attach(MIMEText(mail_content, "plain"))

Expand Down
Loading

0 comments on commit 73e755a

Please sign in to comment.