Skip to content

Commit

Permalink
Bug 1923976 - Change fxci_derived queries to run at 1800 (#6335)
Browse files Browse the repository at this point in the history
These queries depend on data from the billing table, which often doesn't
get finalized until the afternoon of the following day.

It can sometimes populate records much later than that (one month I've
seen in the worst case), but that's a problem for another time. This
patch should at least fix the common case.
  • Loading branch information
ahal authored Oct 11, 2024
1 parent 102669a commit 6f53a9a
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 6 deletions.
26 changes: 24 additions & 2 deletions dags.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -519,8 +519,8 @@ bqetl_desktop_platform:

bqetl_internal_tooling:
description: |
This DAG schedules queries for populating queries related to Mozilla's
internal developer tooling (e.g. mozregression and Firefox-CI).
This DAG schedules queries for populating tables related to Mozilla's
internal developer tooling (e.g. mozregression).
default_args:
depends_on_past: false
email:
Expand Down Expand Up @@ -1830,3 +1830,25 @@ bqetl_shredder_monitoring:
- repo/bigquery-etl
- impact/tier_3
- triage/no_triage

bqetl_fxci:
description: |
This DAG schedules queries for populating tables related to the
Firefox-CI Taskcluster instance.
default_args:
depends_on_past: false
email:
- ahalberstadt@mozilla.com
- telemetry-alerts@mozilla.com
email_on_failure: true
email_on_retry: true
end_date: null
owner: ahalberstadt@mozilla.com
retries: 2
retry_delay: 30m
start_date: "2020-10-11"
# This DAG needs to run late as it depends on the GCP billing export which
# often isn't finalized until the afternoon of the following day.
schedule_interval: 0 18 * * *
tags:
- impact/tier_3
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ owners:
labels:
incremental: true
owner1: ahalberstadt
dag: bqetl_internal_tooling
dag: bqetl_fxci
scheduling:
dag_name: bqetl_internal_tooling
dag_name: bqetl_fxci
bigquery:
time_partitioning:
type: day
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ owners:
labels:
incremental: true
owner1: ahalberstadt
dag: bqetl_internal_tooling
dag: bqetl_fxci
scheduling:
dag_name: bqetl_internal_tooling
dag_name: bqetl_fxci
task_name: fxci_worker_cost__v1
bigquery:
time_partitioning:
Expand Down

1 comment on commit 6f53a9a

@dataops-ci-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Integration report for "Bug 1923976 - Change fxci_derived queries to run at 1800 (#6335)"

sql.diff

Click to expand!
Only in /tmp/workspace/generated-sql/dags/: bqetl_fxci.py
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_fxci.py /tmp/workspace/generated-sql/dags/bqetl_fxci.py
--- /tmp/workspace/main-generated-sql/dags/bqetl_fxci.py	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/dags/bqetl_fxci.py	2024-10-11 16:16:02.000000000 +0000
@@ -0,0 +1,78 @@
+# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py
+
+from airflow import DAG
+from airflow.sensors.external_task import ExternalTaskMarker
+from airflow.sensors.external_task import ExternalTaskSensor
+from airflow.utils.task_group import TaskGroup
+import datetime
+from operators.gcp_container_operator import GKEPodOperator
+from utils.constants import ALLOWED_STATES, FAILED_STATES
+from utils.gcp import bigquery_etl_query, bigquery_dq_check
+from bigeye_airflow.operators.run_metrics_operator import RunMetricsOperator
+
+docs = """
+### bqetl_fxci
+
+Built from bigquery-etl repo, [`dags/bqetl_fxci.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_fxci.py)
+
+#### Description
+
+This DAG schedules queries for populating tables related to the
+Firefox-CI Taskcluster instance.
+
+#### Owner
+
+ahalberstadt@mozilla.com
+
+#### Tags
+
+* impact/tier_3
+* repo/bigquery-etl
+"""
+
+
+default_args = {
+    "owner": "ahalberstadt@mozilla.com",
+    "start_date": datetime.datetime(2020, 10, 11, 0, 0),
+    "end_date": None,
+    "email": ["ahalberstadt@mozilla.com", "telemetry-alerts@mozilla.com"],
+    "depends_on_past": False,
+    "retry_delay": datetime.timedelta(seconds=1800),
+    "email_on_failure": True,
+    "email_on_retry": True,
+    "retries": 2,
+}
+
+tags = ["impact/tier_3", "repo/bigquery-etl"]
+
+with DAG(
+    "bqetl_fxci",
+    default_args=default_args,
+    schedule_interval="0 18 * * *",
+    doc_md=docs,
+    tags=tags,
+) as dag:
+
+    fxci_derived__task_run_costs__v1 = bigquery_etl_query(
+        task_id="fxci_derived__task_run_costs__v1",
+        destination_table="task_run_costs_v1",
+        dataset_id="fxci_derived",
+        project_id="moz-fx-data-shared-prod",
+        owner="ahalberstadt@mozilla.com",
+        email=["ahalberstadt@mozilla.com", "telemetry-alerts@mozilla.com"],
+        date_partition_parameter="submission_date",
+        depends_on_past=False,
+    )
+
+    fxci_worker_cost__v1 = bigquery_etl_query(
+        task_id="fxci_worker_cost__v1",
+        destination_table="worker_costs_v1",
+        dataset_id="fxci_derived",
+        project_id="moz-fx-data-shared-prod",
+        owner="ahalberstadt@mozilla.com",
+        email=["ahalberstadt@mozilla.com", "telemetry-alerts@mozilla.com"],
+        date_partition_parameter="submission_date",
+        depends_on_past=False,
+    )
+
+    fxci_derived__task_run_costs__v1.set_upstream(fxci_worker_cost__v1)
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_internal_tooling.py /tmp/workspace/generated-sql/dags/bqetl_internal_tooling.py
--- /tmp/workspace/main-generated-sql/dags/bqetl_internal_tooling.py	2024-10-11 16:15:36.000000000 +0000
+++ /tmp/workspace/generated-sql/dags/bqetl_internal_tooling.py	2024-10-11 16:15:58.000000000 +0000
@@ -17,8 +17,8 @@
 
 #### Description
 
-This DAG schedules queries for populating queries related to Mozilla's
-internal developer tooling (e.g. mozregression and Firefox-CI).
+This DAG schedules queries for populating tables related to Mozilla's
+internal developer tooling (e.g. mozregression).
 
 #### Owner
 
@@ -65,28 +65,6 @@
         pool="DATA_ENG_EXTERNALTASKSENSOR",
     )
 
-    fxci_derived__task_run_costs__v1 = bigquery_etl_query(
-        task_id="fxci_derived__task_run_costs__v1",
-        destination_table="task_run_costs_v1",
-        dataset_id="fxci_derived",
-        project_id="moz-fx-data-shared-prod",
-        owner="ahalberstadt@mozilla.com",
-        email=["ahalberstadt@mozilla.com", "telemetry-alerts@mozilla.com"],
-        date_partition_parameter="submission_date",
-        depends_on_past=False,
-    )
-
-    fxci_worker_cost__v1 = bigquery_etl_query(
-        task_id="fxci_worker_cost__v1",
-        destination_table="worker_costs_v1",
-        dataset_id="fxci_derived",
-        project_id="moz-fx-data-shared-prod",
-        owner="ahalberstadt@mozilla.com",
-        email=["ahalberstadt@mozilla.com", "telemetry-alerts@mozilla.com"],
-        date_partition_parameter="submission_date",
-        depends_on_past=False,
-    )
-
     mozregression_aggregates__v1 = bigquery_etl_query(
         task_id="mozregression_aggregates__v1",
         destination_table="mozregression_aggregates_v1",
@@ -102,6 +80,4 @@
         depends_on_past=False,
     )
 
-    fxci_derived__task_run_costs__v1.set_upstream(fxci_worker_cost__v1)
-
     mozregression_aggregates__v1.set_upstream(wait_for_copy_deduplicate_all)
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fxci_derived/task_run_costs_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fxci_derived/task_run_costs_v1/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fxci_derived/task_run_costs_v1/metadata.yaml	2024-10-11 16:11:32.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fxci_derived/task_run_costs_v1/metadata.yaml	2024-10-11 16:11:31.000000000 +0000
@@ -6,9 +6,9 @@
 labels:
   incremental: true
   owner1: ahalberstadt
-  dag: bqetl_internal_tooling
+  dag: bqetl_fxci
 scheduling:
-  dag_name: bqetl_internal_tooling
+  dag_name: bqetl_fxci
 bigquery:
   time_partitioning:
     type: day
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fxci_derived/worker_costs_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fxci_derived/worker_costs_v1/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fxci_derived/worker_costs_v1/metadata.yaml	2024-10-11 16:11:32.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fxci_derived/worker_costs_v1/metadata.yaml	2024-10-11 16:11:31.000000000 +0000
@@ -6,9 +6,9 @@
 labels:
   incremental: true
   owner1: ahalberstadt
-  dag: bqetl_internal_tooling
+  dag: bqetl_fxci
 scheduling:
-  dag_name: bqetl_internal_tooling
+  dag_name: bqetl_fxci
   task_name: fxci_worker_cost__v1
 bigquery:
   time_partitioning:

Link to full diff

Please sign in to comment.