Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create shredder_targets_joined table with additional table metadata #6331

Merged
merged 1 commit into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
friendly_name: Shredder Targets
description: |-
Daily list of shredder deletion targets comparing the configured targets with
the lineage of found id tables in bigquery.
owners:
- bewu@mozilla.com
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.monitoring.shredder_targets`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.monitoring_derived.shredder_targets_joined_v1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
friendly_name: Shredder Targets Joined
description: |-
Daily list of shredder deletion targets comparing the configured targets with
the lineage of found id tables in bigquery.
Augmented version of monitoring_derived.shredder_targets_v1 with additional table metadata.
owners:
- bewu@mozilla.com
labels:
incremental: true
owner1: benwu
schedule: daily
scheduling:
dag_name: bqetl_shredder_monitoring
bigquery:
time_partitioning:
type: day
field: run_date
require_partition_filter: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
WITH query_counts AS (
SELECT
COUNT(*) AS query_count,
targets.project_id,
targets.dataset_id,
targets.table_id,
FROM
`moz-fx-data-shared-prod.monitoring_derived.jobs_by_organization_v1`
CROSS JOIN
UNNEST(referenced_tables) AS ref_table
RIGHT JOIN
`moz-fx-data-shared-prod.monitoring_derived.shredder_targets_v1` AS targets
ON targets.project_id = ref_table.project_id
AND targets.dataset_id = ref_table.dataset_id
AND STARTS_WITH(ref_table.table_id, targets.table_id)
-- filter out shredder reads
AND (
NOT ENDS_WITH(reservation_id, '.shredder-main-v4')
OR NOT ENDS_WITH(reservation_id, '.batch')
)
WHERE
DATE(creation_time)
BETWEEN DATE_SUB(@submission_date, INTERVAL 29 DAY)
AND DATE(@submission_date)
AND targets.run_date = @submission_date
GROUP BY
targets.project_id,
targets.dataset_id,
targets.table_id
),
write_counts AS (
SELECT
COUNT(*) AS write_count,
targets.project_id,
targets.dataset_id,
targets.table_id,
FROM
`moz-fx-data-shared-prod.monitoring_derived.shredder_targets_v1` AS targets
LEFT JOIN
`moz-fx-data-shared-prod.monitoring_derived.jobs_by_organization_v1`
ON targets.project_id = destination_table.project_id
AND targets.dataset_id = destination_table.dataset_id
AND STARTS_WITH(destination_table.table_id, targets.table_id)
-- filter out shredder writes
AND (
NOT ENDS_WITH(reservation_id, '.shredder-main-v4')
OR NOT ENDS_WITH(reservation_id, '.batch')
)
WHERE
DATE(creation_time)
BETWEEN DATE_SUB(@submission_date, INTERVAL 29 DAY)
AND DATE(@submission_date)
AND targets.run_date = @submission_date
GROUP BY
targets.project_id,
targets.dataset_id,
targets.table_id
)
SELECT
targets.*,
owners,
COALESCE(query_count, 0) AS query_count_last_30d,
COALESCE(write_count, 0) AS write_count_last_30d,
ROUND(total_logical_bytes / 1024 / 1024 / 1024 / 1024, 2) AS table_size_tib,
table_inventory.creation_date AS table_creation_date,
IFNULL(table_inventory.deprecated, FALSE) AS deprecated,
FROM
`moz-fx-data-shared-prod.monitoring_derived.shredder_targets_v1` AS targets
LEFT JOIN
`moz-fx-data-shared-prod.monitoring_derived.bigquery_tables_inventory_v1` AS table_inventory
USING (project_id, dataset_id, table_id)
LEFT JOIN
`moz-fx-data-shared-prod.monitoring_derived.bigquery_table_storage_v1` AS table_storage
USING (project_id, dataset_id, table_id)
LEFT JOIN
query_counts
USING (project_id, dataset_id, table_id)
LEFT JOIN
write_counts
USING (project_id, dataset_id, table_id)
WHERE
targets.run_date = @submission_date
AND table_inventory.submission_date = @submission_date
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
fields:
- name: run_date
type: DATE
mode: NULLABLE
- name: project_id
type: STRING
mode: NULLABLE
- name: dataset_id
type: STRING
mode: NULLABLE
- name: table_id
type: STRING
mode: NULLABLE
- name: current_sources
type: RECORD
mode: REPEATED
fields:
- name: table
type: STRING
mode: NULLABLE
- name: field
type: STRING
mode: NULLABLE
- name: project
type: STRING
mode: NULLABLE
- name: detected_sources
type: RECORD
mode: REPEATED
fields:
- name: table
type: STRING
mode: NULLABLE
- name: field
type: STRING
mode: NULLABLE
- name: project
type: STRING
mode: NULLABLE
- name: matching_sources
type: BOOLEAN
mode: NULLABLE
- name: owners
type: STRING
mode: REPEATED
- name: query_count_last_30d
type: INTEGER
mode: NULLABLE
- name: write_count_last_30d
type: INTEGER
mode: NULLABLE
- name: table_size_tib
type: FLOAT
mode: NULLABLE
- name: table_creation_date
type: DATE
mode: NULLABLE
- name: deprecated
type: BOOLEAN
mode: NULLABLE