From 5ea186c3bc9f5f898d31f66185d502e9e08b0878 Mon Sep 17 00:00:00 2001 From: Jack Forgash <58153492+forgxyz@users.noreply.github.com> Date: Fri, 10 Nov 2023 12:16:48 -0700 Subject: [PATCH 1/2] upd tags core vs non core and workflow schedule csv --- .github/workflows/dbt_run_scheduled.yml | 2 +- .../workflows/dbt_run_scheduled_non_core.yml | 34 +++++++++++++++++++ data/github_actions__workflows.csv | 9 ++--- models/bronze/core/bronze__blocks.sql | 2 +- models/bronze/core/bronze__transactions.sql | 2 +- models/bronze/labels/bronze__labels.sql | 2 +- models/silver/core/silver__blocks.sql | 2 +- models/silver/core/silver__inputs.sql | 2 +- models/silver/core/silver__inputs_final.sql | 2 +- models/silver/core/silver__outputs.sql | 2 +- .../silver/core/silver__transaction_index.sql | 2 +- models/silver/core/silver__transactions.sql | 2 +- .../core/silver__transactions_final.sql | 2 +- .../silver/ez/silver__block_miner_rewards.sql | 2 +- models/silver/labels/silver__labels.sql | 2 +- .../silver__price_all_providers_hourly.sql | 2 +- .../price/silver__price_coingecko_hourly.sql | 2 +- .../silver__price_coinmarketcap_hourly.sql | 2 +- .../silver__price_coinpaprika_hourly.sql | 2 +- 19 files changed, 56 insertions(+), 21 deletions(-) create mode 100644 .github/workflows/dbt_run_scheduled_non_core.yml diff --git a/.github/workflows/dbt_run_scheduled.yml b/.github/workflows/dbt_run_scheduled.yml index ef27d59f..500263c6 100644 --- a/.github/workflows/dbt_run_scheduled.yml +++ b/.github/workflows/dbt_run_scheduled.yml @@ -28,7 +28,7 @@ jobs: uses: FlipsideCrypto/analytics-workflow-templates/.github/workflows/dbt_run_template.yml@main with: dbt_command: > - dbt run -s "bitcoin_models,tag:load" "bitcoin_models,tag:core" + dbt run -s "bitcoin_models,tag:scheduled_core" environment: workflow_prod warehouse: ${{ vars.WAREHOUSE }} secrets: inherit diff --git a/.github/workflows/dbt_run_scheduled_non_core.yml b/.github/workflows/dbt_run_scheduled_non_core.yml new file mode 100644 index 00000000..bdb0cc8a --- /dev/null +++ b/.github/workflows/dbt_run_scheduled_non_core.yml @@ -0,0 +1,34 @@ +name: dbt_run_scheduled +run-name: dbt_run_scheduled + +on: + workflow_dispatch: + # schedule: + # # Runs 8 minutes after each dbt job kicks off (which is set to 15 min interval) (see https://crontab.guru) + # - cron: '8,23,38,53 * * * *' + +env: + USE_VARS: "${{ vars.USE_VARS }}" + DBT_PROFILES_DIR: "${{ vars.DBT_PROFILES_DIR }}" + DBT_VERSION: "${{ vars.DBT_VERSION }}" + ACCOUNT: "${{ vars.ACCOUNT }}" + ROLE: "${{ vars.ROLE }}" + USER: "${{ vars.USER }}" + PASSWORD: "${{ secrets.PASSWORD }}" + REGION: "${{ vars.REGION }}" + DATABASE: "${{ vars.DATABASE }}" + WAREHOUSE: "${{ vars.WAREHOUSE }}" + SCHEMA: "${{ vars.SCHEMA }}" + +concurrency: + group: ${{ github.workflow }} + +jobs: + called_workflow_template: + uses: FlipsideCrypto/analytics-workflow-templates/.github/workflows/dbt_run_template.yml@main + with: + dbt_command: > + dbt run -s "bitcoin_models,tag:scheduled_non_core" + environment: workflow_prod + warehouse: ${{ vars.WAREHOUSE }} + secrets: inherit diff --git a/data/github_actions__workflows.csv b/data/github_actions__workflows.csv index 06ef6ba6..66226b5e 100644 --- a/data/github_actions__workflows.csv +++ b/data/github_actions__workflows.csv @@ -1,5 +1,6 @@ workflow_name,workflow_schedule -dbt_run_scheduled,"8,23,38,53 * * * *" -dbt_run_streamline_blocks_hash_realtime,"*/10 * * * *" -dbt_run_streamline_realtime,"3,13,23,33,43,53 * * * *" -dbt_test_tasks,"0,30 * * * *" \ No newline at end of file +dbt_run_scheduled,"*/30 * * * *" +dbt_run_scheduled_non_core,"0 * * * *" +dbt_run_streamline_blocks_hash_realtime,"0,15,30,45 * * * *" +dbt_run_streamline_realtime,"5,20,35,50 * * * *" +dbt_test_tasks,"0,30 * * * *" diff --git a/models/bronze/core/bronze__blocks.sql b/models/bronze/core/bronze__blocks.sql index fbf7531a..580818ae 100644 --- a/models/bronze/core/bronze__blocks.sql +++ b/models/bronze/core/bronze__blocks.sql @@ -2,7 +2,7 @@ materialized = 'incremental', cluster_by = ["_inserted_timestamp::DATE"], unique_key = 'block_number', - tags = ["load"], + tags = ["load", "scheduled_core"], incremental_strategy = 'delete+insert' ) }} -- depends on {{ref('bronze__streamline_blocks')}} diff --git a/models/bronze/core/bronze__transactions.sql b/models/bronze/core/bronze__transactions.sql index 5c08201e..45e19a60 100644 --- a/models/bronze/core/bronze__transactions.sql +++ b/models/bronze/core/bronze__transactions.sql @@ -2,7 +2,7 @@ materialized = 'incremental', cluster_by = ["_inserted_timestamp::DATE"], unique_key = "tx_id", - tags = ["load"], + tags = ["load", "scheduled_core"], incremental_strategy = 'delete+insert', incremental_predicates = ['block_number >= (select min(block_number) from ' ~ generate_tmp_view_name(this) ~ ')'], ) }} diff --git a/models/bronze/labels/bronze__labels.sql b/models/bronze/labels/bronze__labels.sql index d8525ce0..9c3c1a1d 100644 --- a/models/bronze/labels/bronze__labels.sql +++ b/models/bronze/labels/bronze__labels.sql @@ -1,6 +1,6 @@ {{ config( materialized = 'view', - tags = ['core'] + tags = ['core', 'scheduled_non_core'] ) }} diff --git a/models/silver/core/silver__blocks.sql b/models/silver/core/silver__blocks.sql index 24e2f295..2154eeaf 100644 --- a/models/silver/core/silver__blocks.sql +++ b/models/silver/core/silver__blocks.sql @@ -3,7 +3,7 @@ incremental_strategy = 'delete+insert', unique_key = 'block_number', cluster_by = ["_inserted_timestamp::DATE", "block_number"], - tags = ["core"] + tags = ["core", "scheduled_core"] ) }} WITH bronze_blocks AS ( diff --git a/models/silver/core/silver__inputs.sql b/models/silver/core/silver__inputs.sql index 39a75285..7c6f893a 100644 --- a/models/silver/core/silver__inputs.sql +++ b/models/silver/core/silver__inputs.sql @@ -3,7 +3,7 @@ incremental_strategy = 'delete+insert', incremental_predicates = ['block_number >= (select min(block_number) from ' ~ generate_tmp_view_name(this) ~ ')'], unique_key = 'input_id', - tags = ["core"], + tags = ["core", "scheduled_core"], cluster_by = ["_inserted_timestamp"], post_hook = "ALTER TABLE {{ this }} ADD SEARCH OPTIMIZATION" ) }} diff --git a/models/silver/core/silver__inputs_final.sql b/models/silver/core/silver__inputs_final.sql index fe89b63e..14d1bbee 100644 --- a/models/silver/core/silver__inputs_final.sql +++ b/models/silver/core/silver__inputs_final.sql @@ -4,7 +4,7 @@ incremental_predicates = ['block_number >= (select min(block_number) from ' ~ generate_tmp_view_name(this) ~ ')'], unique_key = 'input_id', cluster_by = ["block_number", "tx_id"], - tags = ["core"], + tags = ["core", "scheduled_core"], post_hook = "ALTER TABLE {{ this }} ADD SEARCH OPTIMIZATION" ) }} diff --git a/models/silver/core/silver__outputs.sql b/models/silver/core/silver__outputs.sql index ef10de84..a4452fc0 100644 --- a/models/silver/core/silver__outputs.sql +++ b/models/silver/core/silver__outputs.sql @@ -3,7 +3,7 @@ incremental_strategy = 'delete+insert', incremental_predicates = ['block_number >= (select min(block_number) from ' ~ generate_tmp_view_name(this) ~ ')'], unique_key = 'output_id', - tags = ["core"], + tags = ["core", "scheduled_core"], cluster_by = ["_partition_by_block_id", "tx_id"], post_hook = "ALTER TABLE {{ this }} ADD SEARCH OPTIMIZATION" ) }} diff --git a/models/silver/core/silver__transaction_index.sql b/models/silver/core/silver__transaction_index.sql index 5b750af1..55da639a 100644 --- a/models/silver/core/silver__transaction_index.sql +++ b/models/silver/core/silver__transaction_index.sql @@ -3,7 +3,7 @@ unique_key = 'tx_id', incremental_strategy = 'delete+insert', cluster_by = ["_partition_by_block_id", "tx_id"], - tags = ["core"] + tags = ["core", "scheduled_core"] ) }} WITH blocks AS ( diff --git a/models/silver/core/silver__transactions.sql b/models/silver/core/silver__transactions.sql index a81e8950..ee0415d0 100644 --- a/models/silver/core/silver__transactions.sql +++ b/models/silver/core/silver__transactions.sql @@ -4,7 +4,7 @@ incremental_predicates = ['block_number >= (select min(block_number) from ' ~ generate_tmp_view_name(this) ~ ')'], unique_key = 'tx_id', cluster_by = ["_inserted_timestamp::DATE", "block_number"], - tags = ["core"] + tags = ["core", "scheduled_core"] ) }} -- depends_on: {{ ref('silver__blocks') }} WITH bronze_transactions AS ( diff --git a/models/silver/core/silver__transactions_final.sql b/models/silver/core/silver__transactions_final.sql index 2c52adfd..51c739e0 100644 --- a/models/silver/core/silver__transactions_final.sql +++ b/models/silver/core/silver__transactions_final.sql @@ -4,7 +4,7 @@ incremental_predicates = ['block_number >= (select min(block_number) from ' ~ generate_tmp_view_name(this) ~ ')'], unique_key = 'tx_id', cluster_by = ["block_number", "tx_id"], - tags = ["core"], + tags = ["core", "scheduled_core"], post_hook = "ALTER TABLE {{ this }} ADD SEARCH OPTIMIZATION" ) }} diff --git a/models/silver/ez/silver__block_miner_rewards.sql b/models/silver/ez/silver__block_miner_rewards.sql index 6d474ba7..098f04a6 100644 --- a/models/silver/ez/silver__block_miner_rewards.sql +++ b/models/silver/ez/silver__block_miner_rewards.sql @@ -3,7 +3,7 @@ incremental_strategy = 'delete+insert', unique_key = 'block_number', cluster_by = ["block_number", "block_timestamp::DATE"], - tags = ["core", "ez"], + tags = ["core", "ez", "scheduled_non_core" ], post_hook = "ALTER TABLE {{ this }} ADD SEARCH OPTIMIZATION" ) }} diff --git a/models/silver/labels/silver__labels.sql b/models/silver/labels/silver__labels.sql index db5725a8..b276c882 100644 --- a/models/silver/labels/silver__labels.sql +++ b/models/silver/labels/silver__labels.sql @@ -1,7 +1,7 @@ {{ config( materialized = 'incremental', unique_key = 'address', - tags = ['core'] + tags = ["core", "scheduled_non_core"] ) }} SELECT diff --git a/models/silver/price/silver__price_all_providers_hourly.sql b/models/silver/price/silver__price_all_providers_hourly.sql index 19719513..51741e69 100644 --- a/models/silver/price/silver__price_all_providers_hourly.sql +++ b/models/silver/price/silver__price_all_providers_hourly.sql @@ -2,7 +2,7 @@ materialized = 'incremental', unique_key = 'id', cluster_by = ['hour::DATE'], - tags = ['prices', 'core'] + tags = ["prices", "core", "scheduled_non_core"] ) }} WITH coinmarketcap AS ( diff --git a/models/silver/price/silver__price_coingecko_hourly.sql b/models/silver/price/silver__price_coingecko_hourly.sql index 291515f7..f8ce2632 100644 --- a/models/silver/price/silver__price_coingecko_hourly.sql +++ b/models/silver/price/silver__price_coingecko_hourly.sql @@ -2,7 +2,7 @@ materialized = 'incremental', unique_key = 'recorded_hour', cluster_by = ['recorded_hour ::DATE'], - tags = ['prices', 'core'] + tags = ["prices", "core", "scheduled_non_core"] ) }} WITH prices AS ( diff --git a/models/silver/price/silver__price_coinmarketcap_hourly.sql b/models/silver/price/silver__price_coinmarketcap_hourly.sql index 9864ce81..4dbb8a83 100644 --- a/models/silver/price/silver__price_coinmarketcap_hourly.sql +++ b/models/silver/price/silver__price_coinmarketcap_hourly.sql @@ -2,7 +2,7 @@ materialized = 'incremental', unique_key = 'recorded_hour', cluster_by = ['recorded_hour ::DATE'], - tags = ['prices', 'core'] + tags = ["prices", "core", "scheduled_non_core"] ) }} WITH prices AS ( diff --git a/models/silver/price/silver__price_coinpaprika_hourly.sql b/models/silver/price/silver__price_coinpaprika_hourly.sql index 024f6d72..5cb52a67 100644 --- a/models/silver/price/silver__price_coinpaprika_hourly.sql +++ b/models/silver/price/silver__price_coinpaprika_hourly.sql @@ -3,7 +3,7 @@ unique_key = 'recorded_hour', incremental_strategy='delete+insert', cluster_by = ['recorded_hour ::DATE'], - tags = ['prices', 'core'] + tags = ["prices", "core", "scheduled_non_core"] ) }} WITH prices AS ( From 43062ece2302b8f278c23504ad0bd05e8f2135c7 Mon Sep 17 00:00:00 2001 From: Jack Forgash <58153492+forgxyz@users.noreply.github.com> Date: Fri, 10 Nov 2023 12:24:02 -0700 Subject: [PATCH 2/2] stagger non-core to start after core finishes --- data/github_actions__workflows.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/github_actions__workflows.csv b/data/github_actions__workflows.csv index 66226b5e..2c9a084d 100644 --- a/data/github_actions__workflows.csv +++ b/data/github_actions__workflows.csv @@ -1,6 +1,6 @@ workflow_name,workflow_schedule dbt_run_scheduled,"*/30 * * * *" -dbt_run_scheduled_non_core,"0 * * * *" +dbt_run_scheduled_non_core,"7 * * * *" dbt_run_streamline_blocks_hash_realtime,"0,15,30,45 * * * *" dbt_run_streamline_realtime,"5,20,35,50 * * * *" dbt_test_tasks,"0,30 * * * *"