From 45daec72f4953be4c157e6e2ab5671455a569396 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Wed, 27 Nov 2024 09:41:57 -0600 Subject: [PATCH] Ensure MicrobatchModelRunner doesn't double compile batches We were compiling the node for each batch _twice_. Besides making microbatch models more expensive than they needed to be, double compiling wasn't causing any issue. However the first compilation was happening _before_ we had added the batch context information to the model node for the batch. This was leading to models which try to access the `batch_context` information on the model to blow up, which was undesirable. As such, we've now gone and skipped the first compilation. We've done this similar to how SavedQuery nodes skip compilation. --- core/dbt/task/run.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/core/dbt/task/run.py b/core/dbt/task/run.py index d6b198d668d..7c72f17bfd3 100644 --- a/core/dbt/task/run.py +++ b/core/dbt/task/run.py @@ -341,6 +341,13 @@ def __init__(self, config, adapter, node, node_index: int, num_nodes: int): self.batches: Dict[int, BatchType] = {} self.relation_exists: bool = False + def compile(self, manifest: Manifest): + # The default compile function is _always_ called. However, we do our + # compilation _later_ in `_execute_microbatch_materialization`. This + # meant the node was being compiled _twice_ for each batch. To get around + # this, we've overriden the default compile method to do nothing + return self.node + def set_batch_idx(self, batch_idx: int) -> None: self.batch_idx = batch_idx