Skip to content

Commit

Permalink
Merge main into feature/decouple-adapters-from-core (#9240)
Browse files Browse the repository at this point in the history
* moving types_pb2.py to common/events

* Restore warning on unpinned git packages (#9157)

* Support --empty flag for schema-only dry runs (#8971)

* Fix ensuring we produce valid jsonschema artifacts for manifest, catalog, sources, and run-results (#9155)

* Drop `all_refs=True` from jsonschema-ization build process

Passing `all_refs=True` makes it so that Everything is a ref, even
the top level schema. In jsonschema land, this essentially makes the
produced artifact not a full schema, but a fractal object to be included
in a schema. Thus when `$id` is passed in, jsonschema tools blow up
because `$id` is for identifying a schema, which we explicitly weren't
creating. The alternative was to drop the inclusion of `$id`. Howver, we're
intending to create a schema, and having an `$id` is recommended best
practice. Additionally since we were intending to create a schema,
not a fractal, it seemed best to create to full schema.

* Explicity produce jsonschemas using DRAFT_2020_12 dialect

Previously were were implicitly using the `DRAFT_2020_12` dialect through
mashumaro. It felt wise to begin explicitly specifying this. First, it
is closest in available mashumaro provided dialects to what we produced
pre 1.7. Secondly, if mashumaro changes its default for whatever reason
(say a new dialect is added, and mashumaro moves to that), we don't want
to automatically inherit that.

* Bump manifest version to v12

Core 1.7 released with manifest v11, and we don't want to be overriding
that with 1.8. It'd be weird for 1.7 and 1.8 to both have v11 manifests,
but for them to be different, right?

* Begin including schema dialect specification in produced jsonschema

In jsonschema's documentation they state
> It's not always easy to tell which draft a JSON Schema is using.
> You can use the $schema keyword to declare which version of the JSON Schema specification the schema is written to.
> It's generally good practice to include it, though it is not required.

and

> For brevity, the $schema keyword isn't included in most of the examples in this book, but it should always be used in the real world.

Basically, to know how to parse a schema, it's important to include what
schema dialect is being used for the schema specification. The change in
this commit ensures we include that information.

* Create manifest v12 jsonschema specification

* Add change documentation for jsonschema schema production fix

* Bump run-results version to v6

* Generate new v6 run-results jsonschema

* Regenerate catalog v1 and sources v3 with fixed jsonschema production

* Update tests to handle bumped versions of manifest and run-results

---------

Co-authored-by: Jeremy Cohen <jeremy@dbtlabs.com>
Co-authored-by: Michelle Ark <MichelleArk@users.noreply.github.com>
Co-authored-by: Quigley Malcolm <QMalcolm@users.noreply.github.com>
  • Loading branch information
4 people authored Dec 6, 2023
1 parent eb96e3d commit 45b3570
Show file tree
Hide file tree
Showing 21 changed files with 20,456 additions and 464 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20231116-234049.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Support --empty flag for schema-only dry runs
time: 2023-11-16T23:40:49.96651-05:00
custom:
Author: michelleark
Issue: "8971"
7 changes: 7 additions & 0 deletions .changes/unreleased/Fixes-20231127-165244.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Fixes
body: Ensure we produce valid jsonschema schemas for manifest, catalog, run-results,
and sources
time: 2023-11-27T16:52:44.590313-08:00
custom:
Author: QMalcolm
Issue: "8991"
15 changes: 13 additions & 2 deletions core/dbt/adapters/base/relation.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class BaseRelation(FakeAPIObject, Hashable):
include_policy: Policy = field(default_factory=lambda: Policy())
quote_policy: Policy = field(default_factory=lambda: Policy())
dbt_created: bool = False
limit: Optional[int] = None

# register relation types that can be renamed for the purpose of replacing relations using stages and backups
# adding a relation type here also requires defining the associated rename macro
Expand Down Expand Up @@ -190,6 +191,15 @@ def render(self) -> str:
# if there is nothing set, this will return the empty string.
return ".".join(part for _, part in self._render_iterator() if part is not None)

def render_limited(self) -> str:
rendered = self.render()
if self.limit is None:
return rendered
elif self.limit == 0:
return f"(select * from {rendered} where false limit 0) _dbt_limit_subq"
else:
return f"(select * from {rendered} limit {self.limit}) _dbt_limit_subq"

def quoted(self, identifier):
return "{quote_char}{identifier}{quote_char}".format(
quote_char=self.quote_character,
Expand All @@ -204,12 +214,14 @@ def add_ephemeral_prefix(name: str):
def create_ephemeral_from(
cls: Type[Self],
relation_config: RelationConfig,
limit: Optional[int],
) -> Self:
# Note that ephemeral models are based on the name.
identifier = cls.add_ephemeral_prefix(relation_config.name)
return cls.create(
type=cls.CTE,
identifier=identifier,
limit=limit,
).quote(identifier=False)

@classmethod
Expand All @@ -223,7 +235,6 @@ def create_from(

config_quoting = relation_config.quoting_dict
config_quoting.pop("column", None)

# precedence: kwargs quoting > relation config quoting > base quoting > default quoting
quote_policy = deep_merge(
cls.get_default_quote_policy().to_dict(omit_none=True),
Expand Down Expand Up @@ -276,7 +287,7 @@ def __hash__(self) -> int:
return hash(self.render())

def __str__(self) -> str:
return self.render()
return self.render() if self.limit is None else self.render_limited()

@property
def database(self) -> Optional[str]:
Expand Down
2 changes: 2 additions & 0 deletions core/dbt/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ def docs_serve(ctx, **kwargs):
@p.profile
@p.profiles_dir
@p.project_dir
@p.empty
@p.select
@p.selector
@p.inline
Expand Down Expand Up @@ -599,6 +600,7 @@ def parse(ctx, **kwargs):
@p.profile
@p.profiles_dir
@p.project_dir
@p.empty
@p.select
@p.selector
@p.state
Expand Down
6 changes: 6 additions & 0 deletions core/dbt/cli/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@
is_flag=True,
)

empty = click.option(
"--empty/--no-empty",
envvar="DBT_EMPTY",
help="If specified, limit input refs and sources to zero rows.",
is_flag=True,
)

enable_legacy_logger = click.option(
"--enable-legacy-logger/--no-enable-legacy-logger",
Expand Down
18 changes: 13 additions & 5 deletions core/dbt/context/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,12 @@
from dbt.contracts.graph.metrics import MetricReference, ResolvedMetricReference
from dbt.contracts.graph.unparsed import NodeVersion
from dbt.common.events.functions import get_metadata_vars
from dbt.common.exceptions import DbtInternalError, DbtRuntimeError, DbtValidationError
from dbt.common.exceptions import (
DbtInternalError,
DbtRuntimeError,
DbtValidationError,
MacrosSourcesUnWriteableError,
)
from dbt.adapters.exceptions import MissingConfigError
from dbt.exceptions import (
CompilationError,
Expand All @@ -66,7 +71,6 @@
TargetNotFoundError,
DbtReferenceError,
)
from dbt.common.exceptions.macros import MacrosSourcesUnWriteableError
from dbt.config import IsFQNResource
from dbt.node_types import NodeType, ModelLanguage

Expand Down Expand Up @@ -209,6 +213,10 @@ def current_project(self):
def Relation(self):
return self.db_wrapper.Relation

@property
def resolve_limit(self) -> Optional[int]:
return 0 if getattr(self.config.args, "EMPTY", False) else None

@abc.abstractmethod
def __call__(self, *args: str) -> Union[str, RelationProxy, MetricReference]:
pass
Expand Down Expand Up @@ -524,9 +532,9 @@ def resolve(
def create_relation(self, target_model: ManifestNode) -> RelationProxy:
if target_model.is_ephemeral_model:
self.model.set_cte(target_model.unique_id, None)
return self.Relation.create_ephemeral_from(target_model)
return self.Relation.create_ephemeral_from(target_model, limit=self.resolve_limit)
else:
return self.Relation.create_from(self.config, target_model)
return self.Relation.create_from(self.config, target_model, limit=self.resolve_limit)

def validate(
self,
Expand Down Expand Up @@ -583,7 +591,7 @@ def resolve(self, source_name: str, table_name: str):
target_kind="source",
disabled=(isinstance(target_source, Disabled)),
)
return self.Relation.create_from(self.config, target_source)
return self.Relation.create_from(self.config, target_source, limit=self.resolve_limit)


# metric` implementations
Expand Down
3 changes: 2 additions & 1 deletion core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1551,7 +1551,7 @@ def __init__(self, macros) -> None:


@dataclass
@schema_version("manifest", 11)
@schema_version("manifest", 12)
class WritableManifest(ArtifactMixin):
nodes: Mapping[UniqueID, ManifestNode] = field(
metadata=dict(description=("The nodes defined in the dbt project and its dependencies"))
Expand Down Expand Up @@ -1619,6 +1619,7 @@ def compatible_previous_versions(cls) -> Iterable[Tuple[str, int]]:
("manifest", 8),
("manifest", 9),
("manifest", 10),
("manifest", 11),
]

@classmethod
Expand Down
3 changes: 2 additions & 1 deletion core/dbt/contracts/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def write(self, path: str):


@dataclass
@schema_version("run-results", 5)
@schema_version("run-results", 6)
class RunResultsArtifact(ExecutionResult, ArtifactMixin):
results: Sequence[RunResultOutput]
args: Dict[str, Any] = field(default_factory=dict)
Expand All @@ -276,6 +276,7 @@ def from_execution_results(
def compatible_previous_versions(cls) -> Iterable[Tuple[str, int]]:
return [
("run-results", 4),
("run-results", 5),
]

@classmethod
Expand Down
3 changes: 2 additions & 1 deletion core/dbt/contracts/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
ValidationError,
)
from mashumaro.jsonschema import build_json_schema
from mashumaro.jsonschema.dialects import DRAFT_2020_12
import functools


Expand Down Expand Up @@ -194,7 +195,7 @@ class VersionedSchema(dbtClassMixin):
@classmethod
@functools.lru_cache
def json_schema(cls) -> Dict[str, Any]:
json_schema_obj = build_json_schema(cls, all_refs=True)
json_schema_obj = build_json_schema(cls, dialect=DRAFT_2020_12, with_dialect_uri=True)
json_schema = json_schema_obj.to_dict()
json_schema["$id"] = str(cls.dbt_schema_version)
return json_schema
Expand Down
9 changes: 6 additions & 3 deletions core/dbt/deps/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,14 @@ def _fetch_metadata(
self, project: Project, renderer: PackageRenderer
) -> ProjectPackageMetadata:
path = self._checkout()
# overwrite 'revision' with actual commit SHA
self.revision = git.get_current_sha(path)

# raise warning (or error) if this package is not pinned
if (self.revision == "HEAD" or self.revision in ("main", "master")) and self.warn_unpinned:
warn_or_error(DepsUnpinned(git=self.git))
warn_or_error(DepsUnpinned(revision=self.revision, git=self.git))

# now overwrite 'revision' with actual commit SHA
self.revision = git.get_current_sha(path)

partial = PartialProject.from_project_root(path)
return partial.render_package_metadata(renderer)

Expand Down
Loading

0 comments on commit 45b3570

Please sign in to comment.