From 96dc53552142e3c80c055ff003e7439fca864193 Mon Sep 17 00:00:00 2001 From: Chris Riccomini Date: Wed, 1 Feb 2023 08:48:45 -0800 Subject: [PATCH 1/2] Add README link to Python API Very minor update to point README and docs/index.md to the Python API. Since there's no index, I used the analyzers page. --- README.md | 2 +- docs/index.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d7421346..5d28bdeb 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Recap makes it easy for engineers to build infrastructure and tools that need me * Supports major cloud data warehouses and Postgres * No external system dependencies required * Designed for the [CLI](https://docs.recap.cloud/latest/cli/) -* Runs as a Python library or [REST API](https://docs.recap.cloud/latest/rest/) +* Runs as a [Python API](https://docs.recap.cloud/latest/api/recap.analyzers/) or [REST API](https://docs.recap.cloud/latest/rest/) * Fully [pluggable](https://docs.recap.cloud/latest/guides/plugins/) ## Installation diff --git a/docs/index.md b/docs/index.md index 27766430..82b7999d 100644 --- a/docs/index.md +++ b/docs/index.md @@ -7,7 +7,7 @@ Recap makes it easy for engineers to build infrastructure and tools that need me * Supports major cloud data warehouses and PostgreSQL * No external system dependencies required * Designed for the [CLI](cli.md) -* Runs as a Python library or [REST API](rest.md) +* Runs as a [Python API](api/recap.analyzers.md) or [REST API](rest.md) * Fully [pluggable](guides/plugins.md) ## Installation From 8baf83b032f64b7ad59a4aef0abdf41d706cc577 Mon Sep 17 00:00:00 2001 From: Chris Riccomini Date: Wed, 1 Feb 2023 12:02:52 -0800 Subject: [PATCH 2/2] Require fields in SQLAlchemy profile analyzer Fields in the SQLAlchemy profile analyzer were optional. This caused some columns to be matched to `BinaryColumnProfile` when they should have been numeric. I fixed the issue by requiring all columns to be set (even if it's just to set them to None). This fixed the issue. --- recap/analyzers/sqlalchemy/profile.py | 44 +++++++++++++-------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/recap/analyzers/sqlalchemy/profile.py b/recap/analyzers/sqlalchemy/profile.py index 9f587a50..0874213e 100644 --- a/recap/analyzers/sqlalchemy/profile.py +++ b/recap/analyzers/sqlalchemy/profile.py @@ -2,7 +2,7 @@ import sqlalchemy from .columns import TableColumnAnalyzer, Columns from contextlib import contextmanager -from pydantic import BaseModel +from pydantic import BaseModel, Field from recap.analyzers.abstract import AbstractAnalyzer, BaseMetadataModel from recap.browsers.db import create_browser, TablePath, ViewPath from typing import Generator @@ -16,36 +16,36 @@ class BaseColumnProfile(BaseModel): class BinaryColumnProfile(BaseColumnProfile): - min_length: int | None - max_length: int | None - distinct: int | None - nulls: int | None + min_length: int | None = Field(...) + max_length: int | None = Field(...) + distinct: int | None = Field(...) + nulls: int | None = Field(...) class DateColumnProfile(BaseColumnProfile): - min: str | None - max: str | None - distinct: int | None - nulls: int | None - unix_epochs: int | None + min: str | None = Field(...) + max: str | None = Field(...) + distinct: int | None = Field(...) + nulls: int | None = Field(...) + unix_epochs: int | None = Field(...) class NumericColumnProfile(BaseColumnProfile): - min: int | float | None - max: int | float | None - average: int | float | None - sum: int | float | None - nulls: int | None - zeros: int | None - negatives: int | None + min: int | float | None = Field(...) + max: int | float | None = Field(...) + average: int | float | None = Field(...) + sum: int | float | None = Field(...) + nulls: int | None = Field(...) + zeros: int | None = Field(...) + negatives: int | None = Field(...) class StringColumnProfile(BaseColumnProfile): - min_length: int | None - max_length: int | None - distinct: int | None - nulls: int | None - empty_strings: int | None + min_length: int | None = Field(...) + max_length: int | None = Field(...) + distinct: int | None = Field(...) + nulls: int | None = Field(...) + empty_strings: int | None = Field(...) ColumnProfile = (