Skip to content
This repository has been archived by the owner on May 17, 2024. It is now read-only.

Commit

Permalink
Cleanup and better docs
Browse files Browse the repository at this point in the history
  • Loading branch information
erezsh committed Nov 10, 2022
1 parent e509ed9 commit a75b8a1
Show file tree
Hide file tree
Showing 13 changed files with 55 additions and 35 deletions.
2 changes: 1 addition & 1 deletion data_diff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from .tracking import disable_tracking
from .databases import connect
from .sqeleton.databases.database_types import DbKey, DbTime, DbPath
from .sqeleton.databases import DbKey, DbTime, DbPath
from .diff_tables import Algorithm
from .hashdiff_tables import HashDiffer, DEFAULT_BISECTION_THRESHOLD, DEFAULT_BISECTION_FACTOR
from .joindiff_tables import JoinDiffer
Expand Down
2 changes: 1 addition & 1 deletion data_diff/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from .hashdiff_tables import HashDiffer, DEFAULT_BISECTION_THRESHOLD, DEFAULT_BISECTION_FACTOR
from .joindiff_tables import TABLE_WRITE_LIMIT, JoinDiffer
from .table_segment import TableSegment
from .sqeleton.databases.database_types import create_schema
from .sqeleton.databases import create_schema
from .databases import connect
from .parse_time import parse_time_before_now, UNITS_STR, ParseError
from .config import apply_config_from_file
Expand Down
2 changes: 1 addition & 1 deletion data_diff/databases/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from data_diff.sqeleton.databases.base import MD5_HEXDIGITS, CHECKSUM_HEXDIGITS, QueryError, ConnectError
from data_diff.sqeleton.databases import MD5_HEXDIGITS, CHECKSUM_HEXDIGITS, QueryError, ConnectError

from .postgresql import PostgreSQL
from .mysql import MySQL
Expand Down
2 changes: 1 addition & 1 deletion data_diff/diff_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from .thread_utils import ThreadedYielder
from .table_segment import TableSegment
from .tracking import create_end_event_json, create_start_event_json, send_event_json, is_tracking_enabled
from .sqeleton.databases.database_types import IKey
from .sqeleton.databases import IKey

logger = getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion data_diff/hashdiff_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from .utils import safezip
from .thread_utils import ThreadedYielder
from .sqeleton.databases.database_types import ColType_UUID, NumericType, PrecisionType, StringType
from .sqeleton.databases import ColType_UUID, NumericType, PrecisionType, StringType
from .table_segment import TableSegment

from .diff_tables import TableDiffer
Expand Down
4 changes: 1 addition & 3 deletions data_diff/joindiff_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@

from runtype import dataclass

from .sqeleton.databases.database_types import DbPath, NumericType
from .sqeleton.databases import MySQL, BigQuery, Presto, Oracle, Snowflake
from .sqeleton.databases.base import Database
from .sqeleton.databases import Database, DbPath, NumericType, MySQL, BigQuery, Presto, Oracle, Snowflake
from .sqeleton.queries import table, sum_, min_, max_, avg
from .sqeleton.queries.api import and_, if_, or_, outerjoin, leftjoin, rightjoin, this, ITable
from .sqeleton.queries.ast_classes import Concat, Count, Expr, Random, TablePath
Expand Down
4 changes: 1 addition & 3 deletions data_diff/query_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@

from contextlib import suppress

from .sqeleton.databases.database_types import DbPath
from .sqeleton.databases.base import QueryError
from .sqeleton.databases import Oracle
from .sqeleton.databases import DbPath, QueryError, Oracle
from .sqeleton.queries import table, commit, Expr


Expand Down
21 changes: 19 additions & 2 deletions data_diff/sqeleton/databases/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,22 @@
from .database_types import AbstractDatabase, AbstractDialect, AbstractMixin_MD5, AbstractMixin_NormalizeValue
from .base import MD5_HEXDIGITS, CHECKSUM_HEXDIGITS, QueryError, ConnectError, BaseDialect
from .database_types import (
AbstractDatabase,
AbstractDialect,
AbstractMixin_MD5,
AbstractMixin_NormalizeValue,
DbKey,
DbTime,
DbPath,
create_schema,
IKey,
ColType_UUID,
NumericType,
PrecisionType,
StringType,
ColType,
Native_UUID,
Schema,
)
from .base import MD5_HEXDIGITS, CHECKSUM_HEXDIGITS, QueryError, ConnectError, BaseDialect, Database

from .postgresql import PostgreSQL
from .mysql import MySQL
Expand Down
24 changes: 9 additions & 15 deletions data_diff/sqeleton/databases/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def _one(seq):


class ThreadLocalInterpreter:
"""An interpeter used to execute a sequence of queries within the same thread.
"""An interpeter used to execute a sequence of queries within the same thread and cursor.
Useful for cursor-sensitive operations, such as creating a temporary table.
"""
Expand Down Expand Up @@ -217,21 +217,9 @@ class Database(AbstractDatabase):
"""

default_schema: str = None
dialect: AbstractDialect = None

SUPPORTS_ALPHANUMS = True
SUPPORTS_UNIQUE_CONSTAINT = False

@property
@abstractmethod
def CONNECT_URI_HELP(self) -> str:
"Example URI to show the user in help and error messages"

@property
@abstractmethod
def CONNECT_URI_PARAMS(self) -> List[str]:
"List of parameters given in the path of the URI"

CONNECT_URI_KWPARAMS = []

_interactive = False
Expand All @@ -241,7 +229,12 @@ def name(self):
return type(self).__name__

def query(self, sql_ast: Union[Expr, Generator], res_type: type = list):
"Query the given SQL code/AST, and attempt to convert the result to type 'res_type'"
"""Query the given SQL code/AST, and attempt to convert the result to type 'res_type'
If given a generator, it will execute all the yielded sql queries with the same thread and cursor.
The results of the queries a returned by the `yield` stmt (using the .send() mechanism).
It's a cleaner approach than exposing cursors, but may not be enough in all cases.
"""

compiler = Compiler(self)
if isinstance(sql_ast, Generator):
Expand Down Expand Up @@ -445,6 +438,7 @@ def _query_in_worker(self, sql_code: Union[str, ThreadLocalInterpreter]):

@abstractmethod
def create_connection(self):
"Return a connection instance, that supports the .cursor() method."
...

def close(self):
Expand All @@ -455,7 +449,7 @@ def is_autocommit(self) -> bool:
return False


CHECKSUM_HEXDIGITS = 15 # Must be 15 or lower
CHECKSUM_HEXDIGITS = 15 # Must be 15 or lower, otherwise SUM() overflows
MD5_HEXDIGITS = 32

_CHECKSUM_BITSIZE = CHECKSUM_HEXDIGITS << 2
Expand Down
20 changes: 17 additions & 3 deletions data_diff/sqeleton/databases/database_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,6 @@ class UnknownColType(ColType):
class AbstractDialect(ABC):
"""Dialect-dependent query expressions"""

name: str

@property
@abstractmethod
def name(self) -> str:
Expand Down Expand Up @@ -259,6 +257,7 @@ def normalize_boolean(self, value: str, coltype: Boolean) -> str:
return self.to_string(value)

def normalize_uuid(self, value: str, coltype: ColType_UUID) -> str:
"""Creates an SQL expression, that strips uuids of artifacts like whitespace."""
if isinstance(coltype, String_UUID):
return f"TRIM({value})"
return self.to_string(value)
Expand Down Expand Up @@ -300,6 +299,21 @@ def md5_as_int(self, s: str) -> str:


class AbstractDatabase:
@property
@abstractmethod
def dialect(self) -> AbstractDialect:
"The dialect of the database. Used internally by Database, and also available publicly."

@property
@abstractmethod
def CONNECT_URI_HELP(self) -> str:
"Example URI to show the user in help and error messages"

@property
@abstractmethod
def CONNECT_URI_PARAMS(self) -> List[str]:
"List of parameters given in the path of the URI"

@abstractmethod
def _query(self, sql_code: str) -> list:
"Send query to database and return result"
Expand Down Expand Up @@ -357,7 +371,7 @@ def _normalize_table_path(self, path: DbPath) -> DbPath:
@property
@abstractmethod
def is_autocommit(self) -> bool:
...
"Return whether the database autocommits changes. When false, COMMIT statements are skipped."


Schema = CaseAwareMapping
Expand Down
2 changes: 1 addition & 1 deletion data_diff/sqeleton/queries/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from runtype import dataclass

from data_diff.utils import ArithString
from data_diff.sqeleton.databases.database_types import AbstractDatabase, AbstractDialect, DbPath
from data_diff.sqeleton.databases import AbstractDatabase, AbstractDialect, DbPath

import contextvars

Expand Down
2 changes: 1 addition & 1 deletion data_diff/sqeleton/queries/extras.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Callable, Sequence
from runtype import dataclass

from data_diff.sqeleton.databases.database_types import ColType, Native_UUID
from data_diff.sqeleton.databases import ColType, Native_UUID

from .compiler import Compiler
from .ast_classes import Expr, ExprNode, Concat
Expand Down
3 changes: 1 addition & 2 deletions data_diff/table_segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
from runtype import dataclass

from .utils import ArithString, split_space
from .sqeleton.databases.base import Database
from .sqeleton.databases.database_types import DbPath, DbKey, DbTime, Schema, create_schema
from .sqeleton.databases import Database, DbPath, DbKey, DbTime, Schema, create_schema
from .sqeleton.queries import Count, Checksum, SKIP, table, this, Expr, min_, max_
from .sqeleton.queries.extras import ApplyFuncAndNormalizeAsString, NormalizeAsString

Expand Down

0 comments on commit a75b8a1

Please sign in to comment.