diff --git a/CHANGELOG.md b/CHANGELOG.md index 65828fed5..97888f9ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -74,6 +74,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Simple interactive shell - Add pdf rag template - Updated llm_finetuning template +- Add sql table length exceed limit and uuid truncation. #### Bug Fixes diff --git a/plugins/ibis/superduper_ibis/db_helper.py b/plugins/ibis/superduper_ibis/db_helper.py index 0a600daaf..21e7867f3 100644 --- a/plugins/ibis/superduper_ibis/db_helper.py +++ b/plugins/ibis/superduper_ibis/db_helper.py @@ -58,8 +58,7 @@ class DBHelper: """ match_dialect = "base" - table_truncate = {'postgres': 63} - table_truncate_map = _KeyEqualDefaultDict() + truncates = {"postgres": {"column": 63, "table": 63}} def __init__(self, dialect): self.dialect = dialect @@ -77,13 +76,19 @@ def process_before_insert(self, table_name, datas, conn): columns = conn.table(table_name).columns for column in datas.columns: - if conn.name in self.table_truncate: - n = self.table_truncate[conn.name] + if conn.name in self.truncates: + n = self.truncates[conn.name]["column"] if len(column) > n: - self.table_truncate_map[column[:n]] = column - - columns = list(map(lambda x: self.table_truncate_map[x], columns)) + raise Exception( + f"{conn.name} database has limit of {n} for column name." + ) datas = datas[columns] + if conn.name in self.truncates: + if len(table_name) > self.truncates[conn.name]["table"]: + raise Exception( + f"{conn.name} database has limit of {n} for table name." + ) + return table_name, pd.DataFrame(datas) def process_schema_types(self, schema_mapping): diff --git a/plugins/ibis/superduper_ibis/query.py b/plugins/ibis/superduper_ibis/query.py index 1c7b0a4b3..ca137cf79 100644 --- a/plugins/ibis/superduper_ibis/query.py +++ b/plugins/ibis/superduper_ibis/query.py @@ -215,12 +215,6 @@ def _execute(self, parent, method="encode"): ) from e assert isinstance(output, pandas.DataFrame) - table_truncate_map = self.db.databackend.db_helper.table_truncate_map - columns = {} - for c in output.columns: - columns[c] = table_truncate_map[c] - output = output.rename(columns=columns) - output = output.to_dict(orient="records") component_table = self.db.load('table', self.table) return SuperDuperCursor( diff --git a/superduper/base/document.py b/superduper/base/document.py index ce7815c8d..67f5b5d90 100644 --- a/superduper/base/document.py +++ b/superduper/base/document.py @@ -34,6 +34,7 @@ ContentType = t.Union[t.Dict, Encodable] LeafMetaType = t.Type['Leaf'] +_VERSION_LIMIT = 1000 # TODO is this used for anything? _LEAF_TYPES = { 'component': Component, @@ -640,6 +641,13 @@ def _deep_flat_decode(r, builds, getters: _Getters, db: t.Optional['Datalayer'] return r +def _check_if_version(x): + if x.isnumeric(): + if int(x) < _VERSION_LIMIT: + return True + return False + + def _get_component(db, path): parts = path.split(':') if len(parts) == 1: @@ -647,7 +655,7 @@ def _get_component(db, path): if len(parts) == 2: return db.load(type_id=parts[0], identifier=parts[1]) if len(parts) == 3: - if not parts[2].isnumeric(): + if not _check_if_version(parts[2]): return db.load(uuid=parts[2]) return db.load(type_id=parts[0], identifier=parts[1], version=parts[2]) raise ValueError(f'Invalid component reference: {path}') diff --git a/superduper/base/leaf.py b/superduper/base/leaf.py index 91d54ce80..958b587bb 100644 --- a/superduper/base/leaf.py +++ b/superduper/base/leaf.py @@ -119,7 +119,7 @@ def __new__(mcs, name, bases, namespace): def build_uuid(): """Build UUID.""" - return str(uuid.uuid4()).replace('-', '') + return str(uuid.uuid4()).replace('-', '')[:16] class Leaf(metaclass=LeafMeta): diff --git a/test/configs/default.yaml b/test/configs/default.yaml index 6551721ca..bebac24e7 100644 --- a/test/configs/default.yaml +++ b/test/configs/default.yaml @@ -1,4 +1,3 @@ -artifact_store: null data_backend: mongomock://test_db auto_schema: false force_apply: true