From c6352756867d13c8f150761a1f206f7d7590f327 Mon Sep 17 00:00:00 2001 From: Rich Piazza Date: Sun, 24 Nov 2024 20:26:38 -0500 Subject: [PATCH] fix dictionary list, add some doc --- .../database_backend_base.py | 56 ++++++---- .../database_backends/postgres_backend.py | 9 ++ .../datastore/relational_db/input_creation.py | 45 +++++++- .../relational_db/relational_db_testing.py | 21 ++++ .../datastore/relational_db/table_creation.py | 103 +++++++++++++----- 5 files changed, 185 insertions(+), 49 deletions(-) diff --git a/stix2/datastore/relational_db/database_backends/database_backend_base.py b/stix2/datastore/relational_db/database_backends/database_backend_base.py index bb03f55f..e5082451 100644 --- a/stix2/datastore/relational_db/database_backends/database_backend_base.py +++ b/stix2/datastore/relational_db/database_backends/database_backend_base.py @@ -14,13 +14,22 @@ def __init__(self, database_connection_url, force_recreate=False, **kwargs: Any) self.database_exists = database_exists(database_connection_url) if force_recreate: - if self.database_exists: - drop_database(database_connection_url) - create_database(database_connection_url) - self.database_exists = database_exists(database_connection_url) + self._create_database() self.database_connection = create_engine(database_connection_url) + def _create_database(self): + if self.database_exists: + drop_database(self.database_connection_url) + create_database(self.database_connection_url) + self.database_exists = database_exists(self.database_connection_url) + + # ========================================================================= + # schema methods + + # the base methods assume schemas are not supported for the database + # --------------------------------------------------------------------------- + def _create_schemas(self): pass @@ -29,22 +38,6 @@ def determine_schema_name(stix_object): return "" @staticmethod - def determine_stix_type(stix_object): - if isinstance(stix_object, _DomainObject): - return "sdo" - elif isinstance(stix_object, _Observable): - return "sco" - elif isinstance(stix_object, _RelationshipObject): - return "sro" - elif isinstance(stix_object, _MetaObject): - return "common" - - def _create_database(self): - if self.database_exists: - drop_database(self.database_connection.url) - create_database(self.database_connection.url) - self.database_exists = database_exists(self.database_connection.url) - def schema_for(stix_class): return "" @@ -52,6 +45,10 @@ def schema_for(stix_class): def schema_for_core(): return "" + # ========================================================================= + # sql type methods + + # Database specific SQL types for STIX property classes # you must implement the next 4 methods in the subclass @staticmethod @@ -70,6 +67,9 @@ def determine_sql_type_for_hex_property(): # noqa: F811 def determine_sql_type_for_timestamp_property(): # noqa: F811 pass + # ------------------------------------------------------------------ + # Common SQL types for STIX property classes + @staticmethod def determine_sql_type_for_kill_chain_phase(): # noqa: F811 return None @@ -102,11 +102,25 @@ def determine_sql_type_for_key_as_int(): # noqa: F811 def determine_sql_type_for_key_as_id(): # noqa: F811 return Text + # ========================================================================= + # Other methods + + @staticmethod + def determine_stix_type(stix_object): + if isinstance(stix_object, _DomainObject): + return "sdo" + elif isinstance(stix_object, _Observable): + return "sco" + elif isinstance(stix_object, _RelationshipObject): + return "sro" + elif isinstance(stix_object, _MetaObject): + return "common" + @staticmethod def array_allowed(): return False - def generate_value(self, stix_type, value): + def process_value_for_insert(self, stix_type, value): sql_type = stix_type.determine_sql_type(self) if sql_type == self.determine_sql_type_for_string_property(): return value diff --git a/stix2/datastore/relational_db/database_backends/postgres_backend.py b/stix2/datastore/relational_db/database_backends/postgres_backend.py index ca501dfb..931636e2 100644 --- a/stix2/datastore/relational_db/database_backends/postgres_backend.py +++ b/stix2/datastore/relational_db/database_backends/postgres_backend.py @@ -22,6 +22,9 @@ class PostgresBackend(DatabaseBackend): def __init__(self, database_connection_url=default_database_connection_url, force_recreate=False, **kwargs: Any): super().__init__(database_connection_url, force_recreate=force_recreate, **kwargs) + # ========================================================================= + # schema methods + def _create_schemas(self): with self.database_connection.begin() as trans: trans.execute(CreateSchema("common", if_not_exists=True)) @@ -48,6 +51,9 @@ def schema_for(stix_class): def schema_for_core(): return "common" + # ========================================================================= + # sql type methods (overrides) + @staticmethod def determine_sql_type_for_binary_property(): # noqa: F811 return PostgresBackend.determine_sql_type_for_string_property() @@ -61,6 +67,9 @@ def determine_sql_type_for_hex_property(): # noqa: F811 def determine_sql_type_for_timestamp_property(): # noqa: F811 return TIMESTAMP(timezone=True) + # ========================================================================= + # Other methods + @staticmethod def array_allowed(): return True diff --git a/stix2/datastore/relational_db/input_creation.py b/stix2/datastore/relational_db/input_creation.py index 6bc04aa2..78c491e6 100644 --- a/stix2/datastore/relational_db/input_creation.py +++ b/stix2/datastore/relational_db/input_creation.py @@ -14,6 +14,22 @@ from stix2.utils import STIXdatetime from stix2.v21.common import KillChainPhase +# ========================================================================= +# generate_insert_information methods + +# positional arguments +# +# name: property name +# stix_object: STIX object data to be inserted in the table + +# optional arguments +# +# data_sink: STIX data sink object +# table_name: name of the related table +# schema_name: name of the schema for the related table, if it exists +# parent_table_name: the name of the parent table, if called for a child table +# level: what "level" of child table is involved +# foreign_key_value: @add_method(Property) def generate_insert_information(self, name, stix_object, **kwargs): # noqa: F811 @@ -41,6 +57,16 @@ def is_valid_type(cls, valid_types): return cls in valid_types or instance_in_valid_types(cls, valid_types) +def generate_insert_for_dictionary_list(table, next_id, value): + insert_stmts = list() + for v in value: + bindings = dict() + bindings["id"] = next_id + bindings["value"] = v + insert_stmts.append(insert(table).values(bindings)) + return insert_stmts + + @add_method(DictionaryProperty) def generate_insert_information(self, dictionary_name, stix_object, **kwargs): # noqa: F811 bindings = dict() @@ -60,6 +86,7 @@ def generate_insert_information(self, dictionary_name, stix_object, **kwargs): # binary, boolean, float, hex, # integer, string, timestamp valid_types = stix_object._properties[dictionary_name].valid_types + child_table_inserts = list() for name, value in stix_object[dictionary_name].items(): bindings = dict() if "id" in stix_object: @@ -67,7 +94,16 @@ def generate_insert_information(self, dictionary_name, stix_object, **kwargs): elif foreign_key_value: bindings["id"] = foreign_key_value if not valid_types or len(self.valid_types) == 1: - value_binding = "value" + if is_valid_type(ListProperty, valid_types): + value_binding = "values" + if not data_sink.db_backend.array_allowed(): + next_id = data_sink.next_id() + table_child = data_sink.tables_dictionary[ + canonicalize_table_name(table_name + "_" + dictionary_name + "_" + "values", schema_name)] + child_table_inserts = generate_insert_for_dictionary_list(table_child, next_id, value) + value = next_id + else: + value_binding = "value" elif isinstance(value, int) and is_valid_type(IntegerProperty, valid_types): value_binding = "integer_value" elif isinstance(value, str) and is_valid_type(StringProperty, valid_types): @@ -86,6 +122,7 @@ def generate_insert_information(self, dictionary_name, stix_object, **kwargs): insert_statements.append(insert(table).values(bindings)) + insert_statements.extend(child_table_inserts) return insert_statements @@ -141,7 +178,7 @@ def generate_insert_information(self, name, stix_object, **kwargs): # noqa: F81 @add_method(HexProperty) def generate_insert_information(self, name, stix_object, data_sink, **kwargs): # noqa: F811 - return {name: data_sink.db_backend.generate_value(self, stix_object[name])} + return {name: data_sink.db_backend.process_value_for_insert(self, stix_object[name])} def generate_insert_for_hashes( @@ -248,7 +285,7 @@ def generate_insert_information( # noqa: F811 else: if db_backend.array_allowed(): if isinstance(self.contained, HexProperty): - return {name: [data_sink.db_backend.generate_value(self.contained, x) for x in stix_object[name]]} + return {name: [data_sink.db_backend.process_value_for_insert(self.contained, x) for x in stix_object[name]]} else: return {name: stix_object[name]} @@ -283,6 +320,8 @@ def generate_insert_information(self, name, stix_object, **kwargs): # noqa: F81 def generate_insert_information(self, name, stix_object, **kwargs): # noqa: F811 return {name: stix_object[name]} +# ========================================================================= + def derive_column_name(prop): contained_property = prop.contained diff --git a/stix2/datastore/relational_db/relational_db_testing.py b/stix2/datastore/relational_db/relational_db_testing.py index a6376fd2..54bf0aae 100644 --- a/stix2/datastore/relational_db/relational_db_testing.py +++ b/stix2/datastore/relational_db/relational_db_testing.py @@ -7,6 +7,23 @@ from stix2.datastore.relational_db.relational_db import RelationalDBStore import stix2.properties +email_message = stix2.EmailMessage( + type="email-message", + spec_version="2.1", + id="email-message--0c57a381-2a17-5e61-8754-5ef96efb286c", + from_ref="email-addr--9b7e29b3-fd8d-562e-b3f0-8fc8134f5dda", + to_refs=["email-addr--d1b3bf0c-f02a-51a1-8102-11aba7959868"], + is_multipart=False, + date="2004-04-19T12:22:23.000Z", + subject="Did you see this?", + additional_header_fields={ + "Reply-To": [ + "steve@example.com", + "jane@example.com" + ] + } +) + directory_stix_object = stix2.Directory( path="/foo/bar/a", path_enc="latin1", @@ -279,6 +296,10 @@ def main(): if store.sink.db_backend.database_exists: + x=email_message + + store.add(x) + td = test_dictionary() store.add(td) diff --git a/stix2/datastore/relational_db/table_creation.py b/stix2/datastore/relational_db/table_creation.py index 32965f02..f57ff856 100644 --- a/stix2/datastore/relational_db/table_creation.py +++ b/stix2/datastore/relational_db/table_creation.py @@ -31,14 +31,14 @@ def create_array_column(property_name, contained_sql_type, optional): ) -def create_array_child_table(metadata, db_backend, parent_table_name, table_name_suffix, property_name, contained_sql_type): - schema_name = db_backend.schema_for_core() +def create_array_child_table(metadata, db_backend, parent_table_name, schema_name, table_name_suffix, property_name, + contained_sql_type, foreign_key_property="id"): columns = [ Column( - "id", + foreign_key_property, db_backend.determine_sql_type_for_key_as_id(), ForeignKey( - canonicalize_table_name(parent_table_name, schema_name) + ".id", + canonicalize_table_name(parent_table_name, schema_name) + "." + foreign_key_property, ondelete="CASCADE", ), nullable=False, @@ -49,7 +49,7 @@ def create_array_child_table(metadata, db_backend, parent_table_name, table_name nullable=False, ), ] - return Table(parent_table_name + table_name_suffix, metadata, *columns, schema=schema_name) + return Table(canonicalize_table_name(parent_table_name + table_name_suffix), metadata, *columns, schema=schema_name) def derive_column_name(prop): @@ -290,6 +290,7 @@ def create_core_table(metadata, db_backend, stix_type_name): metadata, db_backend, table_name, + db_backend.schema_for_core(), "_labels", "label", db_backend.determine_sql_type_for_string_property(), @@ -308,12 +309,15 @@ def create_core_table(metadata, db_backend, stix_type_name): ) return tables +# ========================================================================= +# sql type methods + +# STIX classes defer to the DB backend @add_method(Property) def determine_sql_type(self, db_backend): # noqa: F811 pass - @add_method(KillChainPhase) def determine_sql_type(self, db_backend): # noqa: F811 return db_backend.determine_sql_type_for_kill_chain_phase() @@ -358,8 +362,26 @@ def determine_sql_type(self, db_backend): # noqa: F811 def determine_sql_type(self, db_backend): # noqa: F811 return db_backend.determine_sql_type_for_timestamp_property() - -# ----------------------------- generate_table_information methods ---------------------------- +# ========================================================================= +# generate_table_information methods + +# positional arguments +# +# +# name property name +# db_backend Class instance related to the database backend + +# optional arguments +# +# metadata: SQL Alchemy metadata +# schema_name: name of the schema for the related table, if it exists +# table_name: name of the related table +# is_extension: is this related to a table for an extension +# is_embedded_object: is this related to a table for an extension +# is_list: is this property a list? +# level: what "level" of child table is involved +# parent_table_name: the name of the parent table, if called for a child table +# core_table: name of the related core table @add_method(KillChainPhase) def generate_table_information( # noqa: F811 @@ -404,7 +426,7 @@ def generate_table_information(self, name, db_backend, **kwargs): # noqa: F811 @add_method(DictionaryProperty) def generate_table_information(self, name, db_backend, metadata, schema_name, table_name, is_extension=False, **kwargs): # noqa: F811 columns = list() - + tables = list() columns.append( Column( "id", @@ -428,17 +450,49 @@ def generate_table_information(self, name, db_backend, metadata, schema_name, ta # its a class determine_sql_type_from_stix(self.valid_types[0], db_backend), nullable=False, + ), ) else: contained_class = self.valid_types[0].contained - columns.append( - create_array_column( - "value", - contained_class.determine_sql_type(db_backend), - False, - ), - ) + if db_backend.array_allowed(): + columns.append( + create_array_column( + "values", + contained_class.determine_sql_type(db_backend), + False, + ), + ) + else: + columns.append( + Column( + "values", + db_backend.determine_sql_type_for_key_as_int(), + unique = True, + ), + ) + child_columns = [ + Column( + "id", + db_backend.determine_sql_type_for_key_as_int(), + ForeignKey( + canonicalize_table_name(table_name + "_" + name, schema_name) + ".values", + ondelete="CASCADE", + ), + nullable=False, + ), + Column( + "value", + db_backend.determine_sql_type_for_string_property(), + nullable=False, + ), + ] + tables.append( + Table( + canonicalize_table_name(table_name + "_" + name + "_" + "values"), + metadata, *child_columns, schema=schema_name, + ), + ) else: for column_type in self.valid_types: sql_type = determine_sql_type_from_stix(column_type, db_backend) @@ -456,15 +510,13 @@ def generate_table_information(self, name, db_backend, metadata, schema_name, ta nullable=False, ), ) - return [ - Table( - canonicalize_table_name(table_name + "_" + name), - metadata, - *columns, - UniqueConstraint("id", "name"), - schema=schema_name, - ), - ] + + tables.append(Table(canonicalize_table_name(table_name + "_" + name), + metadata, + *columns, + UniqueConstraint("id", "name"), + schema=schema_name)) + return tables @add_method(EmbeddedObjectProperty) @@ -753,6 +805,7 @@ def generate_table_information(self, name, db_backend, **kwargs): # noqa: F811 default=self._fixed_value if hasattr(self, "_fixed_value") else None, ) +# ========================================================================= def generate_object_table( stix_object_class, db_backend, metadata, schema_name, foreign_key_name=None,