From 4ca557b738a5157d5f7ff71ee226a36031bbaef8 Mon Sep 17 00:00:00 2001 From: Cristina Date: Thu, 19 Dec 2019 11:26:55 -0800 Subject: [PATCH] Add new models for malware detection. (#7118) * Add new models for malware detection. Fixes #7090 and #7092. * Code review changes. - FK on release_file.id field instead of md5 - Change message type from String to Text - Change Enum class in model to singular form --- warehouse/malware/__init__.py | 11 ++ warehouse/malware/models.py | 116 ++++++++++++++++++ ...1ff3d24c22_add_malware_detection_tables.py | 110 +++++++++++++++++ 3 files changed, 237 insertions(+) create mode 100644 warehouse/malware/__init__.py create mode 100644 warehouse/malware/models.py create mode 100644 warehouse/migrations/versions/061ff3d24c22_add_malware_detection_tables.py diff --git a/warehouse/malware/__init__.py b/warehouse/malware/__init__.py new file mode 100644 index 000000000000..164f68b09175 --- /dev/null +++ b/warehouse/malware/__init__.py @@ -0,0 +1,11 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/warehouse/malware/models.py b/warehouse/malware/models.py new file mode 100644 index 000000000000..fc6576c7dfcd --- /dev/null +++ b/warehouse/malware/models.py @@ -0,0 +1,116 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import enum + +from citext import CIText +from sqlalchemy import ( + Boolean, + Column, + DateTime, + Enum, + ForeignKey, + Integer, + String, + Text, + UniqueConstraint, + orm, + sql, +) +from sqlalchemy.dialects.postgresql import JSONB + +from warehouse import db +from warehouse.utils.attrs import make_repr + + +class MalwareCheckType(enum.Enum): + + EventHook = "event_hook" + Scheduled = "scheduled" + + +class MalwareCheckState(enum.Enum): + + Enabled = "enabled" + Evaluation = "evaluation" + Disabled = "disabled" + WipedOut = "wiped_out" + + +class VerdictClassification(enum.Enum): + + Threat = "threat" + Indeterminate = "indeterminate" + Benign = "benign" + + +class VerdictConfidence(enum.Enum): + + Low = "low" + Medium = "medium" + High = "high" + + +class MalwareCheck(db.Model): + + __tablename__ = "malware_checks" + __table_args__ = (UniqueConstraint("name", "version"),) + __repr__ = make_repr("name", "version") + + name = Column(CIText, nullable=False) + version = Column(Integer, default=0, nullable=False) + short_description = Column(String(length=128), nullable=False) + long_description = Column(Text, nullable=False) + check_type = Column( + Enum(MalwareCheckType, values_callable=lambda x: [e.value for e in x]), + nullable=False, + ) + # This field contains the same content as the ProjectEvent and UserEvent "tag" + # fields. + hook_name = Column(String, nullable=True) + state = Column( + Enum(MalwareCheckState, values_callable=lambda x: [e.value for e in x]), + nullable=False, + server_default=("disabled"), + ) + created = Column(DateTime, nullable=False, server_default=sql.func.now()) + + +class MalwareVerdict(db.Model): + __tablename__ = "malware_verdicts" + + run_date = Column(DateTime, nullable=False, server_default=sql.func.now()) + check_id = Column( + ForeignKey("malware_checks.id", onupdate="CASCADE", ondelete="CASCADE"), + nullable=False, + index=True, + ) + file_id = Column(ForeignKey("release_files.id"), nullable=False) + classification = Column( + Enum(VerdictClassification, values_callable=lambda x: [e.value for e in x]), + nullable=False, + ) + confidence = Column( + Enum(VerdictConfidence, values_callable=lambda x: [e.value for e in x]), + nullable=False, + ) + message = Column(Text, nullable=True) + details = Column(JSONB, nullable=True) + manually_reviewed = Column(Boolean, nullable=False, server_default=sql.false()) + administrator_verdict = Column( + Enum(VerdictClassification, values_callable=lambda x: [e.value for e in x]), + nullable=True, + ) + full_report_link = Column(String, nullable=True) + + check = orm.relationship("MalwareCheck", foreign_keys=[check_id], lazy=True) + release_file = orm.relationship("File", foreign_keys=[file_id], lazy=True) diff --git a/warehouse/migrations/versions/061ff3d24c22_add_malware_detection_tables.py b/warehouse/migrations/versions/061ff3d24c22_add_malware_detection_tables.py new file mode 100644 index 000000000000..899cc51e0f57 --- /dev/null +++ b/warehouse/migrations/versions/061ff3d24c22_add_malware_detection_tables.py @@ -0,0 +1,110 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Add malware detection tables + +Revision ID: 061ff3d24c22 +Revises: b5bb5d08543d +Create Date: 2019-12-18 17:27:00.183542 +""" +import citext +import sqlalchemy as sa + +from alembic import op +from sqlalchemy.dialects import postgresql + +revision = "061ff3d24c22" +down_revision = "b5bb5d08543d" + +MalwareCheckTypes = sa.Enum("event_hook", "scheduled", name="malwarechecktypes") + +MalwareCheckStates = sa.Enum( + "enabled", "evaluation", "disabled", "wiped_out", name="malwarecheckstate" +) + +VerdictClassifications = sa.Enum( + "threat", "indeterminate", "benign", name="verdictclassification" +) +VerdictConfidences = sa.Enum("low", "medium", "high", name="verdictconfidence") + + +def upgrade(): + op.create_table( + "malware_checks", + sa.Column( + "id", + postgresql.UUID(as_uuid=True), + server_default=sa.text("gen_random_uuid()"), + nullable=False, + ), + sa.Column("name", citext.CIText(), nullable=False), + sa.Column("version", sa.Integer(), nullable=False), + sa.Column("short_description", sa.String(length=128), nullable=False), + sa.Column("long_description", sa.Text(), nullable=False), + sa.Column("check_type", MalwareCheckTypes, nullable=False), + sa.Column("hook_name", sa.String(), nullable=True), + sa.Column( + "state", MalwareCheckStates, server_default="disabled", nullable=False, + ), + sa.Column( + "created", sa.DateTime(), server_default=sa.text("now()"), nullable=False + ), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("name", "version"), + ) + op.create_table( + "malware_verdicts", + sa.Column( + "id", + postgresql.UUID(as_uuid=True), + server_default=sa.text("gen_random_uuid()"), + nullable=False, + ), + sa.Column( + "run_date", sa.DateTime(), server_default=sa.text("now()"), nullable=False + ), + sa.Column("check_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("file_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("classification", VerdictClassifications, nullable=False,), + sa.Column("confidence", VerdictConfidences, nullable=False,), + sa.Column("message", sa.Text(), nullable=True), + sa.Column("details", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column( + "manually_reviewed", + sa.Boolean(), + server_default=sa.text("false"), + nullable=False, + ), + sa.Column("administrator_verdict", VerdictClassifications, nullable=True,), + sa.Column("full_report_link", sa.String(), nullable=True), + sa.ForeignKeyConstraint( + ["check_id"], ["malware_checks.id"], onupdate="CASCADE", ondelete="CASCADE" + ), + sa.ForeignKeyConstraint(["file_id"], ["release_files.id"]), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index( + op.f("ix_malware_verdicts_check_id"), + "malware_verdicts", + ["check_id"], + unique=False, + ) + + +def downgrade(): + op.drop_index(op.f("ix_malware_verdicts_check_id"), table_name="malware_verdicts") + op.drop_table("malware_verdicts") + op.drop_table("malware_checks") + MalwareCheckTypes.drop(op.get_bind()) + MalwareCheckStates.drop(op.get_bind()) + VerdictClassifications.drop(op.get_bind()) + VerdictConfidences.drop(op.get_bind())