Skip to content

Commit

Permalink
Add new models for malware detection. (#7118)
Browse files Browse the repository at this point in the history
* Add new models for malware detection.

Fixes #7090 and #7092.

* Code review changes.

- FK on release_file.id field instead of md5
- Change message type from String to Text
- Change Enum class in model to singular form
  • Loading branch information
xmunoz authored and ewdurbin committed Feb 11, 2020
1 parent c4cb589 commit 1553052
Show file tree
Hide file tree
Showing 3 changed files with 237 additions and 0 deletions.
11 changes: 11 additions & 0 deletions warehouse/malware/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
116 changes: 116 additions & 0 deletions warehouse/malware/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import enum

from citext import CIText
from sqlalchemy import (
Boolean,
Column,
DateTime,
Enum,
ForeignKey,
Integer,
String,
Text,
UniqueConstraint,
orm,
sql,
)
from sqlalchemy.dialects.postgresql import JSONB

from warehouse import db
from warehouse.utils.attrs import make_repr


class MalwareCheckType(enum.Enum):

EventHook = "event_hook"
Scheduled = "scheduled"


class MalwareCheckState(enum.Enum):

Enabled = "enabled"
Evaluation = "evaluation"
Disabled = "disabled"
WipedOut = "wiped_out"


class VerdictClassification(enum.Enum):

Threat = "threat"
Indeterminate = "indeterminate"
Benign = "benign"


class VerdictConfidence(enum.Enum):

Low = "low"
Medium = "medium"
High = "high"


class MalwareCheck(db.Model):

__tablename__ = "malware_checks"
__table_args__ = (UniqueConstraint("name", "version"),)
__repr__ = make_repr("name", "version")

name = Column(CIText, nullable=False)
version = Column(Integer, default=0, nullable=False)
short_description = Column(String(length=128), nullable=False)
long_description = Column(Text, nullable=False)
check_type = Column(
Enum(MalwareCheckType, values_callable=lambda x: [e.value for e in x]),
nullable=False,
)
# This field contains the same content as the ProjectEvent and UserEvent "tag"
# fields.
hook_name = Column(String, nullable=True)
state = Column(
Enum(MalwareCheckState, values_callable=lambda x: [e.value for e in x]),
nullable=False,
server_default=("disabled"),
)
created = Column(DateTime, nullable=False, server_default=sql.func.now())


class MalwareVerdict(db.Model):
__tablename__ = "malware_verdicts"

run_date = Column(DateTime, nullable=False, server_default=sql.func.now())
check_id = Column(
ForeignKey("malware_checks.id", onupdate="CASCADE", ondelete="CASCADE"),
nullable=False,
index=True,
)
file_id = Column(ForeignKey("release_files.id"), nullable=False)
classification = Column(
Enum(VerdictClassification, values_callable=lambda x: [e.value for e in x]),
nullable=False,
)
confidence = Column(
Enum(VerdictConfidence, values_callable=lambda x: [e.value for e in x]),
nullable=False,
)
message = Column(Text, nullable=True)
details = Column(JSONB, nullable=True)
manually_reviewed = Column(Boolean, nullable=False, server_default=sql.false())
administrator_verdict = Column(
Enum(VerdictClassification, values_callable=lambda x: [e.value for e in x]),
nullable=True,
)
full_report_link = Column(String, nullable=True)

check = orm.relationship("MalwareCheck", foreign_keys=[check_id], lazy=True)
release_file = orm.relationship("File", foreign_keys=[file_id], lazy=True)
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Add malware detection tables
Revision ID: 061ff3d24c22
Revises: b5bb5d08543d
Create Date: 2019-12-18 17:27:00.183542
"""
import citext
import sqlalchemy as sa

from alembic import op
from sqlalchemy.dialects import postgresql

revision = "061ff3d24c22"
down_revision = "b5bb5d08543d"

MalwareCheckTypes = sa.Enum("event_hook", "scheduled", name="malwarechecktypes")

MalwareCheckStates = sa.Enum(
"enabled", "evaluation", "disabled", "wiped_out", name="malwarecheckstate"
)

VerdictClassifications = sa.Enum(
"threat", "indeterminate", "benign", name="verdictclassification"
)
VerdictConfidences = sa.Enum("low", "medium", "high", name="verdictconfidence")


def upgrade():
op.create_table(
"malware_checks",
sa.Column(
"id",
postgresql.UUID(as_uuid=True),
server_default=sa.text("gen_random_uuid()"),
nullable=False,
),
sa.Column("name", citext.CIText(), nullable=False),
sa.Column("version", sa.Integer(), nullable=False),
sa.Column("short_description", sa.String(length=128), nullable=False),
sa.Column("long_description", sa.Text(), nullable=False),
sa.Column("check_type", MalwareCheckTypes, nullable=False),
sa.Column("hook_name", sa.String(), nullable=True),
sa.Column(
"state", MalwareCheckStates, server_default="disabled", nullable=False,
),
sa.Column(
"created", sa.DateTime(), server_default=sa.text("now()"), nullable=False
),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("name", "version"),
)
op.create_table(
"malware_verdicts",
sa.Column(
"id",
postgresql.UUID(as_uuid=True),
server_default=sa.text("gen_random_uuid()"),
nullable=False,
),
sa.Column(
"run_date", sa.DateTime(), server_default=sa.text("now()"), nullable=False
),
sa.Column("check_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("file_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("classification", VerdictClassifications, nullable=False,),
sa.Column("confidence", VerdictConfidences, nullable=False,),
sa.Column("message", sa.Text(), nullable=True),
sa.Column("details", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
sa.Column(
"manually_reviewed",
sa.Boolean(),
server_default=sa.text("false"),
nullable=False,
),
sa.Column("administrator_verdict", VerdictClassifications, nullable=True,),
sa.Column("full_report_link", sa.String(), nullable=True),
sa.ForeignKeyConstraint(
["check_id"], ["malware_checks.id"], onupdate="CASCADE", ondelete="CASCADE"
),
sa.ForeignKeyConstraint(["file_id"], ["release_files.id"]),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
op.f("ix_malware_verdicts_check_id"),
"malware_verdicts",
["check_id"],
unique=False,
)


def downgrade():
op.drop_index(op.f("ix_malware_verdicts_check_id"), table_name="malware_verdicts")
op.drop_table("malware_verdicts")
op.drop_table("malware_checks")
MalwareCheckTypes.drop(op.get_bind())
MalwareCheckStates.drop(op.get_bind())
VerdictClassifications.drop(op.get_bind())
VerdictConfidences.drop(op.get_bind())

0 comments on commit 1553052

Please sign in to comment.