Skip to content

Commit

Permalink
Add test data generation tool. (#217)
Browse files Browse the repository at this point in the history
  • Loading branch information
mcantelon committed Oct 4, 2023
1 parent 40b01a3 commit 78601e7
Show file tree
Hide file tree
Showing 8 changed files with 250 additions and 0 deletions.
1 change: 1 addition & 0 deletions requirements/test.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
-r base.txt

faker
flake8==5.0.4
pytest==5.4.3
pytest_cov==2.11.1
Expand Down
Empty file added tools/__init__.py
Empty file.
Empty file added tools/app/__init__.py
Empty file.
5 changes: 5 additions & 0 deletions tools/app/tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import sys

sys.path.append("../AIPscan")

config_name = "default"
61 changes: 61 additions & 0 deletions tools/generate-test-data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from app import tool
from faker import Faker
from flask import Flask
from helpers import data

from AIPscan import db
from config import CONFIGS

app = Flask(__name__)
app.config.from_object(CONFIGS[tool.config_name])

db.init_app(app)

fake = Faker()
randint = fake.random.randint

with app.app_context():
# Add example storage services
ss_to_create = 2

print(f"Creating pipeline and {ss_to_create} storage services...")
pipeline = data.create_pipeline()

ss_ids = []
fetch_jobs = {}

default_created = False
for _ in range(ss_to_create):
is_default = len(ss_ids) == 0

ss = data.create_storage_service(is_default)
ss_ids.append(ss.id)

fetch_job = data.create_fetch_job(ss)
fetch_jobs[ss.id] = fetch_job.id

# Add example storage locations
storage_locations_per_ss = 2
ss_locations_to_create = ss_to_create * storage_locations_per_ss

print(f"Creating {ss_locations_to_create} storage service locations...")

aip_batches_created = 0
total_aip_batches = len(ss_ids) * storage_locations_per_ss
for ss_id in ss_ids:
for _ in range(storage_locations_per_ss):
sl = data.create_location(ss_id)

db.session.add(sl)
db.session.commit()

# Add AIPs
aip_batches_created += 1

print(f"Creating AIPs ({aip_batches_created}/{total_aip_batches})...")

for _ in range(1, randint(100, 300)):
aip = data.create_aip(pipeline.id, ss_id, sl.id, fetch_jobs[ss.id])
data.create_aip_files(100, 300, aip)

print("Done.")
Empty file added tools/helpers/__init__.py
Empty file.
95 changes: 95 additions & 0 deletions tools/helpers/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from datetime import date

from faker import Faker

from AIPscan import db
from AIPscan.models import AIP, FetchJob, File, Pipeline, StorageLocation, StorageService

fake = Faker()
randint = fake.random.randint


def create_pipeline():
return Pipeline(
origin_pipeline=fake.uuid4(),
dashboard_url=fake.url()
)


def create_storage_service(default):
ss = StorageService(
name=fake.text(20)[:-1],
url=fake.url(),
user_name=fake.profile()["username"],
api_key=fake.password(),
download_limit=0,
download_offset=0,
default=default,
)
db.session.add(ss)
db.session.commit()

return ss


def create_fetch_job(storage_service):
fetch_job = FetchJob(
total_packages=0,
total_aips=0,
total_deleted_aips=0,
download_start=date.today(),
download_end=date.today(),
download_directory=fake.file_path(),
storage_service_id=storage_service.id,
)
db.session.add(fetch_job)
db.session.commit()

return fetch_job


def create_location(storage_service_id):
return StorageLocation(
current_location=fake.file_path(),
description=fake.text(20)[:-1],
storage_service_id=storage_service_id,
)


def create_aip(pipeline_id, storage_service_id, storage_location_id, fetch_job_id):
aip = AIP(
uuid=fake.uuid4(),
transfer_name=fake.text(20)[:-1],
create_date=date.today(),
mets_sha256=fake.sha256(),
size=randint(10000, 100_000_000),
storage_service_id=storage_service_id,
storage_location_id=storage_location_id,
fetch_job_id=fetch_job_id,
origin_pipeline_id=pipeline_id,
)
db.session.add(aip)
db.session.commit()

return aip


def create_aip_files(min, max, aip):
for _ in range(1, randint(min, max)):
aipfile = File(
aip_id=aip.id,
name=fake.text(20)[:-1],
filepath=fake.file_path(),
uuid=fake.uuid4(),
file_type="original",
size=randint(1000, 1_000_000),
date_created=date.today(),
puid=fake.text(20)[:-1],
file_format=fake.text(20)[:-1],
format_version=fake.text(20)[:-1],
checksum_type=fake.text(20)[:-1],
checksum_value=fake.text(20)[:-1],
premis_object="",
)
db.session.add(aipfile)
db.session.commit()
88 changes: 88 additions & 0 deletions tools/tests/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import datetime

import pytest

from tools.helpers import data


@pytest.fixture
def mock_db_add(mocker):
mocker.patch("AIPscan.db.session.add")
mocker.patch("AIPscan.db.session.commit")


def test_create_storage_service(mock_db_add):
ss = data.create_storage_service(True)

assert ss.name
assert type(ss.name) == str

assert ss.url
assert type(ss.url) == str

assert ss.user_name
assert type(ss.user_name) == str

assert ss.api_key
assert type(ss.api_key) == str

assert ss.default
assert type(ss.default) == bool

ss = data.create_storage_service(False)
assert not ss.default


def test_create_fetch_job(mock_db_add):
ss = data.create_storage_service(True)
ss.id = 1

fetch_job = data.create_fetch_job(ss)

assert fetch_job.download_start
assert type(fetch_job.download_start) == datetime.date

assert fetch_job.download_end
assert type(fetch_job.download_end) == datetime.date

assert fetch_job.download_directory
assert type(fetch_job.download_directory) == str

assert fetch_job.storage_service_id == ss.id


def test_create_location(mock_db_add):
location = data.create_location(1)

assert location.current_location
assert type(location.current_location) == str

assert location.description
assert type(location.description) == str

assert location.storage_service_id == 1


def test_create_aip(mock_db_add):
aip = data.create_aip(1, 2, 3, 4)

assert aip.uuid
assert type(aip.uuid) == str

assert aip.transfer_name
assert type(aip.transfer_name) == str

assert aip.create_date
assert type(aip.create_date) == datetime.date

assert aip.mets_sha256
assert type(aip.mets_sha256) == str

assert aip.size
assert type(aip.size) == int

assert aip.origin_pipeline_id == 1
assert aip.storage_service_id == 2
assert aip.storage_location_id == 3
assert aip.fetch_job_id == 4
assert aip.origin_pipeline_id == 1

0 comments on commit 78601e7

Please sign in to comment.