Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sortable size CSV field (#149) #261

Merged
merged 1 commit into from
Dec 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AIPscan/Data/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
FIELD_RELATED_PAIRING = "RelatedPairing"

FIELD_SIZE = "Size"
FIELD_SIZE_BYTES = "SizeBytes"
FIELD_STORAGE_LOCATION = "StorageLocation"
FIELD_STORAGE_NAME = "StorageName"

Expand Down
41 changes: 36 additions & 5 deletions AIPscan/Reporter/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def sort_puids(puids):
return natsorted(puids)


def translate_headers(headers):
def translate_headers(headers, add_bytes_column=False):
"""Translate headers from something machine readable to something
more user friendly and translatable.
"""
Expand Down Expand Up @@ -69,6 +69,22 @@ def translate_headers(headers):
fields.FIELD_USER: "User",
fields.FIELD_VERSION: "Version",
}

# Attempt to add an additional header representing a column containing size
# expressed as a number of bytes, rather than in human-readable form, so
# rows can more easily be sorted by size
if add_bytes_column:
headers = (
headers.copy()
) # So we don't change the list object passed to this function

# Handle the two standard size columns
for header in [fields.FIELD_AIP_SIZE, fields.FIELD_SIZE]:
# If size header is found then insert another for the size in bytes afer it
if header in headers:
bytes_header = field_lookup[header] + " (bytes)"
headers.insert(headers.index(header) + 1, bytes_header)

return [field_lookup.get(header, header) for header in headers]


Expand All @@ -89,12 +105,27 @@ def format_size_for_csv(rows):

:returns: rows with formatted size field (list of dicts)
"""
edited_rows = []

for row in rows:
try:
# Add size in bytes after original size column
row_key_list = list(row.keys())

if fields.FIELD_SIZE in row_key_list:
size_position = row_key_list.index(fields.FIELD_SIZE) + 1
row_items = list(row.items())

row_items.insert(
size_position, (fields.FIELD_SIZE_BYTES, row[fields.FIELD_SIZE])
)
row = dict(row_items)

# Format original size column
row[fields.FIELD_SIZE] = filesizeformat(row[fields.FIELD_SIZE])
except KeyError:
pass
return rows

edited_rows.append(row)

return edited_rows


def download_csv(headers, rows, filename="report.csv"):
Expand Down
9 changes: 6 additions & 3 deletions AIPscan/Reporter/report_aip_contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
fields.FIELD_FORMATS,
]

HEADERS = [
TABLE_HEADERS = [
fields.FIELD_AIP_NAME,
fields.FIELD_CREATED_DATE,
fields.FIELD_SIZE,
Expand Down Expand Up @@ -110,8 +110,9 @@ def aip_contents():
)

if csv:
headers = translate_headers(CSV_HEADERS, True)

filename = "aip_contents.csv"
headers = translate_headers(CSV_HEADERS)
aips = _create_aip_formats_string_representation(
aip_data.get(fields.FIELD_AIPS), separator="|"
)
Expand All @@ -120,12 +121,14 @@ def aip_contents():

aips = _create_aip_formats_string_representation(aip_data.get(fields.FIELD_AIPS))

headers = translate_headers(TABLE_HEADERS)

return render_template(
"report_aip_contents.html",
storage_service=storage_service_id,
storage_service_name=aip_data.get(fields.FIELD_STORAGE_NAME),
storage_location_description=aip_data.get(fields.FIELD_STORAGE_LOCATION),
columns=translate_headers(HEADERS),
columns=headers,
aips=aip_data.get(fields.FIELD_AIPS),
start_date=start_date,
end_date=get_display_end_date(end_date),
Expand Down
6 changes: 4 additions & 2 deletions AIPscan/Reporter/report_aips_by_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ def aips_by_format():
original_files = parse_bool(request.args.get(request_params.ORIGINAL_FILES, True))
csv = parse_bool(request.args.get(request_params.CSV), default=False)

headers = translate_headers(HEADERS)

aip_data = report_data.aips_by_file_format(
storage_service_id=storage_service_id,
file_format=file_format,
Expand All @@ -38,10 +36,14 @@ def aips_by_format():
)

if csv:
headers = translate_headers(HEADERS, True)

filename = "aips_by_file_format_{}.csv".format(file_format)
csv_data = format_size_for_csv(aip_data[fields.FIELD_AIPS])
return download_csv(headers, csv_data, filename)

headers = translate_headers(HEADERS)

return render_template(
"report_aips_by_format.html",
storage_service_id=storage_service_id,
Expand Down
6 changes: 4 additions & 2 deletions AIPscan/Reporter/report_aips_by_puid.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,6 @@ def aips_by_puid():
original_files = parse_bool(request.args.get(request_params.ORIGINAL_FILES, True))
csv = parse_bool(request.args.get(request_params.CSV), default=False)

headers = translate_headers(HEADERS)

aip_data = report_data.aips_by_puid(
storage_service_id=storage_service_id,
puid=puid,
Expand All @@ -63,10 +61,14 @@ def aips_by_puid():
)

if csv:
headers = translate_headers(HEADERS, True)

filename = "aips_by_puid_{}.csv".format(puid)
csv_data = format_size_for_csv(aip_data[fields.FIELD_AIPS])
return download_csv(headers, csv_data, filename)

headers = translate_headers(HEADERS)

return render_template(
"report_aips_by_puid.html",
storage_service_id=storage_service_id,
Expand Down
6 changes: 4 additions & 2 deletions AIPscan/Reporter/report_format_versions_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,15 @@ def report_format_versions_count():
)
versions = version_data.get(fields.FIELD_FORMAT_VERSIONS)

headers = translate_headers(HEADERS)

if csv:
headers = translate_headers(HEADERS, True)

filename = "format_versions.csv"
csv_data = format_size_for_csv(versions)
return download_csv(headers, csv_data, filename)

headers = translate_headers(HEADERS)

return render_template(
"report_format_versions_count.html",
storage_service_id=storage_service_id,
Expand Down
6 changes: 4 additions & 2 deletions AIPscan/Reporter/report_formats_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,15 @@ def report_formats_count():
)
formats = formats_data.get(fields.FIELD_FORMATS)

headers = translate_headers(HEADERS)

if csv:
headers = translate_headers(HEADERS, True)

filename = "file_formats.csv"
csv_data = format_size_for_csv(formats)
return download_csv(headers, csv_data, filename)

headers = translate_headers(HEADERS)

return render_template(
"report_formats_count.html",
storage_service_id=storage_service_id,
Expand Down
7 changes: 4 additions & 3 deletions AIPscan/Reporter/report_largest_aips.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ def largest_aips():
pass
csv = parse_bool(request.args.get(request_params.CSV), default=False)

headers = translate_headers(HEADERS)

aip_data = report_data.largest_aips(
storage_service_id=storage_service_id,
start_date=start_date,
Expand All @@ -46,11 +44,14 @@ def largest_aips():
)

if csv:
headers = translate_headers(HEADERS, True)

filename = "largest_aips.csv"
headers = translate_headers(HEADERS)
csv_data = format_size_for_csv(aip_data[fields.FIELD_AIPS])
return download_csv(headers, csv_data, filename)

headers = translate_headers(HEADERS)

return render_template(
"report_largest_aips.html",
storage_service_id=storage_service_id,
Expand Down
9 changes: 5 additions & 4 deletions AIPscan/Reporter/report_largest_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
translate_headers,
)

HEADERS = [
TABLE_HEADERS = [
fields.FIELD_FILENAME,
fields.FIELD_SIZE,
fields.FIELD_FORMAT,
Expand Down Expand Up @@ -52,8 +52,6 @@ def largest_files():
pass
csv = parse_bool(request.args.get(request_params.CSV), default=False)

headers = translate_headers(HEADERS)

file_data = report_data.largest_files(
storage_service_id=storage_service_id,
start_date=start_date,
Expand All @@ -64,11 +62,14 @@ def largest_files():
)

if csv:
headers = translate_headers(CSV_HEADERS, True)

filename = "largest_files.csv"
headers = translate_headers(CSV_HEADERS)
csv_data = format_size_for_csv(file_data[fields.FIELD_FILES])
return download_csv(headers, csv_data, filename)

headers = translate_headers(TABLE_HEADERS)

return render_template(
"report_largest_files.html",
storage_service_id=storage_service_id,
Expand Down
6 changes: 4 additions & 2 deletions AIPscan/Reporter/report_storage_locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,20 @@ def storage_locations():
)
csv = parse_bool(request.args.get(request_params.CSV), default=False)

headers = translate_headers(HEADERS)

locations_data = report_data.storage_locations(
storage_service_id=storage_service_id, start_date=start_date, end_date=end_date
)
locations = locations_data.get(fields.FIELD_LOCATIONS)

if csv:
headers = translate_headers(HEADERS, True)

filename = "storage_locations.csv"
csv_data = format_size_for_csv(locations)
return download_csv(headers, csv_data, filename)

headers = translate_headers(HEADERS)

return render_template(
"report_storage_locations.html",
storage_service_id=storage_service_id,
Expand Down
2 changes: 1 addition & 1 deletion AIPscan/Reporter/tests/test_aip_contents.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from flask import current_app

EXPECTED_CSV_CONTENTS = b"UUID,AIP Name,Created Date,Size,Formats\r\n111111111111-1111-1111-11111111,Test AIP,2020-01-01 00:00:00,0 Bytes,fmt/43 (ACME File Format 0.0.0): 1 file|fmt/61 (ACME File Format 0.0.0): 1 file\r\n222222222222-2222-2222-22222222,Test AIP,2020-06-01 00:00:00,0 Bytes,x-fmt/111 (ACME File Format 0.0.0): 3 files|fmt/61 (ACME File Format 0.0.0): 2 files\r\n"
EXPECTED_CSV_CONTENTS = b"UUID,AIP Name,Created Date,Size,Size (bytes),Formats\r\n111111111111-1111-1111-11111111,Test AIP,2020-01-01 00:00:00,0 Bytes,0,fmt/43 (ACME File Format 0.0.0): 1 file|fmt/61 (ACME File Format 0.0.0): 1 file\r\n222222222222-2222-2222-22222222,Test AIP,2020-06-01 00:00:00,0 Bytes,0,x-fmt/111 (ACME File Format 0.0.0): 3 files|fmt/61 (ACME File Format 0.0.0): 2 files\r\n"


def test_aip_contents(aip_contents):
Expand Down
8 changes: 2 additions & 6 deletions AIPscan/Reporter/tests/test_aips_by_file_format.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
import pytest
from flask import current_app

EXPECTED_CSV_ORIGINAL = (
b"AIP Name,UUID,Count,Size\r\nTest AIP,111111111111-1111-1111-11111111,1,1.0 kB\r\n"
)
EXPECTED_CSV_PRESERVATION = (
b"AIP Name,UUID,Count,Size\r\nTest AIP,111111111111-1111-1111-11111111,1,2.0 kB\r\n"
)
EXPECTED_CSV_ORIGINAL = b"AIP Name,UUID,Count,Size,Size (bytes)\r\nTest AIP,111111111111-1111-1111-11111111,1,1.0 kB,1000\r\n"
EXPECTED_CSV_PRESERVATION = b"AIP Name,UUID,Count,Size,Size (bytes)\r\nTest AIP,111111111111-1111-1111-11111111,1,2.0 kB,2000\r\n"


@pytest.mark.parametrize(
Expand Down
8 changes: 2 additions & 6 deletions AIPscan/Reporter/tests/test_aips_by_puid.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,8 @@
from AIPscan.models import File, FileType
from AIPscan.Reporter.report_aips_by_puid import get_format_string_from_puid

EXPECTED_CSV_ORIGINAL = (
b"AIP Name,UUID,Count,Size\r\nTest AIP,111111111111-1111-1111-11111111,1,1.0 kB\r\n"
)
EXPECTED_CSV_PRESERVATION = (
b"AIP Name,UUID,Count,Size\r\nTest AIP,111111111111-1111-1111-11111111,1,2.0 kB\r\n"
)
EXPECTED_CSV_ORIGINAL = b"AIP Name,UUID,Count,Size,Size (bytes)\r\nTest AIP,111111111111-1111-1111-11111111,1,1.0 kB,1000\r\n"
EXPECTED_CSV_PRESERVATION = b"AIP Name,UUID,Count,Size,Size (bytes)\r\nTest AIP,111111111111-1111-1111-11111111,1,2.0 kB,2000\r\n"

FILE_WITH_FORMAT_ONLY = File(
uuid=uuid.uuid4(),
Expand Down
2 changes: 1 addition & 1 deletion AIPscan/Reporter/tests/test_format_versions_count.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from flask import current_app

EXPECTED_CSV_CONTENTS = b"PUID,Format,Version,Count,Size\r\nfmt/44,JPEG,1.02,1,2.0 kB\r\nfmt/43,JPEG,1.01,1,1.0 kB\r\nfmt/468,ISO Disk Image File,,1,0 Bytes\r\n"
EXPECTED_CSV_CONTENTS = b"PUID,Format,Version,Count,Size,Size (bytes)\r\nfmt/44,JPEG,1.02,1,2.0 kB,2000\r\nfmt/43,JPEG,1.01,1,1.0 kB,1000\r\nfmt/468,ISO Disk Image File,,1,0 Bytes,0\r\n"


def test_format_versions_count(app_with_populated_format_versions):
Expand Down
4 changes: 1 addition & 3 deletions AIPscan/Reporter/tests/test_formats_count.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from flask import current_app

EXPECTED_CSV_CONTENTS = (
b"Format,Count,Size\r\nJPEG,2,3.0 kB\r\nISO Disk Image File,1,0 Bytes\r\n"
)
EXPECTED_CSV_CONTENTS = b"Format,Count,Size,Size (bytes)\r\nJPEG,2,3.0 kB,3000\r\nISO Disk Image File,1,0 Bytes,0\r\n"


def test_formats_count(app_with_populated_format_versions):
Expand Down
33 changes: 31 additions & 2 deletions AIPscan/Reporter/tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
from AIPscan.Data.tests import MOCK_STORAGE_SERVICE_ID as STORAGE_SERVICE_ID
from AIPscan.models import File, FileType
from AIPscan.Reporter import helpers
from AIPscan.Reporter.report_aips_by_format import HEADERS
from AIPscan.Reporter.report_aip_contents import CSV_HEADERS as AIP_CONTENTS_HEADERS
from AIPscan.Reporter.report_aips_by_format import HEADERS as AIPS_BY_FORMAT_HEADERS

ROWS_WITH_SIZE = [
{
Expand All @@ -33,11 +34,13 @@
fields.FIELD_AIP_UUID: "test uuid",
fields.FIELD_AIP_NAME: "test name",
fields.FIELD_SIZE: "1.6 MB",
fields.FIELD_SIZE_BYTES: 1560321,
},
{
fields.FIELD_AIP_UUID: "test uuid2",
fields.FIELD_AIP_NAME: "test name2",
fields.FIELD_SIZE: "123.4 kB",
fields.FIELD_SIZE_BYTES: 123423,
},
]

Expand All @@ -62,7 +65,7 @@ def test_download_csv(app_instance, mocker):
mock_get_ss_name = mocker.patch("AIPscan.Data._get_storage_service")
mock_get_ss_name.return_value = STORAGE_SERVICE

headers = helpers.translate_headers(HEADERS)
headers = helpers.translate_headers(AIPS_BY_FORMAT_HEADERS)

report_data = aips_by_file_format(STORAGE_SERVICE_ID, "test")
response = helpers.download_csv(headers, report_data[fields.FIELD_AIPS], CSV_FILE)
Expand Down Expand Up @@ -93,6 +96,32 @@ def test_download_csv(app_instance, mocker):
assert line_count == len(query_results) + 1


@pytest.mark.parametrize(
"data,expected_output",
[
# No adding of header for size in bytes
(
{"headers": AIPS_BY_FORMAT_HEADERS, "add_bytes_column": False},
["AIP Name", "UUID", "Count", "Size"],
),
# Adding of header for size in bytes at end of header list
(
{"headers": AIPS_BY_FORMAT_HEADERS, "add_bytes_column": True},
["AIP Name", "UUID", "Count", "Size", "Size (bytes)"],
),
# Adding of header for size in bytes not at end of header list
(
{"headers": AIP_CONTENTS_HEADERS, "add_bytes_column": True},
["UUID", "AIP Name", "Created Date", "Size", "Size (bytes)", "Formats"],
),
],
)
def test_translate_headers(data, expected_output):
headers = helpers.translate_headers(data["headers"], data["add_bytes_column"])

assert headers == expected_output


@pytest.mark.parametrize(
"data,expected_output",
[
Expand Down
Loading
Loading