diff --git a/AIPscan/Data/fields.py b/AIPscan/Data/fields.py index 7222ce54..00a9e346 100644 --- a/AIPscan/Data/fields.py +++ b/AIPscan/Data/fields.py @@ -53,6 +53,7 @@ FIELD_RELATED_PAIRING = "RelatedPairing" FIELD_SIZE = "Size" +FIELD_SIZE_BYTES = "SizeBytes" FIELD_STORAGE_LOCATION = "StorageLocation" FIELD_STORAGE_NAME = "StorageName" diff --git a/AIPscan/Reporter/helpers.py b/AIPscan/Reporter/helpers.py index 8de8ea3a..39aabd79 100644 --- a/AIPscan/Reporter/helpers.py +++ b/AIPscan/Reporter/helpers.py @@ -28,7 +28,7 @@ def sort_puids(puids): return natsorted(puids) -def translate_headers(headers): +def translate_headers(headers, add_bytes_column=False): """Translate headers from something machine readable to something more user friendly and translatable. """ @@ -69,6 +69,22 @@ def translate_headers(headers): fields.FIELD_USER: "User", fields.FIELD_VERSION: "Version", } + + # Attempt to add an additional header representing a column containing size + # expressed as a number of bytes, rather than in human-readable form, so + # rows can more easily be sorted by size + if add_bytes_column: + headers = ( + headers.copy() + ) # So we don't change the list object passed to this function + + # Handle the two standard size columns + for header in [fields.FIELD_AIP_SIZE, fields.FIELD_SIZE]: + # If size header is found then insert another for the size in bytes afer it + if header in headers: + bytes_header = field_lookup[header] + " (bytes)" + headers.insert(headers.index(header) + 1, bytes_header) + return [field_lookup.get(header, header) for header in headers] @@ -89,12 +105,27 @@ def format_size_for_csv(rows): :returns: rows with formatted size field (list of dicts) """ + edited_rows = [] + for row in rows: - try: + # Add size in bytes after original size column + row_key_list = list(row.keys()) + + if fields.FIELD_SIZE in row_key_list: + size_position = row_key_list.index(fields.FIELD_SIZE) + 1 + row_items = list(row.items()) + + row_items.insert( + size_position, (fields.FIELD_SIZE_BYTES, row[fields.FIELD_SIZE]) + ) + row = dict(row_items) + + # Format original size column row[fields.FIELD_SIZE] = filesizeformat(row[fields.FIELD_SIZE]) - except KeyError: - pass - return rows + + edited_rows.append(row) + + return edited_rows def download_csv(headers, rows, filename="report.csv"): diff --git a/AIPscan/Reporter/report_aip_contents.py b/AIPscan/Reporter/report_aip_contents.py index b911b9ac..c8a515a5 100644 --- a/AIPscan/Reporter/report_aip_contents.py +++ b/AIPscan/Reporter/report_aip_contents.py @@ -22,7 +22,7 @@ fields.FIELD_FORMATS, ] -HEADERS = [ +TABLE_HEADERS = [ fields.FIELD_AIP_NAME, fields.FIELD_CREATED_DATE, fields.FIELD_SIZE, @@ -110,8 +110,9 @@ def aip_contents(): ) if csv: + headers = translate_headers(CSV_HEADERS, True) + filename = "aip_contents.csv" - headers = translate_headers(CSV_HEADERS) aips = _create_aip_formats_string_representation( aip_data.get(fields.FIELD_AIPS), separator="|" ) @@ -120,12 +121,14 @@ def aip_contents(): aips = _create_aip_formats_string_representation(aip_data.get(fields.FIELD_AIPS)) + headers = translate_headers(TABLE_HEADERS) + return render_template( "report_aip_contents.html", storage_service=storage_service_id, storage_service_name=aip_data.get(fields.FIELD_STORAGE_NAME), storage_location_description=aip_data.get(fields.FIELD_STORAGE_LOCATION), - columns=translate_headers(HEADERS), + columns=headers, aips=aip_data.get(fields.FIELD_AIPS), start_date=start_date, end_date=get_display_end_date(end_date), diff --git a/AIPscan/Reporter/report_aips_by_format.py b/AIPscan/Reporter/report_aips_by_format.py index e923a797..a03b897c 100644 --- a/AIPscan/Reporter/report_aips_by_format.py +++ b/AIPscan/Reporter/report_aips_by_format.py @@ -28,8 +28,6 @@ def aips_by_format(): original_files = parse_bool(request.args.get(request_params.ORIGINAL_FILES, True)) csv = parse_bool(request.args.get(request_params.CSV), default=False) - headers = translate_headers(HEADERS) - aip_data = report_data.aips_by_file_format( storage_service_id=storage_service_id, file_format=file_format, @@ -38,10 +36,14 @@ def aips_by_format(): ) if csv: + headers = translate_headers(HEADERS, True) + filename = "aips_by_file_format_{}.csv".format(file_format) csv_data = format_size_for_csv(aip_data[fields.FIELD_AIPS]) return download_csv(headers, csv_data, filename) + headers = translate_headers(HEADERS) + return render_template( "report_aips_by_format.html", storage_service_id=storage_service_id, diff --git a/AIPscan/Reporter/report_aips_by_puid.py b/AIPscan/Reporter/report_aips_by_puid.py index 2da5a22b..14bf14cd 100644 --- a/AIPscan/Reporter/report_aips_by_puid.py +++ b/AIPscan/Reporter/report_aips_by_puid.py @@ -53,8 +53,6 @@ def aips_by_puid(): original_files = parse_bool(request.args.get(request_params.ORIGINAL_FILES, True)) csv = parse_bool(request.args.get(request_params.CSV), default=False) - headers = translate_headers(HEADERS) - aip_data = report_data.aips_by_puid( storage_service_id=storage_service_id, puid=puid, @@ -63,10 +61,14 @@ def aips_by_puid(): ) if csv: + headers = translate_headers(HEADERS, True) + filename = "aips_by_puid_{}.csv".format(puid) csv_data = format_size_for_csv(aip_data[fields.FIELD_AIPS]) return download_csv(headers, csv_data, filename) + headers = translate_headers(HEADERS) + return render_template( "report_aips_by_puid.html", storage_service_id=storage_service_id, diff --git a/AIPscan/Reporter/report_format_versions_count.py b/AIPscan/Reporter/report_format_versions_count.py index f0642979..f51645e1 100644 --- a/AIPscan/Reporter/report_format_versions_count.py +++ b/AIPscan/Reporter/report_format_versions_count.py @@ -41,13 +41,15 @@ def report_format_versions_count(): ) versions = version_data.get(fields.FIELD_FORMAT_VERSIONS) - headers = translate_headers(HEADERS) - if csv: + headers = translate_headers(HEADERS, True) + filename = "format_versions.csv" csv_data = format_size_for_csv(versions) return download_csv(headers, csv_data, filename) + headers = translate_headers(HEADERS) + return render_template( "report_format_versions_count.html", storage_service_id=storage_service_id, diff --git a/AIPscan/Reporter/report_formats_count.py b/AIPscan/Reporter/report_formats_count.py index bd65c87c..d884647a 100644 --- a/AIPscan/Reporter/report_formats_count.py +++ b/AIPscan/Reporter/report_formats_count.py @@ -51,13 +51,15 @@ def report_formats_count(): ) formats = formats_data.get(fields.FIELD_FORMATS) - headers = translate_headers(HEADERS) - if csv: + headers = translate_headers(HEADERS, True) + filename = "file_formats.csv" csv_data = format_size_for_csv(formats) return download_csv(headers, csv_data, filename) + headers = translate_headers(HEADERS) + return render_template( "report_formats_count.html", storage_service_id=storage_service_id, diff --git a/AIPscan/Reporter/report_largest_aips.py b/AIPscan/Reporter/report_largest_aips.py index 614f60d6..8337a7f4 100644 --- a/AIPscan/Reporter/report_largest_aips.py +++ b/AIPscan/Reporter/report_largest_aips.py @@ -35,8 +35,6 @@ def largest_aips(): pass csv = parse_bool(request.args.get(request_params.CSV), default=False) - headers = translate_headers(HEADERS) - aip_data = report_data.largest_aips( storage_service_id=storage_service_id, start_date=start_date, @@ -46,11 +44,14 @@ def largest_aips(): ) if csv: + headers = translate_headers(HEADERS, True) + filename = "largest_aips.csv" - headers = translate_headers(HEADERS) csv_data = format_size_for_csv(aip_data[fields.FIELD_AIPS]) return download_csv(headers, csv_data, filename) + headers = translate_headers(HEADERS) + return render_template( "report_largest_aips.html", storage_service_id=storage_service_id, diff --git a/AIPscan/Reporter/report_largest_files.py b/AIPscan/Reporter/report_largest_files.py index 87e138f0..1dc9178a 100644 --- a/AIPscan/Reporter/report_largest_files.py +++ b/AIPscan/Reporter/report_largest_files.py @@ -13,7 +13,7 @@ translate_headers, ) -HEADERS = [ +TABLE_HEADERS = [ fields.FIELD_FILENAME, fields.FIELD_SIZE, fields.FIELD_FORMAT, @@ -52,8 +52,6 @@ def largest_files(): pass csv = parse_bool(request.args.get(request_params.CSV), default=False) - headers = translate_headers(HEADERS) - file_data = report_data.largest_files( storage_service_id=storage_service_id, start_date=start_date, @@ -64,11 +62,14 @@ def largest_files(): ) if csv: + headers = translate_headers(CSV_HEADERS, True) + filename = "largest_files.csv" - headers = translate_headers(CSV_HEADERS) csv_data = format_size_for_csv(file_data[fields.FIELD_FILES]) return download_csv(headers, csv_data, filename) + headers = translate_headers(TABLE_HEADERS) + return render_template( "report_largest_files.html", storage_service_id=storage_service_id, diff --git a/AIPscan/Reporter/report_storage_locations.py b/AIPscan/Reporter/report_storage_locations.py index 654ffbe3..73d14793 100644 --- a/AIPscan/Reporter/report_storage_locations.py +++ b/AIPscan/Reporter/report_storage_locations.py @@ -38,18 +38,20 @@ def storage_locations(): ) csv = parse_bool(request.args.get(request_params.CSV), default=False) - headers = translate_headers(HEADERS) - locations_data = report_data.storage_locations( storage_service_id=storage_service_id, start_date=start_date, end_date=end_date ) locations = locations_data.get(fields.FIELD_LOCATIONS) if csv: + headers = translate_headers(HEADERS, True) + filename = "storage_locations.csv" csv_data = format_size_for_csv(locations) return download_csv(headers, csv_data, filename) + headers = translate_headers(HEADERS) + return render_template( "report_storage_locations.html", storage_service_id=storage_service_id, diff --git a/AIPscan/Reporter/tests/test_aip_contents.py b/AIPscan/Reporter/tests/test_aip_contents.py index e7229b28..2cacef99 100644 --- a/AIPscan/Reporter/tests/test_aip_contents.py +++ b/AIPscan/Reporter/tests/test_aip_contents.py @@ -1,6 +1,6 @@ from flask import current_app -EXPECTED_CSV_CONTENTS = b"UUID,AIP Name,Created Date,Size,Formats\r\n111111111111-1111-1111-11111111,Test AIP,2020-01-01 00:00:00,0 Bytes,fmt/43 (ACME File Format 0.0.0): 1 file|fmt/61 (ACME File Format 0.0.0): 1 file\r\n222222222222-2222-2222-22222222,Test AIP,2020-06-01 00:00:00,0 Bytes,x-fmt/111 (ACME File Format 0.0.0): 3 files|fmt/61 (ACME File Format 0.0.0): 2 files\r\n" +EXPECTED_CSV_CONTENTS = b"UUID,AIP Name,Created Date,Size,Size (bytes),Formats\r\n111111111111-1111-1111-11111111,Test AIP,2020-01-01 00:00:00,0 Bytes,0,fmt/43 (ACME File Format 0.0.0): 1 file|fmt/61 (ACME File Format 0.0.0): 1 file\r\n222222222222-2222-2222-22222222,Test AIP,2020-06-01 00:00:00,0 Bytes,0,x-fmt/111 (ACME File Format 0.0.0): 3 files|fmt/61 (ACME File Format 0.0.0): 2 files\r\n" def test_aip_contents(aip_contents): diff --git a/AIPscan/Reporter/tests/test_aips_by_file_format.py b/AIPscan/Reporter/tests/test_aips_by_file_format.py index 8e6371a7..dc23946e 100644 --- a/AIPscan/Reporter/tests/test_aips_by_file_format.py +++ b/AIPscan/Reporter/tests/test_aips_by_file_format.py @@ -1,12 +1,8 @@ import pytest from flask import current_app -EXPECTED_CSV_ORIGINAL = ( - b"AIP Name,UUID,Count,Size\r\nTest AIP,111111111111-1111-1111-11111111,1,1.0 kB\r\n" -) -EXPECTED_CSV_PRESERVATION = ( - b"AIP Name,UUID,Count,Size\r\nTest AIP,111111111111-1111-1111-11111111,1,2.0 kB\r\n" -) +EXPECTED_CSV_ORIGINAL = b"AIP Name,UUID,Count,Size,Size (bytes)\r\nTest AIP,111111111111-1111-1111-11111111,1,1.0 kB,1000\r\n" +EXPECTED_CSV_PRESERVATION = b"AIP Name,UUID,Count,Size,Size (bytes)\r\nTest AIP,111111111111-1111-1111-11111111,1,2.0 kB,2000\r\n" @pytest.mark.parametrize( diff --git a/AIPscan/Reporter/tests/test_aips_by_puid.py b/AIPscan/Reporter/tests/test_aips_by_puid.py index c11a2d57..f3489dcd 100644 --- a/AIPscan/Reporter/tests/test_aips_by_puid.py +++ b/AIPscan/Reporter/tests/test_aips_by_puid.py @@ -7,12 +7,8 @@ from AIPscan.models import File, FileType from AIPscan.Reporter.report_aips_by_puid import get_format_string_from_puid -EXPECTED_CSV_ORIGINAL = ( - b"AIP Name,UUID,Count,Size\r\nTest AIP,111111111111-1111-1111-11111111,1,1.0 kB\r\n" -) -EXPECTED_CSV_PRESERVATION = ( - b"AIP Name,UUID,Count,Size\r\nTest AIP,111111111111-1111-1111-11111111,1,2.0 kB\r\n" -) +EXPECTED_CSV_ORIGINAL = b"AIP Name,UUID,Count,Size,Size (bytes)\r\nTest AIP,111111111111-1111-1111-11111111,1,1.0 kB,1000\r\n" +EXPECTED_CSV_PRESERVATION = b"AIP Name,UUID,Count,Size,Size (bytes)\r\nTest AIP,111111111111-1111-1111-11111111,1,2.0 kB,2000\r\n" FILE_WITH_FORMAT_ONLY = File( uuid=uuid.uuid4(), diff --git a/AIPscan/Reporter/tests/test_format_versions_count.py b/AIPscan/Reporter/tests/test_format_versions_count.py index 8f627dcc..b4c9ae57 100644 --- a/AIPscan/Reporter/tests/test_format_versions_count.py +++ b/AIPscan/Reporter/tests/test_format_versions_count.py @@ -1,6 +1,6 @@ from flask import current_app -EXPECTED_CSV_CONTENTS = b"PUID,Format,Version,Count,Size\r\nfmt/44,JPEG,1.02,1,2.0 kB\r\nfmt/43,JPEG,1.01,1,1.0 kB\r\nfmt/468,ISO Disk Image File,,1,0 Bytes\r\n" +EXPECTED_CSV_CONTENTS = b"PUID,Format,Version,Count,Size,Size (bytes)\r\nfmt/44,JPEG,1.02,1,2.0 kB,2000\r\nfmt/43,JPEG,1.01,1,1.0 kB,1000\r\nfmt/468,ISO Disk Image File,,1,0 Bytes,0\r\n" def test_format_versions_count(app_with_populated_format_versions): diff --git a/AIPscan/Reporter/tests/test_formats_count.py b/AIPscan/Reporter/tests/test_formats_count.py index 68956a14..dc44045f 100644 --- a/AIPscan/Reporter/tests/test_formats_count.py +++ b/AIPscan/Reporter/tests/test_formats_count.py @@ -1,8 +1,6 @@ from flask import current_app -EXPECTED_CSV_CONTENTS = ( - b"Format,Count,Size\r\nJPEG,2,3.0 kB\r\nISO Disk Image File,1,0 Bytes\r\n" -) +EXPECTED_CSV_CONTENTS = b"Format,Count,Size,Size (bytes)\r\nJPEG,2,3.0 kB,3000\r\nISO Disk Image File,1,0 Bytes,0\r\n" def test_formats_count(app_with_populated_format_versions): diff --git a/AIPscan/Reporter/tests/test_helpers.py b/AIPscan/Reporter/tests/test_helpers.py index 3b7bfb6d..c177412b 100644 --- a/AIPscan/Reporter/tests/test_helpers.py +++ b/AIPscan/Reporter/tests/test_helpers.py @@ -13,7 +13,8 @@ from AIPscan.Data.tests import MOCK_STORAGE_SERVICE_ID as STORAGE_SERVICE_ID from AIPscan.models import File, FileType from AIPscan.Reporter import helpers -from AIPscan.Reporter.report_aips_by_format import HEADERS +from AIPscan.Reporter.report_aip_contents import CSV_HEADERS as AIP_CONTENTS_HEADERS +from AIPscan.Reporter.report_aips_by_format import HEADERS as AIPS_BY_FORMAT_HEADERS ROWS_WITH_SIZE = [ { @@ -33,11 +34,13 @@ fields.FIELD_AIP_UUID: "test uuid", fields.FIELD_AIP_NAME: "test name", fields.FIELD_SIZE: "1.6 MB", + fields.FIELD_SIZE_BYTES: 1560321, }, { fields.FIELD_AIP_UUID: "test uuid2", fields.FIELD_AIP_NAME: "test name2", fields.FIELD_SIZE: "123.4 kB", + fields.FIELD_SIZE_BYTES: 123423, }, ] @@ -62,7 +65,7 @@ def test_download_csv(app_instance, mocker): mock_get_ss_name = mocker.patch("AIPscan.Data._get_storage_service") mock_get_ss_name.return_value = STORAGE_SERVICE - headers = helpers.translate_headers(HEADERS) + headers = helpers.translate_headers(AIPS_BY_FORMAT_HEADERS) report_data = aips_by_file_format(STORAGE_SERVICE_ID, "test") response = helpers.download_csv(headers, report_data[fields.FIELD_AIPS], CSV_FILE) @@ -93,6 +96,32 @@ def test_download_csv(app_instance, mocker): assert line_count == len(query_results) + 1 +@pytest.mark.parametrize( + "data,expected_output", + [ + # No adding of header for size in bytes + ( + {"headers": AIPS_BY_FORMAT_HEADERS, "add_bytes_column": False}, + ["AIP Name", "UUID", "Count", "Size"], + ), + # Adding of header for size in bytes at end of header list + ( + {"headers": AIPS_BY_FORMAT_HEADERS, "add_bytes_column": True}, + ["AIP Name", "UUID", "Count", "Size", "Size (bytes)"], + ), + # Adding of header for size in bytes not at end of header list + ( + {"headers": AIP_CONTENTS_HEADERS, "add_bytes_column": True}, + ["UUID", "AIP Name", "Created Date", "Size", "Size (bytes)", "Formats"], + ), + ], +) +def test_translate_headers(data, expected_output): + headers = helpers.translate_headers(data["headers"], data["add_bytes_column"]) + + assert headers == expected_output + + @pytest.mark.parametrize( "data,expected_output", [ diff --git a/AIPscan/Reporter/tests/test_largest_aips.py b/AIPscan/Reporter/tests/test_largest_aips.py index 82232597..0557572c 100644 --- a/AIPscan/Reporter/tests/test_largest_aips.py +++ b/AIPscan/Reporter/tests/test_largest_aips.py @@ -1,6 +1,6 @@ from flask import current_app -EXPECTED_CSV_CONTENTS = b"AIP Name,UUID,AIP Size,File Count\r\nTest AIP,111111111111-1111-1111-11111111,100 Bytes,1\r\nTest AIP,222222222222-2222-2222-22222222,100 Bytes,2\r\n" +EXPECTED_CSV_CONTENTS = b"AIP Name,UUID,AIP Size,AIP Size (bytes),File Count\r\nTest AIP,111111111111-1111-1111-11111111,100 Bytes,100,1\r\nTest AIP,222222222222-2222-2222-22222222,100 Bytes,100,2\r\n" def test_largest_aips(app_with_populated_format_versions): diff --git a/AIPscan/Reporter/tests/test_largest_files.py b/AIPscan/Reporter/tests/test_largest_files.py index ed068034..2fbae978 100644 --- a/AIPscan/Reporter/tests/test_largest_files.py +++ b/AIPscan/Reporter/tests/test_largest_files.py @@ -1,6 +1,6 @@ from flask import current_app -EXPECTED_CSV_CONTENTS = b"UUID,Filename,Size,Type,Format,Version,PUID,AIP Name,AIP UUID\r\n555555555555-5555-5555-55555555,preservation.jpg,2.0 kB,original,JPEG,1.02,fmt/44,Test AIP,222222222222-2222-2222-22222222\r\n333333333333-3333-3333-33333333,original.jpg,1.0 kB,original,JPEG,1.01,fmt/43,Test AIP,111111111111-1111-1111-11111111\r\n444444444444-4444-4444-44444444,original.iso,0 Bytes,original,ISO Disk Image File,,fmt/468,Test AIP,222222222222-2222-2222-22222222\r\n" +EXPECTED_CSV_CONTENTS = b"UUID,Filename,Size,Size (bytes),Type,Format,Version,PUID,AIP Name,AIP UUID\r\n555555555555-5555-5555-55555555,preservation.jpg,2.0 kB,2000,original,JPEG,1.02,fmt/44,Test AIP,222222222222-2222-2222-22222222\r\n333333333333-3333-3333-33333333,original.jpg,1.0 kB,1000,original,JPEG,1.01,fmt/43,Test AIP,111111111111-1111-1111-11111111\r\n444444444444-4444-4444-44444444,original.iso,0 Bytes,0,original,ISO Disk Image File,,fmt/468,Test AIP,222222222222-2222-2222-22222222\r\n" def test_largest_files(app_with_populated_format_versions): diff --git a/AIPscan/Reporter/tests/test_storage_locations.py b/AIPscan/Reporter/tests/test_storage_locations.py index 5c710b4f..83abcacc 100644 --- a/AIPscan/Reporter/tests/test_storage_locations.py +++ b/AIPscan/Reporter/tests/test_storage_locations.py @@ -6,7 +6,7 @@ SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) FIXTURES_DIR = os.path.join(SCRIPT_DIR, "fixtures") -EXPECTED_CSV_CONTENTS = b"UUID,Location,AIPs,Size,File Count\r\n2bbcea40-eb4d-4076-a81d-1ab046e34f6a,AIP Store Location 1,2,1.6 kB,3\r\ne69beb57-0e32-4c45-8db7-9b7723724a05,AIP Store Location 2,1,5.0 kB,2\r\n" +EXPECTED_CSV_CONTENTS = b"UUID,Location,AIPs,Size,Size (bytes),File Count\r\n2bbcea40-eb4d-4076-a81d-1ab046e34f6a,AIP Store Location 1,2,1.6 kB,1600,3\r\ne69beb57-0e32-4c45-8db7-9b7723724a05,AIP Store Location 2,1,5.0 kB,5000,2\r\n" def test_storage_locations(storage_locations): diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 76ca719d..9805ece3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -280,13 +280,19 @@ def report_format_versions_count(): ) versions = version_data.get(fields.FIELD_FORMAT_VERSIONS) - headers = translate_headers(HEADERS) - if csv: + # Using the translate_headers function's "True" argument will + # have it automatically add an additional size column with the size + # data left as the number of bytes, rather than a more human readable + # description of the size, to make it easier to sort CSV rows by size + headers = translate_headers(HEADERS, True) + filename = "format_versions.csv" csv_data = format_size_for_csv(versions) return download_csv(headers, csv_data, filename) + headers = translate_headers(HEADERS) + return render_template( "report_format_versions_count.html", storage_service_id=storage_service_id,