From 8d3113335dd61f98cb5dc8a903c56f8f241fa511 Mon Sep 17 00:00:00 2001 From: bwatson78 Date: Mon, 26 Aug 2024 15:42:25 -0500 Subject: [PATCH 1/2] Adds restrictive tests to imgest binaries with correct mime types. --- .../fedora_three_objects_migration_methods.rb | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/lib/fedora/fedora_three_objects_migration_methods.rb b/lib/fedora/fedora_three_objects_migration_methods.rb index 6c225b49..9c4171bb 100644 --- a/lib/fedora/fedora_three_objects_migration_methods.rb +++ b/lib/fedora/fedora_three_objects_migration_methods.rb @@ -27,7 +27,7 @@ def copy_files_to_folder pull_audit_object(datastream:) elsif test_for_xmls(datastream:) pull_xml_object(datastream:) - else + elsif test_for_license(datastream:) || test_for_allowed_mime_type(datastream:) pull_binary_object(datastream:) end end @@ -41,8 +41,19 @@ def test_for_audit(datastream:) datastream['ID'] == 'AUDIT' end + def test_for_license(datastream:) + datastream['ID'] == 'SYMPLECTIC-LICENCE' + end + + def test_for_allowed_mime_type(datastream:) + ALLOWED_TYPES.any? { |k, _v| datastream.elements.first['MIMETYPE'].include?(k.to_s) } + end + def pid_lacks_binaries(datastreams) - datastreams.all? { |ds| test_for_xmls(datastream: ds) || test_for_audit(datastream: ds) } + tested_datastreams = datastreams.reject do |ds| + test_for_xmls(datastream: ds) || test_for_audit(datastream: ds) || !test_for_allowed_mime_type(datastream: ds) || test_for_license(datastream: ds) + end + tested_datastreams.empty? end def pull_audit_object(datastream:) @@ -80,6 +91,7 @@ def file_end_reports # PIDs with no binaries report File.write("./pids_with_no_binaries_#{@date_time_started}.txt", "List of PIDs with no binary files: #{@pids_with_no_binaries.join(', ')}") unless @pids_with_no_binaries.empty? + return if @pids_with_filenames.empty? # PIDs with binaries CSV ::CSV.open("./pids_with_binaries_#{@date_time_started}.csv", 'wb') do |csv| csv << ['pid', 'filenames'] @@ -99,8 +111,8 @@ def truncate_long_filenames(filename) def process_binary_filename(datastream:) @binary_id = datastream['ID'] binary_filename = datastream.elements.first['LABEL'] - blank_filename_test = binary_filename.empty? || binary_filename.include?('/') - binary_ext = ALLOWED_TYPES[:"#{datastream.elements.first['MIMETYPE']}"] + blank_filename_test = binary_filename.empty? || binary_filename.include?('/') || (!test_for_license(datastream:) && !ALLOWED_TYPES.values.any? { |t| binary_filename.include?(".#{t}") }) + binary_ext = ALLOWED_TYPES.find { |k, _v| datastream.elements.first['MIMETYPE'].include?(k.to_s) }[1] unless test_for_license(datastream:) blank_filename_test ? ["content", binary_ext].join('.') : truncate_long_filenames(binary_filename.tr(' ', '_')) end end From 65c6f248ef67f924f77a7ff1d9debe170ec36420 Mon Sep 17 00:00:00 2001 From: bwatson78 Date: Tue, 27 Aug 2024 14:17:17 -0500 Subject: [PATCH 2/2] Corrects some logic and rspec. --- lib/fedora/fedora_three_objects_migration_methods.rb | 9 +++++++-- spec/lib/fedora/migrate_fedora_three_objects_spec.rb | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/fedora/fedora_three_objects_migration_methods.rb b/lib/fedora/fedora_three_objects_migration_methods.rb index 9c4171bb..0556ee03 100644 --- a/lib/fedora/fedora_three_objects_migration_methods.rb +++ b/lib/fedora/fedora_three_objects_migration_methods.rb @@ -53,6 +53,7 @@ def pid_lacks_binaries(datastreams) tested_datastreams = datastreams.reject do |ds| test_for_xmls(datastream: ds) || test_for_audit(datastream: ds) || !test_for_allowed_mime_type(datastream: ds) || test_for_license(datastream: ds) end + @number_of_binary_datastreams = tested_datastreams.size tested_datastreams.empty? end @@ -111,8 +112,12 @@ def truncate_long_filenames(filename) def process_binary_filename(datastream:) @binary_id = datastream['ID'] binary_filename = datastream.elements.first['LABEL'] - blank_filename_test = binary_filename.empty? || binary_filename.include?('/') || (!test_for_license(datastream:) && !ALLOWED_TYPES.values.any? { |t| binary_filename.include?(".#{t}") }) binary_ext = ALLOWED_TYPES.find { |k, _v| datastream.elements.first['MIMETYPE'].include?(k.to_s) }[1] unless test_for_license(datastream:) - blank_filename_test ? ["content", binary_ext].join('.') : truncate_long_filenames(binary_filename.tr(' ', '_')) + blank_filename_test(datastream:, binary_filename:) ? ["content", binary_ext].join('.') : truncate_long_filenames(binary_filename.tr(' ', '_')) + end + + def blank_filename_test(datastream:, binary_filename:) + binary_filename.empty? || binary_filename.include?('/') || (!test_for_license(datastream:) && !ALLOWED_TYPES.values.any? { |t| binary_filename.include?(".#{t}") }) || + (@number_of_binary_datastreams == 1 && !test_for_license(datastream:)) end end diff --git a/spec/lib/fedora/migrate_fedora_three_objects_spec.rb b/spec/lib/fedora/migrate_fedora_three_objects_spec.rb index 6c5f0772..c6762a61 100644 --- a/spec/lib/fedora/migrate_fedora_three_objects_spec.rb +++ b/spec/lib/fedora/migrate_fedora_three_objects_spec.rb @@ -12,8 +12,8 @@ shared_examples 'tests for xml presence method calling' do it 'calls #test_for_xmls and #test_for_audit' do - expect(migrator).to receive(:test_for_xmls) - expect(migrator).to receive(:test_for_audit) + expect(migrator).to receive(:test_for_xmls).at_least(:once) + expect(migrator).to receive(:test_for_audit).at_least(:once) migrator.send(:pid_lacks_binaries, datastreams) end