Skip to content

Commit

Permalink
Merge pull request #482 from emory-libraries/adds_testing_to_ensure_b…
Browse files Browse the repository at this point in the history
…inaries_with_correct_mime_types

Adds restrictive tests to imgest binaries with correct mime types.
  • Loading branch information
alexBLR authored Aug 29, 2024
2 parents aa6b8e7 + 65c6f24 commit 5645c33
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 7 deletions.
27 changes: 22 additions & 5 deletions lib/fedora/fedora_three_objects_migration_methods.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def copy_files_to_folder
pull_audit_object(datastream:)
elsif test_for_xmls(datastream:)
pull_xml_object(datastream:)
else
elsif test_for_license(datastream:) || test_for_allowed_mime_type(datastream:)
pull_binary_object(datastream:)
end
end
Expand All @@ -41,8 +41,20 @@ def test_for_audit(datastream:)
datastream['ID'] == 'AUDIT'
end

def test_for_license(datastream:)
datastream['ID'] == 'SYMPLECTIC-LICENCE'
end

def test_for_allowed_mime_type(datastream:)
ALLOWED_TYPES.any? { |k, _v| datastream.elements.first['MIMETYPE'].include?(k.to_s) }
end

def pid_lacks_binaries(datastreams)
datastreams.all? { |ds| test_for_xmls(datastream: ds) || test_for_audit(datastream: ds) }
tested_datastreams = datastreams.reject do |ds|
test_for_xmls(datastream: ds) || test_for_audit(datastream: ds) || !test_for_allowed_mime_type(datastream: ds) || test_for_license(datastream: ds)
end
@number_of_binary_datastreams = tested_datastreams.size
tested_datastreams.empty?
end

def pull_audit_object(datastream:)
Expand Down Expand Up @@ -80,6 +92,7 @@ def file_end_reports
# PIDs with no binaries report
File.write("./pids_with_no_binaries_#{@date_time_started}.txt", "List of PIDs with no binary files: #{@pids_with_no_binaries.join(', ')}") unless @pids_with_no_binaries.empty?

return if @pids_with_filenames.empty?
# PIDs with binaries CSV
::CSV.open("./pids_with_binaries_#{@date_time_started}.csv", 'wb') do |csv|
csv << ['pid', 'filenames']
Expand All @@ -99,8 +112,12 @@ def truncate_long_filenames(filename)
def process_binary_filename(datastream:)
@binary_id = datastream['ID']
binary_filename = datastream.elements.first['LABEL']
blank_filename_test = binary_filename.empty? || binary_filename.include?('/')
binary_ext = ALLOWED_TYPES[:"#{datastream.elements.first['MIMETYPE']}"]
blank_filename_test ? ["content", binary_ext].join('.') : truncate_long_filenames(binary_filename.tr(' ', '_'))
binary_ext = ALLOWED_TYPES.find { |k, _v| datastream.elements.first['MIMETYPE'].include?(k.to_s) }[1] unless test_for_license(datastream:)
blank_filename_test(datastream:, binary_filename:) ? ["content", binary_ext].join('.') : truncate_long_filenames(binary_filename.tr(' ', '_'))
end

def blank_filename_test(datastream:, binary_filename:)
binary_filename.empty? || binary_filename.include?('/') || (!test_for_license(datastream:) && !ALLOWED_TYPES.values.any? { |t| binary_filename.include?(".#{t}") }) ||
(@number_of_binary_datastreams == 1 && !test_for_license(datastream:))
end
end
4 changes: 2 additions & 2 deletions spec/lib/fedora/migrate_fedora_three_objects_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@

shared_examples 'tests for xml presence method calling' do
it 'calls #test_for_xmls and #test_for_audit' do
expect(migrator).to receive(:test_for_xmls)
expect(migrator).to receive(:test_for_audit)
expect(migrator).to receive(:test_for_xmls).at_least(:once)
expect(migrator).to receive(:test_for_audit).at_least(:once)

migrator.send(:pid_lacks_binaries, datastreams)
end
Expand Down

0 comments on commit 5645c33

Please sign in to comment.