From 73aa943d4a325f24102722798d7d0ce15857c4df Mon Sep 17 00:00:00 2001 From: Longshou Situ Date: Wed, 23 Aug 2023 12:36:36 -0700 Subject: [PATCH 1/7] Valkyrie publish file.characterized event for derivative creation. --- app/jobs/valkyrie_ingest_job.rb | 4 +--- .../valkyrie_characterization_service.rb | 5 +++++ .../hyrax/listeners/file_metadata_listener.rb | 16 ++++++++++++++-- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/app/jobs/valkyrie_ingest_job.rb b/app/jobs/valkyrie_ingest_job.rb index 590344ec94..6a5f393dc5 100644 --- a/app/jobs/valkyrie_ingest_job.rb +++ b/app/jobs/valkyrie_ingest_job.rb @@ -25,14 +25,12 @@ def perform(file, pcdm_use: Hyrax::FileMetadata::Use::ORIGINAL_FILE) def ingest(file:, pcdm_use:) file_set_uri = Valkyrie::ID.new(file.file_set_uri) file_set = Hyrax.query_service.find_by(id: file_set_uri) - updated_metadata = upload_file( + upload_file( file: file, file_set: file_set, pcdm_use: pcdm_use, user: file.user ) - - ValkyrieCreateDerivativesJob.perform_later(file_set.id.to_s, updated_metadata.id.to_s) end ## diff --git a/app/services/hyrax/characterization/valkyrie_characterization_service.rb b/app/services/hyrax/characterization/valkyrie_characterization_service.rb index 66bec8d72c..d8e5a5ebf6 100644 --- a/app/services/hyrax/characterization/valkyrie_characterization_service.rb +++ b/app/services/hyrax/characterization/valkyrie_characterization_service.rb @@ -15,6 +15,11 @@ def self.run(metadata:, file:, user: ::User.system_user, **options) new(metadata: metadata, file: file, **options).characterize saved = Hyrax.persister.save(resource: metadata) Hyrax.publisher.publish('file.metadata.updated', metadata: saved, user: user) + + Hyrax.publisher.publish('file.characterized', + file_set: Hyrax.query_service.find_by(id: saved.file_set_id), + file_id: saved.id.to_s, + path_hint: saved.file_identifier.to_s) end ## diff --git a/app/services/hyrax/listeners/file_metadata_listener.rb b/app/services/hyrax/listeners/file_metadata_listener.rb index 6ef1538faa..2a223b763f 100644 --- a/app/services/hyrax/listeners/file_metadata_listener.rb +++ b/app/services/hyrax/listeners/file_metadata_listener.rb @@ -12,8 +12,20 @@ class FileMetadataListener # @param [Dry::Events::Event] event # @return [void] def on_file_characterized(event) - CreateDerivativesJob - .perform_later(event[:file_set], event[:file_id], event[:path_hint]) + file_set = event[:file_set] + + case file_set + when ActiveFedora::Base # ActiveFedora + CreateDerivativesJob + .perform_later(file_set, event[:file_id], event[:path_hint]) + else + # With valkyrie, file.characterized event is also published for derivatives. + # We only need to create derivative for the original file. + file_id = event[:file_id] + file_metadata = Hyrax.custom_queries.find_file_metadata_by(id: file_id) + + ValkyrieCreateDerivativesJob.perform_later(file_set.id.to_s, file_id) if file_metadata&.original_file? + end end ## From ffe84194abbd28c441dc448cdb9f9bf94b8e5bc3 Mon Sep 17 00:00:00 2001 From: Longshou Situ Date: Wed, 23 Aug 2023 15:42:25 -0700 Subject: [PATCH 2/7] Fix failed tests for ValkyrieCharacterizationService. --- .../valkyrie_characterization_service_spec.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/spec/services/hyrax/characterization/valkyrie_characterization_service_spec.rb b/spec/services/hyrax/characterization/valkyrie_characterization_service_spec.rb index 91de5f56ef..ec2d6f33d5 100644 --- a/spec/services/hyrax/characterization/valkyrie_characterization_service_spec.rb +++ b/spec/services/hyrax/characterization/valkyrie_characterization_service_spec.rb @@ -18,7 +18,10 @@ original_filename: 'test_world.png') end - before { Hyrax.publisher.subscribe(listener) } + before do + Hyrax.publisher.subscribe(listener) + metadata.file_set_id = file_set.id + end after { Hyrax.publisher.unsubscribe(listener) } describe '#run' do From 3e2dd8106b6fa7a2b448747c50159b8eabd7b8aa Mon Sep 17 00:00:00 2001 From: Longshou Situ Date: Mon, 28 Aug 2023 08:50:10 -0700 Subject: [PATCH 3/7] Don't run the file characterization job for derivatives. --- .../hyrax/listeners/file_metadata_listener.rb | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/app/services/hyrax/listeners/file_metadata_listener.rb b/app/services/hyrax/listeners/file_metadata_listener.rb index 2a223b763f..5a36f20979 100644 --- a/app/services/hyrax/listeners/file_metadata_listener.rb +++ b/app/services/hyrax/listeners/file_metadata_listener.rb @@ -19,12 +19,8 @@ def on_file_characterized(event) CreateDerivativesJob .perform_later(file_set, event[:file_id], event[:path_hint]) else - # With valkyrie, file.characterized event is also published for derivatives. - # We only need to create derivative for the original file. - file_id = event[:file_id] - file_metadata = Hyrax.custom_queries.find_file_metadata_by(id: file_id) - - ValkyrieCreateDerivativesJob.perform_later(file_set.id.to_s, file_id) if file_metadata&.original_file? + ValkyrieCreateDerivativesJob + .perform_later(file_set.id.to_s, event[:file_id]) end end @@ -51,7 +47,9 @@ def on_file_metadata_updated(event) # @param [Dry::Events::Event] event # @return [void] def on_file_uploaded(event) - # Run characterization + # Run characterization for original file only + return unless event[:metadata]&.original_file? + Hyrax.config .characterization_service .run(metadata: event[:metadata], file: event[:metadata].file) From f74bd3c6b47e03ebb2818b58685262a1ad767846 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Fri, 25 Aug 2023 15:25:59 -0700 Subject: [PATCH 4/7] Fix single use links for Valkyrie. Closes #6204 This also adds a shortcut to allow creating a work that has a file, including a FileSet, FileMetadata, and File Identifier. I'm not sure it's the best, but it works really well for this test! --- .../valkyrie_downloads_controller_behavior.rb | 2 +- .../single_use_links_viewer_controller.rb | 18 ++++++++++++++++-- .../single_use_links_viewer_controller_spec.rb | 14 +++++++++----- spec/factories/hyrax_file_metadata.rb | 1 + spec/factories/hyrax_file_set.rb | 3 ++- spec/factories/hyrax_work.rb | 10 ++++++++++ spec/spec_helper.rb | 2 ++ 7 files changed, 41 insertions(+), 9 deletions(-) diff --git a/app/controllers/concerns/hyrax/valkyrie_downloads_controller_behavior.rb b/app/controllers/concerns/hyrax/valkyrie_downloads_controller_behavior.rb index 121512bba9..feedd4a1ca 100644 --- a/app/controllers/concerns/hyrax/valkyrie_downloads_controller_behavior.rb +++ b/app/controllers/concerns/hyrax/valkyrie_downloads_controller_behavior.rb @@ -25,7 +25,7 @@ def send_file_contents_valkyrie(file_set) file.rewind send_data send_range_valkyrie(file: file), data_options(file_metadata) else - send_file file.disk_path + send_file file.disk_path, data_options(file_metadata).except(:status) end end diff --git a/app/controllers/hyrax/single_use_links_viewer_controller.rb b/app/controllers/hyrax/single_use_links_viewer_controller.rb index 55b9778add..3c295b8666 100644 --- a/app/controllers/hyrax/single_use_links_viewer_controller.rb +++ b/app/controllers/hyrax/single_use_links_viewer_controller.rb @@ -37,6 +37,14 @@ def show private + def send_content + if Hyrax.config.use_valkyrie? + send_file_contents_valkyrie(@asset) + else + super + end + end + def curation_concern response, _document_list = search_service.search_results response.documents.first @@ -52,6 +60,12 @@ def content_options end end + def data_options(file_metadata) + super.tap do |options| + options[:disposition] = 'attachment' if action_name == 'download' + end + end + # This is called in a before filter. It causes @asset to be set. def authorize_download! authorize! :read, asset @@ -66,7 +80,7 @@ def not_found_exception end def asset - @asset ||= Hyrax.query_service.find_by_alternate_identifier(alternate_identifier: single_use_link.item_id, use_valkyrie: false) + @asset ||= Hyrax.query_service.find_by(id: single_use_link.item_id) end def current_ability @@ -94,7 +108,7 @@ def initialize(user, single_use_link) return unless single_use_link @single_use_link = single_use_link - can :read, [ActiveFedora::Base, ::SolrDocument] do |obj| + can :read, [ActiveFedora::Base, ::SolrDocument, Hyrax::Resource] do |obj| single_use_link.valid? && single_use_link.item_id == obj.id && single_use_link.destroy! end end diff --git a/spec/controllers/hyrax/single_use_links_viewer_controller_spec.rb b/spec/controllers/hyrax/single_use_links_viewer_controller_spec.rb index 69e1880157..a7316ccc53 100644 --- a/spec/controllers/hyrax/single_use_links_viewer_controller_spec.rb +++ b/spec/controllers/hyrax/single_use_links_viewer_controller_spec.rb @@ -3,7 +3,12 @@ routes { Hyrax::Engine.routes } let(:user) { build(:user) } let(:file) do - create(:file_set, label: 'world.png', user: user) + Hyrax.query_service.find_by(id: work.member_ids.first) + end + let(:file_metadata) { Hyrax.custom_queries.find_file_metadata_by(id: file.original_file_id) } + let(:disk_file) { Hyrax.storage_adapter.find_by(id: file_metadata.file_identifier) } + let(:work) do + FactoryBot.valkyrie_create(:hyrax_work, uploaded_files: [FactoryBot.create(:uploaded_file)]) end describe "retrieval links" do @@ -12,17 +17,16 @@ end let :download_link do - Hydra::Works::AddFileToFileSet.call(file, File.open(fixture_path + '/world.png'), :original_file) - SingleUseLink.create item_id: file.id, path: Hyrax::Engine.routes.url_helpers.download_path(id: file, locale: 'en') + SingleUseLink.create item_id: file.id, path: Hyrax::Engine.routes.url_helpers.download_path(id: file.id.to_s, locale: 'en') end let(:show_link_hash) { show_link.download_key } let(:download_link_hash) { download_link.download_key } describe "GET 'download'" do - let(:expected_content) { Hyrax.query_service.find_by_alternate_identifier(alternate_identifier: file.id, use_valkyrie: false).original_file.content } + let(:expected_content) { disk_file.read } it "downloads the file and deletes the link from the database" do - expect(controller).to receive(:send_file_headers!).with({ filename: 'world.png', disposition: 'attachment', type: 'image/png' }) + expect(controller).to receive(:send_file_headers!).with({ filename: 'image.jp2', disposition: 'attachment', type: 'application/octet-stream' }) get :download, params: { id: download_link_hash } expect(response.body).to eq expected_content expect(response).to be_successful diff --git a/spec/factories/hyrax_file_metadata.rb b/spec/factories/hyrax_file_metadata.rb index b7ce189b06..df65ce7fd5 100644 --- a/spec/factories/hyrax_file_metadata.rb +++ b/spec/factories/hyrax_file_metadata.rb @@ -7,6 +7,7 @@ transient do use { nil } visibility_setting { nil } + file { nil } end after(:build) do |file_metadata, evaluator| diff --git a/spec/factories/hyrax_file_set.rb b/spec/factories/hyrax_file_set.rb index 52046801e8..d37f2af541 100644 --- a/spec/factories/hyrax_file_set.rb +++ b/spec/factories/hyrax_file_set.rb @@ -15,6 +15,7 @@ read_users { [] } read_groups { [] } with_index { true } + uploaded_files { [] } end after(:build) do |file_set, evaluator| @@ -24,7 +25,7 @@ .assign_access_for(visibility: evaluator.visibility_setting) end file_set.file_ids = evaluator.files.map(&:id) if evaluator.files - file_set.original_file_id = evaluator.original_file.id if evaluator.original_file + file_set.original_file_id = evaluator&.original_file&.id || evaluator.files&.first&.id file_set.extracted_text_id = evaluator.extracted_text.id if evaluator.extracted_text file_set.thumbnail_id = evaluator.thumbnail.id if evaluator.thumbnail diff --git a/spec/factories/hyrax_work.rb b/spec/factories/hyrax_work.rb index ab4689b80a..14771d8404 100644 --- a/spec/factories/hyrax_work.rb +++ b/spec/factories/hyrax_work.rb @@ -19,6 +19,7 @@ members { nil } visibility_setting { nil } with_index { true } + uploaded_files { [] } end after(:build) do |work, evaluator| @@ -41,6 +42,15 @@ .new(resource: work) .assign_access_for(visibility: evaluator.visibility_setting) end + if evaluator.uploaded_files.present? + Hyrax::WorkUploadsHandler.new(work: work).add(files: evaluator.uploaded_files).attach + evaluator.uploaded_files.each do |file| + allow(Hyrax.config.characterization_service).to receive(:run).and_return(true) + # I don't love this - we might want to just run background jobs so + # this is more real, but we'd have to stub some things. + ValkyrieIngestJob.perform_now(file) + end + end work.permission_manager.edit_groups = evaluator.edit_groups work.permission_manager.edit_users = evaluator.edit_users diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 19d8ab74e6..ba4116e5bc 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -86,6 +86,7 @@ def ci_build? FactoryBot.register_strategy(:json, JsonStrategy) FactoryBot.definition_file_paths = [File.expand_path("../factories", __FILE__)] FactoryBot.find_definitions +require 'rspec/mocks' require 'shoulda/matchers' require 'shoulda/callback/matchers' @@ -142,6 +143,7 @@ def clean_active_fedora_repository config.use_transactional_fixtures = false config.before :suite do + FactoryBot::SyntaxRunner.include RSpec::Mocks::ExampleMethods Hyrax::RedisEventStore.instance.then(&:flushdb) DatabaseCleaner.clean_with(:truncation) # Noid minting causes extra LDP requests which slow the test suite. From bc4e77df77e9c55896e1951b246dd67b084d9292 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Fri, 25 Aug 2023 15:29:49 -0700 Subject: [PATCH 5/7] Use valkyrie for download if it's a Hyrax::FileSet --- app/controllers/hyrax/single_use_links_viewer_controller.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/controllers/hyrax/single_use_links_viewer_controller.rb b/app/controllers/hyrax/single_use_links_viewer_controller.rb index 3c295b8666..e7ab099bd9 100644 --- a/app/controllers/hyrax/single_use_links_viewer_controller.rb +++ b/app/controllers/hyrax/single_use_links_viewer_controller.rb @@ -38,7 +38,7 @@ def show private def send_content - if Hyrax.config.use_valkyrie? + if @asset.is_a?(Hyrax::FileSet) send_file_contents_valkyrie(@asset) else super From 43547518bbd1111ae64d051a6d3b5c7b3529b80e Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Fri, 25 Aug 2023 15:35:49 -0700 Subject: [PATCH 6/7] Fix test. --- .../hyrax/single_use_links_viewer_controller_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/controllers/hyrax/single_use_links_viewer_controller_spec.rb b/spec/controllers/hyrax/single_use_links_viewer_controller_spec.rb index a7316ccc53..e9dca30c10 100644 --- a/spec/controllers/hyrax/single_use_links_viewer_controller_spec.rb +++ b/spec/controllers/hyrax/single_use_links_viewer_controller_spec.rb @@ -26,7 +26,7 @@ describe "GET 'download'" do let(:expected_content) { disk_file.read } it "downloads the file and deletes the link from the database" do - expect(controller).to receive(:send_file_headers!).with({ filename: 'image.jp2', disposition: 'attachment', type: 'application/octet-stream' }) + expect(controller).to receive(:send_file_headers!).with({ filename: 'image.jp2', disposition: 'attachment', type: file_metadata.mime_type }) get :download, params: { id: download_link_hash } expect(response.body).to eq expected_content expect(response).to be_successful From 5ba61e471c52830ea8ada57e0f4c1bb0ed29107c Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Mon, 28 Aug 2023 11:42:04 -0700 Subject: [PATCH 7/7] Remove cruft. --- spec/factories/hyrax_file_metadata.rb | 1 - spec/factories/hyrax_file_set.rb | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/spec/factories/hyrax_file_metadata.rb b/spec/factories/hyrax_file_metadata.rb index df65ce7fd5..b7ce189b06 100644 --- a/spec/factories/hyrax_file_metadata.rb +++ b/spec/factories/hyrax_file_metadata.rb @@ -7,7 +7,6 @@ transient do use { nil } visibility_setting { nil } - file { nil } end after(:build) do |file_metadata, evaluator| diff --git a/spec/factories/hyrax_file_set.rb b/spec/factories/hyrax_file_set.rb index d37f2af541..52046801e8 100644 --- a/spec/factories/hyrax_file_set.rb +++ b/spec/factories/hyrax_file_set.rb @@ -15,7 +15,6 @@ read_users { [] } read_groups { [] } with_index { true } - uploaded_files { [] } end after(:build) do |file_set, evaluator| @@ -25,7 +24,7 @@ .assign_access_for(visibility: evaluator.visibility_setting) end file_set.file_ids = evaluator.files.map(&:id) if evaluator.files - file_set.original_file_id = evaluator&.original_file&.id || evaluator.files&.first&.id + file_set.original_file_id = evaluator.original_file.id if evaluator.original_file file_set.extracted_text_id = evaluator.extracted_text.id if evaluator.extracted_text file_set.thumbnail_id = evaluator.thumbnail.id if evaluator.thumbnail