From d8b0b62f6cd83a63fd9c85ecdf615ab1391ca415 Mon Sep 17 00:00:00 2001 From: Kirk Wang Date: Mon, 19 Feb 2024 05:46:13 -0800 Subject: [PATCH] Add conditional to `.fileset_for_directives` (#6695) We are noticing that when ingesting a PDF, the `ValkyrieCreateDerivativesJob` is failing because the file_set doesn't get found in `ValkyriePersistDerivatives.fileset_for_directives` when the directives[:url] is an id instead of an actual uri, see `FileSetDerivativesService#extract_full_text`. I'm suggesting that we check if we have a "/" and if not we assume that it is an id so we can just look for that. --- app/services/hyrax/valkyrie_persist_derivatives.rb | 13 ++++++++++--- .../hyrax/valkyrie_persist_derivatives_spec.rb | 12 ++++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/app/services/hyrax/valkyrie_persist_derivatives.rb b/app/services/hyrax/valkyrie_persist_derivatives.rb index 910f5b24a3..78bccf322b 100644 --- a/app/services/hyrax/valkyrie_persist_derivatives.rb +++ b/app/services/hyrax/valkyrie_persist_derivatives.rb @@ -43,9 +43,16 @@ def self.call(stream, # @return [Hyrax::FileSet] def self.fileset_for_directives(directives) path = URI(directives.fetch(:url)).path - id = path.sub(Hyrax.config.derivatives_path.to_s, "") - .delete('/') - .match(/^(.*)-\w*(\.\w+)*$/) { |m| m[1] } + # checks if it's a file path, else assuming it is already an id + # Hyrax::FileSetDerivativesService#extract_full_text passes in the raw uri + # and not a derivative_url like the other derivative formats + id = if path.include?("/") + path.sub(Hyrax.config.derivatives_path.to_s, "") + .delete('/') + .match(/^(.*)-\w*(\.\w+)*$/) { |m| m[1] } + else + path + end raise "Could not extract fileset id from path #{path}" unless id Hyrax.metadata_adapter.query_service.find_by(id: id) diff --git a/spec/services/hyrax/valkyrie_persist_derivatives_spec.rb b/spec/services/hyrax/valkyrie_persist_derivatives_spec.rb index f91ff013c8..6973d5c868 100644 --- a/spec/services/hyrax/valkyrie_persist_derivatives_spec.rb +++ b/spec/services/hyrax/valkyrie_persist_derivatives_spec.rb @@ -90,5 +90,17 @@ .to raise_error(/Could not extract fileset id from path/) end end + + context 'with an id' do + let(:directives) do + { url: '123' } + end + + it 'extracts the id' do + expect(Hyrax.metadata_adapter.query_service) + .to receive(:find_by).with(id: '123') + described_class.fileset_for_directives(directives) + end + end end end