From 327dd5df82d7fcb454ee31defebbce0b54487c4c Mon Sep 17 00:00:00 2001 From: Daniel Pierce Date: Thu, 12 Oct 2023 13:48:36 -0400 Subject: [PATCH 1/2] Use a hashed version identifier in IIIF manifest Ensures the IIIF server can discern outdated image data in the event of a file revision. Summary of changes: - Add RiiifFileResolver and RiiifFile to more efficiently use riiif with valkyrie storage adapters. Uses a read/write lock when loading a local file copy from storage. - Rerun characterization/derivatives when reverting a file version. - Include version in solr indexed original_file_id. This is now generated by Hyrax::FileSet#iiif_id instead of in the indexer. - Patch valkyrie StreamFile length in same manner as ::File. - Set json content type for manifest response. - The :test_disk storage adapter for specs is now a VersionedDisk adapter. --- .dassie/config/initializers/riiif.rb | 16 ++---- .koppie/config/initializers/riiif.rb | 17 +++---- .../hyrax/works_controller_behavior.rb | 3 +- app/controllers/hyrax/file_sets_controller.rb | 2 + .../hyrax/valkyrie_file_set_indexer.rb | 7 +-- app/models/concerns/hyrax/riiif_file.rb | 30 +++++++++++ app/models/hyrax/file_set.rb | 9 ++++ app/services/hyrax/riiif_file_resolver.rb | 50 +++++++++++++++++++ config/initializers/file_length_patch.rb | 6 ++- hyrax.gemspec | 1 + .../templates/config/initializers/riiif.rb | 32 +++++------- lib/hyrax/specs/shared_specs/hydra_works.rb | 14 ++++++ spec/factories/hyrax_file_metadata.rb | 9 +++- .../hyrax/file_set_presenter_spec.rb | 2 +- .../hyrax/riiif_file_resolver_spec.rb | 25 ++++++++++ spec/spec_helper.rb | 2 +- 16 files changed, 172 insertions(+), 53 deletions(-) create mode 100644 app/models/concerns/hyrax/riiif_file.rb create mode 100644 app/services/hyrax/riiif_file_resolver.rb create mode 100644 spec/services/hyrax/riiif_file_resolver_spec.rb diff --git a/.dassie/config/initializers/riiif.rb b/.dassie/config/initializers/riiif.rb index df1e443a2d..3d32938c09 100644 --- a/.dassie/config/initializers/riiif.rb +++ b/.dassie/config/initializers/riiif.rb @@ -1,12 +1,10 @@ # frozen_string_literal: true -ActiveSupport::Reloader.to_prepare do - Riiif::Image.file_resolver = Riiif::HttpFileResolver.new +Rails.application.reloader.to_prepare do Riiif::Image.info_service = lambda do |id, _file| # id will look like a path to a pcdm:file # (e.g. rv042t299%2Ffiles%2F6d71677a-4f80-42f1-ae58-ed1063fd79c7) # but we just want the id for the FileSet it's attached to. - # Capture everything before the first slash fs_id = id.sub(/\A([^\/]*)\/.*/, '\1') resp = Hyrax::SolrService.get("id:#{fs_id}") doc = resp['response']['docs'].first @@ -15,14 +13,10 @@ end if Hyrax.config.use_valkyrie? - # Use Valkyrie adapter to make sure file is available locally. Riiif will just open it then - # id comes in with the format "FILE_SET_ID/files/FILE_ID" - Riiif::Image.file_resolver.id_to_uri = lambda do |id| - file_metadata = Hyrax.query_service.find_by(id: id.split('/').last) - file = Hyrax.storage_adapter.find_by(id: file_metadata.file_identifier) - file.disk_path.to_s - end + Riiif::Image.file_resolver = Hyrax::RiiifFileResolver.new else + Riiif::Image.file_resolver = Riiif::HttpFileResolver.new + Riiif::Image.file_resolver.id_to_uri = lambda do |id| Hyrax::Base.id_to_uri(CGI.unescape(id)).tap do |url| Rails.logger.info "Riiif resolved #{id} to #{url}" @@ -35,5 +29,5 @@ Riiif.not_found_image = Rails.root.join('app', 'assets', 'images', 'us_404.svg') Riiif.unauthorized_image = Rails.root.join('app', 'assets', 'images', 'us_404.svg') - Riiif::Engine.config.cache_duration = 365.days + Riiif::Engine.config.cache_duration = 1.day end diff --git a/.koppie/config/initializers/riiif.rb b/.koppie/config/initializers/riiif.rb index 14219a1376..3d32938c09 100644 --- a/.koppie/config/initializers/riiif.rb +++ b/.koppie/config/initializers/riiif.rb @@ -1,12 +1,10 @@ # frozen_string_literal: true -ActiveSupport::Reloader.to_prepare do - Riiif::Image.file_resolver = Riiif::HttpFileResolver.new +Rails.application.reloader.to_prepare do Riiif::Image.info_service = lambda do |id, _file| # id will look like a path to a pcdm:file # (e.g. rv042t299%2Ffiles%2F6d71677a-4f80-42f1-ae58-ed1063fd79c7) # but we just want the id for the FileSet it's attached to. - # Capture everything before the first slash fs_id = id.sub(/\A([^\/]*)\/.*/, '\1') resp = Hyrax::SolrService.get("id:#{fs_id}") doc = resp['response']['docs'].first @@ -15,24 +13,21 @@ end if Hyrax.config.use_valkyrie? - # Use Valkyrie adapter to make sure file is available locally. Riiif will just open it then - # id comes in with the format "FILE_SET_ID/files/FILE_ID" - Riiif::Image.file_resolver.id_to_uri = lambda do |id| - file_metadata = Hyrax.query_service.find_by(id: id.split('/').last) - file = Hyrax.storage_adapter.find_by(id: file_metadata.file_identifier) - file.disk_path.to_s - end + Riiif::Image.file_resolver = Hyrax::RiiifFileResolver.new else + Riiif::Image.file_resolver = Riiif::HttpFileResolver.new + Riiif::Image.file_resolver.id_to_uri = lambda do |id| Hyrax::Base.id_to_uri(CGI.unescape(id)).tap do |url| Rails.logger.info "Riiif resolved #{id} to #{url}" end end end + Riiif::Image.authorization_service = Hyrax::IiifAuthorizationService Riiif.not_found_image = Rails.root.join('app', 'assets', 'images', 'us_404.svg') Riiif.unauthorized_image = Rails.root.join('app', 'assets', 'images', 'us_404.svg') - Riiif::Engine.config.cache_duration = 365.days + Riiif::Engine.config.cache_duration = 1.day end diff --git a/app/controllers/concerns/hyrax/works_controller_behavior.rb b/app/controllers/concerns/hyrax/works_controller_behavior.rb index 1d6756c635..3ded64a870 100644 --- a/app/controllers/concerns/hyrax/works_controller_behavior.rb +++ b/app/controllers/concerns/hyrax/works_controller_behavior.rb @@ -139,8 +139,7 @@ def manifest json = iiif_manifest_builder.manifest_for(presenter: iiif_manifest_presenter) respond_to do |wants| - wants.json { render json: json } - wants.html { render json: json } + wants.any { render json: json } end end diff --git a/app/controllers/hyrax/file_sets_controller.rb b/app/controllers/hyrax/file_sets_controller.rb index b49de1e592..69c601667c 100644 --- a/app/controllers/hyrax/file_sets_controller.rb +++ b/app/controllers/hyrax/file_sets_controller.rb @@ -179,6 +179,8 @@ def attempt_update_valkyrie def revert_valkyrie Hyrax::VersioningService.create(file_metadata, current_user, Hyrax.storage_adapter.find_by(id: params[:revision])) + # update_metadata + Hyrax.publisher.publish("file.uploaded", metadata: file_set.original_file) true end diff --git a/app/indexers/hyrax/valkyrie_file_set_indexer.rb b/app/indexers/hyrax/valkyrie_file_set_indexer.rb index 8409782066..87411b7a01 100644 --- a/app/indexers/hyrax/valkyrie_file_set_indexer.rb +++ b/app/indexers/hyrax/valkyrie_file_set_indexer.rb @@ -17,7 +17,7 @@ def to_solr # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Met # Metadata from the FileSet solr_doc['file_ids_ssim'] = resource.file_ids&.map(&:to_s) - solr_doc['original_file_id_ssi'] = original_file_id + solr_doc['original_file_id_ssi'] = resource.iiif_id solr_doc['extracted_text_id_ssi'] = resource.extracted_text_id.to_s solr_doc['hasRelatedMediaFragment_ssim'] = resource.representative_id.to_s solr_doc['hasRelatedImage_ssim'] = resource.thumbnail_id.to_s @@ -106,11 +106,6 @@ def to_solr # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Met private - # Convert Valkyrie Original File Pointer to versioned url syntax expected by the iiif_presenter - def original_file_id - "#{resource.id}/files/#{resource.original_file_id}" - end - def file_format(file) if file.mime_type.present? && file.format_label.present? "#{file.mime_type.split('/').last} (#{file.format_label.join(', ')})" diff --git a/app/models/concerns/hyrax/riiif_file.rb b/app/models/concerns/hyrax/riiif_file.rb new file mode 100644 index 0000000000..5d8ddf0b9b --- /dev/null +++ b/app/models/concerns/hyrax/riiif_file.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true +module Hyrax + # Adds file locking to Riiif::File + # @see RiiifFileResolver + class RiiifFile < Riiif::File + include ActiveSupport::Benchmarkable + + attr_reader :id + def initialize(input_path, tempfile = nil, id:) + super(input_path, tempfile) + raise(ArgumentError, "must specify id") if id.blank? + @id = id + end + + # Wrap extract in a read lock and benchmark it + def extract(transformation, image_info = nil) + Riiif::Image.file_resolver.file_locks[id].with_read_lock do + benchmark "RiiifFile extracted #{path} with #{transformation.to_params}", level: :debug do + super + end + end + end + + private + + def logger + Hyrax.logger + end + end +end diff --git a/app/models/hyrax/file_set.rb b/app/models/hyrax/file_set.rb index edb3bffe75..8d2f1dd11d 100644 --- a/app/models/hyrax/file_set.rb +++ b/app/models/hyrax/file_set.rb @@ -73,6 +73,15 @@ def original_file_id original_file&.id end + # @return [String, Nil] versioned identifier suitable for use in a IIIF manifest + def iiif_id + orig_file = original_file + return nil if orig_file.nil? || orig_file.file_identifier.blank? + latest_file = Hyrax::VersioningService.latest_version_of(orig_file) + version = latest_file&.version_id ? Digest::MD5.hexdigest(latest_file.version_id) : nil + "#{id}/files/#{orig_file.id}#{'/' + version if version}" + end + # @return [Hyrax::FileMetadata, nil] def thumbnail Hyrax.custom_queries.find_thumbnail(file_set: self) diff --git a/app/services/hyrax/riiif_file_resolver.rb b/app/services/hyrax/riiif_file_resolver.rb new file mode 100644 index 0000000000..8886bb7abc --- /dev/null +++ b/app/services/hyrax/riiif_file_resolver.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true +module Hyrax + # Riiif file resolver for valkyrie resources + class RiiifFileResolver + include ActiveSupport::Benchmarkable + + # @param [String] id from iiif manifest + # @return [Riiif::File] + def find(id) + path = nil + file_locks[id].with_write_lock do + path = build_path(id) + path = build_path(id, force: true) unless File.exist?(path) # Ensures the file is locally available + end + RiiifFile.new(path, id: id) + end + + # tracks individual file locks + # @see RiiifFile + # @return [Concurrent::Map] + def file_locks + @file_locks ||= Concurrent::Map.new do |k, v| + k.compute_if_absent(v) { Concurrent::ReadWriteLock.new } + end + end + + private + + def build_path(id, force: false) + Riiif::Image.cache.fetch("riiif:" + Digest::MD5.hexdigest("path:#{id}"), + expires_in: Riiif::Image.expires_in, + force: force) do + load_file(id) + end + end + + def load_file(id) + benchmark "RiiifFileResolver loaded #{id}", level: :debug do + fs_id = id.sub(/\A([^\/]*)\/.*/, '\1') + file_set = Hyrax.query_service.find_by(id: fs_id) + file_metadata = Hyrax.custom_queries.find_original_file(file_set: file_set) + file_metadata.file.disk_path.to_s # Stores a local copy in tmpdir + end + end + + def logger + Hyrax.logger + end + end +end diff --git a/config/initializers/file_length_patch.rb b/config/initializers/file_length_patch.rb index 36568f550d..6c5ca1eac8 100644 --- a/config/initializers/file_length_patch.rb +++ b/config/initializers/file_length_patch.rb @@ -2,5 +2,9 @@ # Valkyrie::Storage::Fedora expects io objects to have #length class ::File - alias length size unless ::File.respond_to? :length + alias length size unless respond_to? :length +end + +class ::Valkyrie::StorageAdapter::StreamFile + alias length size unless respond_to? :length end diff --git a/hyrax.gemspec b/hyrax.gemspec index fbd2aa3070..b1e31fc3c3 100644 --- a/hyrax.gemspec +++ b/hyrax.gemspec @@ -41,6 +41,7 @@ SUMMARY spec.add_dependency 'browse-everything', '>= 0.16', '< 2.0' spec.add_dependency 'carrierwave', '~> 1.0' spec.add_dependency 'clipboard-rails', '~> 1.5' + spec.add_dependency 'concurrent-ruby', '~> 1.0' spec.add_dependency 'connection_pool', '~> 2.4' spec.add_dependency 'draper', '~> 4.0' spec.add_dependency 'dry-logic', '~> 1.5' diff --git a/lib/generators/hyrax/templates/config/initializers/riiif.rb b/lib/generators/hyrax/templates/config/initializers/riiif.rb index 94c78c9009..e5d3f8dfdf 100644 --- a/lib/generators/hyrax/templates/config/initializers/riiif.rb +++ b/lib/generators/hyrax/templates/config/initializers/riiif.rb @@ -1,12 +1,10 @@ # frozen_string_literal: true -ActiveSupport::Reloader.to_prepare do - Riiif::Image.file_resolver = Riiif::HttpFileResolver.new +Rails.application.reloader.to_prepare do Riiif::Image.info_service = lambda do |id, _file| # id will look like a path to a pcdm:file # (e.g. rv042t299%2Ffiles%2F6d71677a-4f80-42f1-ae58-ed1063fd79c7) # but we just want the id for the FileSet it's attached to. - # Capture everything before the first slash fs_id = id.sub(/\A([^\/]*)\/.*/, '\1') resp = Hyrax::SolrService.get("id:#{fs_id}") doc = resp['response']['docs'].first @@ -14,26 +12,22 @@ { height: doc['height_is'], width: doc['width_is'], format: doc['mime_type_ssi'], channels: doc['alpha_channels_ssi'] } end - Riiif::Image.file_resolver.id_to_uri = if Hyrax.config.use_valkyrie? - # Use Valkyrie adapter to make sure file is available locally. Riiif will just open it then - # id comes in with the format "FILE_SET_ID/files/FILE_ID" - lambda do |id| - file_metadata = Hyrax.query_service.find_by(id: id.split('/').last) - file = Hyrax.storage_adapter.find_by(id: file_metadata.file_identifier) - file.disk_path.to_s - end - else - lambda do |id| - Hyrax::Base.id_to_uri(CGI.unescape(id)).tap do |url| - Rails.logger.info "Riiif resolved #{id} to #{url}" - end - end - end + if Hyrax.config.use_valkyrie? + Riiif::Image.file_resolver = Hyrax::RiiifFileResolver.new + else + Riiif::Image.file_resolver = Riiif::HttpFileResolver.new + + Riiif::Image.file_resolver.id_to_uri = lambda do |id| + Hyrax::Base.id_to_uri(CGI.unescape(id)).tap do |url| + Rails.logger.info "Riiif resolved #{id} to #{url}" + end + end + end Riiif::Image.authorization_service = Hyrax::IiifAuthorizationService Riiif.not_found_image = Rails.root.join('app', 'assets', 'images', 'us_404.svg') Riiif.unauthorized_image = Rails.root.join('app', 'assets', 'images', 'us_404.svg') - Riiif::Engine.config.cache_duration = 365.days + Riiif::Engine.config.cache_duration = 1.month end diff --git a/lib/hyrax/specs/shared_specs/hydra_works.rb b/lib/hyrax/specs/shared_specs/hydra_works.rb index 00858bbfb6..473227fb89 100644 --- a/lib/hyrax/specs/shared_specs/hydra_works.rb +++ b/lib/hyrax/specs/shared_specs/hydra_works.rb @@ -314,6 +314,20 @@ expect(fileset.extracted_text).to eq extracted_text expect(fileset.extracted_text_id).to eq extracted_text.id end + + context 'with simulated original file' do + let(:file_metadata_double) { double("Fake Hyrax::FileMetadata", id: SecureRandom.uuid, file_identifier: SecureRandom.uuid, versions: [file_double]) } + let(:file_double) { double("Fake Valkyrie::StorageAdapter::File", id: SecureRandom.uuid, version_id: SecureRandom.uuid)} + + before do + allow(fileset).to receive(:original_file).and_return(file_metadata_double) + fileset.id = Valkyrie::ID.new(SecureRandom.uuid) + end + + it 'returns a iiif_id with matching ids' do + expect(fileset.iiif_id).to eq "#{fileset.id}/files/#{fileset.original_file_id}/#{Digest::MD5.hexdigest(file_double.version_id)}" + end + end end end end diff --git a/spec/factories/hyrax_file_metadata.rb b/spec/factories/hyrax_file_metadata.rb index f618fd5df3..c07038c9b1 100644 --- a/spec/factories/hyrax_file_metadata.rb +++ b/spec/factories/hyrax_file_metadata.rb @@ -58,8 +58,9 @@ trait :with_file do transient do - file { FactoryBot.create(:uploaded_file) } + file { FactoryBot.create(:uploaded_file, file: File.open('spec/fixtures/world.png')) } file_set { FactoryBot.valkyrie_create(:hyrax_file_set) } + user { FactoryBot.create(:user) } end after(:build) do |file_metadata, evaluator| @@ -76,6 +77,12 @@ original_filename: evaluator.file.uploader.filename) file_metadata.file_identifier = saved.id end + + after(:create) do |file_metadata, evaluator| + Hyrax::ValkyrieUpload.new.add_file_to_file_set(file_set: evaluator.file_set, + file_metadata: file_metadata, + user: evaluator.user) + end end end end diff --git a/spec/presenters/hyrax/file_set_presenter_spec.rb b/spec/presenters/hyrax/file_set_presenter_spec.rb index cc0b3d6832..8a5c38f17f 100644 --- a/spec/presenters/hyrax/file_set_presenter_spec.rb +++ b/spec/presenters/hyrax/file_set_presenter_spec.rb @@ -312,7 +312,7 @@ def uri_segment_escape(uri) let(:file_metadata) { FactoryBot.valkyrie_create(:file_metadata, :original_file, :with_file, file: file) } let(:file_set) { FactoryBot.valkyrie_create(:hyrax_file_set) } let(:request) { double('request', base_url: 'http://test.host') } - let(:id) { "#{file_set.id}/files/#{file_metadata.id}" } + let(:id) { "#{file_set.id}/files/#{file_metadata.id}/#{Digest::MD5.hexdigest(file_metadata.file.version_id)}" } describe "#display_image" do context 'without a file' do diff --git a/spec/services/hyrax/riiif_file_resolver_spec.rb b/spec/services/hyrax/riiif_file_resolver_spec.rb new file mode 100644 index 0000000000..fa91379b4e --- /dev/null +++ b/spec/services/hyrax/riiif_file_resolver_spec.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true +require 'spec_helper' + +RSpec.describe Hyrax::RiiifFileResolver do + let(:resolver) { described_class.new } + + context 'with a file' do + let(:file_metadata) { FactoryBot.valkyrie_create(:hyrax_file_metadata, :with_file) } + let(:file_set) { Hyrax.query_service.find_by(id: file_metadata.file_set_id) } + + describe '#find' do + it 'returns a locally available RiiifFile using a write lock' do + expect(resolver.file_locks[file_set.iiif_id]).to receive(:with_write_lock).and_call_original + expect(resolver.find(file_set.iiif_id)).to be_instance_of Hyrax::RiiifFile + expect(File.exist?(file_metadata.file.disk_path)).to eq true + end + end + end + + describe '#file_locks' do + it 'is a Concurrent::Map of Concurrent::ReadWriteLocks' do + expect(resolver.file_locks[SecureRandom.uuid]).to be_instance_of Concurrent::ReadWriteLock + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index a2738dac77..f7d24db0e8 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -58,7 +58,7 @@ def ci_build? Valkyrie::MetadataAdapter .register(Valkyrie::Persistence::Postgres::MetadataAdapter.new, :postgres_adapter) Valkyrie::StorageAdapter.register( - Valkyrie::Storage::Disk.new(base_path: Rails.root / 'tmp' / 'test_adapter_uploads'), + Valkyrie::Storage::VersionedDisk.new(base_path: Rails.root / 'tmp' / 'test_adapter_uploads'), :test_disk ) Valkyrie::StorageAdapter.register( From 3f32be740ef6caf4152b555205ff27532bef81fe Mon Sep 17 00:00:00 2001 From: Daniel Pierce Date: Wed, 18 Oct 2023 17:56:08 -0400 Subject: [PATCH 2/2] Remove misguided storage adapter mock; don't attempt to delete when there is no file_identifier --- lib/hyrax/transactions/steps/file_metadata_delete.rb | 4 ++-- spec/spec_helper.rb | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/lib/hyrax/transactions/steps/file_metadata_delete.rb b/lib/hyrax/transactions/steps/file_metadata_delete.rb index f698c79763..c64f5650e0 100644 --- a/lib/hyrax/transactions/steps/file_metadata_delete.rb +++ b/lib/hyrax/transactions/steps/file_metadata_delete.rb @@ -21,7 +21,7 @@ def initialize(persister: Hyrax.persister, storage_adapter: Hyrax.storage_adapte end ## - # @param [Valkyrie::Resource] resource + # @param [Hyrax::FileMetadata] FileMetadata resource # @param [::User] the user resposible for the delete action # # @return [Dry::Monads::Result] @@ -30,7 +30,7 @@ def call(resource) @persister.delete(resource: resource) @publisher.publish('file.metadata.deleted', metadata: resource) - Valkyrie::StorageAdapter.delete(id: resource.file_identifier) + Valkyrie::StorageAdapter.delete(id: resource.file_identifier) if resource.file_identifier.present? Success(resource) end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index f7d24db0e8..78e6f6d175 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -342,9 +342,5 @@ def clean_active_fedora_repository allow(Hyrax) .to receive(:storage_adapter) .and_return(Valkyrie::StorageAdapter.find(adapter_name)) - - allow(Valkyrie::StorageAdapter) - .to receive(:adapter_for) - .and_return(Valkyrie::StorageAdapter.find(adapter_name)) end end