From 53cf2e81edcce29ccc093e02de336d847b784e8a Mon Sep 17 00:00:00 2001 From: LaRita Robinson Date: Fri, 7 Jun 2024 15:57:16 -0400 Subject: [PATCH] I350 Derivatives & Update IiifPrint (#2210) * Update IiifPrint * File Set Specs Update specs based on derivative & file migration processing in Hyrax. * Update IiifPrint to released gem * Bring in most recent Hyrax updates * Remove solr_document_behavior_decorator Hyrax now redevines hydra_model so we don't have to. --- Dockerfile | 5 + Gemfile | 2 +- Gemfile.lock | 25 ++-- app/models/ability.rb | 19 +++ .../hyrax/solr_document_behavior_decorator.rb | 19 --- app/services/iiif_print/tenant_config.rb | 11 ++ app/views/hyrax/file_sets/_actions.html.erb | 57 --------- config/initializers/iiif_print.rb | 1 + ..._print_pending_relationships.iiif_print.rb | 6 +- docker-compose.bundle.yml | 1 - docker-compose.override-nodory.yml | 2 - docker-compose.production.yml | 2 - docker-compose.yml | 1 - spec/fixtures/derivatives/00-xml.xml | 23 ++++ spec/fixtures/derivatives/b6-thumbnail.jpeg | Bin 0 -> 2147 bytes .../generic_work_resource_indexer_spec.rb | 2 +- spec/indexers/image_resource_indexer_spec.rb | 2 +- .../solr_document_behavior_decorator_spec.rb | 71 ------------ spec/models/hyrax/file_set_decorator_spec.rb | 108 ++++++++++-------- 19 files changed, 137 insertions(+), 220 deletions(-) delete mode 100644 app/models/concerns/hyrax/solr_document_behavior_decorator.rb delete mode 100644 app/views/hyrax/file_sets/_actions.html.erb create mode 100644 spec/fixtures/derivatives/00-xml.xml create mode 100644 spec/fixtures/derivatives/b6-thumbnail.jpeg delete mode 100644 spec/models/concerns/hyrax/solr_document_behavior_decorator_spec.rb diff --git a/Dockerfile b/Dockerfile index 3c990b9c51..762fb858fa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,6 +46,11 @@ RUN wget https://github.com/ImageMagick/ImageMagick/archive/refs/tags/7.1.0-57.t && rm -rf ImageMagick* \ && rm -rf /var/cache/apk/* +# Install "best" training data for Tesseract +RUN echo "📚 Installing Tesseract Best (training data)!" && \ + cd /usr/share/tessdata/ && \ + wget https://github.com/tesseract-ocr/tessdata_best/blob/main/eng.traineddata?raw=true -O eng_best.traineddata + ARG VIPS_VERSION=8.11.3 RUN set -x -o pipefail \ diff --git a/Gemfile b/Gemfile index b492a53f1d..4408ca1066 100644 --- a/Gemfile +++ b/Gemfile @@ -56,7 +56,7 @@ gem 'hyrax-doi', github: 'samvera-labs/hyrax-doi', branch: 'rails_hyrax_upgrade' gem 'hyrax-iiif_av', github: 'samvera-labs/hyrax-iiif_av', branch: 'rails_hyrax_upgrade' gem 'i18n-debug', require: false, group: %i[development test] gem 'i18n-tasks', group: %i[development test] -gem 'iiif_print', github: 'scientist-softserv/iiif_print', branch: 'main' +gem 'iiif_print', '~> 2.0' gem 'jbuilder', '~> 2.5' gem 'jquery-rails' # Use jquery as the JavaScript library gem 'openssl', '>= 3.2.0' diff --git a/Gemfile.lock b/Gemfile.lock index cd26de066a..31393f59b8 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -147,7 +147,7 @@ GIT GIT remote: https://github.com/samvera/hyrax.git - revision: 151fb0d38339e12eabdf99ce9d0b1a0f07685fd5 + revision: 74f29943d88749a57077818d271b29729a6d7183 branch: double_combo specs: hyrax (5.0.1) @@ -218,18 +218,6 @@ GIT public_suffix (>= 2) rack (>= 1.3.6) -GIT - remote: https://github.com/scientist-softserv/iiif_print.git - revision: 122e86634f17c8f40d9ae4d59d2d7deca00f1694 - branch: main - specs: - iiif_print (1.0.0) - blacklight_iiif_search (>= 1.0, < 3.0) - derivative-rodeo (~> 0.5) - hyrax (>= 2.5, < 6) - nokogiri (>= 1.13.2) - rdf-vocab (~> 3.0) - GIT remote: https://github.com/scientist-softserv/willow_sword.git revision: 38a0906647fae2020e8b0b08e296f85c457fcb34 @@ -695,7 +683,8 @@ GEM hiredis (0.6.3) htmlentities (4.3.4) http_logger (0.7.0) - httparty (0.21.0) + httparty (0.22.0) + csv mini_mime (>= 1.0.0) multi_xml (>= 0.5.2) httpclient (2.8.3) @@ -767,6 +756,12 @@ GEM json iiif_manifest (1.3.1) activesupport (>= 4) + iiif_print (2.0.0) + blacklight_iiif_search (>= 1.0, < 3.0) + derivative-rodeo (~> 0.5) + hyrax (>= 2.5, < 6) + nokogiri (>= 1.13.2) + rdf-vocab (~> 3.0) iso-639 (0.3.6) iso8601 (0.9.1) jaro_winkler (1.5.6) @@ -1504,7 +1499,7 @@ DEPENDENCIES hyrax-iiif_av! i18n-debug i18n-tasks - iiif_print! + iiif_print (~> 2.0) jbuilder (~> 2.5) jquery-rails json-canonicalization (= 0.3.1) diff --git a/app/models/ability.rb b/app/models/ability.rb index ac5f2a2799..92f0a4eaeb 100644 --- a/app/models/ability.rb +++ b/app/models/ability.rb @@ -110,4 +110,23 @@ def can_import_works? def can_export_works? can_create_any_work? end + + ## + # @api public + # + # Overrides hydra-head, (and restores the method from blacklight-access-controls) + def download_permissions + can :download, [::String, ::Valkyrie::ID] do |id| + test_download(id.to_s) + end + + can :download, ::SolrDocument do |obj| + if obj.pdf? && !obj.show_pdf_download_button + false + else + cache.put(obj.id, obj) + test_download(obj.id) + end + end + end end diff --git a/app/models/concerns/hyrax/solr_document_behavior_decorator.rb b/app/models/concerns/hyrax/solr_document_behavior_decorator.rb deleted file mode 100644 index 7e0db0d938..0000000000 --- a/app/models/concerns/hyrax/solr_document_behavior_decorator.rb +++ /dev/null @@ -1,19 +0,0 @@ -# frozen_string_literal: true - -# OVERRIDE Hyrax 5.0.0rc2 to account for Valkyrie migration object that end in "Resource" - -module Hyrax - module SolrDocumentBehaviorDecorator - def hydra_model(classifier: nil) - if valkyrie? - # In the future when we don't have Valkyrie migration objects, - # we wouldn't have the SomethingResource naming convention, so we use super - (first('has_model_ssim') + 'Resource')&.safe_constantize || super - else - super - end - end - end -end - -Hyrax::SolrDocumentBehavior.prepend(Hyrax::SolrDocumentBehaviorDecorator) diff --git a/app/services/iiif_print/tenant_config.rb b/app/services/iiif_print/tenant_config.rb index 14ef3642bc..4ec49e382b 100644 --- a/app/services/iiif_print/tenant_config.rb +++ b/app/services/iiif_print/tenant_config.rb @@ -100,6 +100,11 @@ module PdfSplitter mattr_accessor :iiif_print_splitter self.iiif_print_splitter = ::IiifPrint::SplitPdfs::PagesToJpgsSplitter + ## + def self.never_split_pdfs? + !TenantConfig.use_iiif_print? + end + ## # @api public def self.call(*args) @@ -147,6 +152,12 @@ def service # In Hyrax::WorkShowPresenter we're only looking at the underlying file_sets. But IiifPrint # needs to look at multiple places. module WorkShowPresenterDecorator + ## + # @return [Boolean] Identifies whether IiifPrint PDF splitting is active for this work's tenant + def split_pdfs? + TenantConfig.use_iiif_print? + end + ## # @return [Array] predicate methods (e.g. ending in "?") that reflect the types # of files we want to consider for showing in the IIIF Viewer. diff --git a/app/views/hyrax/file_sets/_actions.html.erb b/app/views/hyrax/file_sets/_actions.html.erb deleted file mode 100644 index 5c479fe8df..0000000000 --- a/app/views/hyrax/file_sets/_actions.html.erb +++ /dev/null @@ -1,57 +0,0 @@ -<%# Overridden from Hyrax 5.0.0 - To add extra download restrictions regarding PDFs and site configurations %> -<% if (can?(:download, file_set.id) || can?(:destroy, file_set.id) || can?(:edit, file_set.id)) && !workflow_restriction?(@parent) %> - <% if can?(:download, file_set.id) && !(can?(:edit, file_set.id) || can?(:destroy, file_set.id)) %> - <% if Hyrax.config.display_media_download_link? && (@presenter.show_pdf_download_button? && file_set.pdf? || !file_set.pdf?) %> - <%= link_to t('.download'), - hyrax.download_path(file_set), - class: 'btn btn-default btn-sm', - title: t('.download_title', file_set: file_set), - target: "_blank", - id: "file_download", - data: { label: file_set.id, work_id: @presenter.id, collection_ids: @presenter.member_of_collection_ids } %> - <% end %> - <% else %> -
- - - -
- <% end %> -<% end %> diff --git a/config/initializers/iiif_print.rb b/config/initializers/iiif_print.rb index e83325c644..15c567a708 100644 --- a/config/initializers/iiif_print.rb +++ b/config/initializers/iiif_print.rb @@ -22,4 +22,5 @@ # config.sort_iiif_manifest_canvases_by = :date_published config.default_iiif_manifest_version = 3 config.persistence_adapter = IiifPrint::PersistenceLayer::ValkyrieAdapter + config.additional_tesseract_options = "-l eng_best" end diff --git a/db/migrate/20240214005253_add_model_details_to_iiif_print_pending_relationships.iiif_print.rb b/db/migrate/20240214005253_add_model_details_to_iiif_print_pending_relationships.iiif_print.rb index 2343771542..ad29f1516e 100644 --- a/db/migrate/20240214005253_add_model_details_to_iiif_print_pending_relationships.iiif_print.rb +++ b/db/migrate/20240214005253_add_model_details_to_iiif_print_pending_relationships.iiif_print.rb @@ -1,8 +1,8 @@ # This migration comes from iiif_print (originally 20231110163052) class AddModelDetailsToIiifPrintPendingRelationships < ActiveRecord::Migration[5.2] def change - add_column :iiif_print_pending_relationships, :parent_model, :string - add_column :iiif_print_pending_relationships, :child_model, :string - add_column :iiif_print_pending_relationships, :file_id, :string + add_column :iiif_print_pending_relationships, :parent_model, :string unless column_exists?(:iiif_print_pending_relationships, :parent_model) + add_column :iiif_print_pending_relationships, :child_model, :string unless column_exists?(:iiif_print_pending_relationships, :child_model) + add_column :iiif_print_pending_relationships, :file_id, :string unless column_exists?(:iiif_print_pending_relationships, :file_id) end end diff --git a/docker-compose.bundle.yml b/docker-compose.bundle.yml index 1874220df4..e4e8bd1877 100644 --- a/docker-compose.bundle.yml +++ b/docker-compose.bundle.yml @@ -1,6 +1,5 @@ # Copy file to docker-compose.override.yml to override docker-compose.yml # Only use for local development -version: '3.8' services: # Uncomment to allow for the use of a ruby debugger (byebug, pry, etc) in Docker. # See http://playbook-staging.notch8.com/en/devops/docker_debugger for more info. diff --git a/docker-compose.override-nodory.yml b/docker-compose.override-nodory.yml index 933b923cb8..f8ffa58a63 100644 --- a/docker-compose.override-nodory.yml +++ b/docker-compose.override-nodory.yml @@ -1,5 +1,3 @@ -version: '3.8' - services: solr: ports: diff --git a/docker-compose.production.yml b/docker-compose.production.yml index 8412c2ad56..ae8fcbc0b1 100644 --- a/docker-compose.production.yml +++ b/docker-compose.production.yml @@ -1,5 +1,3 @@ -version: '3.8' - x-app: &app image: ghcr.io/samvera/hyku:${TAG:-latest} env_file: diff --git a/docker-compose.yml b/docker-compose.yml index f23f27273d..83cfd6f9d5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,3 @@ -version: '3.8' x-app: &app build: context: . diff --git a/spec/fixtures/derivatives/00-xml.xml b/spec/fixtures/derivatives/00-xml.xml new file mode 100644 index 0000000000..6a59fb9c4f --- /dev/null +++ b/spec/fixtures/derivatives/00-xml.xml @@ -0,0 +1,23 @@ + + + + pixel + + + + + + + + + + + + + + + + + + + diff --git a/spec/fixtures/derivatives/b6-thumbnail.jpeg b/spec/fixtures/derivatives/b6-thumbnail.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..391e0f86a44e96b147fd6cedbec1357efc5bd2d7 GIT binary patch literal 2147 zcmeH@yA8rH5QgvK#Q2ruE&1u;>|Z|H_r+^` z0`14$2#5e%84w?^0aYZ@tb|gR38QtTt#Qs8Yh7rA^NqK*=DKN#m|W=kj(S;Bx&%|D zDvcD9w_egeiDzJgFB}vy6pV^7USKtS6+Y1tFG(7hBk3B2d0I`U`74#a?+_HqJBnd| Z)8RNT%e!nK8^{K-fovcf_|d>3-rjo}AMXGF literal 0 HcmV?d00001 diff --git a/spec/indexers/generic_work_resource_indexer_spec.rb b/spec/indexers/generic_work_resource_indexer_spec.rb index 8c50e63c96..9e3591c724 100644 --- a/spec/indexers/generic_work_resource_indexer_spec.rb +++ b/spec/indexers/generic_work_resource_indexer_spec.rb @@ -7,7 +7,7 @@ RSpec.describe GenericWorkResourceIndexer do let(:indexer_class) { described_class } - let(:resource) { GenericWorkResource.new } + let!(:resource) { Hyrax.persister.save(resource: GenericWorkResource.new) } it_behaves_like 'a Hyrax::Resource indexer' end diff --git a/spec/indexers/image_resource_indexer_spec.rb b/spec/indexers/image_resource_indexer_spec.rb index 18e9d0f00d..da0afb625a 100644 --- a/spec/indexers/image_resource_indexer_spec.rb +++ b/spec/indexers/image_resource_indexer_spec.rb @@ -7,7 +7,7 @@ RSpec.describe ImageResourceIndexer do let(:indexer_class) { described_class } - let(:resource) { ImageResource.new } + let!(:resource) { Hyrax.persister.save(resource: ImageResource.new) } it_behaves_like 'a Hyrax::Resource indexer' end diff --git a/spec/models/concerns/hyrax/solr_document_behavior_decorator_spec.rb b/spec/models/concerns/hyrax/solr_document_behavior_decorator_spec.rb deleted file mode 100644 index d7066faa38..0000000000 --- a/spec/models/concerns/hyrax/solr_document_behavior_decorator_spec.rb +++ /dev/null @@ -1,71 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe Hyrax::SolrDocumentBehavior, type: :decorator do - subject(:solr_document) { solr_document_class.new(solr_hash) } - let(:solr_hash) { {} } - - let(:solr_document_class) do - Class.new do - include Blacklight::Solr::Document - include Hyrax::SolrDocumentBehavior - end - end - - describe '#to_partial_path' do - context 'with an ActiveFedora model name' do - let(:solr_hash) { { 'has_model_ssim' => 'GenericWork' } } - - it 'resolves the correct model name' do - expect(solr_document.to_model.to_partial_path).to eq 'hyrax/generic_works/generic_work' - end - end - - context 'with a Valkyrie model name' do - let(:solr_hash) { { 'has_model_ssim' => 'GenericWorkResource' } } - - # Yes, a GenericWorkResource will resolves to the `hyrax/generic_works/generic_work` because - # we're migrating from GenericWork. - it 'resolves the correct model name' do - expect(solr_document.to_model.to_partial_path).to eq 'hyrax/generic_works/generic_work' - end - end - - context 'with a Valkyrie migration model name' do - let(:solr_hash) { { 'has_model_ssim' => 'GenericWork', 'valkyrie_bsi' => true } } - - it 'resolves the correct model name' do - expect(solr_document.to_model.to_partial_path).to eq 'hyrax/generic_works/generic_work' - end - end - end - - describe '#hydra_model' do - it 'gives ActiveFedora::Base by default' do - expect(solr_document.hydra_model).to eq ActiveFedora::Base - end - - context 'with an ActiveFedora model name' do - let(:solr_hash) { { 'has_model_ssim' => 'GenericWork' } } - - it 'resolves the correct model name' do - expect(solr_document.hydra_model).to eq GenericWork - end - end - - context 'with a Valkyrie model name' do - let(:solr_hash) { { 'has_model_ssim' => 'GenericWorkResource' } } - - it 'resolves the correct model name' do - expect(solr_document.hydra_model).to eq GenericWorkResource - end - end - - context 'with a Valkyrie migration model name' do - let(:solr_hash) { { 'has_model_ssim' => 'GenericWork', 'valkyrie_bsi' => true } } - - it 'resolves the correct model name' do - expect(solr_document.hydra_model).to eq GenericWorkResource - end - end - end -end diff --git a/spec/models/hyrax/file_set_decorator_spec.rb b/spec/models/hyrax/file_set_decorator_spec.rb index 3c0137d2c8..9b3f859c9b 100644 --- a/spec/models/hyrax/file_set_decorator_spec.rb +++ b/spec/models/hyrax/file_set_decorator_spec.rb @@ -13,59 +13,75 @@ its(:to_rdf_representation) { is_expected.to eq('FileSet') } end - # given an existing AF FileSet - let(:af_file_set) do - fs = FileSet.create(creator: ['test'], title: ['file set test']) - path_to_file = 'spec/fixtures/csv/sample.csv' - file = File.open(path_to_file, 'rb') - Hydra::Works::AddFileToFileSet.call(fs, file, :original_file) - fs - end + context 'lazy migration' do + # given an existing AF FileSet + let(:af_file_set) do + fs = FileSet.create(creator: ['test'], title: ['file set test'], label: 'sample.csv') + path_to_file = 'spec/fixtures/csv/sample.csv' + file = File.open(path_to_file, 'rb') + Hydra::Works::AddFileToFileSet.call(fs, file, :original_file) + fs + end + let(:file_set_resource) { Hyrax.query_service.find_by(id: af_file_set.id) } + let(:paths) { ['spec/fixtures/derivatives/00-xml.xml', 'spec/fixtures/derivatives/b6-thumbnail.jpeg'] } + let(:storage_adapter) { Valkyrie::StorageAdapter.find(:derivatives_disk) } - # Because we're running a job, we need to specify a tenant - it "converts an AF FileSet to a Valkyrie::FileSet", :singletenant do - ## Preamble to test a "Created in ActiveFedora FileSet" - expect { Hyrax.query_service.services.first.find_by(id: af_file_set.id) }.to raise_error(Valkyrie::Persistence::ObjectNotFoundError) - # We are lazyily migrating a FileSet to a Hyrax::FileSet - # thus it should comeback as a Hyrax::FileSet - expect(Hyrax.query_service.services.last.find_by(id: af_file_set.id)).to be_a(Hyrax::FileSet) - # Expect the goddess combo works as expected - file_set_resource = Hyrax.query_service.find_by(id: af_file_set.id) - expect(file_set_resource).to be_a(Hyrax::FileSet) + before do + # set up some derivatives + allow(Hyrax::DerivativePath).to receive(:derivatives_for_reference).and_return(paths) + allow(Hyrax::ValkyriePersistDerivatives).to receive(:fileset_for_directives).and_return(file_set_resource) + allow(Hyrax.publisher).to receive(:publish) + end - af_file_id = af_file_set.original_file.id - expect { Hyrax.query_service.services.first.find_by(id: af_file_id) }.to raise_error(Valkyrie::Persistence::ObjectNotFoundError) - # We should be able to find this "thing" in the ActiveFedora storage - expect(Hyrax.query_service.services.last.find_by(id: af_file_id)).to be_present - # Expect the goddess combo works as expected - expect(Hyrax.query_service.find_by(id: af_file_id)).to be_present + # Because we're running a job, we need to specify a tenant + it "converts an AF FileSet to a Valkyrie::FileSet", :singletenant do + ## Preamble to test a "Created in ActiveFedora FileSet" + expect { Hyrax.query_service.services.first.find_by(id: af_file_set.id) }.to raise_error(Valkyrie::Persistence::ObjectNotFoundError) + # We are lazily migrating a FileSet to a Hyrax::FileSet + # thus it should comeback as a Hyrax::FileSet + expect(Hyrax.query_service.services.last.find_by(id: af_file_set.id)).to be_a(Hyrax::FileSet) + # Expect the goddess combo works as expected + expect(file_set_resource).to be_a(Hyrax::FileSet) - # The file is in Fedora! - expect(file_set_resource.original_file.file_identifier.id).to start_with("fedora://") + expect { Hyrax.query_service.services.first.find_by(id: af_file_set.id) }.to raise_error(Valkyrie::Persistence::ObjectNotFoundError) + # We should be able to find this "thing" in the ActiveFedora storage + expect(Hyrax.query_service.services.last.find_by(id: af_file_set.id)).to be_present + # Expect the goddess combo works as expected + expect(Hyrax.query_service.find_by(id: af_file_set.id)).to be_present - ## Do the "migration" task, it will conditionally enqueue; if we don't - ## process the queue the statements after this will fail. - perform_enqueued_jobs do - Hyrax.persister.save(resource: file_set_resource) - end + # The file is in Fedora! + expect(file_set_resource.original_file.file_identifier.id).to start_with("fedora://") - # We found it in Postgresql - converted_file_set = Hyrax.query_service.services.first.find_by(id: af_file_set.id) - expect(converted_file_set).to be_a(Hyrax::FileSet) + ## Do the "migration" task: saving the resource triggers the file migrations. + # if we don't process the queue the statements after this will fail. + perform_enqueued_jobs do + Hyrax.persister.save(resource: file_set_resource) + end - # It's been converted to Postgresql - expect(Hyrax.query_service.services.first.find_by(id: af_file_id)).to be_a(Hyrax::FileMetadata) - # It's still there in ActiveFedora - expect(Hyrax.query_service.services.last.find_by(id: af_file_id)).to be_present - # Expect the goddess combo works as expected - expect(Hyrax.query_service.find_by(id: af_file_id)).to be_a(Hyrax::FileMetadata) + ## Verify FileSet migration + # We found the file_set in Postgresql + converted_file_set = Hyrax.query_service.services.first.find_by(id: af_file_set.id) + expect(converted_file_set).to be_a(Hyrax::FileSet) - file_identifier_id = converted_file_set.original_file.file_identifier.id - # Verify that the original file is now on disk (e.g. where we write files in - # the test environment) - expect(file_identifier_id).to start_with("disk://#{Rails.root}") + ## Verify File migration + af_file_id = af_file_set.original_file.id + # It's been converted to Valkyrie if we have a Hyrax::FileMetadata object. + expect(Hyrax.custom_queries.find_file_metadata_by(id: af_file_id)).to be_a(Hyrax::FileMetadata) - # Verify that the file actually exists there! - expect(File.exist?(file_identifier_id.sub("disk://", ""))).to be_truthy + file_identifier_id = converted_file_set.original_file.file_identifier.id + # Verify that the original file is now on disk (e.g. where we write files in + # the test environment) + expect(file_identifier_id).to start_with("disk://#{Rails.root}") + + # Verify that the file actually exists there! + expect(File.exist?(file_identifier_id.sub("disk://", ""))).to be_truthy + + # Verify that it's still in ActiveFedora + expect(af_file_set.original_file).to be_a(Hydra::PCDM::File) + expect(Hydra::PCDM::File.find(af_file_set.original_file.uri.to_s).content).to be_present + + # Verify that we have migrated derivatives & original file... The file_set should have 3 FileMetadata objects. + expect(Hyrax.custom_queries.find_many_file_metadata_by_ids(ids: converted_file_set.file_ids).count).to eq(3) + end end end