diff --git a/app/controllers/stash_datacite/contributors_controller.rb b/app/controllers/stash_datacite/contributors_controller.rb index 3d74fa7ac..797b3ec43 100644 --- a/app/controllers/stash_datacite/contributors_controller.rb +++ b/app/controllers/stash_datacite/contributors_controller.rb @@ -69,10 +69,10 @@ def autocomplete if partial_term.blank? render json: nil else - @affiliations = StashEngine::RorOrg.distinct.joins( + @contributors = StashEngine::RorOrg.distinct.joins( "inner join dcs_contributors on identifier_type = 'ror' and contributor_type = 'funder' and name_identifier_id = ror_id" ).find_by_ror_name(partial_term) - render json: @affiliations + render json: @contributors end end diff --git a/app/models/stash_engine/journal.rb b/app/models/stash_engine/journal.rb index 271afc97d..6a10a8d83 100644 --- a/app/models/stash_engine/journal.rb +++ b/app/models/stash_engine/journal.rb @@ -98,16 +98,16 @@ def self.find_by_issn(issn) # Replace an uncontrolled journal name (typically containing '*') # with a controlled journal reference, using an id - def self.replace_uncontrolled_journal(old_name:, new_id:) - j = StashEngine::Journal.find(new_id) + def self.replace_uncontrolled_journal(old_name:, new_journal:) + j = new_journal data = StashEngine::InternalDatum.where(value: old_name) idents = data.map(&:identifier_id) idents.each do |ident| puts " converting journal for identifier #{ident}" update_journal_for_identifier(new_title: j.title, new_issn: j.single_issn, identifier_id: ident) end - pubs = StashEngine::ResourcePublication.where(publication_name: old_name) - pubs.each do |pub| + pubs = StashEngine::ResourcePublication.where(publication_name: old_name, publication_issn: [nil, '']) + pubs.find_each do |pub| puts " converting journal for resource #{pub.resource_id}" update_journal_for_resource(new_title: j.title, new_issn: j.single_issn, resource_id: pub.resource_id) end diff --git a/documentation/apis/journals.md b/documentation/apis/journals.md index b857e0055..52cbb327f 100644 --- a/documentation/apis/journals.md +++ b/documentation/apis/journals.md @@ -80,64 +80,74 @@ Each journal has a primary title, but may have multiple `alternate_titles`. To add an alternate title to a journal: ```ruby -# Find the target journal and assign it to j - -# Then create the alternate title -StashEngine::JournalTitle.create(title: 'Some new title', journal: j, show_in_autocomplete: true) +StashEngine::JournalTitle.create(title: 'Some new title', journal_id: , show_in_autocomplete: false) ``` The `show_in_autocomplete` can be adjusted to false when adding a misspelling or other journal name that should not be listed for public selection. -Cleaning journal names -======================= +Cleaning journals +================= + +When a journal name is not recognized by the system, it is stored in the resource's `resource publication.publication_name` without an accompanying `publication_issn`. ISSNs may also be added to this table by curators, without an accompanying entry in the `journal_issns`. Periodically, new journals should be added to the system, and old datasets should be updated to link them to the new journals. -When a journal name is not recognized by the system, the title is stored with an -asterisk appended. Periodically, new journals should be added to the system, and -old datasets should be updated to link them to the new journals. - -Process all journal titles in the system, converting any with an asterisk to -the corresponding journal that has the same name: -`rails journals:clean_titles_with_asterisks` - -Search for journals that are candidates to fix, in the database: -```sql -SELECT value, COUNT(value) -FROM stash_engine_internal_data -WHERE value like '%*%' -GROUP BY value -ORDER BY COUNT(value); +This is primarily used for related primary articles. Look at unmatched primary articles in the system, adding a publication_issn if one exists. + +### Unmatched primary articles + +```ruby +# primary articles with no matched journal, a relevant subset of all unmatched publications +StashEngine::Resource.latest_per_dataset.joins('join dcs_related_identifiers r on r.resource_id = stash_engine_resources.id and r.work_type = 6 and r.related_identifier is not null').joins(:resource_publication).left_outer_joins(:journal).where(journal: {id: nil}).distinct.pluck('stash_engine_resources.id', 'stash_engine_identifiers.identifier', 'r.related_identifier', 'stash_engine_resource_publications.publication_name', 'stash_engine_resource_publications.publication_issn') ``` -You can delete titles that are obviously junk or placeholders (e.g., "to be determined"). +You can sort by the entered publication to group them in order of title and see which are used more than once: `.sort_by {|s| [s[3] ? 1 : 0, s[3]]}`. This returns an array of arrays of the following format: -For each title, determine whether there is a corresponding journal in our -database. +`[, , , , ]` + +Visit a primary article DOI. Determine if it is from a journal already in our system, and add the journal information to the resource_publications table. You can also easily do this from the activity log UI for each dataset. -IF there is no corresponding journal, create an entry for a new journal in the -system, using a command like the the one below. Edit any -of the relevant fields, but the most critical are `title` and `issn`. Note that `issn` may contain -either a single ISSN or an array of them. ```ruby -j = StashEngine::Journal.create(title: '', issn: '', - notify_contacts: ["automated-messages@datadryad.org"], allow_review_workflow: true, - allow_embargo: false, allow_blackout: false, sponsor_id: nil) +j = StashEngine::Journal #get your journal +StashEngine::Resource.find().resource_publication.update(publication_name: j.title, publication_issn: j.single_issn) ``` -IF a new journal does not need to be created, add the new title as an -alternate_title to the journal. +If a journal ISSN is already listed as the publication_issn, and is correct for the journal, you should add the ISSN to the journal. You can also easily do this from the journal admin UI. +```ruby +StashEngine::JournalIssn.create(id: , journal: j) ``` -StashEngine::JournalTitle.create(title: 'Some new title', journal: j, show_in_autocomplete: false) + +If the journal name is present and is a reasonable variation for the journal, consider if it should be added as an alternate title: +```ruby +StashEngine::JournalTitle.create(title: 'Some new title', journal_id: , show_in_autocomplete: false) ``` -Finally, replace the title throughout the system: +**NOTE: ONLY add journals that have more than 1 deposit in Dryad.** + +If there is no corresponding journal, you can create an entry for a new journal in the system. You must also create entries for each of the journal's ISSNs: ```ruby -old_name = 'The Greatest Journal*' -new_id = 123 -StashEngine::Journal.replace_uncontrolled_journal(old_name: old_name, new_id: new_id) +j = StashEngine::Journal.create(title: ) +StashEngine::JournalIssn.create(id: , journal: j) ``` +### Unmatched manuscripts + +If all primary articles are processed, you can do a similar process for results where users have entered a publication_name and a manuscript_number but no ISSN was found. + +```ruby +# manuscripts with no matched journal, a relevant subset of all unmatched publications +StashEngine::Resource.latest_per_dataset.joins(:resource_publication).left_outer_joins(:journal).where(journal: {id: nil}).where.not(resource_publication: {manuscript_number: [nil, ''], publication_name: [nil, '']}).distinct.pluck('stash_engine_resources.id', 'stash_engine_identifiers.identifier', 'resource_publication.manuscript_number', 'resource_publication.publication_name', 'resource_publication.publication_issn') +``` + +You can sort by the entered publication to group them in order of title and see which are used more than once: `.sort_by {|s| [s[3] ? 1 : 0, s[3]]}`. This returns an array of arrays of the following format: + +`[, , , , ]` + +Ignore any results for which the manuscript number or publication name are gibberish, or otherwise wrong. If they seem real and relevant, you can check and add journals as above. + +**NOTE: ONLY add journals that have more than 1 deposit in Dryad.** + + Updating journals for payment plans and integrations ==================================================== diff --git a/documentation/technical_notes/affiliations.md b/documentation/technical_notes/affiliations.md index 090fadd80..e823a037a 100644 --- a/documentation/technical_notes/affiliations.md +++ b/documentation/technical_notes/affiliations.md @@ -2,10 +2,7 @@ Author affiliations =================== -Author affiliations are associated with ROR identifiers. When there is -not corresponding ROR identifier for an affiliation, the affiliation -name is stored with an asterisk appended, so the curators can easily -see that there is no maching ROR. +Author affiliations are associated with ROR identifiers. Overview of the UI pieces - on the view for editing a dataset, there is a partial for the given @@ -22,39 +19,29 @@ Overview of the UI pieces Cleaning affiliation names ========================== -When an affiliation name is not recognized by the system, the title is stored with an -asterisk appended. Ideally, all affiliations will appear in ROR, so we can change them to -controlled names. +When an affiliation name is not recognized by the system, it is stored without an accompanying ror_id. Ideally, all affiliations will eventually appear in ROR, so we can change them to controlled names. Search for affiliations that are candidates to fix, in the database: -```sql -SELECT long_name, COUNT(long_name) -FROM dcs_affiliations -WHERE long_name like '%*%' -GROUP BY long_name -ORDER BY COUNT(long_name); +```ruby +StashDatacite::Affiliation.where(ror_id: [nil, '']).select(:long_name).distinct ``` -For each affiliation, determine whether there is a corresponding ROR entry in our -database. +Determine whether there is a corresponding ROR entry in our database. -IF there is no corresponding ROR, leave it alone. +If there is a corresponding ROR, update the associated authors to use the correct affiliations and destroy the unmatched ones, using a process like: -IF there is a corresponding ROR, update the associated affiliation entries to have the correct values, using a process like: ```ruby -# find offending identifiers -aa = StashDatacite::Affiliation.where("long_name like '%%*'") -aa.each do |a| - a.authors.each do |auth| - puts auth.resource.identifier.identifier if auth.resource_id == auth.resource.identifier.last_submitted_resource&.id +#see if there is a correct affiliation +rep = StashDatacite::Affiliation.find_by(ror_id: ) || StashEngine::Affiliation.from_ror_id(ror_id: ) +to_fix = StashDatacite::Affiliation.where(ror_id: nil, long_name: <>) +to_fix.each do |aff| + if aff.authors.blank? + aff.destroy + next + end + aff.authors.each do |auth| + auth.affiliation = rep end -end;nil - -# for a given identifier, fix the issue -i = StashEngine::Identifier.where("identifier like '%'").first -r = i.latest_submitted_resource -# find the offending author in r -# replace their affilation with a new one -affil = StashDatacite::Affiliation.where("long_name like '%%*'").first -auth.affilation = affil + aff.destroy +end ``` diff --git a/documentation/technical_notes/contributors.md b/documentation/technical_notes/contributors.md new file mode 100644 index 000000000..4066fde77 --- /dev/null +++ b/documentation/technical_notes/contributors.md @@ -0,0 +1,24 @@ + +Contributors (Funders and facilities) +===================================== + +Contributors (`StashDatacite::Contributors`) are used for funders (`contributor_tyoe: 'funder'`) and for research facilities (`contributor_tyoe: 'sponsor'`), and are associated with ROR identifiers in the `name_identifier_id` column. + + +Cleaning contributor names +========================== + +When funder name is not recognized by the system, it is stored without an accompanying `name_identifier_id`. Ideally, all insitutions will eventually appear in ROR, so we can change them to controlled names. + +Search for contributors that are candidates to fix, in the database: +```ruby +StashDatacite::Contributor.where(name_identifier_id: [nil, '']).select(:contributor_name).distinct +``` + +Determine whether there is a corresponding ROR entry in our database. + +If there is a corresponding ROR, update the name_identifier_id columns: + +```ruby +StashDatacite::Contributor.where(name_identifier_id: [nil, '']).select(:contributor_name).update(identifier_type: 'ror', name_identifier_id: ) +``` diff --git a/lib/tasks/stash_engine_tasks.rake b/lib/tasks/stash_engine_tasks.rake index 1f939da33..ac789f948 100644 --- a/lib/tasks/stash_engine_tasks.rake +++ b/lib/tasks/stash_engine_tasks.rake @@ -1399,6 +1399,17 @@ namespace :curation_stats do end namespace :journals do + task match_titles_to_issns: :environment do + StashEngine::ResourcePublication.where.not(publication_name: [nil, '']).where(publication_issn: [nil, '']).find_each do |d| + + journal = StashEngine::Journal.find_by_title(d.publication_name) + next unless j.present? + + puts "Cleaning journal: #{name}" + StashEngine::Journal.replace_uncontrolled_journal(old_name: d.publication_name, new_journal: journal) + end + nil + end desc 'Clean journals that have exact name matches except for an asterisk' task clean_titles_with_asterisks: :environment do StashEngine::InternalDatum.where("data_type = 'publicationName' and value like '%*'").find_each do |d| @@ -1409,7 +1420,7 @@ namespace :journals do next unless j.present? puts "Cleaning journal: #{name}" - StashEngine::Journal.replace_uncontrolled_journal(old_name: name, new_id: j.id) + StashEngine::Journal.replace_uncontrolled_journal(old_name: name, new_journal: j) end StashEngine::ResourcePublication.where("publication_name like '%*'").find_each do |d| name = d.publication_name @@ -1419,7 +1430,7 @@ namespace :journals do next unless j.present? puts "Cleaning journal: #{name}" - StashEngine::Journal.replace_uncontrolled_journal(old_name: name, new_id: j.id) + StashEngine::Journal.replace_uncontrolled_journal(old_name: name, new_journal: j) end nil end