diff --git a/app/controllers/api_application_controller.rb b/app/controllers/api_application_controller.rb index 6f72ca21ee..ec8a97aa71 100644 --- a/app/controllers/api_application_controller.rb +++ b/app/controllers/api_application_controller.rb @@ -5,10 +5,13 @@ # class ApplicationController < ActionController::Base class ApiApplicationController < StashEngine::ApplicationController + include StashApi::Versioning layout 'layouts/stash_engine/application' before_action :log_request + before_action :set_response_version_header + before_action :check_requested_version skip_before_action :verify_authenticity_token DEFAULT_PAGE_SIZE = 20 diff --git a/app/controllers/stash_api/versioning.rb b/app/controllers/stash_api/versioning.rb new file mode 100644 index 0000000000..93d6481a12 --- /dev/null +++ b/app/controllers/stash_api/versioning.rb @@ -0,0 +1,34 @@ +module StashApi + module Versioning + extend ActiveSupport::Concern + + private + + def set_response_version_header + response.headers['X-API-Version'] = api_version + return if api_version == current_version + + response.headers['X-API-Deprecation'] = 'true' + end + + def check_requested_version + return if !requested_version || requested_version == current_version + + render json: { error: "Unsupported API version: #{requested_version}, latest version is: #{current_version}" }, status: 400 + end + + def current_version + '2.1.0' + end + + def api_version + return requested_version if requested_version + + request.path.include?('/api/v2') ? '2.1.0' : '1.0.0' + end + + def requested_version + @requested_version ||= request.headers['X-API-Version'] + end + end +end diff --git a/app/javascript/react/components/MetadataEntry/ResearchDomain.jsx b/app/javascript/react/components/MetadataEntry/ResearchDomain.jsx index d8770cbb0b..cf60756db5 100644 --- a/app/javascript/react/components/MetadataEntry/ResearchDomain.jsx +++ b/app/javascript/react/components/MetadataEntry/ResearchDomain.jsx @@ -52,6 +52,7 @@ function ResearchDomain({ formik.handleSubmit(); }} > + {subjectList.map((subj, index) => { // key made from subj + count of preceding duplicates const key = subj + subjectList.slice(0, index).filter((s) => s === subj).length; diff --git a/app/views/layouts/_best_practices.html.md b/app/views/layouts/_best_practices.html.md index 6935777a38..081a1e650e 100644 --- a/app/views/layouts/_best_practices.html.md +++ b/app/views/layouts/_best_practices.html.md @@ -19,7 +19,7 @@ Additionally, if applicable, please do not include any data visualizations that ## Make sure your data are shareable -* **All files submitted to Dryad must abide by the terms of the [Creative Commons Zero (CC0 1.0)](https://creativecommons.org/publicdomain/zero/1.0/) waiver**. Under these terms, the author releases the data to the public domain. +* **All files submitted to Dryad must abide by the terms of the [Creative Commons Zero v1.0 Universal](https://spdx.org/licenses/CC0-1.0.html) waiver**. Under these terms, the author releases the data to the public domain. * Review all files and ensure they conform to `CC0` terms and are not covered by copyright claims or other terms-of-use. We cannot archive any files that contain licenses incompatible with `CC0` (`GNU GPL, MIT, CC-BY,` etc.), but we can link to content in a dedicated software repository (Github, Zenodo, Bitbucket, or CRAN, etc.). * For more information see [Good data practices: Removing barriers to data reuse with CC0 licensing](https://blog.datadryad.org/2023/05/30/good-data-practices-removing-barriers-to-data-reuse-with-cc0-licensing/), [Why Does Dryad Use CC0](https://blog.datadryad.org/2011/10/05/why-does-dryad-use-cc0/), and [Some dos and don'ts for CC0](https://blog.datadryad.org/2017/09/11/some-dos-and-donts-for-cc0/). * Human subjects data must be properly anonymized and prepared under applicable legal and ethical guidelines (see tips for human subjects data). @@ -127,9 +127,9 @@ Ready to get started? [Log in](/stash/sessions/choose_login) and go to the "My d ### Additional resources * Institutional data librarians are an outstanding resource. Check with your university library's data management services team. -* [Cornell University Research Data Management Service Group's Guide to writing "readme" style metadata](https://data.research.cornell.edu/content/readme) -* [Why Dryad uses Creative Commons Zero](https://blog.datadryad.org/2011/10/05/why-does-dryad-use-cc0/) -* DataONE Primer on Data Management Best Practices -* [Introduction to Open Science: Why data versioning and data care practices are key for science and social science](http://blogs.lse.ac.uk/impactofsocialsciences/2015/02/09/data-versioning-open-science/) -* [Making data Findable, Accessible, Interoperable, and Reusable (FAIR)](https://www.force11.org/group/fairgroup/fairprinciples) -* [Data organization in spreadsheets](http://www.tandfonline.com/doi/full/10.1080/00031305.2017.1375989) +* Cornell University Research Data Management Service Group's Guide to writing "readme" style metadata (opens in new window) +* Why Dryad uses Creative Commons Zero (opens in new window) +* DataONE Primer on Data Management Best Practices (opens in new window) +* Introduction to Open Science: Why data versioning and data care practices are key for science and social science (opens in new window) +* Making data Findable, Accessible, Interoperable, and Reusable (FAIR) (opens in new window) +* Data organization in spreadsheets (opens in new window) diff --git a/app/views/layouts/_requirements.html.md b/app/views/layouts/_requirements.html.md index 0dcdec30e9..77092ba384 100644 --- a/app/views/layouts/_requirements.html.md +++ b/app/views/layouts/_requirements.html.md @@ -23,6 +23,10 @@ Most types of files can be submitted (e.g., text, spreadsheets, video, photograp * **Video**: `AVI, MPEG, MP4` * **Compressed file archive**: `TAR.GZ, 7Z, ZIP` +
Note: RAR (Roshal ARchive) is a proprietary compression format. Because users may not have access to the necessary tools to open RAR files, we cannot accept them for publication. Please use open, widely supported, and easily accessible formats like those listed above.
+Dear <%= @user_name %>,
-Your data submission has been automatically withdrawn from the Dryad platform and will be permanently deleted after one year:
+Your data submission has been automatically withdrawn from the Dryad platform + due to inactivity for more than one year. Your data will be permanently + deleted after one year with further inactivity:
Title: <%= @resource.title %>
@@ -19,4 +21,10 @@
If you have questions about this action, please contact us at <%= @helpdesk_email %>.
+Please note that your dataset never completed the curation process and, + therefore, was not published. You should have received an email from us + outlining the changes required to have your dataset published.
+ +If you do not have access to the email or if you have any questions about the + action taken to withdraw your dataset, please contact us at + <%= @helpdesk_email %>.
diff --git a/config/licenses.yml b/config/licenses.yml index 66fe66a18a..69717d6483 100644 --- a/config/licenses.yml +++ b/config/licenses.yml @@ -2,8 +2,8 @@ # and then uri, name, explanation and badge cc0: - uri: https://creativecommons.org/publicdomain/zero/1.0/ - name: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication + uri: https://spdx.org/licenses/CC0-1.0.html + name: Creative Commons Zero v1.0 Universal explanation: > This releases your work to the public domain for any use. badge: icon_cc-zero.svg diff --git a/db/data/20241028110859_updated_cc0_license.rb b/db/data/20241028110859_updated_cc0_license.rb new file mode 100644 index 0000000000..d8fda69871 --- /dev/null +++ b/db/data/20241028110859_updated_cc0_license.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +class UpdatedCc0License < ActiveRecord::Migration[7.0] + def up + StashDatacite::Right.where(rights: 'CC0 1.0 Universal (CC0 1.0) Public Domain Dedication') + .update_all(rights: 'Creative Commons Zero v1.0 Universal', rights_uri: 'https://spdx.org/licenses/CC0-1.0.html') + end + + def down + raise ActiveRecord::IrreversibleMigration + end +end diff --git a/db/data_schema.rb b/db/data_schema.rb index fbacd0e255..7a2fb2c668 100644 --- a/db/data_schema.rb +++ b/db/data_schema.rb @@ -1 +1 @@ -DataMigrate::Data.define(version: 20241022152849) +DataMigrate::Data.define(version: 20241028110859) \ No newline at end of file diff --git a/documentation/apis/README.md b/documentation/apis/README.md index 7293928210..d98a7d2eff 100644 --- a/documentation/apis/README.md +++ b/documentation/apis/README.md @@ -50,3 +50,24 @@ Dryad maintains a variety of reports regarding its content. - Reports that are automatically generated on a regular basis are available at `https://datadryad.org/api/v2/reports` - Reports that are generated less frequently are available through our [Data about Dryad](https://github.com/datadryad/dryad-data/) repository + +API Versioning +============ + +The Dryad API uses [Semantic Versioning](https://semver.org/) to track changes to the API. + +The current version of our API is `2.1.0`. This is also the only supported API versions, at the moment. + +In order to use the latest API version, you can: +- Use `https://datadryad.org/api/v2/` as the base URL for all API requests. +- You can also send send the `X-API-Version: 2.1.0` + +We added 2 new response headers: +- The `X-API-Version` header to allow clients to specify the version of the API they are using. +- The `X-API-Deprecation` header to notify clients if the version they are using is deprecated and will be removed in the future. + +In case a bad version number is used, the API will respond with: +- `400` error status. +- `{ "error": "Unsupported API version: {requested-version}, latest version is: 2.1.0" }` in the response body. +- The `X-API-Version` header set to the version of the API you requested. +- The `X-API-Deprecation` header set `true`. This header will not be returned in case the version you are using is not deprecated. \ No newline at end of file diff --git a/documentation/apis/embedded_submission.md b/documentation/apis/embedded_submission.md index 78963734be..b5a8d1908f 100644 --- a/documentation/apis/embedded_submission.md +++ b/documentation/apis/embedded_submission.md @@ -106,7 +106,7 @@ A sample call using the [sample dataset file](sample_dataset.json), with results "lastModificationDate": "2020-10-02", "visibility": "restricted", "userId": "0000-0003-0597-4085", - "license": "https://creativecommons.org/publicdomain/zero/1.0/", + "license": "https://spdx.org/licenses/CC0-1.0.html", "editLink": "/stash/edit/doi%3A10.7959%2Fdryad.83bk3jc0" } ``` @@ -197,7 +197,7 @@ Sample call and (abbreviated) response: "visibility": "restricted", "sharingLink":"https://datadryad.org/stash/share/OI-tU-WmoT3I2KCOqX7Of624", "userId": 37182, - "license": "https://creativecommons.org/publicdomain/zero/1.0/", + "license": "https://spdx.org/licenses/CC0-1.0.html", "editLink": "/stash/edit/doi%3A10.7959%2Fdryad.83bk3jc0" } ``` diff --git a/documentation/shibboleth/README.md b/documentation/shibboleth/README.md index a497703369..2fabb443b7 100644 --- a/documentation/shibboleth/README.md +++ b/documentation/shibboleth/README.md @@ -198,3 +198,27 @@ mv shibd.service shibd-old.service sudo systemctl enable shibd.service ``` +Renewing certificats for Shibboleth +=================================== + +To test the certificate, log in to the machine you are testing and run: + +`echo | openssl s_client -showcerts -servername localhost -connect localhost:443 2>/dev/null | openssl x509 -inform pem -noout -text` + +The certificate should state that it's from "Let's Encrypt", and it should have valid dates. + +To renew a certificate: +- On the server where you will generate the certificate, update the Apache to respond on port 80 as well as 443. Copy the `datadryad.org.conf` file, and change the new file to respond to port 80. +- Restart Apache: `apache_restart.sh` +- In the load balancer, disable servers other than the one you are using to generate the certificate +- On the server, generate the certificate and install (change the server name in the directories): +``` +sudo certbot renew +sudo cp /etc/letsencrypt/live/sandbox.datadryad.org/fullchain.pem /etc/pki/tls/certs/letsencrypt.crt +sudo cp /etc/letsencrypt/live/sandbox.datadryad.org/privkey.pem /etc/pki/tls/private/letsencrypt.key +apache_restart.sh +``` +- Copy the certificate files to other servers and restart Apache on those servers as well +- Re-enable all servers in the load balancer +- Remove the "extra" configuration file for Apache +- If the server is also running Shibboleth services, restart them: `sudo systemctl restart shibd` and verify that Shibboleth is working through Apache `curl -k https://localhost/Shibboleth.sso/Status` diff --git a/documentation/zenodo_integration/delayed_jobs.md b/documentation/zenodo_integration/delayed_jobs.md index 7686dde1dc..c1f13b053b 100644 --- a/documentation/zenodo_integration/delayed_jobs.md +++ b/documentation/zenodo_integration/delayed_jobs.md @@ -1,117 +1,10 @@ # Zenodo extra copies and software/supplemental submissions +See also the [troubleshooting document](zenodo_troubleshooting.md). + For environments that send datasets to Zenodo, the process of transferring the data only lives on one server (e.g., 01, but not 02). -## I want to go fix a number of failed Zenodo submissions. How? -1. Go to the *Datasets > Zenodo Submissions* option in the UI as a superuser. -2. ssh into the server and restart the delayed job daemon likw `sudo cdlsysctl restart delayed_job` just to - be sure old stuff is cleared out and it's running well. -3. Go to the `delayed_jobs` table in the database and look for items that show `SIGTERM` or `Execution expired` - I believe in the `last error` column but it may be another column. Remove these records from the table. -4. Go back to the UI and click the *Reset stalled to error state* button. This will make items correctly - show error states, even if they stalled or had another problem. -5. Sort the table by ID descending. Scroll down to about the time you saw the errors you want to correct starting. -6. Click the *Resend* buttons for items you care about (such as software or supplemental items, I'd ignore large data replications). Some items - will need prerequisites to be submitted first (will show in the table after clicking). -7. Go up the list until you get to the top. -8. You can refresh the page to see current statuses. If you want to see a chain of items for an identifier then - click on the *identifier id* and it will open a window with just submissions for that dataset. If you - want to look at error and submission details then click on the zenodo_copies.id in the first column to troubleshoot. -9. Rinse and repeat steps 4-8 until all that you can fix is fixed. If you run into bad problems with useless - crap clogging the queue then start at step 2 again. - -You may have to read over error messages and see where things are failing in Zenodo and intervene with other -solutions manually. - -## Fixing "Please remove all files to create a new version in Zenodo" errors (workaround) - -This may be an error caused only by the transition, but I wouldn't be surprised if it reappears. - -1. Find the previous submission where this was published (should be version before this one in - `stash_engine_zenodo_copies` table). Write down or copy/paste the *old deposition_id* and save it. -2. Go into the Zenodo user interface on their site, find the item that won't go through and - click the new version button. Write down the deposition_id for the *new deposition_id* (it should be - in the URL). You can leave that page open in the UI if you wish. -3. Go to the zenodo_copies table and change the old deposition_id for the previously submitted version - to the new item you just created in step 2. Also change the current submission's deposition_id to this number. - (For some reason when it's in this state it's impossible to get the new deposits created through the API - until you do this.) -4. Resubmit through the Dryad queue interface for the item. -5. After it goes through, go back to the record from step 1 and put the *old deposition_id* back in as the correct one. - -## Fixing errors because of zero length files - -Zenodo has decided they do not accept zero length files in the API, which I think is a bad decision since there are a -number of cases in software when people add zero length files to indicate something (.gitkeep files come -to mind or Passenger web server lets you touch a file to change status). They may also indicate a file -to be filled or used later. - -Anyway, if you get errors because of Zero length files, our option is to go remove them all from our -`stash_engine_generic_files` table and resubmit the item. You may need to do this for multiple versions to -get them all through. - -## Can't upload anymore files to Zenodo? - -Zenodo has now limited the number of files you can upload to 100 now. I suppose this means the user -must put them into a package like a zip if they want more files than that at Zenodo. - -## People put in dumb github URLs for software and the sizes don't match - -If they do this they should use the RAW URL from github, not just put in a github UI URL. They are not trying -to preserve the github UI for future generations. They want to get their software files in, not HTML -user interface files from github. - ---- - -## It's not processing? Why? -- start or restart the service `sudo cdlsysctl restart delayed_job` on the 01 server. -- There may be long (or stalled) jobs running on all workers. The `delayed_job` table shows what is trying - to run in delayed job. If some of them have a `last_error` status like `execution expired` or `SIGTERM` then - you can delete these lines out of the `delayed_job` table, restart the service and jobs stuck - behind them should run. -- You can click the "reset stalled to error state" in the UI and it will put things no longer in the queue - and with wrong statuses to "error" state and then you can resubmit the ones you want from the interface. - -## How to handle maintenance -- (On server 01) "pause" or "drain" the jobs a bit ahead with `~/bin/long_jobs.sh drain`. This just creates - the file `defer_jobs.txt` in `deploy/releases` and it will not submit new things to zenodo while it's there. -- After deploy, do `~/bin/long_jobs.sh restart` or remove the `defer_jobs.txt` explained above. (The - `hold-submissions.txt` file does something similar but for repository submissions). -- If you deployed new code you should restart delayed job. `sudo cdlsysctl restart delayed_job` - -## How to clear out a big log jam of recently failed items -This sometimes happens because Zenodo is returning lots of 504 errors or has been down. At other times -there may be a lot of huge submissions that came in and are monopolizing all 3 workers and they are spending -forever and timing out and nothing else can get through. - -- Go to the *Admin > Zenodo Submissions* option in the UI and sort by ID descending. This should show you the - items from most recent to least. The things that aren't recent probably aren't the current problem. -- Right now, submissions over 50GB rarely go in and Zenodo doesn't take items over this size in normal operations. - This will be a longer term thing to address, so go to the `stash_engine_zenodo_copies` table and set - the retries column for the huge item(s) to `100` which will prevent it from coming back and being - retried daily to try getting it in. Otherwise it'll just recreate the log jam tomorrow when it retries items. -- Restart the delayed_job daemon `sudo cdlsysctl restart delayed_job` on the 01 server. -- Remove the expired/Sigtermed items from the `delayed_jobs` table as explained in the "it's not processing" - second point. - -## Resending the stuff that failed (lets play the resubmisison game) -- You may need to remove the log jam (above) first. -- Go to *Admin > Zenodo Submissions* and sort as mentioned in the log jam section above. Sort by by ID, descending. -- Click the *Reset stalled to error state* button and the back button and refresh this page. -- Find about where the recent problems started. You want to try to get everything from there - to the top of the list to resubmit, but probably ignore larger things in a first pass because you'll - have to wait forever for those to go through. -- Just clicking `resend` on everything in order up the list might not be optimal because items - for the same *ident.id* and type (software, data or supplemental) need to proceed in order and - there are three workers so they may arrive out of order and give an error again. -- I prefer to click the *Ident.id* column for an item and open in a new tab. Then I can see if it's only one - item or sort earlier items at the top or it's easy to follow what version needs to happen before another - in a shorter list. I can resend the top one, refresh a few minutes later, resend the 2nd, etc. -- Alternately you can try clicking all the "resend" buttons up the list and some will error and you - will get warnings about resending some and you can make multiple passes up the list and refreshes - of the page until you get things through. -- If there are weird statuses that seem stuck you can always "reset stalled" and have another round or two of fun. ## ActiveJob / Delayed Job Background diff --git a/documentation/zenodo_integration/zenodo_troubleshooting.md b/documentation/zenodo_integration/zenodo_troubleshooting.md new file mode 100644 index 0000000000..2d3cabf493 --- /dev/null +++ b/documentation/zenodo_integration/zenodo_troubleshooting.md @@ -0,0 +1,152 @@ +I want to go fix a number of failed Zenodo submissions. How? +============================================================= + +1. Go to the *Datasets > Zenodo Submissions* option in the UI as a superuser. +2. ssh into the server and restart the delayed job daemon `sudo systemctl start delayed_job` just to + be sure old stuff is cleared out and it's running well. +3. Go to the `delayed_jobs` table in the database and look for items that show `SIGTERM` or `Execution expired` + I believe in the `last error` column but it may be another column. Remove these records from the table. +4. Go back to the UI and click the *Reset stalled to error state* button. This will make items correctly + show error states, even if they stalled or had another problem. +5. Sort the table by ID descending. Scroll down to about the time you saw the errors you want to correct starting. +6. Click the *Resend* buttons for items you care about (such as software or supplemental items, I'd ignore large data replications). Some items + will need prerequisites to be submitted first (will show in the table after clicking). +7. Go up the list until you get to the top. +8. You can refresh the page to see current statuses. If you want to see a chain of items for an identifier then + click on the *identifier id* and it will open a window with just submissions for that dataset. If you + want to look at error and submission details then click on the zenodo_copies.id in the first column to troubleshoot. +9. Rinse and repeat steps 4-8 until all that you can fix is fixed. If you run into bad problems with useless + crap clogging the queue then start at step 2 again. + +You may have to read over error messages and see where things are failing in Zenodo and intervene with other +solutions manually. + + +Fixing "Please remove all files to create a new version in Zenodo" errors (workaround) +======================================================================================= + +1. Find the previous submission where this was published (should be version before this one in + `stash_engine_zenodo_copies` table). Write down or copy/paste the *old deposition_id* and save it. +2. Go into the Zenodo user interface on their site, find the item that won't go through and + click the new version button. Write down the deposition_id for the *new deposition_id* (it should be + in the URL). You can leave that page open in the UI if you wish. +3. Go to the zenodo_copies table and change the old deposition_id for the previously submitted version + to the new item you just created in step 2. Also change the current submission's deposition_id to this number. + (For some reason when it's in this state it's impossible to get the new deposits created through the API + until you do this.) +4. Resubmit through the Dryad queue interface for the item. +5. After it goes through, go back to the record from step 1 and put the *old deposition_id* back in as the correct one. + + +Fixing errors because of zero length files +========================================== + +Zenodo has decided they do not accept zero length files in the API, which is suboptimal since there are a +number of cases in software when people add zero length files to indicate something (.gitkeep files come +to mind or Passenger web server lets you touch a file to change status). They may also indicate a file +to be filled or used later. + +If you get errors because of Zero length files, our option is to go remove them all from our +`stash_engine_generic_files` table and resubmit the item. You may need to do this for multiple versions to +get them all through. + + +Can't upload anymore files to Zenodo? +====================================== + +Zenodo has now limited the number of files you can upload to 100 now. I suppose this means the user +must put them into a package like a zip if they want more files than that at Zenodo. + + +People put in dumb github URLs for software and the sizes don't match +===================================================================== + +If they do this they should use the RAW URL from github, not just put in a github UI URL. They are not trying +to preserve the github UI for future generations. They want to get their software files in, not HTML +user interface files from github. + + +It's not processing? Why? +========================== + +- start or restart the service `sudo systemctl restart delayed_job` on the 01 server. +- There may be long (or stalled) jobs running on all workers. The `delayed_job` table shows what is trying + to run in delayed job. If some of them have a `last_error` status like `execution expired` or `SIGTERM` then + you can delete these lines out of the `delayed_job` table, restart the service and jobs stuck + behind them should run. +- You can click the "reset stalled to error state" in the UI and it will put things no longer in the queue + and with wrong statuses to "error" state and then you can resubmit the ones you want from the interface. + +How to handle maintenance +========================= + +- (On server 01) "pause" or "drain" the jobs a bit ahead with `touch ~/deploy/releases/defer_jobs.txt`. + The delayed job process will not submit new things to zenodo while it's there +- After deploy, do `rm ~/deploy/releases/defer_jobs.txt` +- If you deployed new code you should restart delayed job. `sudo systemctl restart delayed_job` + + +How to clear out a big log jam of recently failed items +======================================================= + +This sometimes happens because Zenodo is returning lots of 504 errors or has been down. At other times +there may be a lot of huge submissions that came in and are monopolizing all 3 workers and they are spending +forever and timing out and nothing else can get through. + +- Go to the *Admin > Zenodo Submissions* option in the UI and sort by ID descending. This should show you the + items from most recent to least. The things that aren't recent probably aren't the current problem. +- Right now, submissions over 50GB rarely go in and Zenodo doesn't take items over this size in normal operations. + This will be a longer term thing to address, so go to the `stash_engine_zenodo_copies` table and set + the retries column for the huge item(s) to `100` which will prevent it from coming back and being + retried daily to try getting it in. Otherwise it'll just recreate the log jam tomorrow when it retries items. +- Restart the delayed_job daemon `sudo systemctl restart delayed_job` on the 01 server. +- Remove the expired/Sigtermed items from the `delayed_jobs` table as explained in the "it's not processing" + second point. + +Resending the stuff that failed (lets play the resubmisison game) +================================================================= + +- You may need to remove the log jam (above) first. +- Go to *Admin > Zenodo Submissions* and sort as mentioned in the log jam section above. Sort by by ID, descending. +- Click the *Reset stalled to error state* button and the back button and refresh this page. +- Find about where the recent problems started. You want to try to get everything from there + to the top of the list to resubmit, but probably ignore larger things in a first pass because you'll + have to wait forever for those to go through. +- Just clicking `resend` on everything in order up the list might not be optimal because items + for the same *ident.id* and type (software, data or supplemental) need to proceed in order and + there are three workers so they may arrive out of order and give an error again. +- I prefer to click the *Ident.id* column for an item and open in a new tab. Then I can see if it's only one + item or sort earlier items at the top or it's easy to follow what version needs to happen before another + in a shorter list. I can resend the top one, refresh a few minutes later, resend the 2nd, etc. +- Alternately you can try clicking all the "resend" buttons up the list and some will error and you + will get warnings about resending some and you can make multiple passes up the list and refreshes + of the page until you get things through. +- If there are weird statuses that seem stuck you can always "reset stalled" and have another round or two of fun. + + +Manually reprocessing Zenodo software submissions +================================================= + +Prepare the database +-------------------- + +Find all ZenodoCopies for this dataset: + +`select id,state,resource_id,copy_type from stash_engine_zenodo_copies where identifier_id=142288 order by resource_id;` + +Save the above table in a text file (or ticket) so you can reference the needed IDs + +Delete the error rows: + +`delete from stash_engine_zenodo_copies where state='error' and identifier_id='XXXXX';` + +Reprocess resources in the Rails console +---------------------------------------- + +Starting with the first resource that had errored, resend each resource in order, and wait until each is completed before sending the next: +``` +r=StashEngine::Resource.find(XXXXX) +r.send_software_to_zenodo +``` + +For entries with `copy_type=software_publish`, after the initial send, send again with `r.send_software_to_zenodo(publish: true)` diff --git a/dryad-config-example/licenses.yml b/dryad-config-example/licenses.yml index b45f7027cd..2015a6f49e 100644 --- a/dryad-config-example/licenses.yml +++ b/dryad-config-example/licenses.yml @@ -2,8 +2,8 @@ # and then uri and name cc0: - uri: https://creativecommons.org/publicdomain/zero/1.0/ - name: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication + uri: https://spdx.org/licenses/CC0-1.0.html + name: Creative Commons Zero v1.0 Universal explanation: > This releases your work to the public domain for any use. badge: icon_cc-zero.svg diff --git a/lib/stash/wrapper/license.rb b/lib/stash/wrapper/license.rb index c1302ea24f..cd48a99b6b 100644 --- a/lib/stash/wrapper/license.rb +++ b/lib/stash/wrapper/license.rb @@ -30,11 +30,11 @@ class License uri: URI('https://creativecommons.org/licenses/by/4.0/') ) - # Convenience instance for the [CC0](https://creativecommons.org/publicdomain/zero/1.0/) + # Convenience instance for the [CC0](https://spdx.org/licenses/CC0-1.0.html) # public domain declaration CC_ZERO = License.new( - name: 'CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', - uri: URI('https://creativecommons.org/publicdomain/zero/1.0/') + name: 'Creative Commons Zero v1.0 Universal', + uri: URI('https://spdx.org/licenses/CC0-1.0.html') ) end end diff --git a/lib/stash/zenodo_replicate/metadata_generator.rb b/lib/stash/zenodo_replicate/metadata_generator.rb index 0c33c25792..ddfe59bf40 100644 --- a/lib/stash/zenodo_replicate/metadata_generator.rb +++ b/lib/stash/zenodo_replicate/metadata_generator.rb @@ -78,7 +78,7 @@ def license end def license_for_data - if @resource.rights.first&.rights_uri&.include?('/zero') + if @resource.rights.first&.rights_uri&.include?('/CC0') 'cc-zero' else 'cc-by' diff --git a/lib/tasks/stash_engine_tasks.rake b/lib/tasks/stash_engine_tasks.rake index 70990158f4..adb4ea5fcc 100644 --- a/lib/tasks/stash_engine_tasks.rake +++ b/lib/tasks/stash_engine_tasks.rake @@ -1208,7 +1208,7 @@ namespace :curation_stats do desc 'Recalculate any curation stats from the past three days, not counting today' task update_recent: :environment do - (Time.now.utc.to_date - 4.days..Time.now.utc.to_date - 1.day).find_each do |date| + (Time.now.utc.to_date - 4.days..Time.now.utc.to_date - 1.day).each do |date| print '.' stats = StashEngine::CurationStats.find_or_create_by(date: date) stats.recalculate unless stats.created_at > 2.seconds.ago diff --git a/public/api/v2/docs/examples/dataset.json b/public/api/v2/docs/examples/dataset.json index 4e1af953ac..3872b91be1 100644 --- a/public/api/v2/docs/examples/dataset.json +++ b/public/api/v2/docs/examples/dataset.json @@ -77,5 +77,5 @@ "changedFields": [ "none" ], - "license": "https://creativecommons.org/publicdomain/zero/1.0/" + "license": "https://spdx.org/licenses/CC0-1.0.html" } diff --git a/public/api/v2/docs/examples/datasets.json b/public/api/v2/docs/examples/datasets.json index efd7df1827..09f4da59dc 100644 --- a/public/api/v2/docs/examples/datasets.json +++ b/public/api/v2/docs/examples/datasets.json @@ -63,7 +63,7 @@ "publicationDate": "2020-01-01", "visibility": "public", "sharingLink": "https://datadryad.org/stash/dataset/doi:10.5072/FK2HM58Q3S", - "license": "https://creativecommons.org/publicdomain/zero/1.0/" + "license": "https://spdx.org/licenses/CC0-1.0.html" }, { "_links": { @@ -194,7 +194,7 @@ "publicationDate": "2020-02-01", "visibility": "public", "sharingLink": "https://datadryad.org/stash/dataset/doi:10.5072/FK2474DX0J", - "license": "https://creativecommons.org/publicdomain/zero/1.0/" + "license": "https://spdx.org/licenses/CC0-1.0.html" }, { "id": "doi:10.5072/FK22N55P2B", @@ -245,7 +245,7 @@ "publicationDate": "2020-01-07", "visibility": "public", "sharingLink": "https://datadryad.org/stash/dataset/doi:10.5072/FK28W3G720", - "license": "https://creativecommons.org/publicdomain/zero/1.0/" + "license": "https://spdx.org/licenses/CC0-1.0.html" }, { "_links": { @@ -290,7 +290,7 @@ "publicationDate": "2020-01-07", "visibility": "public", "sharingLink": "https://datadryad.org/stash/dataset/doi:10.5072/FK2MK6CD6D", - "license": "https://creativecommons.org/publicdomain/zero/1.0/" + "license": "https://spdx.org/licenses/CC0-1.0.html" }, { "identifier": "doi:10.5072/FK2C82BM6C", @@ -359,7 +359,7 @@ "publicationDate": "2020-04-17", "visibility": "public", "sharingLink": "https://datadryad.org/stash/dataset/doi:10.5072/FK24T6N929", - "license": "https://creativecommons.org/publicdomain/zero/1.0/" + "license": "https://spdx.org/licenses/CC0-1.0.html" }, { "_links": { @@ -410,7 +410,7 @@ "publicationDate": "2020-04-19", "visibility": "public", "sharingLink": "https://datadryad.org/stash/dataset/doi:10.5072/FK2QJ7G49H", - "license": "https://creativecommons.org/publicdomain/zero/1.0/" + "license": "https://spdx.org/licenses/CC0-1.0.html" }, { "_links": { @@ -455,7 +455,7 @@ "publicationDate": "2020-05-01", "visibility": "public", "sharingLink": "https://datadryad.org/stash/dataset/doi:10.5072/FK2J966H3B", - "license": "https://creativecommons.org/publicdomain/zero/1.0/" + "license": "https://spdx.org/licenses/CC0-1.0.html" }, { "_links": { @@ -500,7 +500,7 @@ "publicationDate": "2020-05-10", "visibility": "public", "sharingLink": "https://datadryad.org/stash/dataset/doi:10.5072/FK2T151W7S", - "license": "https://creativecommons.org/publicdomain/zero/1.0/" + "license": "https://spdx.org/licenses/CC0-1.0.html" } ] } diff --git a/public/docs/HumanSubjectsData.pdf b/public/docs/HumanSubjectsData.pdf index a633c9112f..3d86e9b24e 100644 Binary files a/public/docs/HumanSubjectsData.pdf and b/public/docs/HumanSubjectsData.pdf differ diff --git a/spec/data/archive/mrt-datacite.xml b/spec/data/archive/mrt-datacite.xml index 26022b6052..ad2cf032d6 100644 --- a/spec/data/archive/mrt-datacite.xml +++ b/spec/data/archive/mrt-datacite.xml @@ -65,7 +65,7 @@