Skip to content

Commit

Permalink
Merge pull request #1953 from ResearchHub/create-doi-on-paper-creation
Browse files Browse the repository at this point in the history
  • Loading branch information
koutst authored Nov 1, 2024
2 parents 0e50424 + 420e884 commit 349572b
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 76 deletions.
3 changes: 3 additions & 0 deletions src/paper/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ def test_create_researchhub_paper_creates_first_version(self):
paper = Paper.objects.get(id=response.data["id"])
self.assertEqual(paper.title, "Test Paper")
self.assertEqual(paper.abstract, "Test abstract")
hubs = paper.unified_document.hubs.all()
self.assertEqual(len(hubs), 1)
self.assertEqual(hubs[0].id, hub.id)

authorship = paper.authorships.first()
self.assertEqual(authorship.author.id, author.id)
Expand Down
18 changes: 18 additions & 0 deletions src/paper/views/paper_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
from researchhub.permissions import IsObjectOwnerOrModerator
from researchhub_document.permissions import HasDocumentCensorPermission
from user.related_models.author_model import Author
from utils.crossref import generate_doi, register_doi_for_paper
from utils.http import GET, POST, check_url_contains_pdf
from utils.openalex import OpenAlex
from utils.permissions import CreateOrUpdateIfAllowed, HasAPIKey, PostOnly
Expand Down Expand Up @@ -253,6 +254,7 @@ def create_researchhub_paper(self, request):
author=author_map[author_id],
source="RESEARCHHUB",
author_position=author_position,
raw_author_name=f"{author_map[author_id].first_name} {author_map[author_id].last_name}",
is_corresponding=author_data.get("is_corresponding", False),
)

Expand All @@ -262,12 +264,16 @@ def create_researchhub_paper(self, request):
# Associate hubs
if hub_ids:
paper.hubs.add(*hub_ids)
paper.unified_document.hubs.add(*hub_ids)

# Create paper version
paper_version = 1
base_doi = generate_doi()
if previous_paper:
try:
paper_version = previous_paper.version.version + 1
if previous_paper.version.base_doi:
base_doi = previous_paper.base_doi
except PaperVersion.DoesNotExist:
# If the previous paper version does not exist, create the initial version
# and set the current version to 2.
Expand All @@ -277,10 +283,22 @@ def create_researchhub_paper(self, request):
)
paper_version = 2

crossref_response = register_doi_for_paper(
authors=authors,
title=title,
base_doi=base_doi,
rh_paper=paper,
version=paper_version,
)

if crossref_response.status_code != 200:
return Response("Crossref API Failure", status=400)

PaperVersion.objects.create(
paper=paper,
version=paper_version,
message=change_description,
base_doi=base_doi,
)

# Return serialized paper
Expand Down
44 changes: 9 additions & 35 deletions src/researchhub_document/views/researchhub_post_views.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import random
import string
import time
from datetime import datetime

import requests
from django.contrib.contenttypes.models import ContentType
Expand Down Expand Up @@ -56,6 +54,7 @@
)
from researchhub_document.utils import reset_unified_document_cache
from user.related_models.author_model import Author
from utils.crossref import generate_doi, register_doi_for_post
from utils.sentry import log_error
from utils.siftscience import SIFT_POST, sift_track
from utils.throttles import THROTTLE_CLASSES
Expand Down Expand Up @@ -150,6 +149,7 @@ def create_researchhub_post(self, request):
try:
with transaction.atomic():
created_by = request.user
created_by_author = created_by.author_profile
doi = generate_doi() if assign_doi else None

if assign_doi and created_by.get_balance() - CROSSREF_DOI_RSC_FEE < 0:
Expand Down Expand Up @@ -189,7 +189,9 @@ def create_researchhub_post(self, request):
rh_post.eln_src.save(file_name, full_src_file)

if assign_doi:
crossref_response = register_doi(created_by, title, doi, rh_post)
crossref_response = register_doi_for_post(
[created_by_author], title, doi, rh_post
)
if crossref_response.status_code != 200:
return Response("Crossref API Failure", status=400)
charge_doi_fee(created_by, rh_post)
Expand Down Expand Up @@ -238,6 +240,7 @@ def update_existing_researchhub_posts(self, request):
)

created_by = request.user
created_by_author = created_by.author_profile
hubs = data.pop("hubs", None)
renderable_text = data.pop("renderable_text", "")
title = data.get("title", "")
Expand Down Expand Up @@ -297,7 +300,9 @@ def update_existing_researchhub_posts(self, request):
)

if assign_doi:
crossref_response = register_doi(created_by, title, doi, rh_post)
crossref_response = register_doi_for_post(
[created_by_author], title, doi, rh_post
)
if crossref_response.status_code != 200:
return Response("Crossref API Failure", status=400)
charge_doi_fee(created_by, rh_post)
Expand All @@ -321,37 +326,6 @@ def create_unified_doc(self, request):
print("create_unified_doc: ", exception)


def generate_doi():
return CROSSREF_DOI_PREFIX + "".join(
random.choice(string.ascii_lowercase + string.digits)
for _ in range(CROSSREF_DOI_SUFFIX_LENGTH)
)


def register_doi(created_by, title, doi, rh_post):
dt = datetime.today()
context = {
"timestamp": int(time.time()),
"first_name": created_by.author_profile.first_name,
"last_name": created_by.author_profile.last_name,
"title": title,
"publication_month": dt.month,
"publication_day": dt.day,
"publication_year": dt.year,
"doi": doi,
"url": f"{BASE_FRONTEND_URL}/post/{rh_post.id}/{rh_post.slug}",
}
crossref_xml = render_to_string("crossref.xml", context)
files = {
"operation": (None, "doMDUpload"),
"login_id": (None, CROSSREF_LOGIN_ID),
"login_passwd": (None, CROSSREF_LOGIN_PASSWORD),
"fname": ("crossref.xml", crossref_xml),
}
crossref_response = requests.post(CROSSREF_API_URL, files=files)
return crossref_response


def charge_doi_fee(created_by, rh_post):
purchase = Purchase.objects.create(
user=created_by,
Expand Down
19 changes: 16 additions & 3 deletions src/templates/crossref.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,23 @@
<report-paper>
<report-paper_metadata language = "en">
<contributors>
<person_name sequence="first" contributor_role="author">
<given_name>{{ first_name }}</given_name>
<surname>{{ last_name }}</surname>
{% for contributor in contributors %}
<person_name sequence="{% if forloop.first %}first{% else %}additional{% endif %}" contributor_role="author">
<given_name>{{ contributor.first_name }}</given_name>
<surname>{{ contributor.last_name }}</surname>
{% if contributor.institution %}
<institution>
<institution_name>{{ contributor.institution.name }}</institution_name>
{% if contributor.institution.place %}
<institution_place>{{ contributor.institution.place }}</institution_place>
{% endif %}
</institution>
{% endif %}
{% if contributor.orcid %}
<ORCID authenticated="true">https://orcid.org/{{ contributor.orcid }}</ORCID>
{% endif %}
</person_name>
{% endfor %}
</contributors>
<titles>
<title>{{ title }}</title>
Expand Down
145 changes: 107 additions & 38 deletions src/utils/crossref.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,30 @@
import logging
import random
import re
import string
import time
from datetime import datetime

import habanero
import requests
from django.apps import apps
from django.template.loader import render_to_string
from django.utils import timezone
import habanero

from researchhub.settings import (
BASE_FRONTEND_URL,
CROSSREF_API_URL,
CROSSREF_DOI_PREFIX,
CROSSREF_DOI_SUFFIX_LENGTH,
CROSSREF_LOGIN_ID,
CROSSREF_LOGIN_PASSWORD,
)


class Crossref:
def __init__(self, id=None, query=None):
# TODO: Handle query case
self.cr = habanero.Crossref(mailto='dev@quantfive.org')
self.cr = habanero.Crossref(mailto="dev@quantfive.org")

self.data = None
self.data_message = None
Expand All @@ -28,86 +43,76 @@ def __init__(self, id=None, query=None):
def handle_id(self):
try:
self.data = self.cr.works(ids=[self.id])
self.data_message = self.data['message']
self.data_message = self.data["message"]
except Exception as e:
self.data_message = None
logging.warning(e)
else:
self.abstract = self.data_message.get('abstract', None)
self.abstract = self.data_message.get("abstract", None)

# Remove any jat xml tags
if self.abstract is not None:
self.abstract = re.sub(r'<[^<]+>', '', self.abstract)
self.abstract = re.sub(r"<[^<]+>", "", self.abstract)

self.doi = self.data_message.get('DOI', None)
self.arxiv_id = self.data_message.get('arxiv', None)
self.doi = self.data_message.get("DOI", None)
self.arxiv_id = self.data_message.get("arxiv", None)

self.paper_publish_date = get_crossref_issued_date(
self.data_message
)
self.paper_publish_date = get_crossref_issued_date(self.data_message)

self.publication_type = self.data_message.get('type', None)
self.publication_type = self.data_message.get("type", None)

self.reference_count = self.data_message.get(
'reference-count',
None
)
self.reference_count = self.data_message.get("reference-count", None)
self.referenced_by_count = self.data_message.get(
'is-referenced-by-count',
None
"is-referenced-by-count", None
)
if self.reference_count > 0:
try:
self.references = self.data_message.get('reference', [])
self.references = self.data_message.get("reference", [])
except Exception as e:
logging.warning(
f'Reference count > 0 but found error: {e}'
)
logging.warning(f"Reference count > 0 but found error: {e}")
if self.referenced_by_count > 0:
try:
relation = self.data_message.get('relation', None)
relation = self.data_message.get("relation", None)
if relation is not None:
self.referenced_by = relation.get('cites', [])
self.referenced_by = relation.get("cites", [])
except Exception as e:
logging.warning(
f'Referenced by count > 0 but found error: {e}'
)
logging.warning(f"Referenced by count > 0 but found error: {e}")

self.title = None
title = self.data_message.get('title', [None])
if (type(title) is list):
if (len(title) > 0):
title = self.data_message.get("title", [None])
if type(title) is list:
if len(title) > 0:
self.title = title[0]
elif type(title) is str and (title != ''):
elif type(title) is str and (title != ""):
self.title = title
if self.title is None:
logging.warning('Crossref did not find title')
logging.warning("Crossref did not find title")

self.url = self.data_message.get('URL', None)
self.url = self.data_message.get("URL", None)

def create_paper(self, is_public=False):
Paper = apps.get_model('paper.Paper')
Paper = apps.get_model("paper.Paper")
if self.data_message is not None:
if self.publication_type == 'journal-article':
if self.publication_type == "journal-article":
if self.id is not None:
paper = Paper.objects.create(
title=self.title,
paper_title=self.title,
doi=self.doi,
alternate_ids={'arxiv': self.arxiv_id},
alternate_ids={"arxiv": self.arxiv_id},
url=self.url,
paper_publish_date=self.paper_publish_date,
publication_type=self.publication_type,
external_source='crossref',
external_source="crossref",
retrieved_from_external_source=True,
is_public=is_public
is_public=is_public,
)
return paper
return None


def get_crossref_issued_date(item):
parts = item['issued']['date-parts'][0]
parts = item["issued"]["date-parts"][0]
day = 1
month = 1
year = None
Expand All @@ -121,3 +126,67 @@ def get_crossref_issued_date(item):
return timezone.datetime(year, month, day)
else:
return None


def generate_doi():
return CROSSREF_DOI_PREFIX + "".join(
random.choice(string.ascii_lowercase + string.digits)
for _ in range(CROSSREF_DOI_SUFFIX_LENGTH)
)


def register_doi_for_post(authors, title, base_doi, rh_post, version=None):
url = f"{BASE_FRONTEND_URL}/post/{rh_post.id}/{rh_post.slug}"
return register_doi(authors, title, base_doi, url, version)


def register_doi_for_paper(authors, title, base_doi, rh_paper, version=None):
url = f"{BASE_FRONTEND_URL}/paper/{rh_paper.id}/{rh_paper.slug}"
return register_doi(authors, title, base_doi, url, version)


def register_doi(authors, title, base_doi, url, version=None):
dt = datetime.today()
contributors = []
if version is not None:
base_doi = f"{base_doi}.{version}"

for author in authors:
institution = None
if author.university:
place = None
if author.university.city:
place = "{author.university.city}, {author.university.state}"
institution = {
"name": author.university.name,
"place": place,
}

contributors.append(
{
"first_name": author.first_name,
"last_name": author.last_name,
"orcid": author.orcid_id,
"institution": institution,
}
)

context = {
"timestamp": int(time.time()),
"contributors": contributors,
"title": title,
"publication_month": dt.month,
"publication_day": dt.day,
"publication_year": dt.year,
"doi": base_doi,
"url": url,
}
crossref_xml = render_to_string("crossref.xml", context)
files = {
"operation": (None, "doMDUpload"),
"login_id": (None, CROSSREF_LOGIN_ID),
"login_passwd": (None, CROSSREF_LOGIN_PASSWORD),
"fname": ("crossref.xml", crossref_xml),
}
crossref_response = requests.post(CROSSREF_API_URL, files=files)
return crossref_response

0 comments on commit 349572b

Please sign in to comment.