From dc20355f01bf4d48f5ae5175ad4e94e5dd6a5f08 Mon Sep 17 00:00:00 2001 From: datadavev <605409+datadavev@users.noreply.github.com> Date: Wed, 15 Jun 2022 14:41:46 -0400 Subject: [PATCH 01/12] Added missing argparse import --- ezidapp/management/commands/diag-db-stats.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ezidapp/management/commands/diag-db-stats.py b/ezidapp/management/commands/diag-db-stats.py index 9376e2613..ce0c07ab0 100644 --- a/ezidapp/management/commands/diag-db-stats.py +++ b/ezidapp/management/commands/diag-db-stats.py @@ -14,6 +14,7 @@ import contextlib import logging +import argparse import django.apps import django.conf From 055b41ef393603e7a57eeb0cdcc0f6c215c00aae Mon Sep 17 00:00:00 2001 From: datadavev <605409+datadavev@users.noreply.github.com> Date: Wed, 15 Jun 2022 17:04:54 -0400 Subject: [PATCH 02/12] Remove spurious encode, add a little logging --- ezidapp/management/commands/proc-download.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ezidapp/management/commands/proc-download.py b/ezidapp/management/commands/proc-download.py index f6f5f50cb..13d22e8b8 100644 --- a/ezidapp/management/commands/proc-download.py +++ b/ezidapp/management/commands/proc-download.py @@ -148,6 +148,7 @@ def _flushFile(self, f): def _createFile(self, r): f = None + log.debug("createFile: %s", self._path(r, 1)) try: f = open(self._path(r, 1), "w", newline='', encoding="utf-8") if r.format == ezidapp.models.async_queue.DownloadQueue.CSV: @@ -258,7 +259,7 @@ def _writeXml(self, f, id, metadata): v = impl.util.removeXmlDeclaration(v) else: v = impl.util.xmlEscape(v) - f.write(f'{v}'.encode("utf-8")) + f.write(f'{v}') f.write("") def _harvest1(self, r, f): @@ -273,6 +274,7 @@ def _harvest1(self, r, f): .order_by("identifier") ) ids = list(qs[:1000]) + log.debug("End harvest query, count = %s", len(ids)) if len(ids) == 0: break try: From 319aff7fb319c509c2d07940243abe12f75e2523 Mon Sep 17 00:00:00 2001 From: datadavev <605409+datadavev@users.noreply.github.com> Date: Wed, 15 Jun 2022 17:09:00 -0400 Subject: [PATCH 03/12] Added diagnostic tool for showing current queue statuses --- .../management/commands/diag-queue-stats.py | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 ezidapp/management/commands/diag-queue-stats.py diff --git a/ezidapp/management/commands/diag-queue-stats.py b/ezidapp/management/commands/diag-queue-stats.py new file mode 100644 index 000000000..d5f070d5f --- /dev/null +++ b/ezidapp/management/commands/diag-queue-stats.py @@ -0,0 +1,54 @@ +import contextlib +import json +import logging + +import django.apps +import django.conf +import django.contrib.auth.models +import django.core.management +import django.db.models +import django.db.transaction + +import ezidapp.models.async_queue +import ezidapp.models.identifier + +_L = logging.getLogger(__name__) + +class Command(django.core.management.BaseCommand): + help = __doc__ + + def __init__(self): + super(Command, self).__init__() + + def handle(self, *_, **opt): + queue_classes = [ + ("binder", ezidapp.models.async_queue.BinderQueue), + ("datacite", ezidapp.models.async_queue.DataciteQueue), + ("crossref", ezidapp.models.async_queue.CrossrefQueue), + ("searchindexer", ezidapp.models.async_queue.SearchIndexerQueue), + ] + queue_stats = { + 'download': {} + } + #Download queue is a different beast + _L.info("Processing queue: download...") + res = ezidapp.models.async_queue.DownloadQueue.objects\ + .all()\ + .values('stage')\ + .annotate(total=django.db.models.Count('stage'))\ + .order_by() + for row in res: + queue_stats['download'][row['stage']] = row['total'] + + for q_class in queue_classes: + q_name = q_class[0] + _L.info(f"Processing queue: {q_name}") + res = q_class[1].objects\ + .all()\ + .values('status')\ + .annotate(total=django.db.models.Count('status'))\ + .order_by() + queue_stats[q_name] = {} + for row in res: + queue_stats[q_name][row['status']] = row['total'] + print(json.dumps(queue_stats, indent=2)) From d9c39cf35f501ee7909c60eb1328519a70faf895 Mon Sep 17 00:00:00 2001 From: datadavev <605409+datadavev@users.noreply.github.com> Date: Wed, 15 Jun 2022 17:13:19 -0400 Subject: [PATCH 04/12] Docstring --- .../management/commands/diag-queue-stats.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/ezidapp/management/commands/diag-queue-stats.py b/ezidapp/management/commands/diag-queue-stats.py index d5f070d5f..e4c21bbb7 100644 --- a/ezidapp/management/commands/diag-queue-stats.py +++ b/ezidapp/management/commands/diag-queue-stats.py @@ -1,4 +1,20 @@ -import contextlib +# Copyright©2021, Regents of the University of California +# http://creativecommons.org/licenses/BSD + +"""Report queue statuses + +For each queue, report the number of entries at each status level. + +For queues other than download: + U = Unsubmitted + C = Unchecked + S = Submitted + W = Warning + F = Failure + I = Ignored + O = Success +""" + import json import logging From 4eb7fbee6569abb70835503205cb64009115b76f Mon Sep 17 00:00:00 2001 From: datadavev <605409+datadavev@users.noreply.github.com> Date: Thu, 16 Jun 2022 09:50:35 -0400 Subject: [PATCH 05/12] Adjusting proc-download logging --- ezidapp/management/commands/proc-download.py | 33 ++++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/ezidapp/management/commands/proc-download.py b/ezidapp/management/commands/proc-download.py index 13d22e8b8..b263387c1 100644 --- a/ezidapp/management/commands/proc-download.py +++ b/ezidapp/management/commands/proc-download.py @@ -41,7 +41,7 @@ import impl.util import impl.util2 -log = logging.getLogger(__name__) +#log = logging.getLogger(__name__) SUFFIX_FORMAT_DICT = { @@ -74,7 +74,7 @@ def run(self): self._remove_expired_files() doSleep = False except Exception as e: - log.exception('Exception') + self.log.exception('Exception') impl.log.otherError("download.run", e) doSleep = True @@ -148,7 +148,7 @@ def _flushFile(self, f): def _createFile(self, r): f = None - log.debug("createFile: %s", self._path(r, 1)) + self.log.debug("createFile: %s", self._path(r, 1)) try: f = open(self._path(r, 1), "w", newline='', encoding="utf-8") if r.format == ezidapp.models.async_queue.DownloadQueue.CSV: @@ -163,7 +163,7 @@ def _createFile(self, r): # probe the file to find its size. n = f.tell() except Exception as e: - log.exception('Exception') + self.log.exception('Exception') raise self._wrapException("error creating file", e) else: r.stage = ezidapp.models.async_queue.DownloadQueue.HARVEST @@ -266,6 +266,7 @@ def _harvest1(self, r, f): columns = self._decode(r.columns) constraints = self._decode(r.constraints) options = self._decode(r.options) + _total = 0 while not self.terminated(): qs = ( ezidapp.models.identifier.SearchIdentifier.objects.filter(identifier__gt=r.lastId) @@ -273,8 +274,9 @@ def _harvest1(self, r, f): .select_related("owner", "ownergroup", "datacenter", "profile") .order_by("identifier") ) + self.log.debug("Query issued: %s", str(qs.query)) ids = list(qs[:1000]) - log.debug("End harvest query, count = %s", len(ids)) + self.log.debug("Matches = %s", len(ids)) if len(ids) == 0: break try: @@ -289,13 +291,18 @@ def _harvest1(self, r, f): self._writeXml(f, id, m) else: assert False, "unhandled case" + _total += 1 self._flushFile(f) except Exception as e: - log.exception('Exception') + self.log.exception('Exception') raise self._wrapException("error writing file", e) r.lastId = ids[-1].identifier r.fileSize = f.tell() r.save() + if self.terminated(): + self.log.info("Terminated.") + else: + self.log.info("Total records exported: %s", _total) def _harvest(self, r): f = None @@ -306,7 +313,7 @@ def _harvest(self, r): f.seek(r.fileSize) f.truncate() except Exception as e: - log.exception('Exception') + self.log.exception('Exception') raise self._wrapException("error re-opening/seeking/truncating file", e) start = r.currentIndex for i in range(r.currentIndex, len(r.toHarvest.split(","))): @@ -320,7 +327,7 @@ def _harvest(self, r): f.write("") self._flushFile(f) except Exception as e: - log.exception('Exception') + self.log.exception('Exception') raise self._wrapException("error writing file footer", e) r.stage = ezidapp.models.async_queue.DownloadQueue.COMPRESS r.save() @@ -371,7 +378,7 @@ def _compressFile(self, r): p.returncode == 0 and stderr == b'' ), f"compression command returned status code {p.returncode:d}, stderr '{stderr}'" except Exception as e: - log.exception('Exception') + self.log.exception('Exception') raise self._wrapException("error compressing file", e) else: r.stage = ezidapp.models.async_queue.DownloadQueue.DELETE @@ -387,7 +394,7 @@ def _deleteUncompressedFile(self, r): if os.path.exists(self._path(r, 1)): os.unlink(self._path(r, 1)) except Exception as e: - log.exception('Exception') + self.log.exception('Exception') raise self._wrapException("error deleting uncompressed file", e) else: r.stage = ezidapp.models.async_queue.DownloadQueue.MOVE @@ -400,7 +407,7 @@ def _moveCompressedFile(self, r): else: assert os.path.exists(self._path(r, 3)), "file has disappeared" except Exception as e: - log.exception('Exception') + self.log.exception('Exception') raise self._wrapException("error moving compressed file", e) else: r.stage = ezidapp.models.async_queue.DownloadQueue.NOTIFY @@ -414,7 +421,7 @@ def _notifyRequestor(self, r): f"{ezidapp.models.util.getUserByPid(r.requestor).username}\n{r.rawRequest.encode('utf-8')}\n" ) except Exception as e: - log.exception('Exception') + self.log.exception('Exception') raise self._wrapException("error writing sidecar file", e) finally: if f: @@ -450,7 +457,7 @@ def _notifyRequestor(self, r): fail_silently=True, ) except Exception as e: - log.exception('Exception') + self.log.exception('Exception') raise self._wrapException("error sending email", e) r.delete() From 93e6c08fcbc2713174f8247fa69dee720f1f689d Mon Sep 17 00:00:00 2001 From: datadavev <605409+datadavev@users.noreply.github.com> Date: Thu, 16 Jun 2022 11:58:05 -0400 Subject: [PATCH 06/12] Adjust download logging, add typehints, fix bytes output to sidecar file --- ezidapp/management/commands/proc-download.py | 72 ++++++++++++-------- 1 file changed, 42 insertions(+), 30 deletions(-) diff --git a/ezidapp/management/commands/proc-download.py b/ezidapp/management/commands/proc-download.py index b263387c1..0d68f45c9 100644 --- a/ezidapp/management/commands/proc-download.py +++ b/ezidapp/management/commands/proc-download.py @@ -13,18 +13,19 @@ """ import csv -import logging import os import os.path import pathlib import re import subprocess import time +import typing import django.conf import django.core.mail import django.core.management import django.db +import django.db.models import ezidapp.management.commands.proc_base import ezidapp.models.async_queue @@ -41,8 +42,6 @@ import impl.util import impl.util2 -#log = logging.getLogger(__name__) - SUFFIX_FORMAT_DICT = { ezidapp.models.async_queue.DownloadQueue.ANVL: "txt", @@ -66,11 +65,12 @@ def run(self): if doSleep: self.sleep(django.conf.settings.DAEMONS_DOWNLOAD_PROCESSING_IDLE_SLEEP) try: - r = ezidapp.models.async_queue.DownloadQueue.objects.all().order_by("seq")[:1] - if len(r) == 0: + rs = ezidapp.models.async_queue.DownloadQueue.objects.all().order_by("seq")[:1] + if len(rs) == 0: + # Don't sleep while work is in progress doSleep = True continue - self._proc_stage(r) + self._proc_stage(rs) self._remove_expired_files() doSleep = False except Exception as e: @@ -78,8 +78,12 @@ def run(self): impl.log.otherError("download.run", e) doSleep = True - def _proc_stage(self, r): - r = r[0] + def _proc_stage(self, rs): + # rs is a list of ezidapp.models.async_queue.DownloadQueue + # Only process one download request at a time + # Once completed, current is deleted, so the + # next one becomes index 0 + r = rs[0] if r.stage == ezidapp.models.async_queue.DownloadQueue.CREATE: self._createFile(r) elif r.stage == ezidapp.models.async_queue.DownloadQueue.HARVEST: @@ -122,7 +126,7 @@ def _wrapException(self, context, exception): m = ": " + m return Exception(f"batch download error: {context}: {type(exception).__name__}{m}") - def _path(self, r, i): + def _path(self, r:ezidapp.models.async_queue.DownloadQueue, i:int): # i=1: uncompressed work file # i=2: compressed work file # i=3: compressed delivery file @@ -142,11 +146,11 @@ def _path(self, r, i): def _csvEncode(self, s): return impl.util.oneLine(s).encode("utf-8") - def _flushFile(self, f): + def _flushFile(self, f:typing.TextIO): f.flush() os.fsync(f.fileno()) - def _createFile(self, r): + def _createFile(self, r:ezidapp.models.async_queue.DownloadQueue): f = None self.log.debug("createFile: %s", self._path(r, 1)) try: @@ -214,9 +218,9 @@ def _satisfiesConstraints(self, id_model, constraints): def _prepareMetadata( self, - id_model: ezidapp.models.identifier.Identifier, + id_model: ezidapp.models.identifier.SearchIdentifier, convertTimestamps: object, - ) -> object: + ) -> dict: d = id_model.toLegacy() ezidapp.models.model_util.convertLegacyToExternal(d) if id_model.isDoi: @@ -226,13 +230,13 @@ def _prepareMetadata( d["_updated"] = impl.util.formatTimestampZulu(int(d["_updated"])) return d - def _writeAnvl(self, f, id_model, metadata): + def _writeAnvl(self, f:typing.TextIO, id_model:ezidapp.models.identifier.SearchIdentifier, metadata:dict): if f.tell() > 0: f.write("\n") f.write(f":: {id_model.identifier}\n") f.write(impl.anvl.format(metadata).encode("utf-8")) - def _writeCsv(self, f, columns, id_model, metadata): + def _writeCsv(self, f:typing.TextIO, columns, id_model:ezidapp.models.identifier.SearchIdentifier, metadata:dict): w = csv.writer(f) l = [] for c in columns: @@ -252,7 +256,7 @@ def _writeCsv(self, f, columns, id_model, metadata): l.append(metadata.get(c, "")) w.writerow([self._csvEncode(c).decode('utf-8', errors='replace') for c in l]) - def _writeXml(self, f, id, metadata): + def _writeXml(self, f:typing.TextIO, id:ezidapp.models.identifier.SearchIdentifier, metadata:dict): f.write(f'') for k, v in list(metadata.items()): if k in ["datacite", "crossref"]: @@ -262,7 +266,7 @@ def _writeXml(self, f, id, metadata): f.write(f'{v}') f.write("") - def _harvest1(self, r, f): + def _harvest1(self, r:ezidapp.models.async_queue.DownloadQueue, f:typing.TextIO): columns = self._decode(r.columns) constraints = self._decode(r.constraints) options = self._decode(r.options) @@ -274,9 +278,9 @@ def _harvest1(self, r, f): .select_related("owner", "ownergroup", "datacenter", "profile") .order_by("identifier") ) - self.log.debug("Query issued: %s", str(qs.query)) + #self.log.debug("Query issued: %s", str(qs.query)) ids = list(qs[:1000]) - self.log.debug("Matches = %s", len(ids)) + self.log.debug("Total query matches: %s", len(ids)) if len(ids) == 0: break try: @@ -300,11 +304,11 @@ def _harvest1(self, r, f): r.fileSize = f.tell() r.save() if self.terminated(): - self.log.info("Terminated.") + self.log.warning("Harvest terminated.") else: self.log.info("Total records exported: %s", _total) - def _harvest(self, r): + def _harvest(self, r:ezidapp.models.async_queue.DownloadQueue): f = None try: try: @@ -335,7 +339,7 @@ def _harvest(self, r): if f: f.close() - def _compressFile(self, r): + def _compressFile(self, r:ezidapp.models.async_queue.DownloadQueue): infile = None outfile = None try: @@ -389,7 +393,7 @@ def _compressFile(self, r): if outfile: outfile.close() - def _deleteUncompressedFile(self, r): + def _deleteUncompressedFile(self, r:ezidapp.models.async_queue.DownloadQueue): try: if os.path.exists(self._path(r, 1)): os.unlink(self._path(r, 1)) @@ -400,7 +404,7 @@ def _deleteUncompressedFile(self, r): r.stage = ezidapp.models.async_queue.DownloadQueue.MOVE r.save() - def _moveCompressedFile(self, r): + def _moveCompressedFile(self, r:ezidapp.models.async_queue.DownloadQueue): try: if os.path.exists(self._path(r, 2)): os.rename(self._path(r, 2), self._path(r, 3)) @@ -413,12 +417,12 @@ def _moveCompressedFile(self, r): r.stage = ezidapp.models.async_queue.DownloadQueue.NOTIFY r.save() - def _notifyRequestor(self, r): + def _notifyRequestor(self, r:ezidapp.models.async_queue.DownloadQueue): f = None try: - f = open(self._path(r, 4), "w") + f = open(self._path(r, 4), mode="w", encoding="utf-8") f.write( - f"{ezidapp.models.util.getUserByPid(r.requestor).username}\n{r.rawRequest.encode('utf-8')}\n" + f"{ezidapp.models.util.getUserByPid(r.requestor).username}\n{r.rawRequest}\n" ) except Exception as e: self.log.exception('Exception') @@ -461,23 +465,31 @@ def _notifyRequestor(self, r): raise self._wrapException("error sending email", e) r.delete() - def _unescape(self, s): + def _unescape(self, s:str)->str: return re.sub("%([0-9A-F][0-9A-F])", lambda m: chr(int(m.group(1), 16)), s) - def _decode(self, s): + def _decode(self, s:str): + ''' + Decodes DownloadQueue.constraint + ''' if s[0] == "B": + # boolean return s[1:] == "True" elif s[0] == "I": + # integer return int(s[1:]) elif s[0] == "S": + # string return s[1:] elif s[0] == "L": + # list, from comma separated string of constraints if len(s) > 1: return [self._decode(self._unescape(i)) for i in s[1:].split(",")] else: return [] elif s[0] == "D": + # dict, from comma separated list of k=v if len(s) > 1: return dict( list( @@ -495,7 +507,7 @@ def _decode(self, s): assert False, "unhandled case" - def _fileSuffix(self, r): + def _fileSuffix(self, r:ezidapp.models.async_queue.DownloadQueue): if r.compression == ezidapp.models.async_queue.DownloadQueue.GZIP: return SUFFIX_FORMAT_DICT[r.format] + ".gz" else: From 41316357b552660f787b8788852ec050a4867fcb Mon Sep 17 00:00:00 2001 From: datadavev <605409+datadavev@users.noreply.github.com> Date: Thu, 16 Jun 2022 12:03:34 -0400 Subject: [PATCH 07/12] remove unused import --- ezidapp/management/commands/proc-download.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ezidapp/management/commands/proc-download.py b/ezidapp/management/commands/proc-download.py index 0d68f45c9..7be9865e9 100644 --- a/ezidapp/management/commands/proc-download.py +++ b/ezidapp/management/commands/proc-download.py @@ -25,7 +25,6 @@ import django.core.mail import django.core.management import django.db -import django.db.models import ezidapp.management.commands.proc_base import ezidapp.models.async_queue From e60b124b512900ba0607586292571d51ba6e177f Mon Sep 17 00:00:00 2001 From: datadavev <605409+datadavev@users.noreply.github.com> Date: Thu, 16 Jun 2022 12:04:10 -0400 Subject: [PATCH 08/12] black formatting --- ezidapp/management/commands/proc-download.py | 49 +++++++++++--------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/ezidapp/management/commands/proc-download.py b/ezidapp/management/commands/proc-download.py index 7be9865e9..9038fe8ea 100644 --- a/ezidapp/management/commands/proc-download.py +++ b/ezidapp/management/commands/proc-download.py @@ -48,6 +48,7 @@ ezidapp.models.async_queue.DownloadQueue.XML: "xml", } + class Command(ezidapp.management.commands.proc_base.AsyncProcessingCommand): help = __doc__ name = __name__ @@ -125,7 +126,7 @@ def _wrapException(self, context, exception): m = ": " + m return Exception(f"batch download error: {context}: {type(exception).__name__}{m}") - def _path(self, r:ezidapp.models.async_queue.DownloadQueue, i:int): + def _path(self, r: ezidapp.models.async_queue.DownloadQueue, i: int): # i=1: uncompressed work file # i=2: compressed work file # i=3: compressed delivery file @@ -145,11 +146,11 @@ def _path(self, r:ezidapp.models.async_queue.DownloadQueue, i:int): def _csvEncode(self, s): return impl.util.oneLine(s).encode("utf-8") - def _flushFile(self, f:typing.TextIO): + def _flushFile(self, f: typing.TextIO): f.flush() os.fsync(f.fileno()) - def _createFile(self, r:ezidapp.models.async_queue.DownloadQueue): + def _createFile(self, r: ezidapp.models.async_queue.DownloadQueue): f = None self.log.debug("createFile: %s", self._path(r, 1)) try: @@ -229,13 +230,21 @@ def _prepareMetadata( d["_updated"] = impl.util.formatTimestampZulu(int(d["_updated"])) return d - def _writeAnvl(self, f:typing.TextIO, id_model:ezidapp.models.identifier.SearchIdentifier, metadata:dict): + def _writeAnvl( + self, f: typing.TextIO, id_model: ezidapp.models.identifier.SearchIdentifier, metadata: dict + ): if f.tell() > 0: f.write("\n") f.write(f":: {id_model.identifier}\n") f.write(impl.anvl.format(metadata).encode("utf-8")) - def _writeCsv(self, f:typing.TextIO, columns, id_model:ezidapp.models.identifier.SearchIdentifier, metadata:dict): + def _writeCsv( + self, + f: typing.TextIO, + columns, + id_model: ezidapp.models.identifier.SearchIdentifier, + metadata: dict, + ): w = csv.writer(f) l = [] for c in columns: @@ -255,7 +264,9 @@ def _writeCsv(self, f:typing.TextIO, columns, id_model:ezidapp.models.identifier l.append(metadata.get(c, "")) w.writerow([self._csvEncode(c).decode('utf-8', errors='replace') for c in l]) - def _writeXml(self, f:typing.TextIO, id:ezidapp.models.identifier.SearchIdentifier, metadata:dict): + def _writeXml( + self, f: typing.TextIO, id: ezidapp.models.identifier.SearchIdentifier, metadata: dict + ): f.write(f'') for k, v in list(metadata.items()): if k in ["datacite", "crossref"]: @@ -265,7 +276,7 @@ def _writeXml(self, f:typing.TextIO, id:ezidapp.models.identifier.SearchIdentifi f.write(f'{v}') f.write("") - def _harvest1(self, r:ezidapp.models.async_queue.DownloadQueue, f:typing.TextIO): + def _harvest1(self, r: ezidapp.models.async_queue.DownloadQueue, f: typing.TextIO): columns = self._decode(r.columns) constraints = self._decode(r.constraints) options = self._decode(r.options) @@ -277,7 +288,7 @@ def _harvest1(self, r:ezidapp.models.async_queue.DownloadQueue, f:typing.TextIO) .select_related("owner", "ownergroup", "datacenter", "profile") .order_by("identifier") ) - #self.log.debug("Query issued: %s", str(qs.query)) + # self.log.debug("Query issued: %s", str(qs.query)) ids = list(qs[:1000]) self.log.debug("Total query matches: %s", len(ids)) if len(ids) == 0: @@ -307,7 +318,7 @@ def _harvest1(self, r:ezidapp.models.async_queue.DownloadQueue, f:typing.TextIO) else: self.log.info("Total records exported: %s", _total) - def _harvest(self, r:ezidapp.models.async_queue.DownloadQueue): + def _harvest(self, r: ezidapp.models.async_queue.DownloadQueue): f = None try: try: @@ -338,7 +349,7 @@ def _harvest(self, r:ezidapp.models.async_queue.DownloadQueue): if f: f.close() - def _compressFile(self, r:ezidapp.models.async_queue.DownloadQueue): + def _compressFile(self, r: ezidapp.models.async_queue.DownloadQueue): infile = None outfile = None try: @@ -392,7 +403,7 @@ def _compressFile(self, r:ezidapp.models.async_queue.DownloadQueue): if outfile: outfile.close() - def _deleteUncompressedFile(self, r:ezidapp.models.async_queue.DownloadQueue): + def _deleteUncompressedFile(self, r: ezidapp.models.async_queue.DownloadQueue): try: if os.path.exists(self._path(r, 1)): os.unlink(self._path(r, 1)) @@ -403,7 +414,7 @@ def _deleteUncompressedFile(self, r:ezidapp.models.async_queue.DownloadQueue): r.stage = ezidapp.models.async_queue.DownloadQueue.MOVE r.save() - def _moveCompressedFile(self, r:ezidapp.models.async_queue.DownloadQueue): + def _moveCompressedFile(self, r: ezidapp.models.async_queue.DownloadQueue): try: if os.path.exists(self._path(r, 2)): os.rename(self._path(r, 2), self._path(r, 3)) @@ -416,13 +427,11 @@ def _moveCompressedFile(self, r:ezidapp.models.async_queue.DownloadQueue): r.stage = ezidapp.models.async_queue.DownloadQueue.NOTIFY r.save() - def _notifyRequestor(self, r:ezidapp.models.async_queue.DownloadQueue): + def _notifyRequestor(self, r: ezidapp.models.async_queue.DownloadQueue): f = None try: f = open(self._path(r, 4), mode="w", encoding="utf-8") - f.write( - f"{ezidapp.models.util.getUserByPid(r.requestor).username}\n{r.rawRequest}\n" - ) + f.write(f"{ezidapp.models.util.getUserByPid(r.requestor).username}\n{r.rawRequest}\n") except Exception as e: self.log.exception('Exception') raise self._wrapException("error writing sidecar file", e) @@ -464,11 +473,10 @@ def _notifyRequestor(self, r:ezidapp.models.async_queue.DownloadQueue): raise self._wrapException("error sending email", e) r.delete() - def _unescape(self, s:str)->str: + def _unescape(self, s: str) -> str: return re.sub("%([0-9A-F][0-9A-F])", lambda m: chr(int(m.group(1), 16)), s) - - def _decode(self, s:str): + def _decode(self, s: str): ''' Decodes DownloadQueue.constraint ''' @@ -505,8 +513,7 @@ def _decode(self, s:str): else: assert False, "unhandled case" - - def _fileSuffix(self, r:ezidapp.models.async_queue.DownloadQueue): + def _fileSuffix(self, r: ezidapp.models.async_queue.DownloadQueue): if r.compression == ezidapp.models.async_queue.DownloadQueue.GZIP: return SUFFIX_FORMAT_DICT[r.format] + ".gz" else: From f8c9af74f9eba559bab855770c0a536fc9f285ef Mon Sep 17 00:00:00 2001 From: datadavev <605409+datadavev@users.noreply.github.com> Date: Wed, 22 Jun 2022 15:58:16 -0400 Subject: [PATCH 09/12] Adding minimal testing for data validation --- tests/test_docs/datacite_metadata_01.txt | 7 +++++++ tests/test_validation.py | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 tests/test_docs/datacite_metadata_01.txt create mode 100644 tests/test_validation.py diff --git a/tests/test_docs/datacite_metadata_01.txt b/tests/test_docs/datacite_metadata_01.txt new file mode 100644 index 000000000..af3ee31c6 --- /dev/null +++ b/tests/test_docs/datacite_metadata_01.txt @@ -0,0 +1,7 @@ +# For input to client testing datacite minting +# client.py l admin mint doi:10.5072/FK2 @datacite_metadata_01.txt +datacite.creator: Dave +datacite.title: Test doc +datacite.publicationyear: 1961 +datacite.resourcetype: Event +datacite.publisher: Tester diff --git a/tests/test_validation.py b/tests/test_validation.py new file mode 100644 index 000000000..f1f2ca232 --- /dev/null +++ b/tests/test_validation.py @@ -0,0 +1,19 @@ +# Copyright©2021, Regents of the University of California +# http://creativecommons.org/licenses/BSD + +"""Test ezidapp.models.validation +""" + +import pytest + +import ezidapp.models.validation + +#@pytest.mark.django_db +@pytest.mark.parametrize("test,expected",[ + ('1961', '1961'), + ('196104','1961-04'), + ('20201201', '2020-12-01'), +]) +def test_publicationDate(test, expected): + res = ezidapp.models.validation.publicationDate(test) + assert res == expected From 585209cbc1607ca684d37602439acefc5a2136f3 Mon Sep 17 00:00:00 2001 From: datadavev <605409+datadavev@users.noreply.github.com> Date: Wed, 22 Jun 2022 15:59:37 -0400 Subject: [PATCH 10/12] a tuple is not valid variable for the specified format string --- ezidapp/models/validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ezidapp/models/validation.py b/ezidapp/models/validation.py index 7f6b93c09..74e64c2be 100644 --- a/ezidapp/models/validation.py +++ b/ezidapp/models/validation.py @@ -114,7 +114,7 @@ def publicationDate(date): # return ("%04d", "%04d-%02d", "%04d-%02d-%02d")[numComponents - 1] % t[:numComponents] return ("{:04d}", "{:04d}-{:02d}", "{:04d}-{:02d}-{:02d}")[ numComponents - 1 - ].format(t[:numComponents]) + ].format(*t[:numComponents]) except Exception: pass raise django.core.exceptions.ValidationError( From 6f3a63ecd085ba5626945310225536349aafed4e Mon Sep 17 00:00:00 2001 From: datadavev <605409+datadavev@users.noreply.github.com> Date: Wed, 22 Jun 2022 16:58:01 -0400 Subject: [PATCH 11/12] added reminder to flesh out this test case --- tests/test_validation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_validation.py b/tests/test_validation.py index f1f2ca232..1fdcfd6ef 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -8,7 +8,7 @@ import ezidapp.models.validation -#@pytest.mark.django_db +#TODO: Flesh out the test cases to match all the possibilities in the tested method @pytest.mark.parametrize("test,expected",[ ('1961', '1961'), ('196104','1961-04'), @@ -17,3 +17,4 @@ def test_publicationDate(test, expected): res = ezidapp.models.validation.publicationDate(test) assert res == expected + From 7be309694205f2453bd0bfb5f3eef0d595c7e3a9 Mon Sep 17 00:00:00 2001 From: datadavev <605409+datadavev@users.noreply.github.com> Date: Wed, 22 Jun 2022 17:01:48 -0400 Subject: [PATCH 12/12] Make import at module level --- impl/mapping.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/impl/mapping.py b/impl/mapping.py index a0125abe5..b0be56860 100644 --- a/impl/mapping.py +++ b/impl/mapping.py @@ -21,7 +21,7 @@ import re -# import ezidapp.models.validation +import ezidapp.models.validation import impl.datacite import impl.erc import impl.util @@ -52,8 +52,8 @@ def __init__( def validatedDate(self): if self.date is not None: try: - import ezidapp.models.validation - + #2022-06-22 Not clear why this import was within the method instead of module level + #import ezidapp.models.validation return ezidapp.models.validation.publicationDate(self.date) except Exception: return None