From dc20355f01bf4d48f5ae5175ad4e94e5dd6a5f08 Mon Sep 17 00:00:00 2001
From: datadavev <605409+datadavev@users.noreply.github.com>
Date: Wed, 15 Jun 2022 14:41:46 -0400
Subject: [PATCH 01/12] Added missing argparse import
---
ezidapp/management/commands/diag-db-stats.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/ezidapp/management/commands/diag-db-stats.py b/ezidapp/management/commands/diag-db-stats.py
index 9376e2613..ce0c07ab0 100644
--- a/ezidapp/management/commands/diag-db-stats.py
+++ b/ezidapp/management/commands/diag-db-stats.py
@@ -14,6 +14,7 @@
import contextlib
import logging
+import argparse
import django.apps
import django.conf
From 055b41ef393603e7a57eeb0cdcc0f6c215c00aae Mon Sep 17 00:00:00 2001
From: datadavev <605409+datadavev@users.noreply.github.com>
Date: Wed, 15 Jun 2022 17:04:54 -0400
Subject: [PATCH 02/12] Remove spurious encode, add a little logging
---
ezidapp/management/commands/proc-download.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/ezidapp/management/commands/proc-download.py b/ezidapp/management/commands/proc-download.py
index f6f5f50cb..13d22e8b8 100644
--- a/ezidapp/management/commands/proc-download.py
+++ b/ezidapp/management/commands/proc-download.py
@@ -148,6 +148,7 @@ def _flushFile(self, f):
def _createFile(self, r):
f = None
+ log.debug("createFile: %s", self._path(r, 1))
try:
f = open(self._path(r, 1), "w", newline='', encoding="utf-8")
if r.format == ezidapp.models.async_queue.DownloadQueue.CSV:
@@ -258,7 +259,7 @@ def _writeXml(self, f, id, metadata):
v = impl.util.removeXmlDeclaration(v)
else:
v = impl.util.xmlEscape(v)
- f.write(f'{v}'.encode("utf-8"))
+ f.write(f'{v}')
f.write("")
def _harvest1(self, r, f):
@@ -273,6 +274,7 @@ def _harvest1(self, r, f):
.order_by("identifier")
)
ids = list(qs[:1000])
+ log.debug("End harvest query, count = %s", len(ids))
if len(ids) == 0:
break
try:
From 319aff7fb319c509c2d07940243abe12f75e2523 Mon Sep 17 00:00:00 2001
From: datadavev <605409+datadavev@users.noreply.github.com>
Date: Wed, 15 Jun 2022 17:09:00 -0400
Subject: [PATCH 03/12] Added diagnostic tool for showing current queue
statuses
---
.../management/commands/diag-queue-stats.py | 54 +++++++++++++++++++
1 file changed, 54 insertions(+)
create mode 100644 ezidapp/management/commands/diag-queue-stats.py
diff --git a/ezidapp/management/commands/diag-queue-stats.py b/ezidapp/management/commands/diag-queue-stats.py
new file mode 100644
index 000000000..d5f070d5f
--- /dev/null
+++ b/ezidapp/management/commands/diag-queue-stats.py
@@ -0,0 +1,54 @@
+import contextlib
+import json
+import logging
+
+import django.apps
+import django.conf
+import django.contrib.auth.models
+import django.core.management
+import django.db.models
+import django.db.transaction
+
+import ezidapp.models.async_queue
+import ezidapp.models.identifier
+
+_L = logging.getLogger(__name__)
+
+class Command(django.core.management.BaseCommand):
+ help = __doc__
+
+ def __init__(self):
+ super(Command, self).__init__()
+
+ def handle(self, *_, **opt):
+ queue_classes = [
+ ("binder", ezidapp.models.async_queue.BinderQueue),
+ ("datacite", ezidapp.models.async_queue.DataciteQueue),
+ ("crossref", ezidapp.models.async_queue.CrossrefQueue),
+ ("searchindexer", ezidapp.models.async_queue.SearchIndexerQueue),
+ ]
+ queue_stats = {
+ 'download': {}
+ }
+ #Download queue is a different beast
+ _L.info("Processing queue: download...")
+ res = ezidapp.models.async_queue.DownloadQueue.objects\
+ .all()\
+ .values('stage')\
+ .annotate(total=django.db.models.Count('stage'))\
+ .order_by()
+ for row in res:
+ queue_stats['download'][row['stage']] = row['total']
+
+ for q_class in queue_classes:
+ q_name = q_class[0]
+ _L.info(f"Processing queue: {q_name}")
+ res = q_class[1].objects\
+ .all()\
+ .values('status')\
+ .annotate(total=django.db.models.Count('status'))\
+ .order_by()
+ queue_stats[q_name] = {}
+ for row in res:
+ queue_stats[q_name][row['status']] = row['total']
+ print(json.dumps(queue_stats, indent=2))
From d9c39cf35f501ee7909c60eb1328519a70faf895 Mon Sep 17 00:00:00 2001
From: datadavev <605409+datadavev@users.noreply.github.com>
Date: Wed, 15 Jun 2022 17:13:19 -0400
Subject: [PATCH 04/12] Docstring
---
.../management/commands/diag-queue-stats.py | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)
diff --git a/ezidapp/management/commands/diag-queue-stats.py b/ezidapp/management/commands/diag-queue-stats.py
index d5f070d5f..e4c21bbb7 100644
--- a/ezidapp/management/commands/diag-queue-stats.py
+++ b/ezidapp/management/commands/diag-queue-stats.py
@@ -1,4 +1,20 @@
-import contextlib
+# Copyright©2021, Regents of the University of California
+# http://creativecommons.org/licenses/BSD
+
+"""Report queue statuses
+
+For each queue, report the number of entries at each status level.
+
+For queues other than download:
+ U = Unsubmitted
+ C = Unchecked
+ S = Submitted
+ W = Warning
+ F = Failure
+ I = Ignored
+ O = Success
+"""
+
import json
import logging
From 4eb7fbee6569abb70835503205cb64009115b76f Mon Sep 17 00:00:00 2001
From: datadavev <605409+datadavev@users.noreply.github.com>
Date: Thu, 16 Jun 2022 09:50:35 -0400
Subject: [PATCH 05/12] Adjusting proc-download logging
---
ezidapp/management/commands/proc-download.py | 33 ++++++++++++--------
1 file changed, 20 insertions(+), 13 deletions(-)
diff --git a/ezidapp/management/commands/proc-download.py b/ezidapp/management/commands/proc-download.py
index 13d22e8b8..b263387c1 100644
--- a/ezidapp/management/commands/proc-download.py
+++ b/ezidapp/management/commands/proc-download.py
@@ -41,7 +41,7 @@
import impl.util
import impl.util2
-log = logging.getLogger(__name__)
+#log = logging.getLogger(__name__)
SUFFIX_FORMAT_DICT = {
@@ -74,7 +74,7 @@ def run(self):
self._remove_expired_files()
doSleep = False
except Exception as e:
- log.exception('Exception')
+ self.log.exception('Exception')
impl.log.otherError("download.run", e)
doSleep = True
@@ -148,7 +148,7 @@ def _flushFile(self, f):
def _createFile(self, r):
f = None
- log.debug("createFile: %s", self._path(r, 1))
+ self.log.debug("createFile: %s", self._path(r, 1))
try:
f = open(self._path(r, 1), "w", newline='', encoding="utf-8")
if r.format == ezidapp.models.async_queue.DownloadQueue.CSV:
@@ -163,7 +163,7 @@ def _createFile(self, r):
# probe the file to find its size.
n = f.tell()
except Exception as e:
- log.exception('Exception')
+ self.log.exception('Exception')
raise self._wrapException("error creating file", e)
else:
r.stage = ezidapp.models.async_queue.DownloadQueue.HARVEST
@@ -266,6 +266,7 @@ def _harvest1(self, r, f):
columns = self._decode(r.columns)
constraints = self._decode(r.constraints)
options = self._decode(r.options)
+ _total = 0
while not self.terminated():
qs = (
ezidapp.models.identifier.SearchIdentifier.objects.filter(identifier__gt=r.lastId)
@@ -273,8 +274,9 @@ def _harvest1(self, r, f):
.select_related("owner", "ownergroup", "datacenter", "profile")
.order_by("identifier")
)
+ self.log.debug("Query issued: %s", str(qs.query))
ids = list(qs[:1000])
- log.debug("End harvest query, count = %s", len(ids))
+ self.log.debug("Matches = %s", len(ids))
if len(ids) == 0:
break
try:
@@ -289,13 +291,18 @@ def _harvest1(self, r, f):
self._writeXml(f, id, m)
else:
assert False, "unhandled case"
+ _total += 1
self._flushFile(f)
except Exception as e:
- log.exception('Exception')
+ self.log.exception('Exception')
raise self._wrapException("error writing file", e)
r.lastId = ids[-1].identifier
r.fileSize = f.tell()
r.save()
+ if self.terminated():
+ self.log.info("Terminated.")
+ else:
+ self.log.info("Total records exported: %s", _total)
def _harvest(self, r):
f = None
@@ -306,7 +313,7 @@ def _harvest(self, r):
f.seek(r.fileSize)
f.truncate()
except Exception as e:
- log.exception('Exception')
+ self.log.exception('Exception')
raise self._wrapException("error re-opening/seeking/truncating file", e)
start = r.currentIndex
for i in range(r.currentIndex, len(r.toHarvest.split(","))):
@@ -320,7 +327,7 @@ def _harvest(self, r):
f.write("")
self._flushFile(f)
except Exception as e:
- log.exception('Exception')
+ self.log.exception('Exception')
raise self._wrapException("error writing file footer", e)
r.stage = ezidapp.models.async_queue.DownloadQueue.COMPRESS
r.save()
@@ -371,7 +378,7 @@ def _compressFile(self, r):
p.returncode == 0 and stderr == b''
), f"compression command returned status code {p.returncode:d}, stderr '{stderr}'"
except Exception as e:
- log.exception('Exception')
+ self.log.exception('Exception')
raise self._wrapException("error compressing file", e)
else:
r.stage = ezidapp.models.async_queue.DownloadQueue.DELETE
@@ -387,7 +394,7 @@ def _deleteUncompressedFile(self, r):
if os.path.exists(self._path(r, 1)):
os.unlink(self._path(r, 1))
except Exception as e:
- log.exception('Exception')
+ self.log.exception('Exception')
raise self._wrapException("error deleting uncompressed file", e)
else:
r.stage = ezidapp.models.async_queue.DownloadQueue.MOVE
@@ -400,7 +407,7 @@ def _moveCompressedFile(self, r):
else:
assert os.path.exists(self._path(r, 3)), "file has disappeared"
except Exception as e:
- log.exception('Exception')
+ self.log.exception('Exception')
raise self._wrapException("error moving compressed file", e)
else:
r.stage = ezidapp.models.async_queue.DownloadQueue.NOTIFY
@@ -414,7 +421,7 @@ def _notifyRequestor(self, r):
f"{ezidapp.models.util.getUserByPid(r.requestor).username}\n{r.rawRequest.encode('utf-8')}\n"
)
except Exception as e:
- log.exception('Exception')
+ self.log.exception('Exception')
raise self._wrapException("error writing sidecar file", e)
finally:
if f:
@@ -450,7 +457,7 @@ def _notifyRequestor(self, r):
fail_silently=True,
)
except Exception as e:
- log.exception('Exception')
+ self.log.exception('Exception')
raise self._wrapException("error sending email", e)
r.delete()
From 93e6c08fcbc2713174f8247fa69dee720f1f689d Mon Sep 17 00:00:00 2001
From: datadavev <605409+datadavev@users.noreply.github.com>
Date: Thu, 16 Jun 2022 11:58:05 -0400
Subject: [PATCH 06/12] Adjust download logging, add typehints, fix bytes
output to sidecar file
---
ezidapp/management/commands/proc-download.py | 72 ++++++++++++--------
1 file changed, 42 insertions(+), 30 deletions(-)
diff --git a/ezidapp/management/commands/proc-download.py b/ezidapp/management/commands/proc-download.py
index b263387c1..0d68f45c9 100644
--- a/ezidapp/management/commands/proc-download.py
+++ b/ezidapp/management/commands/proc-download.py
@@ -13,18 +13,19 @@
"""
import csv
-import logging
import os
import os.path
import pathlib
import re
import subprocess
import time
+import typing
import django.conf
import django.core.mail
import django.core.management
import django.db
+import django.db.models
import ezidapp.management.commands.proc_base
import ezidapp.models.async_queue
@@ -41,8 +42,6 @@
import impl.util
import impl.util2
-#log = logging.getLogger(__name__)
-
SUFFIX_FORMAT_DICT = {
ezidapp.models.async_queue.DownloadQueue.ANVL: "txt",
@@ -66,11 +65,12 @@ def run(self):
if doSleep:
self.sleep(django.conf.settings.DAEMONS_DOWNLOAD_PROCESSING_IDLE_SLEEP)
try:
- r = ezidapp.models.async_queue.DownloadQueue.objects.all().order_by("seq")[:1]
- if len(r) == 0:
+ rs = ezidapp.models.async_queue.DownloadQueue.objects.all().order_by("seq")[:1]
+ if len(rs) == 0:
+ # Don't sleep while work is in progress
doSleep = True
continue
- self._proc_stage(r)
+ self._proc_stage(rs)
self._remove_expired_files()
doSleep = False
except Exception as e:
@@ -78,8 +78,12 @@ def run(self):
impl.log.otherError("download.run", e)
doSleep = True
- def _proc_stage(self, r):
- r = r[0]
+ def _proc_stage(self, rs):
+ # rs is a list of ezidapp.models.async_queue.DownloadQueue
+ # Only process one download request at a time
+ # Once completed, current is deleted, so the
+ # next one becomes index 0
+ r = rs[0]
if r.stage == ezidapp.models.async_queue.DownloadQueue.CREATE:
self._createFile(r)
elif r.stage == ezidapp.models.async_queue.DownloadQueue.HARVEST:
@@ -122,7 +126,7 @@ def _wrapException(self, context, exception):
m = ": " + m
return Exception(f"batch download error: {context}: {type(exception).__name__}{m}")
- def _path(self, r, i):
+ def _path(self, r:ezidapp.models.async_queue.DownloadQueue, i:int):
# i=1: uncompressed work file
# i=2: compressed work file
# i=3: compressed delivery file
@@ -142,11 +146,11 @@ def _path(self, r, i):
def _csvEncode(self, s):
return impl.util.oneLine(s).encode("utf-8")
- def _flushFile(self, f):
+ def _flushFile(self, f:typing.TextIO):
f.flush()
os.fsync(f.fileno())
- def _createFile(self, r):
+ def _createFile(self, r:ezidapp.models.async_queue.DownloadQueue):
f = None
self.log.debug("createFile: %s", self._path(r, 1))
try:
@@ -214,9 +218,9 @@ def _satisfiesConstraints(self, id_model, constraints):
def _prepareMetadata(
self,
- id_model: ezidapp.models.identifier.Identifier,
+ id_model: ezidapp.models.identifier.SearchIdentifier,
convertTimestamps: object,
- ) -> object:
+ ) -> dict:
d = id_model.toLegacy()
ezidapp.models.model_util.convertLegacyToExternal(d)
if id_model.isDoi:
@@ -226,13 +230,13 @@ def _prepareMetadata(
d["_updated"] = impl.util.formatTimestampZulu(int(d["_updated"]))
return d
- def _writeAnvl(self, f, id_model, metadata):
+ def _writeAnvl(self, f:typing.TextIO, id_model:ezidapp.models.identifier.SearchIdentifier, metadata:dict):
if f.tell() > 0:
f.write("\n")
f.write(f":: {id_model.identifier}\n")
f.write(impl.anvl.format(metadata).encode("utf-8"))
- def _writeCsv(self, f, columns, id_model, metadata):
+ def _writeCsv(self, f:typing.TextIO, columns, id_model:ezidapp.models.identifier.SearchIdentifier, metadata:dict):
w = csv.writer(f)
l = []
for c in columns:
@@ -252,7 +256,7 @@ def _writeCsv(self, f, columns, id_model, metadata):
l.append(metadata.get(c, ""))
w.writerow([self._csvEncode(c).decode('utf-8', errors='replace') for c in l])
- def _writeXml(self, f, id, metadata):
+ def _writeXml(self, f:typing.TextIO, id:ezidapp.models.identifier.SearchIdentifier, metadata:dict):
f.write(f'')
for k, v in list(metadata.items()):
if k in ["datacite", "crossref"]:
@@ -262,7 +266,7 @@ def _writeXml(self, f, id, metadata):
f.write(f'{v}')
f.write("")
- def _harvest1(self, r, f):
+ def _harvest1(self, r:ezidapp.models.async_queue.DownloadQueue, f:typing.TextIO):
columns = self._decode(r.columns)
constraints = self._decode(r.constraints)
options = self._decode(r.options)
@@ -274,9 +278,9 @@ def _harvest1(self, r, f):
.select_related("owner", "ownergroup", "datacenter", "profile")
.order_by("identifier")
)
- self.log.debug("Query issued: %s", str(qs.query))
+ #self.log.debug("Query issued: %s", str(qs.query))
ids = list(qs[:1000])
- self.log.debug("Matches = %s", len(ids))
+ self.log.debug("Total query matches: %s", len(ids))
if len(ids) == 0:
break
try:
@@ -300,11 +304,11 @@ def _harvest1(self, r, f):
r.fileSize = f.tell()
r.save()
if self.terminated():
- self.log.info("Terminated.")
+ self.log.warning("Harvest terminated.")
else:
self.log.info("Total records exported: %s", _total)
- def _harvest(self, r):
+ def _harvest(self, r:ezidapp.models.async_queue.DownloadQueue):
f = None
try:
try:
@@ -335,7 +339,7 @@ def _harvest(self, r):
if f:
f.close()
- def _compressFile(self, r):
+ def _compressFile(self, r:ezidapp.models.async_queue.DownloadQueue):
infile = None
outfile = None
try:
@@ -389,7 +393,7 @@ def _compressFile(self, r):
if outfile:
outfile.close()
- def _deleteUncompressedFile(self, r):
+ def _deleteUncompressedFile(self, r:ezidapp.models.async_queue.DownloadQueue):
try:
if os.path.exists(self._path(r, 1)):
os.unlink(self._path(r, 1))
@@ -400,7 +404,7 @@ def _deleteUncompressedFile(self, r):
r.stage = ezidapp.models.async_queue.DownloadQueue.MOVE
r.save()
- def _moveCompressedFile(self, r):
+ def _moveCompressedFile(self, r:ezidapp.models.async_queue.DownloadQueue):
try:
if os.path.exists(self._path(r, 2)):
os.rename(self._path(r, 2), self._path(r, 3))
@@ -413,12 +417,12 @@ def _moveCompressedFile(self, r):
r.stage = ezidapp.models.async_queue.DownloadQueue.NOTIFY
r.save()
- def _notifyRequestor(self, r):
+ def _notifyRequestor(self, r:ezidapp.models.async_queue.DownloadQueue):
f = None
try:
- f = open(self._path(r, 4), "w")
+ f = open(self._path(r, 4), mode="w", encoding="utf-8")
f.write(
- f"{ezidapp.models.util.getUserByPid(r.requestor).username}\n{r.rawRequest.encode('utf-8')}\n"
+ f"{ezidapp.models.util.getUserByPid(r.requestor).username}\n{r.rawRequest}\n"
)
except Exception as e:
self.log.exception('Exception')
@@ -461,23 +465,31 @@ def _notifyRequestor(self, r):
raise self._wrapException("error sending email", e)
r.delete()
- def _unescape(self, s):
+ def _unescape(self, s:str)->str:
return re.sub("%([0-9A-F][0-9A-F])", lambda m: chr(int(m.group(1), 16)), s)
- def _decode(self, s):
+ def _decode(self, s:str):
+ '''
+ Decodes DownloadQueue.constraint
+ '''
if s[0] == "B":
+ # boolean
return s[1:] == "True"
elif s[0] == "I":
+ # integer
return int(s[1:])
elif s[0] == "S":
+ # string
return s[1:]
elif s[0] == "L":
+ # list, from comma separated string of constraints
if len(s) > 1:
return [self._decode(self._unescape(i)) for i in s[1:].split(",")]
else:
return []
elif s[0] == "D":
+ # dict, from comma separated list of k=v
if len(s) > 1:
return dict(
list(
@@ -495,7 +507,7 @@ def _decode(self, s):
assert False, "unhandled case"
- def _fileSuffix(self, r):
+ def _fileSuffix(self, r:ezidapp.models.async_queue.DownloadQueue):
if r.compression == ezidapp.models.async_queue.DownloadQueue.GZIP:
return SUFFIX_FORMAT_DICT[r.format] + ".gz"
else:
From 41316357b552660f787b8788852ec050a4867fcb Mon Sep 17 00:00:00 2001
From: datadavev <605409+datadavev@users.noreply.github.com>
Date: Thu, 16 Jun 2022 12:03:34 -0400
Subject: [PATCH 07/12] remove unused import
---
ezidapp/management/commands/proc-download.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/ezidapp/management/commands/proc-download.py b/ezidapp/management/commands/proc-download.py
index 0d68f45c9..7be9865e9 100644
--- a/ezidapp/management/commands/proc-download.py
+++ b/ezidapp/management/commands/proc-download.py
@@ -25,7 +25,6 @@
import django.core.mail
import django.core.management
import django.db
-import django.db.models
import ezidapp.management.commands.proc_base
import ezidapp.models.async_queue
From e60b124b512900ba0607586292571d51ba6e177f Mon Sep 17 00:00:00 2001
From: datadavev <605409+datadavev@users.noreply.github.com>
Date: Thu, 16 Jun 2022 12:04:10 -0400
Subject: [PATCH 08/12] black formatting
---
ezidapp/management/commands/proc-download.py | 49 +++++++++++---------
1 file changed, 28 insertions(+), 21 deletions(-)
diff --git a/ezidapp/management/commands/proc-download.py b/ezidapp/management/commands/proc-download.py
index 7be9865e9..9038fe8ea 100644
--- a/ezidapp/management/commands/proc-download.py
+++ b/ezidapp/management/commands/proc-download.py
@@ -48,6 +48,7 @@
ezidapp.models.async_queue.DownloadQueue.XML: "xml",
}
+
class Command(ezidapp.management.commands.proc_base.AsyncProcessingCommand):
help = __doc__
name = __name__
@@ -125,7 +126,7 @@ def _wrapException(self, context, exception):
m = ": " + m
return Exception(f"batch download error: {context}: {type(exception).__name__}{m}")
- def _path(self, r:ezidapp.models.async_queue.DownloadQueue, i:int):
+ def _path(self, r: ezidapp.models.async_queue.DownloadQueue, i: int):
# i=1: uncompressed work file
# i=2: compressed work file
# i=3: compressed delivery file
@@ -145,11 +146,11 @@ def _path(self, r:ezidapp.models.async_queue.DownloadQueue, i:int):
def _csvEncode(self, s):
return impl.util.oneLine(s).encode("utf-8")
- def _flushFile(self, f:typing.TextIO):
+ def _flushFile(self, f: typing.TextIO):
f.flush()
os.fsync(f.fileno())
- def _createFile(self, r:ezidapp.models.async_queue.DownloadQueue):
+ def _createFile(self, r: ezidapp.models.async_queue.DownloadQueue):
f = None
self.log.debug("createFile: %s", self._path(r, 1))
try:
@@ -229,13 +230,21 @@ def _prepareMetadata(
d["_updated"] = impl.util.formatTimestampZulu(int(d["_updated"]))
return d
- def _writeAnvl(self, f:typing.TextIO, id_model:ezidapp.models.identifier.SearchIdentifier, metadata:dict):
+ def _writeAnvl(
+ self, f: typing.TextIO, id_model: ezidapp.models.identifier.SearchIdentifier, metadata: dict
+ ):
if f.tell() > 0:
f.write("\n")
f.write(f":: {id_model.identifier}\n")
f.write(impl.anvl.format(metadata).encode("utf-8"))
- def _writeCsv(self, f:typing.TextIO, columns, id_model:ezidapp.models.identifier.SearchIdentifier, metadata:dict):
+ def _writeCsv(
+ self,
+ f: typing.TextIO,
+ columns,
+ id_model: ezidapp.models.identifier.SearchIdentifier,
+ metadata: dict,
+ ):
w = csv.writer(f)
l = []
for c in columns:
@@ -255,7 +264,9 @@ def _writeCsv(self, f:typing.TextIO, columns, id_model:ezidapp.models.identifier
l.append(metadata.get(c, ""))
w.writerow([self._csvEncode(c).decode('utf-8', errors='replace') for c in l])
- def _writeXml(self, f:typing.TextIO, id:ezidapp.models.identifier.SearchIdentifier, metadata:dict):
+ def _writeXml(
+ self, f: typing.TextIO, id: ezidapp.models.identifier.SearchIdentifier, metadata: dict
+ ):
f.write(f'')
for k, v in list(metadata.items()):
if k in ["datacite", "crossref"]:
@@ -265,7 +276,7 @@ def _writeXml(self, f:typing.TextIO, id:ezidapp.models.identifier.SearchIdentifi
f.write(f'{v}')
f.write("")
- def _harvest1(self, r:ezidapp.models.async_queue.DownloadQueue, f:typing.TextIO):
+ def _harvest1(self, r: ezidapp.models.async_queue.DownloadQueue, f: typing.TextIO):
columns = self._decode(r.columns)
constraints = self._decode(r.constraints)
options = self._decode(r.options)
@@ -277,7 +288,7 @@ def _harvest1(self, r:ezidapp.models.async_queue.DownloadQueue, f:typing.TextIO)
.select_related("owner", "ownergroup", "datacenter", "profile")
.order_by("identifier")
)
- #self.log.debug("Query issued: %s", str(qs.query))
+ # self.log.debug("Query issued: %s", str(qs.query))
ids = list(qs[:1000])
self.log.debug("Total query matches: %s", len(ids))
if len(ids) == 0:
@@ -307,7 +318,7 @@ def _harvest1(self, r:ezidapp.models.async_queue.DownloadQueue, f:typing.TextIO)
else:
self.log.info("Total records exported: %s", _total)
- def _harvest(self, r:ezidapp.models.async_queue.DownloadQueue):
+ def _harvest(self, r: ezidapp.models.async_queue.DownloadQueue):
f = None
try:
try:
@@ -338,7 +349,7 @@ def _harvest(self, r:ezidapp.models.async_queue.DownloadQueue):
if f:
f.close()
- def _compressFile(self, r:ezidapp.models.async_queue.DownloadQueue):
+ def _compressFile(self, r: ezidapp.models.async_queue.DownloadQueue):
infile = None
outfile = None
try:
@@ -392,7 +403,7 @@ def _compressFile(self, r:ezidapp.models.async_queue.DownloadQueue):
if outfile:
outfile.close()
- def _deleteUncompressedFile(self, r:ezidapp.models.async_queue.DownloadQueue):
+ def _deleteUncompressedFile(self, r: ezidapp.models.async_queue.DownloadQueue):
try:
if os.path.exists(self._path(r, 1)):
os.unlink(self._path(r, 1))
@@ -403,7 +414,7 @@ def _deleteUncompressedFile(self, r:ezidapp.models.async_queue.DownloadQueue):
r.stage = ezidapp.models.async_queue.DownloadQueue.MOVE
r.save()
- def _moveCompressedFile(self, r:ezidapp.models.async_queue.DownloadQueue):
+ def _moveCompressedFile(self, r: ezidapp.models.async_queue.DownloadQueue):
try:
if os.path.exists(self._path(r, 2)):
os.rename(self._path(r, 2), self._path(r, 3))
@@ -416,13 +427,11 @@ def _moveCompressedFile(self, r:ezidapp.models.async_queue.DownloadQueue):
r.stage = ezidapp.models.async_queue.DownloadQueue.NOTIFY
r.save()
- def _notifyRequestor(self, r:ezidapp.models.async_queue.DownloadQueue):
+ def _notifyRequestor(self, r: ezidapp.models.async_queue.DownloadQueue):
f = None
try:
f = open(self._path(r, 4), mode="w", encoding="utf-8")
- f.write(
- f"{ezidapp.models.util.getUserByPid(r.requestor).username}\n{r.rawRequest}\n"
- )
+ f.write(f"{ezidapp.models.util.getUserByPid(r.requestor).username}\n{r.rawRequest}\n")
except Exception as e:
self.log.exception('Exception')
raise self._wrapException("error writing sidecar file", e)
@@ -464,11 +473,10 @@ def _notifyRequestor(self, r:ezidapp.models.async_queue.DownloadQueue):
raise self._wrapException("error sending email", e)
r.delete()
- def _unescape(self, s:str)->str:
+ def _unescape(self, s: str) -> str:
return re.sub("%([0-9A-F][0-9A-F])", lambda m: chr(int(m.group(1), 16)), s)
-
- def _decode(self, s:str):
+ def _decode(self, s: str):
'''
Decodes DownloadQueue.constraint
'''
@@ -505,8 +513,7 @@ def _decode(self, s:str):
else:
assert False, "unhandled case"
-
- def _fileSuffix(self, r:ezidapp.models.async_queue.DownloadQueue):
+ def _fileSuffix(self, r: ezidapp.models.async_queue.DownloadQueue):
if r.compression == ezidapp.models.async_queue.DownloadQueue.GZIP:
return SUFFIX_FORMAT_DICT[r.format] + ".gz"
else:
From f8c9af74f9eba559bab855770c0a536fc9f285ef Mon Sep 17 00:00:00 2001
From: datadavev <605409+datadavev@users.noreply.github.com>
Date: Wed, 22 Jun 2022 15:58:16 -0400
Subject: [PATCH 09/12] Adding minimal testing for data validation
---
tests/test_docs/datacite_metadata_01.txt | 7 +++++++
tests/test_validation.py | 19 +++++++++++++++++++
2 files changed, 26 insertions(+)
create mode 100644 tests/test_docs/datacite_metadata_01.txt
create mode 100644 tests/test_validation.py
diff --git a/tests/test_docs/datacite_metadata_01.txt b/tests/test_docs/datacite_metadata_01.txt
new file mode 100644
index 000000000..af3ee31c6
--- /dev/null
+++ b/tests/test_docs/datacite_metadata_01.txt
@@ -0,0 +1,7 @@
+# For input to client testing datacite minting
+# client.py l admin mint doi:10.5072/FK2 @datacite_metadata_01.txt
+datacite.creator: Dave
+datacite.title: Test doc
+datacite.publicationyear: 1961
+datacite.resourcetype: Event
+datacite.publisher: Tester
diff --git a/tests/test_validation.py b/tests/test_validation.py
new file mode 100644
index 000000000..f1f2ca232
--- /dev/null
+++ b/tests/test_validation.py
@@ -0,0 +1,19 @@
+# Copyright©2021, Regents of the University of California
+# http://creativecommons.org/licenses/BSD
+
+"""Test ezidapp.models.validation
+"""
+
+import pytest
+
+import ezidapp.models.validation
+
+#@pytest.mark.django_db
+@pytest.mark.parametrize("test,expected",[
+ ('1961', '1961'),
+ ('196104','1961-04'),
+ ('20201201', '2020-12-01'),
+])
+def test_publicationDate(test, expected):
+ res = ezidapp.models.validation.publicationDate(test)
+ assert res == expected
From 585209cbc1607ca684d37602439acefc5a2136f3 Mon Sep 17 00:00:00 2001
From: datadavev <605409+datadavev@users.noreply.github.com>
Date: Wed, 22 Jun 2022 15:59:37 -0400
Subject: [PATCH 10/12] a tuple is not valid variable for the specified format
string
---
ezidapp/models/validation.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ezidapp/models/validation.py b/ezidapp/models/validation.py
index 7f6b93c09..74e64c2be 100644
--- a/ezidapp/models/validation.py
+++ b/ezidapp/models/validation.py
@@ -114,7 +114,7 @@ def publicationDate(date):
# return ("%04d", "%04d-%02d", "%04d-%02d-%02d")[numComponents - 1] % t[:numComponents]
return ("{:04d}", "{:04d}-{:02d}", "{:04d}-{:02d}-{:02d}")[
numComponents - 1
- ].format(t[:numComponents])
+ ].format(*t[:numComponents])
except Exception:
pass
raise django.core.exceptions.ValidationError(
From 6f3a63ecd085ba5626945310225536349aafed4e Mon Sep 17 00:00:00 2001
From: datadavev <605409+datadavev@users.noreply.github.com>
Date: Wed, 22 Jun 2022 16:58:01 -0400
Subject: [PATCH 11/12] added reminder to flesh out this test case
---
tests/test_validation.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tests/test_validation.py b/tests/test_validation.py
index f1f2ca232..1fdcfd6ef 100644
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@@ -8,7 +8,7 @@
import ezidapp.models.validation
-#@pytest.mark.django_db
+#TODO: Flesh out the test cases to match all the possibilities in the tested method
@pytest.mark.parametrize("test,expected",[
('1961', '1961'),
('196104','1961-04'),
@@ -17,3 +17,4 @@
def test_publicationDate(test, expected):
res = ezidapp.models.validation.publicationDate(test)
assert res == expected
+
From 7be309694205f2453bd0bfb5f3eef0d595c7e3a9 Mon Sep 17 00:00:00 2001
From: datadavev <605409+datadavev@users.noreply.github.com>
Date: Wed, 22 Jun 2022 17:01:48 -0400
Subject: [PATCH 12/12] Make import at module level
---
impl/mapping.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/impl/mapping.py b/impl/mapping.py
index a0125abe5..b0be56860 100644
--- a/impl/mapping.py
+++ b/impl/mapping.py
@@ -21,7 +21,7 @@
import re
-# import ezidapp.models.validation
+import ezidapp.models.validation
import impl.datacite
import impl.erc
import impl.util
@@ -52,8 +52,8 @@ def __init__(
def validatedDate(self):
if self.date is not None:
try:
- import ezidapp.models.validation
-
+ #2022-06-22 Not clear why this import was within the method instead of module level
+ #import ezidapp.models.validation
return ezidapp.models.validation.publicationDate(self.date)
except Exception:
return None