From 9a4196081958893b2545c95dd0b4afaa6d69ef93 Mon Sep 17 00:00:00 2001 From: sfisher Date: Thu, 3 Oct 2024 12:48:22 -0700 Subject: [PATCH 1/2] Updates the SearchIdentifier updateTime (and OpenSearch update_time) from link checker. !) only updates if changed, 2) now OS update takes the time, 3) updated test. --- .../commands/proc-link-checker-update.py | 17 ++++++++++++++--- impl/open_search_doc.py | 6 ++++-- tests/test_open_search_doc.py | 5 +++-- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/ezidapp/management/commands/proc-link-checker-update.py b/ezidapp/management/commands/proc-link-checker-update.py index 2cefc3eb..ee312800 100644 --- a/ezidapp/management/commands/proc-link-checker-update.py +++ b/ezidapp/management/commands/proc-link-checker-update.py @@ -18,6 +18,7 @@ import impl.log from impl.open_search_doc import OpenSearchDoc import opensearchpy.exceptions +import time log = logging.getLogger(__name__) @@ -96,11 +97,21 @@ def run(self): si2 = ezidapp.models.identifier.SearchIdentifier.objects.get( identifier=si.identifier ) + # get before values for OpenSearchDoc for link/issues + before_link_is_broken = si2.linkIsBroken + before_has_issues = si2.hasIssues + si2.linkIsBroken = newValue si2.computeHasIssues() - si2.save(update_fields=["linkIsBroken", "hasIssues"]) - open_s = OpenSearchDoc(identifier=si2) - open_s.update_link_issues(link_is_broken=si2.linkIsBroken, has_issues=si2.hasIssues) + + # only update if the record is "dirty" and the values have changed + if before_link_is_broken != si2.linkIsBroken or before_has_issues != si2.hasIssues: + si2.updateTime = int(time.time()) + si2.save(update_fields=["updateTime", "linkIsBroken", "hasIssues"]) + open_s = OpenSearchDoc(identifier=si2) + open_s.update_link_issues(link_is_broken=si2.linkIsBroken, + has_issues=si2.hasIssues, + update_time=si2.updateTime) except ezidapp.models.identifier.SearchIdentifier.DoesNotExist: log.exception('SearchIdentifier.DoesNotExist') except opensearchpy.exceptions.OpenSearchException as e: diff --git a/impl/open_search_doc.py b/impl/open_search_doc.py index 27fb8b1d..cd320c2d 100644 --- a/impl/open_search_doc.py +++ b/impl/open_search_doc.py @@ -18,6 +18,7 @@ from opensearchpy.exceptions import NotFoundError from django.conf import settings import urllib +import time # the functools allows memoizing the results of functions, so they're not recalculated every time (ie cached # results if called more than once on the same instance) @@ -138,10 +139,11 @@ def remove_from_index(self): return True return False - def update_link_issues(self, link_is_broken=False, has_issues=False): + # Note that this time is passed in as an integer, but it's converted to an iso datetime for opensearch + def update_link_issues(self, link_is_broken=False, has_issues=False, update_time=int(time.time())): dict_to_update = { 'open_search_updated': datetime.datetime.now().isoformat(), - 'update_time': datetime.datetime.now().isoformat(), + 'update_time': datetime.datetime.utcfromtimestamp(update_time).isoformat(), 'link_is_broken': link_is_broken, 'has_issues': has_issues } diff --git a/tests/test_open_search_doc.py b/tests/test_open_search_doc.py index 28a9a5df..e021a888 100644 --- a/tests/test_open_search_doc.py +++ b/tests/test_open_search_doc.py @@ -272,7 +272,7 @@ def test_update_link_issues(mock_client, open_search_doc): mock_client.update.return_value = mock_response # Act - result = open_search_doc.update_link_issues(link_is_broken=True, has_issues=True) + result = open_search_doc.update_link_issues(link_is_broken=True, has_issues=True, update_time=1727984570) # Assert mock_client.update.assert_called_once_with( @@ -282,7 +282,8 @@ def test_update_link_issues(mock_client, open_search_doc): 'open_search_updated': ANY, # Use unittest.mock.ANY if the exact value doesn't matter 'update_time': ANY, 'link_is_broken': True, - 'has_issues': True + 'has_issues': True, + 'update_time': '2024-10-03T19:42:50' }} ) assert result is True From 215b35c5809d7fbe7abf8c7916858fb939fb9a90 Mon Sep 17 00:00:00 2001 From: sfisher Date: Tue, 8 Oct 2024 13:40:56 -0700 Subject: [PATCH 2/2] Removed conditional update, only when values changed. The current code *might* do this, logic is unclear. --- .../commands/proc-link-checker-update.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/ezidapp/management/commands/proc-link-checker-update.py b/ezidapp/management/commands/proc-link-checker-update.py index ee312800..42d51ccd 100644 --- a/ezidapp/management/commands/proc-link-checker-update.py +++ b/ezidapp/management/commands/proc-link-checker-update.py @@ -97,21 +97,15 @@ def run(self): si2 = ezidapp.models.identifier.SearchIdentifier.objects.get( identifier=si.identifier ) - # get before values for OpenSearchDoc for link/issues - before_link_is_broken = si2.linkIsBroken - before_has_issues = si2.hasIssues - si2.linkIsBroken = newValue si2.computeHasIssues() - # only update if the record is "dirty" and the values have changed - if before_link_is_broken != si2.linkIsBroken or before_has_issues != si2.hasIssues: - si2.updateTime = int(time.time()) - si2.save(update_fields=["updateTime", "linkIsBroken", "hasIssues"]) - open_s = OpenSearchDoc(identifier=si2) - open_s.update_link_issues(link_is_broken=si2.linkIsBroken, - has_issues=si2.hasIssues, - update_time=si2.updateTime) + si2.updateTime = int(time.time()) + si2.save(update_fields=["updateTime", "linkIsBroken", "hasIssues"]) + open_s = OpenSearchDoc(identifier=si2) + open_s.update_link_issues(link_is_broken=si2.linkIsBroken, + has_issues=si2.hasIssues, + update_time=si2.updateTime) except ezidapp.models.identifier.SearchIdentifier.DoesNotExist: log.exception('SearchIdentifier.DoesNotExist') except opensearchpy.exceptions.OpenSearchException as e: