From 5d5bdb5af860940505e33a42e638ce3c2a8bac42 Mon Sep 17 00:00:00 2001 From: Benjamin Pritchard Date: Mon, 6 May 2024 20:10:02 -0400 Subject: [PATCH] Add periodics job to delete old access logs --- .../qcarchivetesting/testing_classes.py | 1 + .../qcfractal/components/serverinfo/socket.py | 40 +++++++++++++++ qcfractal/qcfractal/config.py | 5 +- qcfractal/qcfractal/test_periodics.py | 49 +++++++++++++++++++ 4 files changed, 93 insertions(+), 2 deletions(-) diff --git a/qcarchivetesting/qcarchivetesting/testing_classes.py b/qcarchivetesting/qcarchivetesting/testing_classes.py index dbf290296..12299658b 100644 --- a/qcarchivetesting/qcarchivetesting/testing_classes.py +++ b/qcarchivetesting/qcarchivetesting/testing_classes.py @@ -146,6 +146,7 @@ def __init__( qcf_config["database"] = {"pool_size": 0} qcf_config["log_access"] = log_access + qcf_config["access_log_keep"] = 1 if ip_tests_enabled: qcf_config["geoip2_dir"] = geoip_path diff --git a/qcfractal/qcfractal/components/serverinfo/socket.py b/qcfractal/qcfractal/components/serverinfo/socket.py index 682cd865f..bfbcb1912 100644 --- a/qcfractal/qcfractal/components/serverinfo/socket.py +++ b/qcfractal/qcfractal/components/serverinfo/socket.py @@ -58,6 +58,8 @@ def __init__(self, root_socket: SQLAlchemySocket): # Set up access logging self._access_log_enabled = root_socket.qcf_config.log_access + self._delete_access_log_frequency = 60 * 60 * 24 # one day + self._access_log_keep = root_socket.qcf_config.access_log_keep self._geoip2_enabled = geoip2_found and self._access_log_enabled # MOTD contents @@ -78,6 +80,31 @@ def __init__(self, root_socket: SQLAlchemySocket): # Updating the access log with geolocation info. Don't do it right at startup self.add_internal_job_geolocate_accesses(self._geolocate_accesses_frequency) + # Deleting old access logs. Wait a bit after startup + self.add_internal_job_delete_accesses(2.0) + + def add_internal_job_delete_accesses(self, delay: float, *, session: Optional[Session] = None): + """ + Adds an internal job to delete old access log entries + """ + + # Only add this if we are going to delete some + if self._access_log_keep <= 0 or not self._access_log_enabled: + return + + with self.root_socket.optional_session(session) as session: + self.root_socket.internal_jobs.add( + "delete_access_log", + now_at_utc() + timedelta(seconds=delay), + "serverinfo.delete_old_access_logs", + {}, + user_id=None, + unique_name=True, + after_function="serverinfo.add_internal_job_delete_accesses", + after_function_kwargs={"delay": self._delete_access_log_frequency}, # wait one day + session=session, + ) + def add_internal_job_update_geoip2(self, delay: float, *, session: Optional[Session] = None): """ Adds an internal job to update the geoip database @@ -275,6 +302,19 @@ def _lookup_ip(ip_address: str) -> Dict[str, Any]: session.commit() + def delete_old_access_logs(self, session: Session, job_progress: JobProgress) -> None: + """ + Deletes old access logs (as defined by the configuration) + """ + + # we check when adding the job, but double check here + if self._access_log_keep <= 0 or not self._access_log_enabled: + return + + before = now_at_utc() - timedelta(days=self._access_log_keep) + num_deleted = self.delete_access_logs(before, session=session) + self._logger.info(f"Deleted {num_deleted} access logs before {before}") + def _load_motd(self, *, session: Optional[Session] = None): stmt = select(MessageOfTheDayORM).order_by(MessageOfTheDayORM.id) with self.root_socket.optional_session(session, True) as session: diff --git a/qcfractal/qcfractal/config.py b/qcfractal/qcfractal/config.py index c25b55176..abc88b1dc 100644 --- a/qcfractal/qcfractal/config.py +++ b/qcfractal/qcfractal/config.py @@ -369,6 +369,7 @@ class FractalConfig(ConfigBase): # Access logging log_access: bool = Field(False, description="Store API access in the database") + access_log_keep: int = Field(0, description="Number of days of access logs to keep. 0 means keep all") # maxmind_account_id: Optional[int] = Field(None, description="Account ID for MaxMind GeoIP2 service") maxmind_license_key: Optional[str] = Field( @@ -416,8 +417,8 @@ def _root_validator(cls, values): values.pop("statistics_frequency") logger.warning("The 'statistics_frequency' setting is no longer and is now ignored") - if "get_server_stats" in values['api_limits']: - values['api_limits'].pop("get_server_stats") + if "get_server_stats" in values["api_limits"]: + values["api_limits"].pop("get_server_stats") logger.warning("The 'get_server_stats' setting in 'api_limits' is no longer and is now ignored") return values diff --git a/qcfractal/qcfractal/test_periodics.py b/qcfractal/qcfractal/test_periodics.py index 1deaa9329..1d022e014 100644 --- a/qcfractal/qcfractal/test_periodics.py +++ b/qcfractal/qcfractal/test_periodics.py @@ -1,6 +1,7 @@ from __future__ import annotations import time +from datetime import timedelta from typing import TYPE_CHECKING import pytest @@ -9,6 +10,8 @@ from qcfractal.components.torsiondrive.testing_helpers import submit_test_data as submit_td_test_data from qcportal.managers import ManagerName, ManagerStatusEnum from qcportal.record_models import RecordStatusEnum +from qcportal.serverinfo.models import AccessLogQueryFilters +from qcportal.utils import now_at_utc if TYPE_CHECKING: from qcarchivetesting.testing_classes import QCATestingSnowflake @@ -70,3 +73,49 @@ def test_periodics_service_iteration(snowflake: QCATestingSnowflake): rec = storage_socket.records.get([id_1, id_2]) assert rec[0]["status"] == RecordStatusEnum.running assert rec[1]["status"] == RecordStatusEnum.running + + +def test_periodics_delete_old_access_logs(secure_snowflake: QCATestingSnowflake): + storage_socket = secure_snowflake.get_storage_socket() + + read_id = storage_socket.users.get("read_user")["id"] + + access1 = { + "module": "api", + "method": "GET", + "full_uri": "/api/v1/datasets", + "ip_address": "127.0.0.1", + "user_agent": "Fake user agent", + "request_duration": 0.24, + "user_id": read_id, + "request_bytes": 123, + "response_bytes": 18273, + "timestamp": now_at_utc() - timedelta(days=2) + } + + access2 = { + "module": "api", + "method": "POST", + "full_uri": "/api/v1/records", + "ip_address": "127.0.0.2", + "user_agent": "Fake user agent", + "request_duration": 0.45, + "user_id": read_id, + "request_bytes": 456, + "response_bytes": 12671, + "timestamp": now_at_utc() + } + + storage_socket.serverinfo.save_access(access1) + storage_socket.serverinfo.save_access(access2) + + accesses = storage_socket.serverinfo.query_access_log(AccessLogQueryFilters()) + n_access = len(accesses) + + secure_snowflake.start_job_runner() + # There's a set delay at startup before we delete old logs + time.sleep(5.0) + + # we only removed the really "old" (manually added) one + accesses = storage_socket.serverinfo.query_access_log(AccessLogQueryFilters()) + assert len(accesses) == n_access-1