Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add internal job to delete old access log entries #831

Merged
merged 1 commit into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions qcarchivetesting/qcarchivetesting/testing_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ def __init__(

qcf_config["database"] = {"pool_size": 0}
qcf_config["log_access"] = log_access
qcf_config["access_log_keep"] = 1

if ip_tests_enabled:
qcf_config["geoip2_dir"] = geoip_path
Expand Down
40 changes: 40 additions & 0 deletions qcfractal/qcfractal/components/serverinfo/socket.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ def __init__(self, root_socket: SQLAlchemySocket):

# Set up access logging
self._access_log_enabled = root_socket.qcf_config.log_access
self._delete_access_log_frequency = 60 * 60 * 24 # one day
self._access_log_keep = root_socket.qcf_config.access_log_keep
self._geoip2_enabled = geoip2_found and self._access_log_enabled

# MOTD contents
Expand All @@ -78,6 +80,31 @@ def __init__(self, root_socket: SQLAlchemySocket):
# Updating the access log with geolocation info. Don't do it right at startup
self.add_internal_job_geolocate_accesses(self._geolocate_accesses_frequency)

# Deleting old access logs. Wait a bit after startup
self.add_internal_job_delete_accesses(2.0)

def add_internal_job_delete_accesses(self, delay: float, *, session: Optional[Session] = None):
"""
Adds an internal job to delete old access log entries
"""

# Only add this if we are going to delete some
if self._access_log_keep <= 0 or not self._access_log_enabled:
return

with self.root_socket.optional_session(session) as session:
self.root_socket.internal_jobs.add(
"delete_access_log",
now_at_utc() + timedelta(seconds=delay),
"serverinfo.delete_old_access_logs",
{},
user_id=None,
unique_name=True,
after_function="serverinfo.add_internal_job_delete_accesses",
after_function_kwargs={"delay": self._delete_access_log_frequency}, # wait one day
session=session,
)

def add_internal_job_update_geoip2(self, delay: float, *, session: Optional[Session] = None):
"""
Adds an internal job to update the geoip database
Expand Down Expand Up @@ -275,6 +302,19 @@ def _lookup_ip(ip_address: str) -> Dict[str, Any]:

session.commit()

def delete_old_access_logs(self, session: Session, job_progress: JobProgress) -> None:
"""
Deletes old access logs (as defined by the configuration)
"""

# we check when adding the job, but double check here
if self._access_log_keep <= 0 or not self._access_log_enabled:
return

before = now_at_utc() - timedelta(days=self._access_log_keep)
num_deleted = self.delete_access_logs(before, session=session)
self._logger.info(f"Deleted {num_deleted} access logs before {before}")

def _load_motd(self, *, session: Optional[Session] = None):
stmt = select(MessageOfTheDayORM).order_by(MessageOfTheDayORM.id)
with self.root_socket.optional_session(session, True) as session:
Expand Down
5 changes: 3 additions & 2 deletions qcfractal/qcfractal/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ class FractalConfig(ConfigBase):

# Access logging
log_access: bool = Field(False, description="Store API access in the database")
access_log_keep: int = Field(0, description="Number of days of access logs to keep. 0 means keep all")

# maxmind_account_id: Optional[int] = Field(None, description="Account ID for MaxMind GeoIP2 service")
maxmind_license_key: Optional[str] = Field(
Expand Down Expand Up @@ -416,8 +417,8 @@ def _root_validator(cls, values):
values.pop("statistics_frequency")
logger.warning("The 'statistics_frequency' setting is no longer and is now ignored")

if "get_server_stats" in values['api_limits']:
values['api_limits'].pop("get_server_stats")
if "get_server_stats" in values["api_limits"]:
values["api_limits"].pop("get_server_stats")
logger.warning("The 'get_server_stats' setting in 'api_limits' is no longer and is now ignored")

return values
Expand Down
49 changes: 49 additions & 0 deletions qcfractal/qcfractal/test_periodics.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import time
from datetime import timedelta
from typing import TYPE_CHECKING

import pytest
Expand All @@ -9,6 +10,8 @@
from qcfractal.components.torsiondrive.testing_helpers import submit_test_data as submit_td_test_data
from qcportal.managers import ManagerName, ManagerStatusEnum
from qcportal.record_models import RecordStatusEnum
from qcportal.serverinfo.models import AccessLogQueryFilters
from qcportal.utils import now_at_utc

if TYPE_CHECKING:
from qcarchivetesting.testing_classes import QCATestingSnowflake
Expand Down Expand Up @@ -70,3 +73,49 @@ def test_periodics_service_iteration(snowflake: QCATestingSnowflake):
rec = storage_socket.records.get([id_1, id_2])
assert rec[0]["status"] == RecordStatusEnum.running
assert rec[1]["status"] == RecordStatusEnum.running


def test_periodics_delete_old_access_logs(secure_snowflake: QCATestingSnowflake):
storage_socket = secure_snowflake.get_storage_socket()

read_id = storage_socket.users.get("read_user")["id"]

access1 = {
"module": "api",
"method": "GET",
"full_uri": "/api/v1/datasets",
"ip_address": "127.0.0.1",
"user_agent": "Fake user agent",
"request_duration": 0.24,
"user_id": read_id,
"request_bytes": 123,
"response_bytes": 18273,
"timestamp": now_at_utc() - timedelta(days=2)
}

access2 = {
"module": "api",
"method": "POST",
"full_uri": "/api/v1/records",
"ip_address": "127.0.0.2",
"user_agent": "Fake user agent",
"request_duration": 0.45,
"user_id": read_id,
"request_bytes": 456,
"response_bytes": 12671,
"timestamp": now_at_utc()
}

storage_socket.serverinfo.save_access(access1)
storage_socket.serverinfo.save_access(access2)

accesses = storage_socket.serverinfo.query_access_log(AccessLogQueryFilters())
n_access = len(accesses)

secure_snowflake.start_job_runner()
# There's a set delay at startup before we delete old logs
time.sleep(5.0)

# we only removed the really "old" (manually added) one
accesses = storage_socket.serverinfo.query_access_log(AccessLogQueryFilters())
assert len(accesses) == n_access-1
Loading