Skip to content

Commit

Permalink
Improve logging and reduce db inaccuracies (#97)
Browse files Browse the repository at this point in the history
* Reduce logging verbosity by reducing script to 10 chars instead of many

* Increase logging verbosity

* Attempt improved mitigation in event of logs being too long in pod

* black auto commit

---------

Co-authored-by: github-actions <github-actions@github.com>
  • Loading branch information
Pasarus and github-actions authored Aug 2, 2023
1 parent ef3674f commit aa1dbd7
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 12 deletions.
6 changes: 3 additions & 3 deletions job_controller/database/db_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
This module is responsible for holding the SQL Classes that SQLAlchemy will use and then formatting the SQL queries
via SQLAlchemy via pre-made functions.
"""

import textwrap
from typing import Any, Dict, List

from sqlalchemy import ( # type: ignore[attr-defined]
Expand Down Expand Up @@ -301,7 +301,7 @@ def add_completed_run(
str(state),
status_message,
output_files,
reduction_script,
textwrap.shorten(reduction_script, width=10, placeholder="..."),
)
with self.session_maker_func() as session:
script = session.query(Script).filter_by(script=reduction_script).first()
Expand All @@ -325,7 +325,7 @@ def add_completed_run(
str(state),
status_message,
output_files,
reduction_script,
textwrap.shorten(reduction_script, width=10, placeholder="..."),
)


Expand Down
21 changes: 16 additions & 5 deletions job_controller/job_watcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def watch(self) -> None:
for event in watch_.stream(v1.list_job_for_all_namespaces):
self.process_event(event)
except Exception as exception: # pylint: disable=broad-exception-caught
logger.error("Job watching failed due to an exception: %s", str(exception))
logger.error("JobWatcher for job %s failed", self.job_name)
logger.exception(exception)
return
logger.info("Ending JobWatcher for job %s", self.job_name)

Expand Down Expand Up @@ -139,21 +140,31 @@ def process_event_success(self) -> None:
f"namespace returned None when looking for a pod."
)
v1_core = client.CoreV1Api()
logs = v1_core.read_namespaced_pod_log(name=pod_name, namespace=self.namespace)
output = logs.split("\n")[-2] # Get second last line (last line is empty)
logger.info("Job %s has been completed with output: %s", self.job_name, output)
# Convert message from JSON string to python dict
try:
logs = v1_core.read_namespaced_pod_log(name=pod_name, namespace=self.namespace)
output = logs.split("\n")[-2] # Get second last line (last line is empty)
logger.info("Job %s has been completed with output: %s", self.job_name, output)
job_output = json.loads(output)
except JSONDecodeError as exception:
logger.error("Last message from job is not a JSON string: %s", str(exception))
logger.error("Last message from job is not a JSON string")
logger.exception(exception)
job_output = {
"status": "Unsuccessful",
"output_files": [],
"status_message": f"{str(exception)}",
}
except TypeError as exception:
logger.error("Last message from job is not a string: %s", str(exception))
logger.exception(exception)
job_output = {
"status": "Unsuccessful",
"output_files": [],
"status_message": f"{str(exception)}",
}
except Exception as exception:
logger.error("There was a problem recovering the job output")
logger.exception(exception)
job_output = {
"status": "Unsuccessful",
"output_files": [],
Expand Down
3 changes: 2 additions & 1 deletion job_controller/topic_consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,6 @@ def start_consuming(self, run_once: bool = False) -> None:
logger.info("Message decoded as: %s", message_obj)
self.message_callback(message_obj)
except json.JSONDecodeError as exception:
logger.error("Error attempting to decode JSON: %s", str(exception))
logger.error("Error attempting to decode JSON: %s", message_str)
logger.exception(exception)
continue
7 changes: 4 additions & 3 deletions test/test_job_watcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,10 @@ def raise_exception(_):
self.job_watcher.watch()

watch_.stream.assert_called_once_with(v1.list_job_for_all_namespaces)
logger.error.assert_called_once_with(
"Job watching failed due to an exception: %s", str(Exception("EVERYTHING IS ON FIRE"))
)
logger.error.assert_called_once()
logger.error.call_args_list[0]("JobWatcher for job %s failed", str(self.job_watcher.job_name))
logger.exception.assert_called_once()
logger.exception.call_args_list[0](Exception("EVERYTHING IS ON FIRE"))

@mock.patch("job_controller.job_watcher.watch")
@mock.patch("job_controller.job_watcher.client")
Expand Down

0 comments on commit aa1dbd7

Please sign in to comment.