Skip to content

Commit

Permalink
✨ feat(formatters): Add JSONFormatter for structured logging with key…
Browse files Browse the repository at this point in the history
…-value support

- Introduced BaseStructuredFormatter, which serves as the base class for extracting and formatting log specifiers.
- Added JSONFormatter, a custom log formatter that formats log records as JSON strings.
- JSONFormatter parses 'key=value' patterns in log messages into key-value pairs in the resulting JSON.
- Handles complex data types such as lists, dicts, and tuples, and cleans up unnecessary newlines and tabs in messages.
- Exception information is also included in the formatted output if available.

Closes Lazarus-org#92
  • Loading branch information
MEHRSHAD-MIRSHEKARY committed Oct 4, 2024
1 parent b76afa1 commit 509e67e
Show file tree
Hide file tree
Showing 2 changed files with 244 additions and 0 deletions.
103 changes: 103 additions & 0 deletions django_logging/formatters/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import re
from datetime import datetime
from logging import Formatter, LogRecord
from typing import Any, Dict, List, Optional, Union


class BaseStructuredFormatter(Formatter):
"""Base class for custom formatters that extract specific fields from log
records based on a format string.
Attributes:
----------
specifiers (List[str]): List of specifiers extracted from the provided format string.
"""

def __init__(
self, fmt: Optional[str] = None, datefmt: Optional[str] = None
) -> None:
"""Initializes the formatter by extracting the format specifiers from
the format string.
Args:
----
fmt (Optional[str]): The log format string, e.g., "%(levelname)s | %(asctime)s | %(message)s".
datefmt (Optional[str]): The date format string for formatting 'asctime'.
"""
super().__init__(fmt, datefmt)
self.specifiers = self._extract_specifiers(fmt)

def _extract_specifiers(self, fmt: Optional[str]) -> List[str]:
"""Extracts format specifiers (e.g., %(levelname)s) from the format
string.
Args:
----
fmt (Optional[str]): The format string to extract specifiers from.
Returns:
-------
List[str]: A list of extracted specifier names.
"""
if fmt is None:
return []
return re.findall(r"%\((.*?)\)", fmt)

def _get_field_value(self, record: LogRecord, specifier: str) -> Optional[Any]:
"""Retrieves the value for a given specifier from the log record.
Args:
----
record (logging.LogRecord): The log record object.
specifier (str): The field name to retrieve from the log record.
Returns:
-------
Optional[Any]: The value of the field, or None if the field is not found.
"""
if specifier == "message":
return record.getMessage()
elif specifier == "asctime":
return self.formatTime(record, self.datefmt)
elif hasattr(record, specifier):
return getattr(record, specifier)
return None

def _handle_complex_value(
self, value: Any
) -> Union[str, Dict[str, Any], List[Any]]:
"""Recursively handles complex values such as dictionaries, lists, and
datetime objects.
Args:
----
value (Any): The value to process.
Returns:
-------
Union[str, Dict[str, Any], List[Any]]: The processed value.
"""
if isinstance(value, dict):
return {k: self._handle_complex_value(v) for k, v in value.items()}
elif isinstance(value, (list, tuple)):
return [self._handle_complex_value(v) for v in value]

return str(value)

def _add_exception(self, record: LogRecord, data: Dict[str, Any]) -> None:
"""Adds exception information to the data structure, if present in the
log record.
Args:
----
record (logging.LogRecord): The log record object.
data (Dict[str, Any]): The dictionary to which exception information will be added.
"""
if record.exc_info:
data["exception"] = self.formatException(record.exc_info)
141 changes: 141 additions & 0 deletions django_logging/formatters/json_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import ast
import json
import re
from logging import LogRecord
from typing import Any

from django_logging.formatters.base import BaseStructuredFormatter


class JSONFormatter(BaseStructuredFormatter):
"""A custom log formatter that formats log records as JSON strings, and
converts 'key=value' patterns in the log message to key-value pairs in
JSON.
It also handles complex types such as lists, dicts, and tuples.
"""

key_value_pattern = re.compile(
r"(?P<key>\w+)=(?P<value>\{.*?\}|\[.*?\]|\(.*?\)|\S+)"
)

def format(self, record: LogRecord) -> str:
"""Formats the log record as a JSON string, and converts 'key=value'
patterns in the log message to key-value pairs in JSON.
Args:
----
record (logging.LogRecord): The log record object.
Returns:
-------
str: The formatted JSON string.
"""
# Format the log data based on specifiers
log_data = {
specifier: self._handle_complex_value(
self._get_field_value(record, specifier)
)
for specifier in self.specifiers
}

# Parse 'key=value' pairs from the message if present
message = record.getMessage()
key_value_pairs = self._extract_key_value_pairs(message)

# If key-value pairs are extracted, update the log data and remove them from the message
if key_value_pairs:
log_data.update(key_value_pairs)
message = self._remove_key_value_pairs(message)

# Clean up the message: remove \n and \t
message = self._clean_message(message)

# Update the message field with the cleaned-up version
log_data["message"] = message

# Add any exception information if available
self._add_exception(record, log_data)

# Return the log data as a formatted JSON string
return json.dumps(log_data, indent=2)

def _extract_key_value_pairs(self, message: str) -> dict:
"""Extracts 'key=value' pairs from the log message and returns them as
a dictionary. Supports complex structures like dict, list, and tuple.
Args:
----
message (str): The log message string.
Returns:
-------
dict: A dictionary of extracted key-value pairs.
"""
key_value_dict = {}
for match in self.key_value_pattern.finditer(message):
key = match.group("key")
value = match.group("value")

# Try to interpret the value as a dict, list, tuple, or other primitive types
key_value_dict[key] = self._convert_value(value)

return key_value_dict

def _remove_key_value_pairs(self, message: str) -> str:
"""Removes key=value pairs from the log message string to avoid
duplication.
Args:
----
message (str): The original log message string.
Returns:
-------
str: The cleaned-up message string without key=value pairs.
"""
# Replace the key=value pairs in the message with an empty string
return self.key_value_pattern.sub("", message).strip()

def _clean_message(self, message: str) -> str:
"""Cleans up the log message by removing any '\n' (newlines) and '\t'
(tabs).
Args:
----
message (str): The log message string to clean.
Returns:
-------
str: The cleaned message without newlines and tabs.
"""
return message.replace("\n", " ").replace("\t", " ").strip()

def _convert_value(self, value: str) -> Any:
"""Tries to convert a string value to an appropriate type (int, float,
bool, dict, list, tuple). If conversion fails, returns the value as a
string.
Args:
----
value (str): The string value to convert.
Returns:
-------
any: The converted value.
"""
if value.lower() in ("true", "false"):
return value.lower() == "true"

try:
# Use ast.literal_eval to safely parse dict, list, or tuple from the string
return ast.literal_eval(value)
except (ValueError, SyntaxError):
# If it's not a valid literal, return the original string
return value

0 comments on commit 509e67e

Please sign in to comment.