Skip to content
This repository has been archived by the owner on Feb 8, 2024. It is now read-only.

Commit

Permalink
CORTX-34050: Apply limit_time filter on .gz files.
Browse files Browse the repository at this point in the history
Signed-off-by: Shriya Deshmukh <shriya.deshmukh@seagate.com>
  • Loading branch information
shriya-deshmukh committed Aug 30, 2022
1 parent 9df754d commit f06b58a
Showing 1 changed file with 78 additions and 36 deletions.
114 changes: 78 additions & 36 deletions py-utils/src/utils/support_framework/log_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import re
import errno
import shutil
import gzip
from datetime import datetime, timedelta

from cortx.utils.support_framework.errors import BundleError
Expand Down Expand Up @@ -159,7 +160,7 @@ def limit_time(src_dir, dest_dir, duration, file_name_reg_ex,
Args:
src_dir [str]: source directory from where logs will be fetched
dest_dir [str]: destination directory where logs will be dumped
duration [str]: Duraion in ISO 8601 format,
duration [str]: Duration in ISO 8601 format,
eg: 2020-09-06T05:30:00P5DT3H3S
file_name_reg_ex [str]: File name regex
log_timestamp_regex [str] : regex format for timestamp to be
Expand All @@ -177,43 +178,84 @@ def limit_time(src_dir, dest_dir, duration, file_name_reg_ex,
"""
invalid_chars = re.findall("[^PTDHMS0-9-:]+", duration)
if invalid_chars:
raise BundleError(errno.EINVAL,"Invalid duration passed! "
+ f"unexpected charecters: {invalid_chars}")
raise BundleError(errno.EINVAL, "Invalid duration passed! "
+ f"unexpected characters: {invalid_chars}")

include_lines_without_timestamp = False
start_time, end_time = FilterLog._parse_duration(duration)
for file in os.listdir(src_dir):
# sort files based on timestamp
list_of_files = filter(lambda f: os.path.isfile(os.path.join(src_dir, f)),
os.listdir(src_dir))
# sort the files based on last modification time in descending order
list_of_files = sorted(list_of_files,
key=lambda f: os.path.getmtime(os.path.join(src_dir, f)),
reverse=True)
for file in list_of_files:
op_file = os.path.join(dest_dir, 'tmp_' + file)
# TODO: Handle time based log collection for rotated files('.gz') files
if file.startswith(file_name_reg_ex) and not file.endswith('.gz'):
if file.startswith(file_name_reg_ex):
in_file = os.path.join(src_dir, file)
with open(in_file, 'r') as fd_in, open(op_file, 'a') as fd_out:
line = fd_in.readline()
while(line):
regex_res = re.search(log_timestamp_regex, line)
log_duration = regex_res.group(0) if regex_res else None
if log_duration:
# convert log timestamp to datetime object
try:
log_time = datetime.strptime(log_duration, datetime_format)
if start_time <= log_time and log_time <= end_time:
include_lines_without_timestamp = True
fd_out.write(line)
elif log_time > end_time and include_lines_without_timestamp:
include_lines_without_timestamp = False
break
# There will be some log lines which lies under passed duration,
# but has no timestamp, those lines will throw ValueError while
# trying to fetch log timestamp,
# to capture such logs, we have set a flag
# include_lines_without_timestamp = True
except ValueError:
if include_lines_without_timestamp:
fd_out.write(line)
line = fd_in.readline()
try:
if file.endswith('.gz'):
with gzip.open(in_file, 'rt') as fd_in:
lines = fd_in.readlines()
else:
with open(in_file, 'r') as fd_in:
lines = fd_in.readlines()
if not lines:
continue
if file.endswith('.gz'):
with gzip.open(op_file, 'a') as fd_out:
is_log_exceeded, is_log_written_to_file = FilterLog._add_logs_in_dest_file(
lines, log_timestamp_regex, datetime_format,
duration, fd_out, encode=True)
else:
with open(op_file, 'a') as fd_out:
is_log_exceeded, is_log_written_to_file = FilterLog._add_logs_in_dest_file(
lines, log_timestamp_regex, datetime_format,
duration, fd_out)
try:
if is_log_written_to_file:
final_op_file = os.path.join(dest_dir, file)
os.rename(op_file, final_op_file)
except OSError as e:
raise BundleError(errno.EINVAL, "Failed to filter log " +
f"file based on time duration. ERROR:{e}")
else:
os.remove(op_file)
if is_log_exceeded:
break
except OSError as e:
raise BundleError(errno.EINVAL, "Failed to filter log " +
f"file based on time duration. ERROR:{e}")

@staticmethod
def _add_logs_in_dest_file(contents: list, timestamp_regex: str,
datetime_format: str, duration: str, fd_out, encode=False):
"""The logs which are in given time duration add those logs in dest_dir."""
include_lines_without_timestamp = False
is_log_exceeded = False
is_log_written_to_file = False
start_time, end_time = FilterLog._parse_duration(duration)
for line in contents:
regex_res = re.search(timestamp_regex, line)
log_duration = regex_res.group(0) if regex_res else None
if log_duration:
# convert log timestamp to datetime object
try:
log_time = datetime.strptime(log_duration, datetime_format)
if start_time <= log_time and log_time <= end_time:
include_lines_without_timestamp = True
is_log_written_to_file = True
if encode:
line = line.encode('utf-8')
fd_out.write(line)
elif log_time > end_time and include_lines_without_timestamp:
include_lines_without_timestamp = False
is_log_exceeded = True
break
# There will be some log lines which lies under passed duration,
# but has no timestamp, those lines will throw ValueError while
# trying to fetch log timestamp,
# to capture such logs, we have set a flag
# include_lines_without_timestamp = True
except ValueError:
if include_lines_without_timestamp:
is_log_written_to_file = True
if encode:
line = line.encode('utf-8')
fd_out.write(line)
return is_log_exceeded, is_log_written_to_file

0 comments on commit f06b58a

Please sign in to comment.