Skip to content

Commit

Permalink
for metagen, changed logging level to DEBUG, turn down logging for ev…
Browse files Browse the repository at this point in the history
…erything else and move some messages to debug for clarity
  • Loading branch information
danellecline committed Sep 12, 2024
1 parent a017606 commit ebc1bf4
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 31 deletions.
25 changes: 16 additions & 9 deletions pbp/main_meta_generator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import os
from datetime import datetime
from pathlib import Path
import loguru
import sys
import copy

from pbp.meta_gen.gen_nrs import NRSMetadataGenerator
from pbp.meta_gen.gen_iclisten import IcListenMetadataGenerator
Expand All @@ -15,15 +18,19 @@
def main():
opts = parse_arguments()

# pylint: disable=import-outside-toplevel
from pbp.logging_helper import create_logger

log = create_logger(
log_filename_and_level=(
f"{opts.output_dir}/{opts.recorder}{opts.start}_{opts.end}.log",
"INFO",
),
console_level="INFO",
loguru.logger.remove()
log = copy.deepcopy(loguru.logger)
info_format = "{message}"
default_format = "{time} {level} {message}"
log_filename = (f"{opts.output_dir}/{opts.recorder}{opts.start}_{opts.end}.log",)
log.add(
sys.stdout,
level="INFO",
format=info_format,
filter=lambda record: record["level"].name == "INFO",
)
log.add(
sink=open(log_filename, "w"), level="DEBUG", format=default_format, enqueue=True
)

log_dir = Path(opts.output_dir)
Expand Down
8 changes: 4 additions & 4 deletions pbp/meta_gen/gen_iclisten.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def run(self):
)
)

self.log.info(
self.log.debug(
f"{self.log_prefix} Found {len(wav_files)} files to process that "
f"cover the expanded period {start_dt} - {end_dt}"
)
Expand All @@ -154,8 +154,8 @@ def run(self):
wav_files.sort(key=lambda x: x.start)

# create a dataframe from the wav files
self.log.info(
f"{self.log_prefix} Creating dataframe from {len(wav_files)} files "
self.log.debug(
f"{self.log_prefix} creating dataframe from {len(wav_files)} files "
f"spanning {wav_files[0].start} to {wav_files[-1].start}..."
)

Expand Down Expand Up @@ -183,7 +183,7 @@ def run(self):
plot_file = plot_daily_coverage(
InstrumentType.ICLISTEN, self.df, self.json_base_dir, self.start, self.end
)
self.log.info(f"Plot file: {plot_file}")
self.log.info(f"Coverage plot saved to: {plot_file}")


if __name__ == "__main__":
Expand Down
8 changes: 4 additions & 4 deletions pbp/meta_gen/gen_nrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def run(self):
if f_dt is None:
continue
if start_dt <= f_dt <= end_dt:
self.log.info(f"Found file {filename} with timestamp {f_dt}")
self.log.debug(f"Found file {filename} with timestamp {f_dt}")
if ext == "*.flac":
sound_files.append(FlacFile(self.log, str(filename), f_dt))
if ext == "*.wav":
Expand All @@ -102,13 +102,13 @@ def run(self):
if f_dt is None:
continue
if start_dt <= f_dt <= end_dt:
self.log.info(f"Found file {blob.name} with timestamp {f_dt}")
self.log.debug(f"Found file {blob.name} with timestamp {f_dt}")
if re.search(r"\.flac$", blob.name):
sound_files.append(FlacFile(self.log, f_path, f_dt))
if re.search(r"\.wav$", blob.name):
sound_files.append(WavFile(self.log, f_path, f_dt))
# delay to avoid 400 error
if i % 100 == 0:
if i % 100 == 0 and i > 0:
self.log.info(
f"{i} files searched...found {len(sound_files)} files that match the search pattern"
)
Expand All @@ -135,7 +135,7 @@ def run(self):
for day in pd.date_range(self.start, self.end, freq="D"):
try:
# create a dataframe from the flac files
self.log.info(
self.log.debug(
f"Creating dataframe from {len(sound_files)} "
f"files spanning {sound_files[0].start} to {sound_files[-1].start} in self.json_base_dir..."
)
Expand Down
6 changes: 3 additions & 3 deletions pbp/meta_gen/gen_soundtrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def run(self):
else:
# if the audio_loc is a s3 url, then we need to list the files in buckets that cover the start and end
# dates
self.log.info(f"Searching between {start_dt} and {end_dt}")
self.log.debug(f"Searching between {start_dt} and {end_dt}")

client = boto3.client("s3", config=Config(signature_version=UNSIGNED))
paginator = client.get_paginator("list_objects")
Expand All @@ -148,7 +148,7 @@ def run(self):
if start_dt <= key_dt <= end_dt and key.endswith(".wav"):
# download the associated xml file to the wav file and create a SoundTrapWavFile object
try:
self.log.info(f"Downloading {key_xml} ...")
self.log.debug(f"Downloading {key_xml} ...")
client.download_file(bucket, key_xml, xml_path)
wav_files.append(SoundTrapWavFile(uri, xml_path, key_dt))
except Exception as ex:
Expand All @@ -168,7 +168,7 @@ def run(self):
wav_files.sort(key=lambda x: x.start)

# create a dataframe from the wav files
self.log.info(
self.log.debug(
f"Creating dataframe from {len(wav_files)} files spanning "
f"{wav_files[0].start} to {wav_files[-1].start}..."
)
Expand Down
16 changes: 5 additions & 11 deletions pbp/meta_gen/json_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,6 @@ def run(self):
| ((self.raw_df["end"] >= self.day) & (self.raw_df["start"] < self.day))
]

self.log.info(
f"Creating metadata for day {self.day} from {len(day_df)} files..."
)

if len(day_df) == 0:
self.log.warning(f"No metadata found for day {self.day}")
return
Expand All @@ -85,7 +81,7 @@ def run(self):
day_df["end"] = pd.to_datetime(day_df["end"])

# get the file list that covers the requested day
self.log.info(
self.log.debug(
f'Found {len(day_df)} files for day {self.day}, between {day_df.iloc[0]["start"]} and {day_df.iloc[-1]["end"]}'
)

Expand Down Expand Up @@ -159,10 +155,6 @@ def run(self):

except Exception as e:
self.log.exception(f"Error correcting metadata for {self.day}. {e}")
finally:
self.log.info(
f"Done correcting metadata for {self.day}. Saved to {self.json_base_dir}"
)

def no_jitter(self, day_df: pd.DataFrame) -> pd.DataFrame:
"""
Expand All @@ -172,7 +164,7 @@ def no_jitter(self, day_df: pd.DataFrame) -> pd.DataFrame:
:return:
The corrected dataframe
"""
self.log.info(
self.log.debug(
"Using file start times as is, setting jitter to 0 and calculating end times."
)
# calculate the difference between each row start time and save as diff in a copy of the dataframe
Expand Down Expand Up @@ -236,4 +228,6 @@ def save_day(self, day: datetime.datetime, day_df: pd.DataFrame, prefix: str = "
output_path = Path(self.json_base_dir, str(day.year))
output_path.mkdir(parents=True, exist_ok=True)
shutil.copy2(temp_metadata.as_posix(), output_path)
self.log.info(f"Wrote {output_path}/{temp_metadata.name}")
self.log.info(
f"Done correcting metadata for {self.day}. Saved to {output_path}/{temp_metadata.name}"
)

0 comments on commit ebc1bf4

Please sign in to comment.