for metagen, changed logging level to DEBUG, turn down logging for ev…

…erything else and move some messages to debug for clarity
mbari-org · Sep 12, 2024 · ebc1bf4 · ebc1bf4
1 parent a017606
commit ebc1bf4
Show file tree

Hide file tree

Showing 5 changed files with 32 additions and 31 deletions.
diff --git a/pbp/main_meta_generator.py b/pbp/main_meta_generator.py
@@ -1,6 +1,9 @@
 import os
 from datetime import datetime
 from pathlib import Path
+import loguru
+import sys
+import copy
 
 from pbp.meta_gen.gen_nrs import NRSMetadataGenerator
 from pbp.meta_gen.gen_iclisten import IcListenMetadataGenerator
@@ -15,15 +18,19 @@
 def main():
     opts = parse_arguments()
 
-    # pylint: disable=import-outside-toplevel
-    from pbp.logging_helper import create_logger
-
-    log = create_logger(
-        log_filename_and_level=(
-            f"{opts.output_dir}/{opts.recorder}{opts.start}_{opts.end}.log",
-            "INFO",
-        ),
-        console_level="INFO",
+    loguru.logger.remove()
+    log = copy.deepcopy(loguru.logger)
+    info_format = "{message}"
+    default_format = "{time} {level} {message}"
+    log_filename = (f"{opts.output_dir}/{opts.recorder}{opts.start}_{opts.end}.log",)
+    log.add(
+        sys.stdout,
+        level="INFO",
+        format=info_format,
+        filter=lambda record: record["level"].name == "INFO",
+    )
+    log.add(
+        sink=open(log_filename, "w"), level="DEBUG", format=default_format, enqueue=True
     )
 
     log_dir = Path(opts.output_dir)

diff --git a/pbp/meta_gen/gen_iclisten.py b/pbp/meta_gen/gen_iclisten.py
@@ -138,7 +138,7 @@ def run(self):
                                             )
                                         )
 
-                self.log.info(
+                self.log.debug(
                     f"{self.log_prefix} Found {len(wav_files)} files to process that "
                     f"cover the expanded period {start_dt} - {end_dt}"
                 )
@@ -154,8 +154,8 @@ def run(self):
                 wav_files.sort(key=lambda x: x.start)
 
                 # create a dataframe from the wav files
-                self.log.info(
-                    f"{self.log_prefix}  Creating dataframe from {len(wav_files)} files "
+                self.log.debug(
+                    f"{self.log_prefix} creating dataframe from {len(wav_files)} files "
                     f"spanning {wav_files[0].start} to {wav_files[-1].start}..."
                 )
 
@@ -183,7 +183,7 @@ def run(self):
         plot_file = plot_daily_coverage(
             InstrumentType.ICLISTEN, self.df, self.json_base_dir, self.start, self.end
         )
-        self.log.info(f"Plot file: {plot_file}")
+        self.log.info(f"Coverage plot saved to: {plot_file}")
 
 
 if __name__ == "__main__":

diff --git a/pbp/meta_gen/gen_nrs.py b/pbp/meta_gen/gen_nrs.py
@@ -83,7 +83,7 @@ def run(self):
                     if f_dt is None:
                         continue
                     if start_dt <= f_dt <= end_dt:
-                        self.log.info(f"Found file {filename} with timestamp {f_dt}")
+                        self.log.debug(f"Found file {filename} with timestamp {f_dt}")
                         if ext == "*.flac":
                             sound_files.append(FlacFile(self.log, str(filename), f_dt))
                         if ext == "*.wav":
@@ -102,13 +102,13 @@ def run(self):
                 if f_dt is None:
                     continue
                 if start_dt <= f_dt <= end_dt:
-                    self.log.info(f"Found file {blob.name} with timestamp {f_dt}")
+                    self.log.debug(f"Found file {blob.name} with timestamp {f_dt}")
                     if re.search(r"\.flac$", blob.name):
                         sound_files.append(FlacFile(self.log, f_path, f_dt))
                     if re.search(r"\.wav$", blob.name):
                         sound_files.append(WavFile(self.log, f_path, f_dt))
                 # delay to avoid 400 error
-                if i % 100 == 0:
+                if i % 100 == 0 and i > 0:
                     self.log.info(
                         f"{i} files searched...found {len(sound_files)} files that match the search pattern"
                     )
@@ -135,7 +135,7 @@ def run(self):
         for day in pd.date_range(self.start, self.end, freq="D"):
             try:
                 # create a dataframe from the flac files
-                self.log.info(
+                self.log.debug(
                     f"Creating dataframe from {len(sound_files)} "
                     f"files spanning {sound_files[0].start} to {sound_files[-1].start} in self.json_base_dir..."
                 )

diff --git a/pbp/meta_gen/gen_soundtrap.py b/pbp/meta_gen/gen_soundtrap.py
@@ -121,7 +121,7 @@ def run(self):
             else:
                 # if the audio_loc is a s3 url, then we need to list the files in buckets that cover the start and end
                 # dates
-                self.log.info(f"Searching between {start_dt} and {end_dt}")
+                self.log.debug(f"Searching between {start_dt} and {end_dt}")
 
                 client = boto3.client("s3", config=Config(signature_version=UNSIGNED))
                 paginator = client.get_paginator("list_objects")
@@ -148,7 +148,7 @@ def run(self):
                         if start_dt <= key_dt <= end_dt and key.endswith(".wav"):
                             # download the associated xml file to the wav file and create a SoundTrapWavFile object
                             try:
-                                self.log.info(f"Downloading {key_xml} ...")
+                                self.log.debug(f"Downloading {key_xml} ...")
                                 client.download_file(bucket, key_xml, xml_path)
                                 wav_files.append(SoundTrapWavFile(uri, xml_path, key_dt))
                             except Exception as ex:
@@ -168,7 +168,7 @@ def run(self):
             wav_files.sort(key=lambda x: x.start)
 
             # create a dataframe from the wav files
-            self.log.info(
+            self.log.debug(
                 f"Creating dataframe from {len(wav_files)} files spanning "
                 f"{wav_files[0].start} to {wav_files[-1].start}..."
             )

diff --git a/pbp/meta_gen/json_generator.py b/pbp/meta_gen/json_generator.py
@@ -71,10 +71,6 @@ def run(self):
                 | ((self.raw_df["end"] >= self.day) & (self.raw_df["start"] < self.day))
             ]
 
-            self.log.info(
-                f"Creating metadata for day {self.day} from {len(day_df)} files..."
-            )
-
             if len(day_df) == 0:
                 self.log.warning(f"No metadata found for day {self.day}")
                 return
@@ -85,7 +81,7 @@ def run(self):
             day_df["end"] = pd.to_datetime(day_df["end"])
 
             # get the file list that covers the requested day
-            self.log.info(
+            self.log.debug(
                 f'Found {len(day_df)} files for day {self.day}, between {day_df.iloc[0]["start"]} and {day_df.iloc[-1]["end"]}'
             )
 
@@ -159,10 +155,6 @@ def run(self):
 
         except Exception as e:
             self.log.exception(f"Error correcting metadata for  {self.day}. {e}")
-        finally:
-            self.log.info(
-                f"Done correcting metadata for {self.day}. Saved to {self.json_base_dir}"
-            )
 
     def no_jitter(self, day_df: pd.DataFrame) -> pd.DataFrame:
         """
@@ -172,7 +164,7 @@ def no_jitter(self, day_df: pd.DataFrame) -> pd.DataFrame:
         :return:
             The corrected dataframe
         """
-        self.log.info(
+        self.log.debug(
             "Using file start times as is, setting jitter to 0 and calculating end times."
         )
         # calculate the difference between each row start time and save as diff in a copy of the dataframe
@@ -236,4 +228,6 @@ def save_day(self, day: datetime.datetime, day_df: pd.DataFrame, prefix: str = "
             output_path = Path(self.json_base_dir, str(day.year))
             output_path.mkdir(parents=True, exist_ok=True)
             shutil.copy2(temp_metadata.as_posix(), output_path)
-            self.log.info(f"Wrote {output_path}/{temp_metadata.name}")
+            self.log.info(
+                f"Done correcting metadata for {self.day}. Saved to {output_path}/{temp_metadata.name}"
+            )