Merge pull request #56 from mjs/fix-logging

Prevent stray output to stdout - log to stderr
TheCacophonyProject · Apr 12, 2019 · 414f72d · 414f72d
2 parents 30d8cba + 9a103b7
commit 414f72d
Show file tree

Hide file tree

Showing 15 changed files with 93 additions and 82 deletions.
diff --git a/build.py b/build.py
@@ -18,6 +18,7 @@
 
 import numpy as np
 
+from ml_tools.logs import init_logging
 from ml_tools.trackdatabase import TrackDatabase
 from ml_tools.dataset import Dataset
 
@@ -356,6 +357,7 @@ def get_bin_split(filename):
     return test_bins
 
 def main():
+    init_logging()
 
     global dataset
     global db
@@ -394,4 +396,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/classify/clipclassifier.py b/classify/clipclassifier.py
@@ -39,9 +39,6 @@ def __init__(self, config, tracking_config):
         # enables exports detailed information for each track.  If preview mode is enabled also enables track previews.
         self.enable_per_track_information = False
 
-        # writes metadata to standard out instead of a file.
-        self.write_meta_to_stdout = False
-
     def preprocess(self, frame, thermal_reference):
         """
         Applies preprocessing to frame required by the model.
@@ -223,7 +220,6 @@ def get_meta_data(self, filename):
             elif len(tags) == 1:
                 tag = tags[0] if tags[0] else "none"
             else:
-                print(tags)
                 tag = 'multi'
             meta_data["primary_tag"] = tag
             return meta_data
@@ -347,8 +343,7 @@ def save_metadata(self, filename, meta_filename, tracker):
             track_info['positions'] = positions
 
         if self.config.classify.meta_to_stdout:
-            output = json.dumps(save_file, indent=4, cls=tools.CustomJSONEncoder)
-            print(output)
+            print(json.dumps(save_file, cls=tools.CustomJSONEncoder))
         else:
-            f = open(meta_filename, 'w')
-            json.dump(save_file, f, indent=4, cls=tools.CustomJSONEncoder)
+            with open(meta_filename, 'w') as f:
+                json.dump(save_file, f, indent=4, cls=tools.CustomJSONEncoder)
diff --git a/classify/main.py b/classify/main.py
@@ -1,26 +1,14 @@
 import argparse
 import os
 import logging
-import sys
 from datetime import datetime
 
+from ml_tools.logs import init_logging
 from ml_tools import tools
 from ml_tools.config import Config
 from ml_tools.previewer import Previewer
 from .clipclassifier import ClipClassifier
 
-def log_to_stdout():
-    """ Outputs all log entries to standard out. """
-    # taken from https://stackoverflow.com/questions/14058453/making-python-loggers-output-all-messages-to-stdout-in-addition-to-log
-    root = logging.getLogger()
-    root.setLevel(logging.INFO)
-
-    ch = logging.StreamHandler(sys.stdout)
-    ch.setLevel(logging.INFO)
-    formatter = logging.Formatter('%(message)s')
-    ch.setFormatter(formatter)
-    root.addHandler(ch)
-
 def main():
     parser = argparse.ArgumentParser()
 
@@ -34,10 +22,13 @@ def main():
         '--end-date', help='Only clips on or before this day will be processed (format YYYY-MM-DD)')
     parser.add_argument('-c', '--config-file', help="Path to config file to use")
     parser.add_argument('--processor-folder', help="When running from thermal-processing use this to specify the folder for both the source cptv and output mp4. With this option the metadata will be sent to stdout.")
-
+    parser.add_argument('-T', '--timestamps', action="store_true", help="Emit log timestamps")
     args = parser.parse_args()
+
     config = Config.load_from_file(args.config_file)
 
+    init_logging(args.timestamps)
+
     # parse command line arguments
     if args.create_previews:
         config.classify.preview = Previewer.PREVIEW_CLASSIFIED
@@ -59,9 +50,6 @@ def main():
     if args.end_date:
         clip_classifier.end_date = datetime.strptime(args.end_date, "%Y-%m-%d")
 
-    if not config.classify.meta_to_stdout:
-        log_to_stdout()
-
     if config.classify.preview != Previewer.PREVIEW_NONE:
         logging.info("Creating previews")
 

diff --git a/evaluate.py b/evaluate.py
@@ -21,14 +21,17 @@
 import json
 from datetime import datetime, timedelta
 import dateutil.parser
-from ml_tools import tools
 import matplotlib.pyplot as plt
 from sklearn import metrics
 import numpy as np
 import itertools
 import argparse
 import seaborn as sns
 
+from ml_tools.logs import init_logging
+from ml_tools import tools
+
+
 # number of seconds between clips required to trigger a a new visit
 NEW_VISIT_THRESHOLD = 3*60
 
@@ -576,6 +579,8 @@ def print_evaluation(visits):
     show_errors_by_score(visits)
 
 def main():
+    init_logging()
+
     parser = argparse.ArgumentParser()
 
     parser.add_argument('-s', '--source-folder', default=os.path.join(DEFAULT_SOURCE_FOLDER), help='Source folder containing .txt files exported by classify.py')

diff --git a/extract.py b/extract.py
@@ -3,9 +3,12 @@
 """
 
 import argparse
-import cv2
+import logging
 import os
 
+import cv2
+
+from ml_tools.logs import init_logging
 from ml_tools.trackdatabase import TrackDatabase
 from ml_tools import trackdatabase
 from ml_tools import tools
@@ -29,6 +32,8 @@ def parse_params():
         print(cv2.getBuildInformation())
         return
 
+    init_logging()
+
     config = Config.load_from_file(args.config_file)
     if args.create_previews:
         config.extract.preview = "tracking"
@@ -43,17 +48,17 @@ def parse_params():
         source_file = tools.find_file_from_cmd_line(config.source_folder, args.target)
         if source_file is None:
             return
-        print("Processing file '" + source_file + "'")
+        logging.info("Processing file '" + source_file + "'")
         tag = os.path.basename(os.path.dirname(source_file))
         extractor.process_file(source_file, tag=tag)
         return
 
     if args.target.lower() == 'test':
-        print("Running test suite")
+        logging.info("Running test suite")
         extractor.run_tests(args.source_folder, args.test_file)
         return
 
-    print('Processing tag "{0}"'.format(args.target))
+    logging.info('Processing tag "{0}"'.format(args.target))
 
     if args.target.lower() == 'all':
         extractor.clean_all()

diff --git a/ml_tools/cptvfileprocessor.py b/ml_tools/cptvfileprocessor.py
@@ -1,7 +1,7 @@
 import logging
+import multiprocessing
 import os
 import time
-import multiprocessing
 import traceback
 
 def process_job(job):
@@ -12,9 +12,8 @@ def process_job(job):
 
     try:
         processor.process_file(path, **params)
-    except Exception as e:
-        print("Warning - error processing job:",e)
-        traceback.print_exc()
+    except Exception:
+        logging.exception("Warning - error processing job")
 
     time.sleep(0.001) # apparently gives me a chance to catch the control-c
 
@@ -60,10 +59,9 @@ def needs_processing(self, filename):
     def process_folder(self, folder_path, worker_pool_args=None, **kwargs):
         """Processes all files within a folder."""
 
-        jobs = []
-
-        print('processing',folder_path)
+        logging.info('processing %s', folder_path)
 
+        jobs = []
         for file_name in os.listdir(folder_path):
             full_path = os.path.join(folder_path, file_name)
             if os.path.isfile(full_path) and os.path.splitext(full_path )[1].lower() == '.cptv':
@@ -91,7 +89,7 @@ def process_job_list(self, jobs, worker_pool_args=None):
                 pool.close()
                 pool.join()
             except KeyboardInterrupt:
-                print("KeyboardInterrupt, terminating.")
+                logging.info("KeyboardInterrupt, terminating.")
                 pool.terminate()
                 exit()
             except Exception:
@@ -102,12 +100,13 @@ def process_job_list(self, jobs, worker_pool_args=None):
     def log_message(self, message):
         """ Record message in stdout.  Will be printed if verbose is enabled. """
         # note, python has really good logging... I should probably make use of this.
-        if self.tracker_config.verbose: print(message)
+        if self.tracker_config.verbose:
+            logging.info(message)
 
     def log_warning(self, message):
         """ Record warning message in stdout."""
         # note, python has really good logging... I should probably make use of this.
-        print("Warning:",message)
+        logging.warning("Warning: %s", message)
 
 if __name__ == '__main__':
     # for some reason the fork method seems to memory leak, and unix defaults to this so we

diff --git a/ml_tools/dataset.py b/ml_tools/dataset.py
@@ -9,6 +9,7 @@
 """
 
 import queue
+import logging
 import threading
 import multiprocessing
 import cv2
@@ -446,14 +447,7 @@ def next_batch(self, n, disable_async=False, force_no_augmentation=False):
             batch_y.append(self.labels.index(segment.label))
 
             if np.isnan(data).any():
-                print("Warning NaN found in data from source {}".format(segment.clip_id))
-
-            # I've been getting some NaN's come through so I check here to make sure input is reasonable.
-            # note: this is a really good idea, however my data has some '0' values in the thermal data which come
-            # through as -reference level (i.e. -3000) so I've disabled this for now.
-            #if np.max(np.abs(data)) > 2000:
-            #    print("Extreme values found in batch from source {} with value +-{:.1f}".format(segment.clip_id,
-            #                                                                              np.max(np.abs(data))))
+                logging.warning("NaN found in data from source: %r", segment.clip_id)
 
         # Half float should be fine here.  When using process based async loading we have to pickle the batch between
         # processes, so having it half the size helps a lot.  Also it reduces the memory required for the read buffers
@@ -637,7 +631,7 @@ def fetch_segment(self, segment: SegmentHeader, augment=False):
                                  last_frame)
 
         if len(data) != self.segment_width:
-            print("ERROR, invalid segment length {}, expected {}", len(data), self.segment_width)
+            logging.error("invalid segment length %d, expected %d", len(data), self.segment_width)
 
         data = Preprocessor.apply(data,
                                   segment.track.thermal_reference_level[first_frame:last_frame],
@@ -855,8 +849,8 @@ def stop_async_load(self):
 # continue to read examples until queue is full
 def preloader(q, dataset):
     """ add a segment into buffer """
-    print(" -started async fetcher for {} with augment={} segment_width={}".format(
-        dataset.name, dataset.enable_augmentation, dataset.segment_width))
+    logging.info(" -started async fetcher for %s with augment=%s segment_width=%s",
+        dataset.name, dataset.enable_augmentation, dataset.segment_width)
     loads = 0
     timer = time.time()
     while not dataset.preloader_stop_flag:

diff --git a/ml_tools/logs.py b/ml_tools/logs.py
@@ -0,0 +1,16 @@
+import sys
+import logging
+
+
+def init_logging(timestamps=False):
+    """Set up logging for use by various classifier pipeline scripts.
+
+    Logs will go to stderr.
+    """
+
+    fmt = "%(levelname)7s %(message)s"
+    if timestamps:
+        fmt = "%(asctime)s " + fmt
+    logging.basicConfig(
+        stream=sys.stderr, level=logging.INFO, format=fmt, datefmt="%Y-%m-%d %H:%M:%S"
+    )
diff --git a/ml_tools/model.py b/ml_tools/model.py
@@ -286,7 +286,7 @@ def eval_model(self, writer=None):
         print("-"*60)
         self.datasets.test.load_all()
         test_accuracy, _ = self.eval_batch(self.datasets.test.X, self.datasets.test.y, writer=writer)
-        print("Test Accuracy {0:.2f}% (error {1:.2f}%)".format(test_accuracy*100,(1.0-test_accuracy)*100))
+        logging.info("Test Accuracy %.2f (error %.2f%%)", test_accuracy * 100, (1.0 - test_accuracy) * 100)
         return test_accuracy
 
     def benchmark_model(self):

diff --git a/ml_tools/previewer.py b/ml_tools/previewer.py
@@ -1,3 +1,4 @@
+import logging
 from os import path
 
 import numpy as np
@@ -61,7 +62,7 @@ def colourmap(self):
             if path.exists(colourmap):
                 self.colormap = tools.load_colormap(colourmap)
             else:
-                print("using default colour map")
+                logging.info("using default colour map")
                 self.colormap = plt.get_cmap('jet')
 
         return globs._previewer_colour_map
@@ -93,7 +94,7 @@ def export_clip_preview(self, filename, tracker: TrackExtractor, track_predictio
             self.auto_max = tracker.stats['max_temp']
             self.auto_min = tracker.stats['min_temp']
         else:
-            print("Do not have temperatures to use.")
+            logging.error("Do not have temperatures to use.")
             return
 
         if bool(track_predictions) and self.preview_type == self.PREVIEW_CLASSIFIED:
@@ -151,7 +152,7 @@ def create_individual_track_previews(self, filename, tracker:TrackExtractor):
                 img = img.resize((frame_width, frame_height), Image.NEAREST)
                 video_frames.append(np.asarray(img))
 
-            print("creating preview {}".format(filename_format.format(id + 1)))
+            logging.info("creating preview %s", filename_format.format(id + 1))
             tools.write_mpeg(filename_format.format(id + 1), video_frames)
 
 

diff --git a/ml_tools/tools.py b/ml_tools/tools.py
@@ -155,7 +155,7 @@ def find_file_from_cmd_line(root, cmd_line_input):
     if os.path.isfile(cmd_line_input):
         return cmd_line_input
 
-    print("Could not locate file '" + cmd_line_input + "'")
+    logging.warning("Could not locate %r", cmd_line_input)
     return None
 
 def get_ffmpeg_command(filename, width, height, quality=21):

diff --git a/ml_tools/trackdatabase.py b/ml_tools/trackdatabase.py
@@ -9,7 +9,9 @@
 """
 
 import os
+import logging
 from multiprocessing import Lock
+
 import h5py
 import tables           # required for blosc compression to work
 import numpy as np
@@ -56,7 +58,7 @@ def __init__(self, database_filename):
         self.database = database_filename
 
         if not os.path.exists(database_filename):
-            print("Creating new database {}".format(database_filename))
+            logging.info("Creating new database %s", database_filename)
             f = h5py.File(database_filename, 'w')
             f.create_group("clips")
             f.close()