From 24b94470ffb7652ee5e3f24436e2b5c235be22ec Mon Sep 17 00:00:00 2001 From: Christian Winther Date: Mon, 2 Dec 2024 09:34:29 +0100 Subject: [PATCH] Added verification towards disk space (#276) * Added verification towards disk space * Added documentation * Added changelog entry --- CHANGELOG | 2 ++ src/common/io.py | 42 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 7be62932..a8c03b9a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,7 @@ --- CHANGELOG --- --- Future --- + * Added verification against the available free disk space when storing the result on disk (i.e. a protection + against running out of disk space). * Changed testing framework from `nose` to `pytest`. * Removed tests from the PyFMI installation. * Moved test files from src/pyfmi/tests/... to tests/files/... diff --git a/src/common/io.py b/src/common/io.py index 9e91ace0..64460440 100644 --- a/src/common/io.py +++ b/src/common/io.py @@ -25,6 +25,7 @@ import logging as logging_module from functools import reduce from typing import Union +from shutil import disk_usage import numpy as np import scipy @@ -42,6 +43,23 @@ SYS_LITTLE_ENDIAN = sys.byteorder == 'little' NCP_LARGE = 5000 +DISK_PROTECTION = 50*1024**2 #50mb + +def get_available_disk_space(local_path): + """ + Get the available disk space at a given local path. If the local path does not exists, it returns a very large + value (max of size_t). Furthermore, 50mb is subtracted from the available free space and the reason for doing + so is that running out of disk space can lead to any number of bad things and by making sure that there is at + least some available disk left will lead to a controlled environment where the user can take appropriate + actions. + """ + try: + free_space = disk_usage(local_path).free - DISK_PROTECTION + + #If the user is saving to a stream or something else than a file - catch it here and return a large value. + except (FileNotFoundError, TypeError): + free_space = sys.maxsize + return free_space class Trajectory: """ @@ -1848,7 +1866,7 @@ def integration_point(self, solver = None): sys.getsizeof(self.int_sol) + sys.getsizeof(self.bool_sol) + \ sys.getsizeof(self.param_sol) - verify_result_size(self._first_point, current_size, previous_size, max_size, self.options["ncp"], self.model.time) + verify_result_size("", self._first_point, current_size, previous_size, max_size, self.options["ncp"], self.model.time) self._first_point = False def get_result(self): @@ -2035,7 +2053,7 @@ def integration_point(self, solver = None): max_size = self.options.get("result_max_size", None) if max_size is not None: - verify_result_size(self._first_point, self._current_file_size, previous_size, max_size, self.options["ncp"], self.model.time) + verify_result_size(self.file_name, self._first_point, self._current_file_size, previous_size, max_size, self.options["ncp"], self.model.time) self._first_point = False def simulation_end(self): @@ -2465,7 +2483,7 @@ def integration_point(self, solver = None):#parameter_data=[]): max_size = self.options.get("result_max_size", None) if max_size is not None: - verify_result_size(self._first_point, self._current_file_size, previous_size, max_size, self.options["ncp"], self.model.time) + verify_result_size(self.file_name, self._first_point, self._current_file_size, previous_size, max_size, self.options["ncp"], self.model.time) self._first_point = False def simulation_end(self): @@ -2833,7 +2851,7 @@ def _make_consistent(self, diag=False): max_size = self.options.get("result_max_size", None) if max_size is not None: - verify_result_size(self._first_point, file_pos, file_pos-self._size_point, max_size, self.options["ncp"], self.model.time) + verify_result_size(self.file_name, self._first_point, file_pos, file_pos-self._size_point, max_size, self.options["ncp"], self.model.time) #We can go in here before we've stored a full result point (due to storing diagnostic points). So check that a point has been fully stored if self._first_point and self._size_point > 0: self._first_point = False @@ -2860,12 +2878,21 @@ def get_result(self): """ return ResultDymolaBinary(self.file_name) -def verify_result_size(first_point, current_size, previous_size, max_size, ncp, time): +def verify_result_size(file_name, first_point, current_size, previous_size, max_size, ncp, time): + free_space = get_available_disk_space(file_name) + if first_point: point_size = current_size - previous_size estimate = ncp*point_size + previous_size + + msg = "" if estimate > max_size: - msg = "The result is estimated to exceed the allowed maximum size (limit: %g GB, estimate: %g GB). "%(max_size/1024**3, estimate/1024**3) + msg = msg + "The result is estimated to exceed the allowed maximum size (limit: %g GB, estimate: %g GB). "%(max_size/1024**3, estimate/1024**3) + + if estimate > free_space: + msg = msg + "The result is estimated to exceed the available disk space (available: %g GB, estimate: %g GB). "%(free_space/1024**3, estimate/1024**3) + + if msg != "": if ncp > NCP_LARGE: msg = msg + "The number of result points is large (%d), consider reducing the number of points. "%ncp raise ResultSizeError(msg + "To change the maximum allowed result file size, please use the option 'result_max_size'") @@ -2876,6 +2903,9 @@ def verify_result_size(first_point, current_size, previous_size, max_size, ncp, "'result_max_size' or consider reducing the number of communication " "points alternatively the number of variables to store result for."%(max_size/1024**3, time)) + if free_space <= 0: + raise ResultSizeError("Not enough disk space to continue to save the result at time t=%g."%time) + def get_result_handler(model, opts): result_handler = None