Skip to content

Commit

Permalink
Changed expected size calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
jrenaud90 committed Oct 3, 2023
1 parent 44eb9ca commit b93c410
Show file tree
Hide file tree
Showing 8 changed files with 291 additions and 764 deletions.
6 changes: 6 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ New Features:
- `CySolver.change_y0_pointer` - Changes the y0 pointer without having to pass memoryviews.
- `CySolver.change_t_eval_pointer` - Changes the t_eval pointer without having to pass memoryviews.

Performance:
- Improved how `CySolver` and `cyrk_ode` expected size is calculated and how much it grows with each concat.

Other Changes:
- Moved some common constants for both `CySolver` and `cyrk_ode` out of their files and into `cy.common`.

#### v0.8.2 (2023-09-25)

New Features:
Expand Down
15 changes: 15 additions & 0 deletions CyRK/cy/common.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,21 @@ from libcpp cimport bool as bool_cpp_t

from CyRK.rk.rk cimport double_numeric

cdef double SAFETY
cdef double MIN_FACTOR
cdef double MAX_FACTOR
cdef double MAX_STEP
cdef double INF
cdef double EPS
cdef double EPS_10
cdef double EPS_100
cdef Py_ssize_t MAX_INT_SIZE
cdef Py_ssize_t MAX_SIZET_SIZE
cdef double MIN_ARRAY_PREALLOCATE_SIZE
cdef double MAX_ARRAY_PREALLOCATE_SIZE
cdef double ARRAY_PREALLOC_TABS_SCALE
cdef double ARRAY_PREALLOC_RTOL_SCALE

cdef void interpolate(
double* time_domain_full,
double* time_domain_reduced,
Expand Down
22 changes: 21 additions & 1 deletion CyRK/cy/common.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,30 @@
# cython: boundscheck=False, wraparound=False, nonecheck=False, cdivision=True, initializedcheck=False
import cython

from libc.math cimport INFINITY as INF
from libc.float cimport DBL_EPSILON as EPS
from libc.stdint cimport SIZE_MAX, INT32_MAX
from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free

from CyRK.array.interp cimport interp_array_ptr, interp_complex_array_ptr

# # Integration Constants
# Multiply steps computed from asymptotic behaviour of errors by this.
cdef double SAFETY = 0.9
cdef double MIN_FACTOR = 0.2 # Minimum allowed decrease in a step size.
cdef double MAX_FACTOR = 10. # Maximum allowed increase in a step size.
cdef double MAX_STEP = INF
cdef double EPS_10 = EPS * 10.
cdef double EPS_100 = EPS * 100.
cdef Py_ssize_t MAX_SIZET_SIZE = <Py_ssize_t>(0.95 * SIZE_MAX)
cdef Py_ssize_t MAX_INT_SIZE = <Py_ssize_t>(0.95 * INT32_MAX)

# # Memory management constants
cdef double MIN_ARRAY_PREALLOCATE_SIZE = 100.
cdef double MAX_ARRAY_PREALLOCATE_SIZE = 1_000_000.
cdef double ARRAY_PREALLOC_TABS_SCALE = 1000. # A delta_t_abs higher than this value will start to grow array size.
cdef double ARRAY_PREALLOC_RTOL_SCALE = 1.0e-6 # A rtol lower than this value will start to grow array size.


cdef void interpolate(
double* time_domain_full,
double* time_domain_reduced,
Expand Down
66 changes: 40 additions & 26 deletions CyRK/cy/cyrk.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,9 @@ from libcpp cimport bool as bool_cpp_t
from libc.math cimport sqrt, fabs, nextafter, fmax, fmin, NAN

from CyRK.rk.rk cimport double_numeric, find_rk_properties, populate_rk_arrays
from CyRK.cy.common cimport interpolate

# # Integration Constants
# Multiply steps computed from asymptotic behaviour of errors by this.
cdef double SAFETY = 0.9
cdef double MIN_FACTOR = 0.2 # Minimum allowed decrease in a step size.
cdef double MAX_FACTOR = 10. # Maximum allowed increase in a step size.
cdef double MAX_STEP = np.inf
cdef double INF = np.inf
cdef double EPS = np.finfo(np.float64).eps
cdef double EPS_10 = EPS * 10.
cdef double EPS_100 = EPS * 100.
cdef Py_ssize_t MAX_INT_SIZE = int(0.95 * sys.maxsize)
from CyRK.cy.common cimport interpolate, SAFETY, MIN_FACTOR, MAX_FACTOR, MAX_STEP, INF, EPS, EPS_10, EPS_100, \
MAX_INT_SIZE, MIN_ARRAY_PREALLOCATE_SIZE, MAX_ARRAY_PREALLOCATE_SIZE, \
ARRAY_PREALLOC_TABS_SCALE, ARRAY_PREALLOC_RTOL_SCALE


cdef double cabs(
Expand Down Expand Up @@ -288,20 +278,42 @@ def cyrk_ode(
else:
max_num_steps = min(max_num_steps, MAX_INT_SIZE)


# Expected size of output arrays.

cdef Py_ssize_t expected_size_to_use, num_concats, current_size
cdef double temp_expected_size
cdef Py_ssize_t expected_size_to_use, num_concats, new_size
if expected_size == 0:
# CySolver will attempt to guess the best size for the output arrays.
temp_expected_size = t_delta_abs * fmax(1., (1.e-6 / rtol_min))
temp_expected_size = fmax(temp_expected_size, 500.)
temp_expected_size = fmin(temp_expected_size, 1_000_000.)
# CySolver will attempt to guess on a best size for the arrays.
# Assume a safe bet on CPU cache size is 300KB.
temp_expected_size = 300_000.
# Doubles are (most likely) 8 bytes.
temp_expected_size = temp_expected_size / sizeof(double)
# Then there are going to be (y + num_extra) number of doubles
if capture_extra:
temp_expected_size = temp_expected_size / (y_size_dbl + <double>num_extra)
else:
temp_expected_size = temp_expected_size / y_size_dbl
# If t_delta_abs is very large or rtol is very small, then we may need more.
temp_expected_size = \
fmax(
temp_expected_size,
fmax(
fmax(1., t_delta_abs / ARRAY_PREALLOC_TABS_SCALE),
fmax(1., (ARRAY_PREALLOC_RTOL_SCALE / rtol_min))
)
)
# Fix values that are very small/large
temp_expected_size = fmax(temp_expected_size, MIN_ARRAY_PREALLOCATE_SIZE)
temp_expected_size = fmin(temp_expected_size, MAX_ARRAY_PREALLOCATE_SIZE)
# Store result as int
expected_size_to_use = <Py_ssize_t>temp_expected_size
else:
expected_size_to_use = <Py_ssize_t>expected_size
# This variable tracks how many times the storage arrays have been appended.
# It starts at 1 since there is at least one storage array present.
num_concats = 1
# Set the current size to the expected size.
# `expected_size` should never change but current might grow if expected size is not large enough.
current_size = expected_size_to_use
num_concats = 1

# Initialize live variable arrays
cdef double_numeric* y_old_ptr
Expand Down Expand Up @@ -824,26 +836,28 @@ def cyrk_ode(
dy_old_ptr[i] = dy_ptr[i]

# Store data
if len_t >= (num_concats * expected_size_to_use):
if len_t >= current_size:
# There is more data then we have room in our arrays.
# Build new arrays with more space.
# OPT: Note this is an expensive operation.
num_concats += 1
new_size = num_concats * expected_size_to_use

# Grow the array by 50% its current value
current_size = <Py_ssize_t>(<double>current_size * (1.5))

time_domain_array_ptr = <double *> PyMem_Realloc(time_domain_array_ptr,
new_size * sizeof(double))
current_size * sizeof(double))
if not time_domain_array_ptr:
raise MemoryError()

y_results_array_ptr = <double_numeric *> PyMem_Realloc(y_results_array_ptr,
y_size * new_size * sizeof(double_numeric))
y_size * current_size * sizeof(double_numeric))
if not y_results_array_ptr:
raise MemoryError()

if capture_extra:
extra_array_ptr = <double_numeric *> PyMem_Realloc(extra_array_ptr,
num_extra * new_size * sizeof(double_numeric))
num_extra * current_size * sizeof(double_numeric))
if not extra_array_ptr:
raise MemoryError()

Expand Down
2 changes: 1 addition & 1 deletion CyRK/cy/cysolver.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ cdef class CySolver:
cdef double* atols_ptr
cdef double first_step, max_step
cdef Py_ssize_t max_num_steps
cdef Py_ssize_t expected_size, num_concats,
cdef Py_ssize_t expected_size, current_size, num_concats
cdef bool_cpp_t recalc_first_step

# -- Interpolation info
Expand Down
86 changes: 51 additions & 35 deletions CyRK/cy/cysolver.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,9 @@ cimport numpy as np
np.import_array()

from CyRK.rk.rk cimport find_rk_properties, populate_rk_arrays
from CyRK.cy.common cimport interpolate

# # Integration Constants
# Multiply steps computed from asymptotic behaviour of errors by this.
cdef double SAFETY = 0.9
cdef double MIN_FACTOR = 0.2 # Minimum allowed decrease in a step size.
cdef double MAX_FACTOR = 10. # Maximum allowed increase in a step size.
cdef double MAX_STEP = np.inf
cdef double INF = np.inf
cdef double EPS = np.finfo(np.float64).eps
cdef double EPS_10 = EPS * 10.
cdef double EPS_100 = EPS * 100.
cdef Py_ssize_t MAX_INT_SIZE = int(0.95 * sys.maxsize)
from CyRK.cy.common cimport interpolate, SAFETY, MIN_FACTOR, MAX_FACTOR, MAX_STEP, INF, EPS, EPS_10, EPS_100, \
MAX_INT_SIZE, MIN_ARRAY_PREALLOCATE_SIZE, MAX_ARRAY_PREALLOCATE_SIZE, \
ARRAY_PREALLOC_TABS_SCALE, ARRAY_PREALLOC_RTOL_SCALE

cdef (double, double) EMPTY_T_SPAN = (NAN, NAN)

Expand Down Expand Up @@ -405,17 +395,54 @@ cdef class CySolver:
raise AttributeError('Negative number of max steps provided.')
else:
self.max_num_steps = min(max_num_steps, MAX_INT_SIZE)

# Determine extra outputs
self.capture_extra = capture_extra
# To avoid memory access violations we need to set the extra output arrays no matter if they are used.
# If not used, just set them to size zero.
if self.capture_extra:
if num_extra <= 0:
self.status = -8
raise AttributeError('Capture extra set to True, but number of extra set to 0 (or negative).')
self.num_extra = num_extra
else:
# Even though we are not capturing extra, we still want num_extra to be equal to 1 so that nan arrays
# are properly initialized
self.num_extra = 1

# Expected size of output arrays.
cdef double temp_expected_size
if expected_size == 0:
# CySolver will attempt to guess on a best size for the arrays.
temp_expected_size = 100. * self.t_delta_abs * fmax(1., (1.e-6 / rtol_min))
temp_expected_size = fmax(temp_expected_size, 100.)
temp_expected_size = fmin(temp_expected_size, 10_000_000.)
# Assume a safe bet on CPU cache size is 300KB.
temp_expected_size = 300_000.
# Doubles are (most likely) 8 bytes.
temp_expected_size = temp_expected_size / sizeof(double)
# Then there are going to be (y + num_extra) number of doubles
if self.capture_extra:
temp_expected_size = temp_expected_size / (self.y_size_dbl + <double>self.num_extra)
else:
temp_expected_size = temp_expected_size / self.y_size_dbl
# If t_delta_abs is very large or rtol is very small, then we may need more.
temp_expected_size = \
fmax(
temp_expected_size,
fmax(
fmax(1., self.t_delta_abs / ARRAY_PREALLOC_TABS_SCALE),
fmax(1., (ARRAY_PREALLOC_RTOL_SCALE / rtol_min))
)
)
# Fix values that are very small/large
temp_expected_size = fmax(temp_expected_size, MIN_ARRAY_PREALLOCATE_SIZE)
temp_expected_size = fmin(temp_expected_size, MAX_ARRAY_PREALLOCATE_SIZE)
# Store result as int
self.expected_size = <Py_ssize_t>temp_expected_size
else:
self.expected_size = <Py_ssize_t>expected_size
# Set the current size to the expected size.
# `expected_size` should never change but current might grow if expected size is not large enough.
self.current_size = self.expected_size

# This variable tracks how many times the storage arrays have been appended.
# It starts at 1 since there is at least one storage array present.
self.num_concats = 1
Expand Down Expand Up @@ -451,20 +478,6 @@ cdef class CySolver:
if not self.dy_old_ptr:
raise MemoryError()

# Determine extra outputs
self.capture_extra = capture_extra
# To avoid memory access violations we need to set the extra output arrays no matter if they are used.
# If not used, just set them to size zero.
if self.capture_extra:
if num_extra <= 0:
self.status = -8
raise AttributeError('Capture extra set to True, but number of extra set to 0 (or negative).')
self.num_extra = num_extra
else:
# Even though we are not capturing extra, we still want num_extra to be equal to 1 so that nan arrays
# are properly initialized
self.num_extra = 1

self.extra_output_init_ptr = <double *> PyMem_Malloc(self.num_extra * sizeof(double))
if not self.extra_output_init_ptr:
raise MemoryError()
Expand Down Expand Up @@ -650,6 +663,7 @@ cdef class CySolver:
self.step_size = self.first_step

# Reset output storage
self.current_size = self.expected_size
self.num_concats = 1

# Reset storage variables to clear any old solutions and to avoid access violations if solve() is not called.
Expand Down Expand Up @@ -1050,26 +1064,28 @@ cdef class CySolver:
break

# Store data
if self.len_t >= (self.num_concats * self.expected_size):
if self.len_t >= self.current_size:
# There is more data then we have room in our arrays.
# Build new arrays with more space.
# OPT: Note this is an expensive operation.
self.num_concats += 1
new_size = self.num_concats * self.expected_size

# Grow the array by 50% its current value
self.current_size = <Py_ssize_t>(<double>self.current_size * (1.5))

time_domain_array_ptr = <double *> PyMem_Realloc(time_domain_array_ptr,
new_size * sizeof(double))
self.current_size * sizeof(double))
if not time_domain_array_ptr:
raise MemoryError()

y_results_array_ptr = <double *> PyMem_Realloc(y_results_array_ptr,
self.y_size * new_size * sizeof(double))
self.y_size * self.current_size * sizeof(double))
if not y_results_array_ptr:
raise MemoryError()

if self.capture_extra:
extra_array_ptr = <double *> PyMem_Realloc(extra_array_ptr,
self.num_extra * new_size * sizeof(double))
self.num_extra * self.current_size * sizeof(double))
if not extra_array_ptr:
raise MemoryError()

Expand Down
Loading

0 comments on commit b93c410

Please sign in to comment.