diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f6002efee1f..af116d2e362 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -31,6 +31,8 @@ New Features the option ``use_sky_coords=True`` to `FindAdaptiveMom` to apply this automatically. (#1219) - Added some utility functions that we have found useful in our test suite, and which other people might want to use to the installed galsim.utilities. (#1240) +- Added `galsim.utilities.merge_sorted` which merges two or more sorted numpy arrays faster than + the available numpy options. (#1243) Performance Improvements @@ -43,6 +45,8 @@ Performance Improvements Now it only adds it if there is not already one given by the user. (#1229) - Work around an OMP bug that disables multiprocessing on some systems when omp_get_max_threads is called. (#1241) +- The `combine_wave_lists` function is faster now, using the new `merge_sorted` function. + (#1243) Bug Fixes diff --git a/devel/time_combine_waves.py b/devel/time_combine_waves.py new file mode 100644 index 00000000000..3c21bab6f77 --- /dev/null +++ b/devel/time_combine_waves.py @@ -0,0 +1,129 @@ +# Copyright (c) 2012-2022 by the GalSim developers team on GitHub +# https://github.com/GalSim-developers +# +# This file is part of GalSim: The modular galaxy image simulation toolkit. +# https://github.com/GalSim-developers/GalSim +# +# GalSim is free software: redistribution and use in source and binary forms, +# with or without modification, are permitted provided that the following +# conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions, and the disclaimer given in the accompanying LICENSE +# file. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the disclaimer given in the documentation +# and/or other materials provided with the distribution. +# + +import timeit +import galsim +import numpy as np + +from galsim.utilities import Profile + +def old_combine_wave_list(*args): + if len(args) == 1: + if isinstance(args[0], (list, tuple)): + args = args[0] + else: + raise TypeError("Single input argument must be a list or tuple") + + if len(args) == 0: + return np.array([], dtype=float), 0.0, np.inf + + if len(args) == 1: + obj = args[0] + return obj.wave_list, getattr(obj, 'blue_limit', 0.0), getattr(obj, 'red_limit', np.inf) + + blue_limit = np.max([getattr(obj, 'blue_limit', 0.0) for obj in args]) + red_limit = np.min([getattr(obj, 'red_limit', np.inf) for obj in args]) + if blue_limit > red_limit: + raise GalSimError("Empty wave_list intersection.") + + waves = [np.asarray(obj.wave_list) for obj in args] + waves = [w[(blue_limit <= w) & (w <= red_limit)] for w in waves] + wave_list = np.union1d(waves[0], waves[1]) + for w in waves[2:]: + wave_list = np.union1d(wave_list, w) + # Make sure both limits are included in final list + if len(wave_list) > 0 and (wave_list[0] != blue_limit or wave_list[-1] != red_limit): + wave_list = np.union1d([blue_limit, red_limit], wave_list) + return wave_list, blue_limit, red_limit + +# This edit was suggested by Jim Chiang to not merge things if they are all equal. +# (Slightly improved to use np.array_equal, rather than all(waves[0] == w).) +# It helps a lot when the inputs are equal, but not quite as much as the new C++ code. +def jims_combine_wave_list(*args): + if len(args) == 1: + if isinstance(args[0], (list, tuple)): + args = args[0] + else: + raise TypeError("Single input argument must be a list or tuple") + + if len(args) == 0: + return np.array([], dtype=float), 0.0, np.inf + + if len(args) == 1: + obj = args[0] + return obj.wave_list, getattr(obj, 'blue_limit', 0.0), getattr(obj, 'red_limit', np.inf) + + blue_limit = np.max([getattr(obj, 'blue_limit', 0.0) for obj in args]) + red_limit = np.min([getattr(obj, 'red_limit', np.inf) for obj in args]) + if blue_limit > red_limit: + raise GalSimError("Empty wave_list intersection.") + + waves = [np.asarray(obj.wave_list) for obj in args] + waves = [w[(blue_limit <= w) & (w <= red_limit)] for w in waves] + if (len(waves[0]) == len(waves[1]) + and all(np.array_equal(waves[0], w) for w in waves[1:])): + wave_list = waves[0] + else: + wave_list = np.union1d(waves[0], waves[1]) + for w in waves[2:]: + wave_list = np.union1d(wave_list, w) + # Make sure both limits are included in final list + if len(wave_list) > 0 and (wave_list[0] != blue_limit or wave_list[-1] != red_limit): + wave_list = np.union1d([blue_limit, red_limit], wave_list) + return wave_list, blue_limit, red_limit + + +sed_list = [ galsim.SED(name, wave_type='ang', flux_type='flambda') for name in + ['CWW_E_ext.sed', 'CWW_Im_ext.sed', 'CWW_Sbc_ext.sed', 'CWW_Scd_ext.sed'] ] + +ref_wave, ref_bl, ref_rl = old_combine_wave_list(sed_list) +wave_list, blue_limit, red_limit = galsim.utilities.combine_wave_list(sed_list) +np.testing.assert_array_equal(wave_list, ref_wave) +assert blue_limit == ref_bl +assert red_limit == ref_rl + +n = 10000 +t1 = min(timeit.repeat(lambda: old_combine_wave_list(sed_list), number=n)) +t2 = min(timeit.repeat(lambda: jims_combine_wave_list(sed_list), number=n)) +t3 = min(timeit.repeat(lambda: galsim.utilities.combine_wave_list(sed_list), number=n)) + +print(f'Time for {n} iterations of combine_wave_list') +print('old time = ',t1) +print('jims time = ',t2) +print('new time = ',t3) + +# Check when all wave_lists are equal. +sed_list = [ galsim.SED(name, wave_type='ang', flux_type='flambda') for name in + ['CWW_E_ext.sed', 'CWW_E_ext.sed', 'CWW_E_ext.sed', 'CWW_E_ext.sed'] ] + +ref_wave, ref_bl, ref_rl = old_combine_wave_list(sed_list) +jims_wave, jims_bl, jims_rl = jims_combine_wave_list(sed_list) +wave_list, blue_limit, red_limit = galsim.utilities.combine_wave_list(sed_list) + +np.testing.assert_array_equal(wave_list, ref_wave) +assert blue_limit == ref_bl +assert red_limit == ref_rl + +t1 = min(timeit.repeat(lambda: old_combine_wave_list(sed_list), number=n)) +t2 = min(timeit.repeat(lambda: jims_combine_wave_list(sed_list), number=n)) +t3 = min(timeit.repeat(lambda: galsim.utilities.combine_wave_list(sed_list), number=n)) + +print(f'Time for {n} iterations of combine_wave_list with identical wave_lists') +print('old time = ',t1) +print('jims time = ',t2) +print('new time = ',t3) diff --git a/docs/misc.rst b/docs/misc.rst index 04b79a43520..83d344ddecc 100644 --- a/docs/misc.rst +++ b/docs/misc.rst @@ -77,6 +77,8 @@ Utilities Related to NumPy Functions .. autofunction:: galsim.utilities.kxky +.. autofunction:: galsim.utilities.merge_sorted + Test Suite Helper Functions and Contexts ---------------------------------------- diff --git a/galsim/utilities.py b/galsim/utilities.py index b9d7854b8c1..057bec62363 100644 --- a/galsim/utilities.py +++ b/galsim/utilities.py @@ -1302,6 +1302,32 @@ def structure_function(image): return lambda r: 2*(tab(0.0, 0.0) - np.mean(tab(r*np.cos(thetas), r*np.sin(thetas)))) +def merge_sorted(arrays): + r"""Merge 2 or more numpy arrays into a single merged array. + + Each of the input arrays must be already sorted. + + This is equivalent to np.unique(np.concatenate(arrays)), but much faster. + + Parameters: + arrays: A list of the arrays to merge. + + Returns: + A single numpy.array with the merged values. + """ + try: + return _galsim.MergeSorted(list(arrays)) + except Exception as e: + # Probably the inputs are not sorted. Try to give the user more information. + for i,a in enumerate(arrays): + if not np.all(np.diff(a)>=0): + raise GalSimValueError("Not all arrays input to merge_sorted are sorted." + + "The first such case is at index %s"%i, + value=a) + else: + # That wasn't it. Just reraise the exception, converted to a GalSimError. + raise GalSimError(str(e)) from None + def combine_wave_list(*args): """Combine wave_list attributes of all objects in obj_list while respecting blue_limit and red_limit attributes. Should work with any combination of `SED`, `Bandpass`, and @@ -1313,10 +1339,6 @@ def combine_wave_list(*args): Returns: wave_list, blue_limit, red_limit """ - from .sed import SED - from .bandpass import Bandpass - from .gsobject import GSObject - from .chromatic import ChromaticObject if len(args) == 1: if isinstance(args[0], (list, tuple)): args = args[0] @@ -1337,12 +1359,11 @@ def combine_wave_list(*args): waves = [np.asarray(obj.wave_list) for obj in args] waves = [w[(blue_limit <= w) & (w <= red_limit)] for w in waves] - wave_list = np.union1d(waves[0], waves[1]) - for w in waves[2:]: - wave_list = np.union1d(wave_list, w) # Make sure both limits are included in final list - if len(wave_list) > 0 and (wave_list[0] != blue_limit or wave_list[-1] != red_limit): - wave_list = np.union1d([blue_limit, red_limit], wave_list) + if any(len(w) > 0 for w in waves): + waves.append([blue_limit, red_limit]) + wave_list = merge_sorted(waves) + return wave_list, blue_limit, red_limit def functionize(f): diff --git a/pysrc/PyBind11Helper.h b/pysrc/PyBind11Helper.h index 9ec04e71639..2e1301e5d9f 100644 --- a/pysrc/PyBind11Helper.h +++ b/pysrc/PyBind11Helper.h @@ -28,6 +28,7 @@ #include #include #include +#include namespace py = pybind11; #endif diff --git a/pysrc/Utilities.cpp b/pysrc/Utilities.cpp new file mode 100644 index 00000000000..faeeacbfda9 --- /dev/null +++ b/pysrc/Utilities.cpp @@ -0,0 +1,153 @@ +/* -*- c++ -*- + * Copyright (c) 2012-2022 by the GalSim developers team on GitHub + * https://github.com/GalSim-developers + * + * This file is part of GalSim: The modular galaxy image simulation toolkit. + * https://github.com/GalSim-developers/GalSim + * + * GalSim is free software: redistribution and use in source and binary forms, + * with or without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions, and the disclaimer given in the accompanying LICENSE + * file. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions, and the disclaimer given in the documentation + * and/or other materials provided with the distribution. + */ + +//#define DEBUGLOGGING + +#include +#include "PyBind11Helper.h" +#include "Std.h" + +namespace galsim { + + static py::array_t MergeSorted(py::list& arrays) + { + dbg<<"Start MergeSorted: "< 0); + py::array_t a0 = arrays[0].cast >(); + int n0 = a0.size(); + dbg<<"size of array 0 = "<(a0.data()); + for(int k=1; k ak = arrays[k].cast >(); + int nk = ak.size(); + dbg<<"size of array "<(a0.data()); + const double* ak_p1 = static_cast(ak.data()); + const double* a0_p2 = a0_p1 + n0; + const double* ak_p2 = ak_p1 + nk; + while (a0_p1 != a0_p2 && ak_p1 != ak_p2 && *a0_p1 == *ak_p1) { + ++a0_p1; ++ak_p1; + } + while (a0_p1 != a0_p2 && ak_p1 != ak_p2 && *(a0_p2-1) == *(ak_p2-1)) { + --a0_p2; --ak_p2; + } + int n_left = ak_p2 - ak_p1; + dbg<<"For array "<