diff --git a/.github/labeler.yml b/.github/labeler.yml index 447790b87..54f7453da 100755 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -5,3 +5,5 @@ Plotting: - Diagnostics/** Nu Osc/Xsec: - covariance/** +Documentation: + - Doc/** diff --git a/CMakeLists.txt b/CMakeLists.txt index db5dbd63f..b6d983b5d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR) #KS: Enable language, necessary when using CUDA enable_language(CXX) -SET(MaCh3_VERSION 1.1.0) +SET(MaCh3_VERSION 1.1.1) # Try to find CUDA find_package(CUDAToolkit) @@ -127,14 +127,14 @@ else() #KS: Consider in future __attribute__((always_inline)) see https://indico.cern.ch/event/386232/sessions/159923/attachments/771039/1057534/always_inline_performance.pdf #https://gcc.gnu.org/onlinedocs/gcc-3.3.6/gcc/Optimize-Options.html target_compile_options(MaCh3CompilerOptions INTERFACE - -O3 # Optimize code for maximum speed - -funroll-loops # Unroll loops where possible for performance - --param=max-vartrack-size=100000000 # Set maximum size of variable tracking data to avoid excessive memory usage - -finline-limit=100000000 # Increase the limit for inlining functions to improve performance - #-flto # FIXME need more testing # Enable link-time optimization (commented out for now, needs more testing) + -O3 # Optimize code for maximum speed + -finline-limit=100000000 # Increase the limit for inlining functions to improve performance + # KS: After benchmarking below didn't in fact worse performance, leave it for future tests and documentation + #-funroll-loops # Unroll loops where possible for performance + #--param=max-vartrack-size=100000000 # Set maximum size of variable tracking data to avoid excessive memory usage + #-flto # Enable link-time optimization (commented out for now, needs more testing) ) #KS: add Link-Time Optimization (LTO) - # FIXME previously it wasn't used correctly but would need more testing #target_link_libraries(MaCh3CompilerOptions INTERFACE -flto) endif() diff --git a/Doc/Doxyfile b/Doc/Doxyfile index 8a7ccceed..b79bf9b03 100644 --- a/Doc/Doxyfile +++ b/Doc/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = "MaCh3" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 1.1.0 +PROJECT_NUMBER = 1.1.1 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a @@ -670,7 +670,7 @@ LAYOUT_FILE = # search path. Do not use file names with spaces, bibtex cannot handle them. See # also \cite for info how to create references. -CITE_BIB_FILES = +CITE_BIB_FILES = ../Doc/bibliography.bib #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages @@ -1080,7 +1080,7 @@ HTML_STYLESHEET = # see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. -HTML_EXTRA_STYLESHEET = +HTML_EXTRA_STYLESHEET = ../Doc/MaCh3.css # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note diff --git a/Doc/MaCh3.css b/Doc/MaCh3.css new file mode 100755 index 000000000..cb675d8de --- /dev/null +++ b/Doc/MaCh3.css @@ -0,0 +1,7 @@ +/* MaCh3 doxygen HTML_EXTRA_STYLESHEET */ + +div.contents { + max-width: 100em; + margin-right: 5em; + margin-left: 5em; +} diff --git a/Doc/bibliography.bib b/Doc/bibliography.bib new file mode 100755 index 000000000..d97d04fb8 --- /dev/null +++ b/Doc/bibliography.bib @@ -0,0 +1,165 @@ +@article{gelman1996posterior, + title = {Posterior predictive assessment of model fitness via realized discrepancies}, + author = {Gelman, A. and Meng, X. L. and Stern, H.}, + journal = {Statistica Sinica}, + pages = {733--760}, + year = {1996}, + publisher = {JSTOR}, + url = {http://www.stat.columbia.edu/~gelman/research/published/A6n41.pdf} +} + +@inproceedings{Conway:2011in, + author = {Conway, J. S.}, + title = {Incorporating Nuisance Parameters in Likelihoods for Multisource Spectra}, + booktitle = {PHYSTAT 2011}, + eprint = {1103.0354}, + archivePrefix= {arXiv}, + primaryClass = {physics.data-an}, + doi = {10.5170/CERN-2011-006.115}, + pages = {115--120}, + year = {2011} +} + +@article{Arguelles:2019izp, + author = {Argüelles, Carlos A. and Schneider, Austin and Yuan, Tianlu}, + title = {A binned likelihood for stochastic models}, + eprint = {1901.04645}, + archivePrefix= {arXiv}, + primaryClass = {physics.data-an}, + doi = {10.1007/JHEP06(2019)030}, + journal = {JHEP}, + volume = {06}, + pages = {030}, + year = {2019} +} + +@article{Dembinski:2022ios, + author = {Dembinski, Hans Peter and Abdelmotteleb, Ahmed}, + title = {A new maximum-likelihood method for template fits}, + eprint = {2206.12346}, + archivePrefix= {arXiv}, + primaryClass = {stat.ME}, + doi = {10.1140/epjc/s10052-022-11019-z}, + journal = {Eur. Phys. J. C}, + volume = {82}, + number = {11}, + pages = {1043}, + year = {2022} +} + +@misc{Fang2014GewekeDiagnostics, + author = {Qijun Fang}, + title = {A Brief Introduction to Geweke’s Diagnostics}, + howpublished = {\url{https://math.arizona.edu/~piegorsch/675/GewekeDiagnostics.pdf}}, + note = {STAT 675, University of Arizona}, + year = {2014}, + month = {May}, + day = {7} +} + +@mastersthesis{karlsbakk2011, + author = {Jarle Karlsbakk}, + title = {Likelihood Inference and Comparison of Time-Inhomogeneous Markov Processes}, + school = {Stockholm University, Department of Mathematics}, + year = {2011}, + type = {Master's Thesis}, + note = {Accessed: 2024-09-08}, + url = {https://www2.math.su.se/matstat/reports/master/2011/rep2/report.pdf}, + chapter = {3.1} +} + +@article{Dunkley:2004sv, + author = {Dunkley, Joanna and Bucher, Martin and Ferreira, Pedro G. and Moodley, Kavilan and Skordis, Constantinos}, + title = {Fast and reliable MCMC for cosmological parameter estimation}, + eprint = {astro-ph/0405462}, + archivePrefix= {arXiv}, + doi = {10.1111/j.1365-2966.2004.08464.x}, + journal = {Mon. Not. Roy. Astron. Soc.}, + volume = {356}, + pages = {925--936}, + year = {2005} +} + +@manual{StanManual, + title = {Stan Reference Manual: Effective Sample Size}, + author = {{Stan Development Team}}, + year = {2018}, + note = {Version 2.18}, + url = {https://mc-stan.org/docs/2_18/reference-manual/effective-sample-size-section.html} +} + +@misc{hanson2008mcmc, + author = {Kenneth M. Hanson}, + title = {Tutorial on Markov Chain Monte Carlo}, + year = {2008}, + note = {Revised 14/05/08, LA-UR-05-5680}, + howpublished = {Presented at the 29th International Workshop on Bayesian Inference and Maximum Entropy Methods in Science and Technology, Gif-sur-Yvette, France, July 8–13, 2009}, + url = {https://kmh-lanl.hansonhub.com/talks/maxent00b.pdf}, + institution = {Los Alamos National Laboratory} +} + +@article{roberts2009adaptive, + author = {Gareth O. Roberts and Jeffrey S. Rosenthal}, + title = {Examples of Adaptive MCMC}, + journal = {Journal of Computational and Graphical Statistics}, + volume = {18}, + number = {2}, + year = {2009}, + pages = {349--367}, + url = {http://www.jstor.org/stable/25651249}, + note = {Accessed: 2024-09-08} +} + +@article{James:2004xla, + author = {James, Fred and Winkler, Matthias}, + title = {MINUIT User's Guide}, + month = {June}, + year = {2004} +} + +@book{jeffreys1998theory, + author = {H. Jeffreys}, + title = {The Theory of Probability}, + publisher = {UOP Oxford}, + year = {1998}, + doi = {10.2307/3619118} +} + +@book{press1992numerical, + author = {William H. Press and Saul A. Teukolsky and William T. Vetterling and Brian P. Flannery}, + title = {Numerical Recipes in C: The Art of Scientific Computing}, + edition = {Second Edition}, + publisher = {Cambridge University Press}, + year = {1992}, + note = {William H. Press: Harvard-Smithsonian Center for Astrophysics, Saul A. Teukolsky: Department of Physics, Cornell University, William T. Vetterling: Polaroid Corporation, Brian P. Flannery: EXXON Research and Engineering Company} +} + +@misc{gabry2024visual, + author = {Jonah Gabry and Martin Modr{\'a}k}, + title = {Visual MCMC diagnostics using the bayesplot package}, + year = {2024}, + month = {January}, + day = {30}, + howpublished = {\url{https://mc-stan.org/bayesplot/articles/visual-mcmc-diagnostics.html}}, + note = {Source: vignettes/visual-mcmc-diagnostics.Rmd} +} + +@misc{chakraborty2019estimating, + author = {Saptarshi Chakraborty and Suman K. Bhattacharya and Kshitij Khare}, + title = {Estimating accuracy of the MCMC variance estimator: a central limit theorem for batch means estimators}, + year = {2019}, + eprint = {1911.00915}, + archivePrefix= {arXiv}, + primaryClass = {stat.CO}, + url = {https://doi.org/10.48550/arXiv.1911.00915} +} + +@misc{rossetti2024batch, + author = {Manuel D. Rossetti}, + title = {The Batch Means Method}, + booktitle = {Simulation Modeling and Arena, 2nd Edition}, + year = {2024}, + chapter = {5.4}, + url = {https://rossetti.github.io/RossettiArenaBook/ch5-BatchMeansMethod.html}, + note = {Accessed: 2024-09-08} +} diff --git a/Doc/mainpage.md b/Doc/mainpage.md index 6786972f1..ec4507b90 100644 --- a/Doc/mainpage.md +++ b/Doc/mainpage.md @@ -1,9 +1,21 @@ \mainpage %MaCh3 Reference Documentation -### Introduction +## Introduction Welcome to %MaCh3! This is the Reference Guide of the MaCh3 software. You can find additional documentation on our [Wiki](https://github.com/mach3-software/MaCh3/wiki) +If you are new we recommend to start from our [Tutorial](https://github.com/mach3-software/MaCh3Validations) + +If something is unclear please contact us via +- [Mailing lists](https://www.jiscmail.ac.uk/cgi-bin/webadmin?A0=MACH3) +- [Slack](https://t2k-experiment.slack.com/archives/C06EM0C6D7W/p1705599931356889) +- [Discussions](https://github.com/mach3-software/MaCh3/discussions) + + +### About us +The Markov Chain 3 flavour is a framework born in 2013 as a Bayesian MCMC fitter for [T2K](https://t2k-experiment.org/pl/) oscillation analysis. It has now been used for multiple T2K Oscillation analyses both at the Near and Far detectors throughout the years and is also used by the DUNE and HK oscillation analysis groups as well as for joint fits between T2K and NOvA and T2K and SK's atmospheric data. + +The framework has also evolved to allow non MCMC modules to interrogate the likelihoods implemented. diff --git a/cmake/Templates/Doxyfile.in b/cmake/Templates/Doxyfile.in index b99637401..712ed3c3c 100644 --- a/cmake/Templates/Doxyfile.in +++ b/cmake/Templates/Doxyfile.in @@ -670,7 +670,7 @@ LAYOUT_FILE = # search path. Do not use file names with spaces, bibtex cannot handle them. See # also \cite for info how to create references. -CITE_BIB_FILES = +CITE_BIB_FILES = ../Doc/bibliography.bib #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages @@ -1080,7 +1080,7 @@ HTML_STYLESHEET = # see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. -HTML_EXTRA_STYLESHEET = +HTML_EXTRA_STYLESHEET = ../Doc/MaCh3.css # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note diff --git a/manager/gpuUtils.cuh b/manager/gpuUtils.cuh index 211a8c595..6a34d9a85 100644 --- a/manager/gpuUtils.cuh +++ b/manager/gpuUtils.cuh @@ -20,9 +20,8 @@ /// KS: Need it for shared memory, there is way to use dynamic shared memory but I am lazy right now #define _BlockSize_ 1024 -//KS: TODO -// There is plenty of useful stuff here https://github.com/NVIDIA/cuda-samples/blob/master/Samples/1_Utilities/deviceQuery/deviceQuery.cpp -// We might want to port some of these utilities, for example having bool if there is unified memory etc. +/// @todo KS: There is plenty of useful stuff here https://github.com/NVIDIA/cuda-samples/blob/master/Samples/1_Utilities/deviceQuery/deviceQuery.cpp +/// @todo KS: We might want to port some of these utilities, for example having bool if there is unified memory etc. // CUDA_ERROR_CHECK is now defined in the makefile instead //#define CUDA_ERROR_CHECK diff --git a/mcmc/MCMCProcessor.cpp b/mcmc/MCMCProcessor.cpp index a32c3c088..e373223ad 100644 --- a/mcmc/MCMCProcessor.cpp +++ b/mcmc/MCMCProcessor.cpp @@ -1,6 +1,7 @@ #include "MCMCProcessor.h" #include "TChain.h" +#include "TF1.h" //Only if GPU is enabled #ifdef CUDA @@ -1332,7 +1333,7 @@ void MCMCProcessor::MakeCovariance_MP(bool Mute) { // ********************* -// Based on https://www.jstor.org/stable/25651249?seq=3, +// Based on @cite roberts2009adaptive // all credits for finding and studying it goes to Henry void MCMCProcessor::MakeSubOptimality(const int NIntervals) { // ********************* @@ -3793,8 +3794,8 @@ void MCMCProcessor::PrepareGPU_AutoCorr(const int nLags) { // ************************** -// KS: calc Effective Sample Size Following https://mc-stan.org/docs/2_18/reference-manual/effective-sample-size-section.html -// Furthermore we calculate Sampling efficiency following https://kmh-lanl.hansonhub.com/talks/maxent00b.pdf +// KS: calc Effective Sample Size Following @cite StanManual +// Furthermore we calculate Sampling efficiency following @cite hanson2008mcmc // Rule of thumb is to have efficiency above 25% void MCMCProcessor::CalculateESS(const int nLags, double** LagL) { // ************************** @@ -3814,7 +3815,7 @@ void MCMCProcessor::CalculateESS(const int nLags, double** LagL) { const double Thresholds[Nhists+1] = {1, 0.02, 0.005, 0.001, 0.0001, 0.0}; const Color_t ESSColours[Nhists] = {kGreen, kGreen+2, kYellow, kOrange, kRed}; - //KS: This histogram is inspired by the following: https://mc-stan.org/bayesplot/articles/visual-mcmc-diagnostics.html + //KS: This histogram is inspired by the following: @cite gabry2024visual TH1D **EffectiveSampleSizeHist = new TH1D*[Nhists](); for(int i = 0; i < Nhists; ++i) { @@ -3979,9 +3980,9 @@ void MCMCProcessor::BatchedAnalysis() { throw MaCh3Exception(__FILE__ , __LINE__ ); } - // Calculate variance estimator using batched means following https://arxiv.org/pdf/1911.00915.pdf see Eq. 1.2 + // Calculate variance estimator using batched means following @cite chakraborty2019estimating see Eq. 1.2 TVectorD* BatchedVariance = new TVectorD(nDraw); - //KS: The hypothesis is rejected if C > z α for a given confidence level α. If the batch means do not pass the test, Correlated is reported for the half-width on the statistical reports following https://rossetti.github.io/RossettiArenaBook/ch5-BatchMeansMethod.html alternatively for more old-school see Alexopoulos and Seila 1998 section 3.4.3 + //KS: The hypothesis is rejected if C > z α for a given confidence level α. If the batch means do not pass the test, Correlated is reported for the half-width on the statistical reports following @cite rossetti2024batch alternatively for more old-school see Alexopoulos and Seila 1998 section 3.4.3 TVectorD* C_Test_Statistics = new TVectorD(nDraw); double* OverallBatchMean = new double[nDraw](); @@ -4089,7 +4090,7 @@ void MCMCProcessor::BatchedAnalysis() { } // ************************** -// RC: Perform spectral analysis of MCMC based on http://arxiv.org/abs/astro-ph/0405462 +// RC: Perform spectral analysis of MCMC based on @cite Dunkley:2004sv void MCMCProcessor::PowerSpectrumAnalysis() { // ************************** TStopwatch clock; @@ -4112,7 +4113,7 @@ void MCMCProcessor::PowerSpectrumAnalysis() { int nPrams = nDraw; - //KS: WARNING Code is awfully slow... I know how to make it faster (GPU scream in a distant) but for now just make it for two params, bit hacky sry... + /// @todo KS: Code is awfully slow... I know how to make it faster (GPU scream in a distant) but for now just make it for two params, bit hacky sry... nPrams = 2; std::vector> k_j(nPrams, std::vector(v_size, 0.0)); @@ -4217,11 +4218,10 @@ void MCMCProcessor::PowerSpectrumAnalysis() { // ************************** // Geweke Diagnostic based on -// https://www.math.arizona.edu/~piegorsch/675/GewekeDiagnostics.pdf -// https://www2.math.su.se/matstat/reports/master/2011/rep2/report.pdf Chapter 3.1 +// @cite Fang2014GewekeDiagnostics +// @cite karlsbakk2011 Chapter 3.1 void MCMCProcessor::GewekeDiagnostic() { // ************************** - MACH3LOG_INFO("Making Geweke Diagnostic"); //KS: Up refers to upper limit we check, it stays constant, in literature it is mostly 50% thus using 0.5 for threshold @@ -4379,7 +4379,6 @@ void MCMCProcessor::GewekeDiagnostic() { OutputFile->cd(); } - // ************************** // Acceptance Probability void MCMCProcessor::AcceptanceProbabilities() { diff --git a/mcmc/MCMCProcessor.h b/mcmc/MCMCProcessor.h index e7a64e5e3..3eeb5a408 100644 --- a/mcmc/MCMCProcessor.h +++ b/mcmc/MCMCProcessor.h @@ -19,7 +19,6 @@ #include "TString.h" #include "TH1.h" #include "TH2.h" -#include "TF1.h" #include "TGraphErrors.h" #include "TVectorD.h" #include "TColor.h" @@ -41,11 +40,11 @@ //KS: Joy of forward declaration https://gieseanw.wordpress.com/2018/02/25/the-joys-of-forward-declarations-results-from-the-real-world/ class TChain; +class TF1; -// TODO -// Apply reweighted weight to plotting and Bayes Factor -// 2D Reweighing like DayaBay -// Implement Diagnostics/GetPenaltyTerm.cpp here +/// @todo KS: Apply reweighted weight to plotting and Bayes Factor. +/// @todo KS: Implement 2D reweighing like DayaBay. +/// @todo KS: Implement Diagnostics/GetPenaltyTerm.cpp here. /// KS: Enum for different covariance classes enum ParameterEnum { @@ -80,6 +79,7 @@ class MCMCProcessor { /// @param Mute Allow silencing many messages, especially important if we calculate matrix many times void MakeCovariance_MP(const bool Mute = false); /// @brief Make and Draw SubOptimality + /// @cite roberts2009adaptive void MakeSubOptimality(const int NIntervals = 10); /// @brief Reset 2D posteriors, in case we would like to calculate in again with different BurnInCut @@ -280,23 +280,30 @@ class MCMCProcessor { inline void ParamTraces(); /// @brief KS: Calculate autocorrelations supports both OpenMP and CUDA :) inline void AutoCorrelation(); - /// @brief KS: calc Effective Sample Size Following https://mc-stan.org/docs/2_18/reference-manual/effective-sample-size-section.html + /// @brief KS: calc Effective Sample Size /// @param nLags Should be the same nLags as used in AutoCorrelation() /// @param LagL Value of LagL for each dial and each Lag /// /// This function computes the Effective Sample Size (ESS) using the autocorrelations /// calculated by AutoCorrelation(). Ensure that the parameter nLags here matches /// the number of lags used in AutoCorrelation() to obtain accurate results. + /// @cite StanManual + /// @cite hanson2008mcmc + /// @cite gabry2024visual inline void CalculateESS(const int nLags, double **LagL); /// @brief Get the batched means variance estimation and variable indicating if number of batches is sensible + /// @cite chakraborty2019estimating + /// @cite rossetti2024batch inline void BatchedAnalysis(); /// @brief CW: Batched means, literally read from an array and chuck into TH1D inline void BatchedMeans(); - /// @brief Geweke Diagnostic based on https://www.math.arizona.edu/~piegorsch/675/GewekeDiagnostics.pdf + /// @brief Geweke Diagnostic based on the methods described by Fang (2014) and Karlsbakk (2011). + /// @cite Fang2014GewekeDiagnostics + /// @cite karlsbakk2011 inline void GewekeDiagnostic(); /// @brief Acceptance Probability inline void AcceptanceProbabilities(); - /// @brief RC: Perform spectral analysis of MCMC based on http://arxiv.org/abs/astro-ph/0405462 + /// @brief RC: Perform spectral analysis of MCMC based on @cite Dunkley:2004sv inline void PowerSpectrumAnalysis(); /// Name of MCMC file diff --git a/mcmc/MinuitFit.h b/mcmc/MinuitFit.h index 856ce229f..1ccbb2e8b 100644 --- a/mcmc/MinuitFit.h +++ b/mcmc/MinuitFit.h @@ -9,6 +9,7 @@ #include "Math/Functor.h" /// @brief Implementation of Minuit fitting algorithm +/// @cite James:2004xla class MinuitFit : public LikelihoodFit { public: /// @brief Constructor diff --git a/mcmc/SampleSummary.h b/mcmc/SampleSummary.h index f187529cd..6e15ad02e 100644 --- a/mcmc/SampleSummary.h +++ b/mcmc/SampleSummary.h @@ -20,8 +20,8 @@ #include "mcmc/MCMCProcessor.h" // ******************* -/// @brief Class to calculate pvalue produce posterior predictive and many fancy Bayesian stuff -/// @see For more details, visit the [Wiki](https://github.com/mach3-software/MaCh3/wiki/10.-Posterior-Predictive,-p%E2%80%90value-etc.). +/// @brief Class to calculate pvalue produce posterior predictive and many fancy Bayesian stuff \cite gelman1996posterior +/// @details For more information, visit the [Wiki](https://github.com/mach3-software/MaCh3/wiki/10.-Posterior-Predictive,-p%E2%80%90value-etc.). class SampleSummary { // ******************* public: diff --git a/mcmc/StatisticalUtils.h b/mcmc/StatisticalUtils.h index 5c19f67be..06af3164a 100644 --- a/mcmc/StatisticalUtils.h +++ b/mcmc/StatisticalUtils.h @@ -13,7 +13,7 @@ #include "manager/manager.h" // ************************** -/// @brief KS: Following H. Jeffreys. The theory of probability. UOP Oxford, 1998. DOI: 10.2307/3619118. +/// @brief KS: Following H. Jeffreys @cite jeffreys1998theory /// @param BayesFactor Obtained value of Bayes factor inline std::string GetJeffreysScale(const double BayesFactor){ // ************************** @@ -99,6 +99,7 @@ inline double GetBIC(const double llh, const int data, const int nPars){ // **************** /// @brief KS: See 14.3.10 in Numerical Recipes in C +/// @cite press1992numerical inline double GetNeffective(const int N1, const int N2) { // **************** @@ -155,14 +156,12 @@ inline void CheckBonferoniCorrectedpValue(const std::vector& Sample std::cout<<""<& EigenValues, const int TotalTarameters) { // ********************* diff --git a/samplePDF/Structs.h b/samplePDF/Structs.h index c8745c32a..6aaffc892 100644 --- a/samplePDF/Structs.h +++ b/samplePDF/Structs.h @@ -201,7 +201,7 @@ inline std::string SystType_ToString(const SystType i) { // *************************** // A handy namespace for variables extraction namespace MaCh3Utils { - // *************************** +// *************************** // *************************** /// @brief Return mass for given PDG @@ -357,11 +357,11 @@ inline int ProbsToPDG(ProbNu NuType){ /// Make an enum of the test statistic that we're using enum TestStatistic { kPoisson, //!< Standard Poisson likelihood - kBarlowBeeston, //!< Barlow-Beeston following Conway https://cds.cern.ch/record/1333496? - kIceCube, //!< Based on https://arxiv.org/abs/1901.04645 + kBarlowBeeston, //!< Barlow-Beeston following Conway \cite Conway:2011in + kIceCube, //!< Based on \cite Arguelles:2019izp kPearson, //!< Standard Pearson likelihood - kDembinskiAbdelmottele, //!< Based on arXiv:2206.12346v2 - kNTestStatistics //!< This only enumerates statistic + kDembinskiAbdelmottele, //!< Based on \cite Dembinski:2022ios + kNTestStatistics //!< Number of test statistics }; // ************************************************** diff --git a/samplePDF/samplePDFFDBase.h b/samplePDF/samplePDFFDBase.h index fb9e72383..69d2ad21a 100644 --- a/samplePDF/samplePDFFDBase.h +++ b/samplePDF/samplePDFFDBase.h @@ -42,7 +42,7 @@ class samplePDFFDBase : virtual public samplePDFBase void addData(TH2D* Data); void addData(std::vector &data); void addData(std::vector< std::vector > &data); - /// DB Multi-threaded GetLikelihood + /// @brief DB Multi-threaded GetLikelihood double GetLikelihood(); //=============================================================================== @@ -72,37 +72,26 @@ class samplePDFFDBase : virtual public samplePDFBase void SetXsecCov(covarianceXsec* xsec_cov); void SetOscCov(covarianceOsc* osc_cov); - //============================= Should be deprecated ============================= - // Note: the following functions aren't used any more! (From 14/1/2015) - KD. Just kept in for backwards compatibility in compiling, but they have no effect. - // DB 27/08/2020 The following functions shouldn't be used (Currently included for backwards compatibility) - - //DB Incredibly hardcoded - Could probably make 'LetsPrintSomeWeights' do the same functionality and remove this? - - virtual void DumpWeights(std::string outname){return;}; - // ETA - in the future it would be nice to have some generic getHIst functions - // although, this introduces a root dependence into the core code? - //TH1D *getModeHist1D(int s, int m, int style = 0); - //TH2D *getModeHist2D(int s, int m, int style = 0); - // Direct translation of getModeHist1D(s,m,style) = get1DVarHist(kPDFBinning,m,s,style) - //TH1D* get1DVarHist(ND280KinematicTypes Var1, int fModeToFill=-1, int fSampleToFill=-1, int WeightStyle=0, TAxis* Axis=0); - //TH1D* get1DVarHist(ND280KinematicTypes Var1, std::vector< std::vector > Selection, int WeightStyle=0, TAxis* Axis=0); - // Direct translation of getModeHist2D(s,m,style) = get2DVarHist(kPDFBinning,kPDFBinning,m,s,style) - //TH2D* get2DVarHist(ND280KinematicTypes Var1, ND280KinematicTypes Var2, int fModeToFill=-1, int fSampleToFill=-1, int WeightStyle=0, TAxis* XAxis=0, TAxis* YAxis=0); - //TH2D* get2DVarHist(ND280KinematicTypes Var1, ND280KinematicTypes Var2, std::vector< std::vector > Selection, int WeightStyle=0, TAxis* XAxis=0, TAxis* YAxis=0); - //TH3D* get3DVarHist(ND280KinematicTypes Var1, ND280KinematicTypes Var2, ND280KinematicTypes Var3, int kModeToFill, int kChannelToFill, int WeightStyle, TAxis* XAxis=0, TAxis* YAxis=0, TAxis* ZAxis=0); - + /// @deprecated The `DumpWeights` function is deprecated and should not be used. + /// It was kept for backwards compatibility in compiling but has no effect. + /// + /// @note This function was marked for deprecation as of 14/01/2015 by KD. + /// - DB (27/08/2020): The function is incredibly hardcoded. + /// - DB Consider using 'LetsPrintSomeWeights' to achieve the same functionality. + /// + /// @param outname The name of the output file. + virtual void DumpWeights(std::string outname) { return; }; //================================================================================ virtual void setupSplines(fdmc_base *skobj, const char *splineFile, int nutype, int signal){}; - // LW - Setup Osc + /// @brief LW - Setup Osc void virtual SetupOscCalc(double PathLength, double Density); void SetOscillator(Oscillator* Osc_); void FindEventOscBin(); protected: - //TODO - I think this will be tricky to abstract. fdmc_base will have to contain the pointers to the appropriate weights, can probably pass the number of these weights to constructor? - //DB Function to determine which weights apply to which types of samples - //pure virtual!! + /// @todo - I think this will be tricky to abstract. fdmc_base will have to contain the pointers to the appropriate weights, can probably pass the number of these weights to constructor? + /// @brief DB Function to determine which weights apply to which types of samples pure virtual!! virtual void SetupWeightPointers() = 0; splineFDBase *splineFile; @@ -135,27 +124,25 @@ class samplePDFFDBase : virtual public samplePDFBase std::vector SampleYBins; //=============================================================================== - //ETA - a function to setup and pass values to functional parameters where - //you need to pass a value to some custom reweight calc or engine + /// @brief ETA - a function to setup and pass values to functional parameters where you need to pass a value to some custom reweight calc or engine virtual void PrepFunctionalParameters(){}; - //ETA - generic function applying shifts + /// @brief ETA - generic function applying shifts virtual void applyShifts(int iSample, int iEvent){}; - //DB Function which determines if an event is selected, where Selection double looks like {{ND280KinematicTypes Var1, douuble LowBound} + /// @brief DB Function which determines if an event is selected, where Selection double looks like {{ND280KinematicTypes Var1, douuble LowBound} bool IsEventSelected(int iSample, int iEvent); bool IsEventSelected(std::vector< std::string > ParameterStr, int iSample, int iEvent); bool IsEventSelected(std::vector< std::string > ParameterStr, std::vector< std::vector > &Selection, int iSample, int iEvent); void CalcXsecNormsBins(int iSample); - //This just gets read in from a yaml file + /// @brief This just gets read in from a yaml file bool GetIsRHC() {return IsRHC;} - // Calculate the spline weight for a given event + /// @brief Calculate the spline weight for a given event double CalcXsecWeightSpline(const int iSample, const int iEvent); - // Calculate the norm weight for a given event + /// @brief Calculate the norm weight for a given event double CalcXsecWeightNorm(const int iSample, const int iEvent); - //Virtual so this can be over-riden in an experiment derived class + /// @brief Virtual so this can be over-riden in an experiment derived class virtual double CalcXsecWeightFunc(int iSample, int iEvent){return 1.0;}; - //virtual double ReturnKinematicParameter(KinematicTypes Var, int i) = 0; //Returns parameter Var for event j in sample i virtual double ReturnKinematicParameter(std::string KinematicParamter, int iSample, int iEvent) = 0; virtual double ReturnKinematicParameter(double KinematicVariable, int iSample, int iEvent) = 0; virtual std::vector ReturnKinematicParameterBinning(std::string KinematicParameter) = 0; //Returns binning for parameter Var @@ -176,13 +163,13 @@ class samplePDFFDBase : virtual public samplePDFBase void fill1DHist(); void fill2DHist(); - //DB Nice new multi-threaded function which calculates the event weights and fills the relevant bins of an array + /// @brief DB Nice new multi-threaded function which calculates the event weights and fills the relevant bins of an array #ifdef MULTITHREAD void fillArray_MP(); #endif void fillArray(); - // Helper function to reset histograms + /// @brief Helper function to reset histograms inline void ResetHistograms(); #ifndef USE_PROB3 @@ -191,18 +178,15 @@ class samplePDFFDBase : virtual public samplePDFBase //=============================================================================== //DB Variables required for GetLikelihood // - //DB Vectors to hold bin edges + /// DB Vectors to hold bin edges std::vector XBinEdges; std::vector YBinEdges; - //std::vector XVarStr; - //std::vector YVarStr; - - //DB Array to be filled after reweighting + /// DB Array to be filled after reweighting double** samplePDFFD_array; - //KS Array used for MC stat + /// KS Array used for MC stat double** samplePDFFD_array_w2; - //DB Array to be filled in AddData + /// DB Array to be filled in AddData double** samplePDFFD_data; //=============================================================================== @@ -214,10 +198,10 @@ class samplePDFFDBase : virtual public samplePDFBase //=============================================================================== //=============================================================================== - //DB Variables required for oscillation + /// DB Variables required for oscillation Oscillator *Osc = NULL; - // An axis to set binned oscillation weights + /// An axis to set binned oscillation weights TAxis *osc_binned_axis; //=============================================================================== @@ -235,18 +219,16 @@ class samplePDFFDBase : virtual public samplePDFBase //ETA - binning opt can probably go soon... int BinningOpt; - /// @var const int nDimensions - /// @brief keep track of the dimensions of the sample binning + /// keep track of the dimensions of the sample binning int nDimensions; int SampleDetID; bool IsRHC; - /// @var std::vector SplineBinnedVars - /// @brief holds "TrueNeutrinoEnergy" and the strings used for the sample binning. + /// holds "TrueNeutrinoEnergy" and the strings used for the sample binning. std::vector SplineBinnedVars; std::string samplename; - //Information to store for normalisation pars + /// Information to store for normalisation pars std::vector xsec_norms; int nFuncParams; std::vector funcParsNames; @@ -271,9 +253,4 @@ class samplePDFFDBase : virtual public samplePDFBase //=========================================================================== // - - //ETA - trying new way of doing shift parameters - // leave this for now but coming soon! - //std::vector< EnergyScale* > ShiftFunctors; - //std::vector< BaseFuncPar* > ShiftFunctors; }; diff --git a/splines/splineFDBase.cpp b/splines/splineFDBase.cpp index bb209f10a..fc5e672ae 100644 --- a/splines/splineFDBase.cpp +++ b/splines/splineFDBase.cpp @@ -14,6 +14,33 @@ splineFDBase::splineFDBase(covarianceXsec *xsec_) MonolithIndex=0; //Keeps track of the monolith index we're on when filling arrays (declared here so we can have multiple FillSampleArray calls) CoeffIndex=0; //Keeps track of our indexing the coefficient arrays [x, ybcd] } +//**************************************** +splineFDBase::~splineFDBase(){ +//**************************************** + +} +//**************************************** +void splineFDBase::cleanUpMemory() { +//**************************************** + + //Call once everything's been allocated in samplePDFSKBase, cleans up junk from memory! + //Not a huge saving but it's better than leaving everything up to the compiler + MACH3LOG_INFO("Cleaning up spline memory"); + + indexvec.clear(); + indexvec.shrink_to_fit(); + SplineFileParPrefixNames.clear(); + SplineFileParPrefixNames.shrink_to_fit(); + SplineBinning.clear(); + SplineBinning.shrink_to_fit(); + GlobalSystIndex.clear(); + GlobalSystIndex.shrink_to_fit(); + UniqueSystNames.clear(); + UniqueSystNames.shrink_to_fit(); + splinevec_Monolith.clear(); + splinevec_Monolith.shrink_to_fit(); + if(isflatarray != nullptr) delete isflatarray; +} //**************************************** bool splineFDBase::AddSample(std::string SampleName, int NSplineDimensions, int DetID, std::vector OscChanFileNames, std::vector SplineVarNames) @@ -83,12 +110,12 @@ void splineFDBase::TransferToMonolith() if(MonolithSize!=MonolithIndex){ MACH3LOG_ERROR("Something's gone wrong when we tried to get the size of your monolith"); - MACH3LOG_ERROR("MonolithSize is {}", MonolithSize); - MACH3LOG_ERROR("MonolithIndex is {}", MonolithIndex); + MACH3LOG_ERROR("MonolithSize is {}", MonolithSize); + MACH3LOG_ERROR("MonolithIndex is {}", MonolithIndex); throw; } - MACH3LOG_INFO("Now transfering splines to a monolith if size {}", MonolithSize); + MACH3LOG_INFO("Now transferring splines to a monolith if size {}", MonolithSize); uniquesplinevec_Monolith.resize(MonolithSize); weightvec_Monolith.resize(MonolithSize); @@ -351,7 +378,7 @@ void splineFDBase::BuildSampleIndexingArray(std::string SampleName) std::vector indexvec_Var3; for (int iVar3 = 0; iVar3 < (SplineBinning[iSample][iOscChan][2])->GetNbins(); iVar3++) { // Loop over third dimension - indexvec_Var3.push_back(0); //Don't start counting yet! + indexvec_Var3.push_back(0); //Don't start counting yet! } // end iVar3 loop indexvec_Var2.push_back(indexvec_Var3); } // end iVar2 loop @@ -733,13 +760,13 @@ void splineFDBase::PrepForReweight() std::cout << "Number of combinations of Sample, OscChan, Syst and Mode which have entirely flat response:" << nCombinations_FlatSplines << " / " << nCombinations_All << std::endl; } +//**************************************** // Rather work with spline coefficients in the splines, let's copy ND and use coefficient arrays void splineFDBase::getSplineCoeff_SepMany(int splineindex, _float_* &xArray, _float_* &manyArray){ - +//**************************************** // Initialise all arrays to 1.0 int nPoints; - //No point evalutating a flat spline - + //No point evaluating a flat spline nPoints = splinevec_Monolith[splineindex]->GetNp(); for (int i = 0; i < nPoints; i++) { @@ -781,9 +808,9 @@ void splineFDBase::getSplineCoeff_SepMany(int splineindex, _float_* &xArray, _fl splinevec_Monolith[splineindex] = NULL; } +//**************************************** //ETA - this may need to be virtual and then we can define this in the experiment. //Equally though could just use KinematicVariable to map back -//**************************************** std::string splineFDBase::getDimLabel(int iSample, unsigned int Axis) //**************************************** { @@ -795,7 +822,6 @@ std::string splineFDBase::getDimLabel(int iSample, unsigned int Axis) return DimensionLabels.at(iSample).at(Axis); } - //Returns sample index in int splineFDBase::getSampleIndex(std::string SampleName){ int SampleIndex = -1; diff --git a/splines/splineFDBase.h b/splines/splineFDBase.h index 990f00532..599c37cfb 100644 --- a/splines/splineFDBase.h +++ b/splines/splineFDBase.h @@ -11,40 +11,20 @@ class splineFDBase : public SplineBase { //ETA - do all of these functions and members actually need to be public? public: - //splineFDBase(const char *spline, int nutype, int nevents, int DetID, covarianceXsec* xsec_cov = NULL); // constructor for etrue-var1 splines - //splineFDBase(const char *spline, int nutype, int nevents, double BinningOpt, int DetID, covarianceXsec* xsec_cov = NULL); // constructor for etrue-var1-var2 splines - splineFDBase(covarianceXsec *xsec_ = NULL); - virtual ~splineFDBase(){}; - void SetupSplines(); - void SetupSplines(int BinningOpt); + splineFDBase(covarianceXsec *xsec_ = NULL); + /// @brief Destructor + /// @todo it need some love + virtual ~splineFDBase(); /// @brief CW: This Eval should be used when using two separate x,{y,a,b,c,d} arrays to store the weights; probably the best one here! Same thing but pass parameter spline segments instead of variations void Evaluate(); //Spline Monolith things - //Essential methods used externally - //Move these to splineFDBase in core - bool AddSample(std::string SampleName, int BinningOpt, int DetID, std::vector OscChanFileNames, std::vector SplineVarNames); - void TransferToMonolith(); - void cleanUpMemory(){ - //Call once everything's been allocated in samplePDFSKBase, cleans up junk from memory! - //Not a huge saving but it's better than leaving everything up to the compiler - std::cout<<"Cleaning up spline memory"< OscChanFileNames, std::vector SplineVarNames); + void TransferToMonolith(); + void cleanUpMemory(); //Have to define this in your own class virtual void FillSampleArray(std::string SampleName, std::vector OscChanFileNames)=0; @@ -54,7 +34,6 @@ class splineFDBase : public SplineBase { std::vector FindSplineBinning(std::string FileName, std::string SampleName); int CountNumberOfLoadedSplines(bool NonFlat=false, int Verbosity=0); - //int getNDim(int BinningOpt); std::string getDimLabel(int iSample, unsigned int Axis); std::vector> DimensionLabels; @@ -74,7 +53,6 @@ class splineFDBase : public SplineBase { return &weightvec_Monolith[index]; } - protected: inline void FindSplineSegment() override; inline void CalcSplineWeights() override; @@ -90,17 +68,17 @@ class splineFDBase : public SplineBase { std::vector nSplineParams; std::vector nOscChans; - //This holds the global spline index and is used to grab the current parameter value - // to evaluate splines at. Each internal vector will be of size of the number of spline - // systematics which affect that sample. - std::vector< std::vector > GlobalSystIndex; + /// This holds the global spline index and is used to grab the current parameter value + /// to evaluate splines at. Each internal vector will be of size of the number of spline + /// systematics which affect that sample. + std::vector< std::vector > GlobalSystIndex; std::vector< std::vector< std::vector > > SplineBinning; /// std::vector< std::vector > SplineFileParPrefixNames; - //A vector of vectors of the spline modes that a systematic applies to - //This gets compared against the event mode to figure out if a syst should - //apply to an event or not - std::vector< std::vector< std::vector > > SplineModeVecs; + /// A vector of vectors of the spline modes that a systematic applies to + /// This gets compared against the event mode to figure out if a syst should + /// apply to an event or not + std::vector< std::vector< std::vector > > SplineModeVecs; int nUniqueSysts; std::vector UniqueSystNames; @@ -114,18 +92,21 @@ class splineFDBase : public SplineBase { std::vector coeffindexvec; std::vectoruniquecoeffindices; //Unique coefficient indices - std::vector splinevec_Monolith; + std::vector splinevec_Monolith; - int MonolithSize; - int MonolithIndex; - int CoeffIndex; + int MonolithSize; + int MonolithIndex; + int CoeffIndex; - //Probably need to clear these arrays up at some point - _float_ *xVarArray; - bool *isflatarray; // Need to keep track of which splines are flat and which aren't - _float_ *xcoeff_arr; //x coefficients for each spline - _float_ *manycoeff_arr; //ybcd coefficients for each spline + //Probably need to clear these arrays up at some point + _float_ *xVarArray; + /// Need to keep track of which splines are flat and which aren't + bool *isflatarray; + /// x coefficients for each spline + _float_ *xcoeff_arr; + /// ybcd coefficients for each spline + _float_ *manycoeff_arr; - std::vector weightvec_Monolith; - std::vector uniquesplinevec_Monolith; + std::vector weightvec_Monolith; + std::vector uniquesplinevec_Monolith; };