Impmement .bib bilbiograpgy within doxygen and minor fixes

mach3-software · Sep 9, 2024 · 5b52865 · 5b52865
1 parent 35e5001
commit 5b52865
Show file tree

Hide file tree

Showing 17 changed files with 335 additions and 160 deletions.
diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -5,3 +5,5 @@ Plotting:
   - Diagnostics/**
 Nu Osc/Xsec:
   - covariance/**
+Documentation:
+  - Doc/**
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -5,7 +5,7 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
 
 #KS: Enable language, necessary when using CUDA
 enable_language(CXX)
-SET(MaCh3_VERSION 1.1.0)
+SET(MaCh3_VERSION 1.1.1)
 
 # Try to find CUDA
 find_package(CUDAToolkit)
@@ -127,14 +127,14 @@ else()
   #KS: Consider in future __attribute__((always_inline)) see https://indico.cern.ch/event/386232/sessions/159923/attachments/771039/1057534/always_inline_performance.pdf
   #https://gcc.gnu.org/onlinedocs/gcc-3.3.6/gcc/Optimize-Options.html
   target_compile_options(MaCh3CompilerOptions INTERFACE
-    -O3                                  # Optimize code for maximum speed
-    -funroll-loops                       # Unroll loops where possible for performance
-    --param=max-vartrack-size=100000000  # Set maximum size of variable tracking data to avoid excessive memory usage
-    -finline-limit=100000000             # Increase the limit for inlining functions to improve performance
-    #-flto # FIXME need more testing     # Enable link-time optimization (commented out for now, needs more testing)
+    -O3                                   # Optimize code for maximum speed
+    -finline-limit=100000000              # Increase the limit for inlining functions to improve performance
+    # KS: After benchmarking below didn't in fact worse performance, leave it for future tests and documentation
+    #-funroll-loops                       # Unroll loops where possible for performance
+    #--param=max-vartrack-size=100000000  # Set maximum size of variable tracking data to avoid excessive memory usage
+    #-flto                                # Enable link-time optimization (commented out for now, needs more testing)
   )
   #KS: add Link-Time Optimization (LTO)
-  # FIXME previously it wasn't used correctly but would need more testing
   #target_link_libraries(MaCh3CompilerOptions INTERFACE -flto)
 endif()
 

diff --git a/Doc/Doxyfile b/Doc/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = "MaCh3"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 1.1.0
+PROJECT_NUMBER         = 1.1.1
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
@@ -670,7 +670,7 @@ LAYOUT_FILE            =
 # search path. Do not use file names with spaces, bibtex cannot handle them. See
 # also \cite for info how to create references.
 
-CITE_BIB_FILES         =
+CITE_BIB_FILES         = ../Doc/bibliography.bib
 
 #---------------------------------------------------------------------------
 # Configuration options related to warning and progress messages
@@ -1080,7 +1080,7 @@ HTML_STYLESHEET        =
 # see the documentation.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-HTML_EXTRA_STYLESHEET  =
+HTML_EXTRA_STYLESHEET  = ../Doc/MaCh3.css
 
 # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
 # other source files which should be copied to the HTML output directory. Note

diff --git a/Doc/MaCh3.css b/Doc/MaCh3.css
@@ -0,0 +1,7 @@
+/* MaCh3 doxygen HTML_EXTRA_STYLESHEET */
+
+div.contents {
+    max-width: 100em;
+    margin-right: 5em;
+    margin-left: 5em;
+}
diff --git a/Doc/bibliography.bib b/Doc/bibliography.bib
@@ -0,0 +1,165 @@
+@article{gelman1996posterior,
+  title    = {Posterior predictive assessment of model fitness via realized discrepancies},
+  author   = {Gelman, A. and Meng, X. L. and Stern, H.},
+  journal  = {Statistica Sinica},
+  pages    = {733--760},
+  year     = {1996},
+  publisher = {JSTOR},
+  url      = {http://www.stat.columbia.edu/~gelman/research/published/A6n41.pdf}
+}
+
+@inproceedings{Conway:2011in,
+  author       = {Conway, J. S.},
+  title        = {Incorporating Nuisance Parameters in Likelihoods for Multisource Spectra},
+  booktitle    = {PHYSTAT 2011},
+  eprint       = {1103.0354},
+  archivePrefix= {arXiv},
+  primaryClass = {physics.data-an},
+  doi          = {10.5170/CERN-2011-006.115},
+  pages        = {115--120},
+  year         = {2011}
+}
+
+@article{Arguelles:2019izp,
+  author       = {Argüelles, Carlos A. and Schneider, Austin and Yuan, Tianlu},
+  title        = {A binned likelihood for stochastic models},
+  eprint       = {1901.04645},
+  archivePrefix= {arXiv},
+  primaryClass = {physics.data-an},
+  doi          = {10.1007/JHEP06(2019)030},
+  journal      = {JHEP},
+  volume       = {06},
+  pages        = {030},
+  year         = {2019}
+}
+
+@article{Dembinski:2022ios,
+  author       = {Dembinski, Hans Peter and Abdelmotteleb, Ahmed},
+  title        = {A new maximum-likelihood method for template fits},
+  eprint       = {2206.12346},
+  archivePrefix= {arXiv},
+  primaryClass = {stat.ME},
+  doi          = {10.1140/epjc/s10052-022-11019-z},
+  journal      = {Eur. Phys. J. C},
+  volume       = {82},
+  number       = {11},
+  pages        = {1043},
+  year         = {2022}
+}
+
+@misc{Fang2014GewekeDiagnostics,
+  author       = {Qijun Fang},
+  title        = {A Brief Introduction to Geweke’s Diagnostics},
+  howpublished = {\url{https://math.arizona.edu/~piegorsch/675/GewekeDiagnostics.pdf}},
+  note         = {STAT 675, University of Arizona},
+  year         = {2014},
+  month        = {May},
+  day          = {7}
+}
+
+@mastersthesis{karlsbakk2011,
+  author       = {Jarle Karlsbakk},
+  title        = {Likelihood Inference and Comparison of Time-Inhomogeneous Markov Processes},
+  school       = {Stockholm University, Department of Mathematics},
+  year         = {2011},
+  type         = {Master's Thesis},
+  note         = {Accessed: 2024-09-08},
+  url          = {https://www2.math.su.se/matstat/reports/master/2011/rep2/report.pdf},
+  chapter      = {3.1}
+}
+
+@article{Dunkley:2004sv,
+  author       = {Dunkley, Joanna and Bucher, Martin and Ferreira, Pedro G. and Moodley, Kavilan and Skordis, Constantinos},
+  title        = {Fast and reliable MCMC for cosmological parameter estimation},
+  eprint       = {astro-ph/0405462},
+  archivePrefix= {arXiv},
+  doi          = {10.1111/j.1365-2966.2004.08464.x},
+  journal      = {Mon. Not. Roy. Astron. Soc.},
+  volume       = {356},
+  pages        = {925--936},
+  year         = {2005}
+}
+
+@manual{StanManual,
+  title        = {Stan Reference Manual: Effective Sample Size},
+  author       = {{Stan Development Team}},
+  year         = {2018},
+  note         = {Version 2.18},
+  url          = {https://mc-stan.org/docs/2_18/reference-manual/effective-sample-size-section.html}
+}
+
+@misc{hanson2008mcmc,
+  author       = {Kenneth M. Hanson},
+  title        = {Tutorial on Markov Chain Monte Carlo},
+  year         = {2008},
+  note         = {Revised 14/05/08, LA-UR-05-5680},
+  howpublished = {Presented at the 29th International Workshop on Bayesian Inference and Maximum Entropy Methods in Science and Technology, Gif-sur-Yvette, France, July 8–13, 2009},
+  url          = {https://kmh-lanl.hansonhub.com/talks/maxent00b.pdf},
+  institution  = {Los Alamos National Laboratory}
+}
+
+@article{roberts2009adaptive,
+  author       = {Gareth O. Roberts and Jeffrey S. Rosenthal},
+  title        = {Examples of Adaptive MCMC},
+  journal      = {Journal of Computational and Graphical Statistics},
+  volume       = {18},
+  number       = {2},
+  year         = {2009},
+  pages        = {349--367},
+  url          = {http://www.jstor.org/stable/25651249},
+  note         = {Accessed: 2024-09-08}
+}
+
+@article{James:2004xla,
+  author       = {James, Fred and Winkler, Matthias},
+  title        = {MINUIT User's Guide},
+  month        = {June},
+  year         = {2004}
+}
+
+@book{jeffreys1998theory,
+  author       = {H. Jeffreys},
+  title        = {The Theory of Probability},
+  publisher    = {UOP Oxford},
+  year         = {1998},
+  doi          = {10.2307/3619118}
+}
+
+@book{press1992numerical,
+  author       = {William H. Press and Saul A. Teukolsky and William T. Vetterling and Brian P. Flannery},
+  title        = {Numerical Recipes in C: The Art of Scientific Computing},
+  edition      = {Second Edition},
+  publisher    = {Cambridge University Press},
+  year         = {1992},
+  note         = {William H. Press: Harvard-Smithsonian Center for Astrophysics, Saul A. Teukolsky: Department of Physics, Cornell University, William T. Vetterling: Polaroid Corporation, Brian P. Flannery: EXXON Research and Engineering Company}
+}
+
+@misc{gabry2024visual,
+  author       = {Jonah Gabry and Martin Modr{\'a}k},
+  title        = {Visual MCMC diagnostics using the bayesplot package},
+  year         = {2024},
+  month        = {January},
+  day          = {30},
+  howpublished = {\url{https://mc-stan.org/bayesplot/articles/visual-mcmc-diagnostics.html}},
+  note         = {Source: vignettes/visual-mcmc-diagnostics.Rmd}
+}
+
+@misc{chakraborty2019estimating,
+  author       = {Saptarshi Chakraborty and Suman K. Bhattacharya and Kshitij Khare},
+  title        = {Estimating accuracy of the MCMC variance estimator: a central limit theorem for batch means estimators},
+  year         = {2019},
+  eprint       = {1911.00915},
+  archivePrefix= {arXiv},
+  primaryClass = {stat.CO},
+  url          = {https://doi.org/10.48550/arXiv.1911.00915}
+}
+
+@misc{rossetti2024batch,
+  author       = {Manuel D. Rossetti},
+  title        = {The Batch Means Method},
+  booktitle    = {Simulation Modeling and Arena, 2nd Edition},
+  year         = {2024},
+  chapter      = {5.4},
+  url          = {https://rossetti.github.io/RossettiArenaBook/ch5-BatchMeansMethod.html},
+  note         = {Accessed: 2024-09-08}
+}
diff --git a/Doc/mainpage.md b/Doc/mainpage.md
@@ -1,9 +1,21 @@
 \mainpage %MaCh3 Reference Documentation
 
-### Introduction
+## Introduction
 Welcome to %MaCh3!
 
 This is the Reference Guide of the MaCh3 software.
 
 You can find additional documentation on our [Wiki](https://github.com/mach3-software/MaCh3/wiki)
 
+If you are new we recommend to start from our [Tutorial](https://github.com/mach3-software/MaCh3Validations)
+
+If something is unclear please contact us via
+- [Mailing lists](https://www.jiscmail.ac.uk/cgi-bin/webadmin?A0=MACH3)
+- [Slack](https://t2k-experiment.slack.com/archives/C06EM0C6D7W/p1705599931356889)
+- [Discussions](https://github.com/mach3-software/MaCh3/discussions)
+
+
+### About us
+The Markov Chain 3 flavour is a framework born in 2013 as a Bayesian MCMC fitter for [T2K](https://t2k-experiment.org/pl/) oscillation analysis. It has now been used for multiple T2K Oscillation analyses both at the Near and Far detectors throughout the years and is also used by the DUNE and HK oscillation analysis groups as well as for joint fits between T2K and NOvA and T2K and SK's atmospheric data.
+
+The framework has also evolved to allow non MCMC modules to interrogate the likelihoods implemented.
diff --git a/cmake/Templates/Doxyfile.in b/cmake/Templates/Doxyfile.in
@@ -670,7 +670,7 @@ LAYOUT_FILE            =
 # search path. Do not use file names with spaces, bibtex cannot handle them. See
 # also \cite for info how to create references.
 
-CITE_BIB_FILES         =
+CITE_BIB_FILES         = ../Doc/bibliography.bib
 
 #---------------------------------------------------------------------------
 # Configuration options related to warning and progress messages
@@ -1080,7 +1080,7 @@ HTML_STYLESHEET        =
 # see the documentation.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-HTML_EXTRA_STYLESHEET  =
+HTML_EXTRA_STYLESHEET  = ../Doc/MaCh3.css
 
 # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
 # other source files which should be copied to the HTML output directory. Note

diff --git a/manager/gpuUtils.cuh b/manager/gpuUtils.cuh
@@ -20,9 +20,8 @@
 /// KS: Need it for shared memory, there is way to use dynamic shared memory but I am lazy right now
 #define _BlockSize_ 1024
 
-//KS: TODO
-// There is plenty of useful stuff here https://github.com/NVIDIA/cuda-samples/blob/master/Samples/1_Utilities/deviceQuery/deviceQuery.cpp
-// We might want to port some of these utilities, for example having bool if there is unified memory etc.
+/// @todo KS: There is plenty of useful stuff here https://github.com/NVIDIA/cuda-samples/blob/master/Samples/1_Utilities/deviceQuery/deviceQuery.cpp
+/// @todo KS: We might want to port some of these utilities, for example having bool if there is unified memory etc.
 
 // CUDA_ERROR_CHECK is now defined in the makefile instead
 //#define CUDA_ERROR_CHECK

diff --git a/mcmc/MCMCProcessor.cpp b/mcmc/MCMCProcessor.cpp
@@ -1,6 +1,7 @@
 #include "MCMCProcessor.h"
 
 #include "TChain.h"
+#include "TF1.h"
 
 //Only if GPU is enabled
 #ifdef CUDA
@@ -1332,7 +1333,7 @@ void MCMCProcessor::MakeCovariance_MP(bool Mute) {
 
 
 // *********************
-// Based on https://www.jstor.org/stable/25651249?seq=3,
+// Based on @cite roberts2009adaptive
 // all credits for finding and studying it goes to Henry
 void MCMCProcessor::MakeSubOptimality(const int NIntervals) {
 // *********************
@@ -3723,8 +3724,8 @@ void MCMCProcessor::PrepareGPU_AutoCorr(const int nLags) {
 
 
 // **************************
-// KS: calc Effective Sample Size Following https://mc-stan.org/docs/2_18/reference-manual/effective-sample-size-section.html
-// Furthermore we calculate Sampling efficiency following https://kmh-lanl.hansonhub.com/talks/maxent00b.pdf
+// KS: calc Effective Sample Size Following @cite StanManual
+// Furthermore we calculate Sampling efficiency following @cite hanson2008mcmc
 // Rule of thumb is to have efficiency above 25%
 void MCMCProcessor::CalculateESS(const int nLags, double** LagL) {
 // **************************
@@ -3744,7 +3745,7 @@ void MCMCProcessor::CalculateESS(const int nLags, double** LagL) {
   const double Thresholds[Nhists+1] = {1, 0.02, 0.005, 0.001, 0.0001, 0.0};
   const Color_t ESSColours[Nhists] = {kGreen, kGreen+2, kYellow, kOrange, kRed};
 
-  //KS: This histogram is inspired by the following: https://mc-stan.org/bayesplot/articles/visual-mcmc-diagnostics.html
+  //KS: This histogram is inspired by the following: @cite gabry2024visual
   TH1D **EffectiveSampleSizeHist = new TH1D*[Nhists]();
   for(int i = 0; i < Nhists; ++i)
   {
@@ -3909,9 +3910,9 @@ void MCMCProcessor::BatchedAnalysis() {
     throw MaCh3Exception(__FILE__ , __LINE__ );
   }
 
-  // Calculate variance estimator using batched means following https://arxiv.org/pdf/1911.00915.pdf see Eq. 1.2
+  // Calculate variance estimator using batched means following @cite chakraborty2019estimating see Eq. 1.2
   TVectorD* BatchedVariance = new TVectorD(nDraw);
-  //KS: The hypothesis is rejected if C > z α for a given confidence level α. If the batch means do not pass the test, Correlated is reported for the half-width on the statistical reports following https://rossetti.github.io/RossettiArenaBook/ch5-BatchMeansMethod.html alternatively for more old-school see Alexopoulos and Seila 1998 section 3.4.3
+  //KS: The hypothesis is rejected if C > z α for a given confidence level α. If the batch means do not pass the test, Correlated is reported for the half-width on the statistical reports following @cite rossetti2024batch alternatively for more old-school see Alexopoulos and Seila 1998 section 3.4.3
   TVectorD* C_Test_Statistics = new TVectorD(nDraw);
 
   double* OverallBatchMean = new double[nDraw]();
@@ -4019,7 +4020,7 @@ void MCMCProcessor::BatchedAnalysis() {
 }
 
 // **************************
-// RC: Perform spectral analysis of MCMC based on http://arxiv.org/abs/astro-ph/0405462
+// RC: Perform spectral analysis of MCMC based on @cite Dunkley:2004sv
 void MCMCProcessor::PowerSpectrumAnalysis() {
 // **************************
   TStopwatch clock;
@@ -4042,7 +4043,7 @@ void MCMCProcessor::PowerSpectrumAnalysis() {
 
 
   int nPrams = nDraw;
-  //KS: WARNING Code is awfully slow... I know how to make it faster (GPU scream in a distant) but for now just make it for two params, bit hacky sry...
+  /// @todo KS: Code is awfully slow... I know how to make it faster (GPU scream in a distant) but for now just make it for two params, bit hacky sry...
   nPrams = 2;
 
   std::vector<std::vector<float>> k_j(nPrams, std::vector<float>(v_size, 0.0));
@@ -4147,11 +4148,10 @@ void MCMCProcessor::PowerSpectrumAnalysis() {
 
 // **************************
 // Geweke Diagnostic based on
-// https://www.math.arizona.edu/~piegorsch/675/GewekeDiagnostics.pdf
-// https://www2.math.su.se/matstat/reports/master/2011/rep2/report.pdf Chapter 3.1
+// @cite Fang2014GewekeDiagnostics
+// @cite karlsbakk2011 Chapter 3.1
 void MCMCProcessor::GewekeDiagnostic() {
 // **************************
-
   MACH3LOG_INFO("Making Geweke Diagnostic");
 
   //KS: Up refers to upper limit we check, it stays constant, in literature it is mostly 50% thus using 0.5 for threshold
@@ -4309,7 +4309,6 @@ void MCMCProcessor::GewekeDiagnostic() {
   OutputFile->cd();
 }
 
-
 // **************************
 // Acceptance Probability
 void MCMCProcessor::AcceptanceProbabilities() {