From 3f97580dc30cd7ce4fb682ef529e90563d487108 Mon Sep 17 00:00:00 2001 From: Jonathan Berrisch Date: Thu, 11 Jan 2024 16:31:10 +0100 Subject: [PATCH 01/10] Fix problems with integer ranges in clock.h --- inst/include/clock.h | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/inst/include/clock.h b/inst/include/clock.h index 393be4a..6d874e6 100644 --- a/inst/include/clock.h +++ b/inst/include/clock.h @@ -25,7 +25,7 @@ namespace Rcpp class Clock { using tp = sc::high_resolution_clock::time_point; - using keypair = std::pair; + using keypair = std::pair; using timesmap = std::map; private: @@ -33,7 +33,7 @@ namespace Rcpp public: std::string name; - std::vector timers; + std::vector timers; std::vector names; // Init - Set name of R object @@ -58,7 +58,7 @@ namespace Rcpp #pragma omp critical { timers.push_back( - sc::duration_cast( + sc::duration_cast( sc::high_resolution_clock::now() - tickmap[key]) .count()); @@ -73,30 +73,29 @@ namespace Rcpp std::vector unique_names = names; remove_duplicates(unique_names); - std::vector> - table(unique_names.size()); - std::vector averages(unique_names.size()); - std::vector counts(unique_names.size()); + std::vector counts(unique_names.size()); // Loop over unique names for (unsigned int i = 0; i < unique_names.size(); i++) { - int sum = 0; - int count = 0; + unsigned long long int sum = 0; + unsigned long int count = 0; // Loop over all names - for (unsigned int j = 0; j < names.size(); j++) + for (unsigned long int j = 0; j < names.size(); j++) { if (names[j] == unique_names[i]) { + // Sum up all timers with the same name sum += timers[j]; count++; } } - // Calculate average, convert to milliseconds, round to 3 dec - averages[i] = (std::round((sum * 1e-3) / double(count)) / 1e+3); + // Calculate average, round to 3 decimal places, + // and convert from microseconds to milliseconds + averages[i] = std::round(sum / double(count)) / 1e+3; counts[i] = count; } From a5cd6f2ccc34387a113705a2ae4189487877e6b3 Mon Sep 17 00:00:00 2001 From: Jonathan Berrisch Date: Thu, 11 Jan 2024 17:19:46 +0100 Subject: [PATCH 02/10] Update clock.h Generalize so that more code can be shared with the python version --- inst/include/clock.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/inst/include/clock.h b/inst/include/clock.h index 6d874e6..af90d4e 100644 --- a/inst/include/clock.h +++ b/inst/include/clock.h @@ -30,6 +30,9 @@ namespace Rcpp private: timesmap tickmap; + std::vector averages; + std::vector counts; + std::vector unique_names; public: std::string name; @@ -70,12 +73,9 @@ namespace Rcpp void aggregate() { // Create copy of names called unique_names - std::vector unique_names = names; + unique_names = names; remove_duplicates(unique_names); - std::vector averages(unique_names.size()); - std::vector counts(unique_names.size()); - // Loop over unique names for (unsigned int i = 0; i < unique_names.size(); i++) { @@ -95,9 +95,16 @@ namespace Rcpp // Calculate average, round to 3 decimal places, // and convert from microseconds to milliseconds - averages[i] = std::round(sum / double(count)) / 1e+3; - counts[i] = count; + averages.push_back(std::round(sum / double(count)) / 1e+3); + + counts.push_back(count); } + } + + // Pass data to R / Python + void stop() + { + aggregate(); DataFrame df = DataFrame::create( Named("Name") = unique_names, @@ -107,11 +114,6 @@ namespace Rcpp env[name] = df; } - void stop() - { - aggregate(); - } - // Destructor ~Clock() { From 929de299b46bd7f5fe18c19cee62c92f6d0b7edc Mon Sep 17 00:00:00 2001 From: Jonathan Berrisch Date: Thu, 11 Jan 2024 17:43:55 +0100 Subject: [PATCH 03/10] Investigate rlang error in rdevel ci --- inst/include/clock.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/inst/include/clock.h b/inst/include/clock.h index af90d4e..cb73b85 100644 --- a/inst/include/clock.h +++ b/inst/include/clock.h @@ -106,12 +106,12 @@ namespace Rcpp { aggregate(); - DataFrame df = DataFrame::create( - Named("Name") = unique_names, - Named("Milliseconds") = averages, - Named("Count") = counts); - Environment env = Environment::global_env(); - env[name] = df; + // DataFrame df = DataFrame::create( + // Named("Name") = unique_names, + // Named("Milliseconds") = averages, + // Named("Count") = counts); + // Environment env = Environment::global_env(); + // env[name] = df; } // Destructor From 9280cfffff40033f839c1877b2c91e6e9b08d150 Mon Sep 17 00:00:00 2001 From: Jonathan Berrisch Date: Thu, 11 Jan 2024 18:17:13 +0100 Subject: [PATCH 04/10] Revert "Investigate rlang error in rdevel ci" This reverts commit 929de299b46bd7f5fe18c19cee62c92f6d0b7edc. --- inst/include/clock.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/inst/include/clock.h b/inst/include/clock.h index cb73b85..af90d4e 100644 --- a/inst/include/clock.h +++ b/inst/include/clock.h @@ -106,12 +106,12 @@ namespace Rcpp { aggregate(); - // DataFrame df = DataFrame::create( - // Named("Name") = unique_names, - // Named("Milliseconds") = averages, - // Named("Count") = counts); - // Environment env = Environment::global_env(); - // env[name] = df; + DataFrame df = DataFrame::create( + Named("Name") = unique_names, + Named("Milliseconds") = averages, + Named("Count") = counts); + Environment env = Environment::global_env(); + env[name] = df; } // Destructor From 50cc7b868c5b2efd4af6aa018562aa8c60866eb4 Mon Sep 17 00:00:00 2001 From: Jonathan Berrisch Date: Thu, 11 Jan 2024 18:20:04 +0100 Subject: [PATCH 05/10] Use unsigned long int for counts --- inst/include/clock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/include/clock.h b/inst/include/clock.h index af90d4e..bd4db14 100644 --- a/inst/include/clock.h +++ b/inst/include/clock.h @@ -31,7 +31,7 @@ namespace Rcpp private: timesmap tickmap; std::vector averages; - std::vector counts; + std::vector counts; std::vector unique_names; public: From 4cb25741acd235f7e8b35572c35ab2b50a01e1b2 Mon Sep 17 00:00:00 2001 From: Jonathan Berrisch Date: Fri, 12 Jan 2024 12:27:52 +0100 Subject: [PATCH 06/10] Fix online function for edge cases * Fixed online() function for cases where the regret is exactly zero. This can happen if: * * Only a single expert is used * * Only two experts are provided and they both have the same predictions (in the beginning). --- DESCRIPTION | 2 +- NEWS.md | 12 +++++++ src/conline.cpp | 7 ++-- tests/testthat/test-single_same_experts.R | 40 +++++++++++++++++++++++ 4 files changed, 57 insertions(+), 4 deletions(-) create mode 100644 tests/testthat/test-single_same_experts.R diff --git a/DESCRIPTION b/DESCRIPTION index 775d55f..ff8146d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: profoc Type: Package Title: Probabilistic Forecast Combination Using CRPS Learning -Version: 1.3.0 +Version: 1.3.1 Date: 2024-01-09 Authors@R: c( person(given = "Jonathan", diff --git a/NEWS.md b/NEWS.md index f3e0517..599bd0f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,15 @@ +profoc 1.3.1 +============== + +## Improvements +* Adjusted the clock.h code so that a larger share of code can be shared between the R and Python versions of that file. + +## Fixes +* Fixed an integer overflow in the clock.h code which caused the package to fail on some systems. +* Fixed online() function for cases where the regret is exactly zero. This can happen if: +* * Only a single expert is used +* * Only two experts are provided and they both have the same predictions (in the beginning). + profoc 1.3.0 ============== diff --git a/src/conline.cpp b/src/conline.cpp index 1ebf2bc..f9299e3 100644 --- a/src/conline.cpp +++ b/src/conline.cpp @@ -380,12 +380,13 @@ void conline::learn() { V(x).tube(dr, pr) = vectorise(V(x).tube(dr, pr)).t() * (1 - params["forget_regret"](x)) + square(r.t()); - E(x).tube(dr, pr) = max(vectorise(E(x).tube(dr, pr)).t() * (1 - params["forget_regret"](x)), abs(r.t())); + E(x).tube(dr, pr) = pmax_arma(max(vectorise(E(x).tube(dr, pr)).t() * (1 - params["forget_regret"](x)), abs(r.t())), exp(-350)); - eta(x).tube(dr, pr) = + eta(x) + .tube(dr, pr) = pmin_arma( min(1 / (2 * vectorise(E(x).tube(dr, pr))), - sqrt(-log(vectorise(beta0field(x).tube(dr, pr))) / vectorise(V(x).tube(dr, pr)))), + sqrt(-log(vectorise(beta0field(x).tube(dr, pr))) / pmax_arma(vectorise(V(x).tube(dr, pr)), exp(-350)))), exp(350)); vec r_reg = r - vectorise(eta(x).tube(dr, pr)) % square(r); diff --git a/tests/testthat/test-single_same_experts.R b/tests/testthat/test-single_same_experts.R new file mode 100644 index 0000000..038f47f --- /dev/null +++ b/tests/testthat/test-single_same_experts.R @@ -0,0 +1,40 @@ +skip_if(debug_mode) +# %% Test online "combination" of a single expert +set.seed(1) + +mod <- online( + y = array(rnorm(30), + dim = c(5, 3) + ), array(rnorm(30), + dim = c(5, 3, 1) + ), + tau = .5, + trace = FALSE +) + +expect_true(all(mod$weights == 1)) +# %% + + +# %% Test online "combination" of two experts that are the same +set.seed(1) + +experts <- array(NA, + dim = c(5, 3, 2) +) + +experts[, , 1] <- array(rnorm(30), + dim = c(5, 3) +) +experts[, , 2] <- experts[, , 1] + +mod <- online( + y = array(rnorm(30), + dim = c(5, 3) + ), experts, + tau = .5, + trace = FALSE +) + +expect_true(all(mod$weights == 0.5)) +# %% From 0fe2f2349d1fe2cd73204c476a2faeb5132c6815 Mon Sep 17 00:00:00 2001 From: Jonathan Berrisch Date: Fri, 12 Jan 2024 18:18:22 +0100 Subject: [PATCH 07/10] Use welford online algorithm in clock.h --- inst/include/clock.h | 47 +++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/inst/include/clock.h b/inst/include/clock.h index bd4db14..011263d 100644 --- a/inst/include/clock.h +++ b/inst/include/clock.h @@ -1,7 +1,7 @@ #ifndef clock_h #define clock_h -#include +#include #include #include #include @@ -29,19 +29,19 @@ namespace Rcpp using timesmap = std::map; private: - timesmap tickmap; - std::vector averages; - std::vector counts; - std::vector unique_names; + std::string name; // Name of R object to return + timesmap tickmap; // Map of start times + std::vector names, // Vector of identifiers + unique_names; // Vector of unique identifiers + std::vector counts; // Count occurence of identifiers + std::vector means, sds; // Output vecs of mean and sd + std::vector // Observed durations + timers; public: - std::string name; - std::vector timers; - std::vector names; - // Init - Set name of R object Clock() : name("times") {} - Clock(std::string name_) : name(name_) {} + Clock(std::string name) : name(name) {} // start a timer - save time void tick(std::string &&name) @@ -76,28 +76,34 @@ namespace Rcpp unique_names = names; remove_duplicates(unique_names); - // Loop over unique names for (unsigned int i = 0; i < unique_names.size(); i++) { - unsigned long long int sum = 0; unsigned long int count = 0; + double mean = 0, M2 = 0, variance = 0; - // Loop over all names for (unsigned long int j = 0; j < names.size(); j++) { if (names[j] == unique_names[i]) { - // Sum up all timers with the same name - sum += timers[j]; + // Welford's online algorithm for mean and variance + double delta = timers[j] - mean; count++; + mean += delta / count; + M2 += delta * (timers[j] - mean) * 1e-3; } } - // Calculate average, round to 3 decimal places, - // and convert from microseconds to milliseconds - averages.push_back(std::round(sum / double(count)) / 1e+3); - + // Save count counts.push_back(count); + + // Save average, round to 3 decimal places + means.push_back(std::round(mean) * 1e-3); + + // Calculate sample variance + variance = M2 / (count); + // Save standard deviation, round to 3 decimal places + sds.push_back( + std::round(std::sqrt(variance * 1e-3) * 1e+3) * 1e-3); } } @@ -108,7 +114,8 @@ namespace Rcpp DataFrame df = DataFrame::create( Named("Name") = unique_names, - Named("Milliseconds") = averages, + Named("Milliseconds") = means, + Named("SD") = sds, Named("Count") = counts); Environment env = Environment::global_env(); env[name] = df; From fee15dc6b4db981ff70ddbd0c0621d60aa38f748 Mon Sep 17 00:00:00 2001 From: Jonathan Berrisch Date: Fri, 12 Jan 2024 18:19:59 +0100 Subject: [PATCH 08/10] Update NEWS.md --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 599bd0f..9850cc9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ profoc 1.3.1 ## Improvements * Adjusted the clock.h code so that a larger share of code can be shared between the R and Python versions of that file. +* clock.h now uses welfords online algorithm to calculate the mean and variance of the timings. SD is reported in the times table. ## Fixes * Fixed an integer overflow in the clock.h code which caused the package to fail on some systems. From 8cedba659d3e66120e4224e82361e1d2faae45aa Mon Sep 17 00:00:00 2001 From: Jonathan Berrisch Date: Fri, 12 Jan 2024 18:58:54 +0100 Subject: [PATCH 09/10] Prepare Release --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ff8146d..d10b0e9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: profoc Type: Package Title: Probabilistic Forecast Combination Using CRPS Learning Version: 1.3.1 -Date: 2024-01-09 +Date: 2024-01-12 Authors@R: c( person(given = "Jonathan", family = "Berrisch", From a7a53eeffac2e98bb83d7d37dcb243d2c7391650 Mon Sep 17 00:00:00 2001 From: Jonathan Berrisch Date: Fri, 12 Jan 2024 19:01:50 +0100 Subject: [PATCH 10/10] Update CRAN-SUBMISSION --- CRAN-SUBMISSION | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION index aa340d4..d9e9791 100644 --- a/CRAN-SUBMISSION +++ b/CRAN-SUBMISSION @@ -1,3 +1,3 @@ -Version: 1.3.0 -Date: 2024-01-09 19:31:21 UTC -SHA: e5500ba623c38eecbf21ba4e990d78daa8ad8e99 +Version: 1.3.1 +Date: 2024-01-12 17:59:54 UTC +SHA: 8cedba659d3e66120e4224e82361e1d2faae45aa