Merge pull request #13 from BerriJ/develop

Release 1.3.1
BerriJ · Jan 15, 2024 · 56b6e5c · 56b6e5c
2 parents 4a4c412 + a7a53ee
commit 56b6e5c
Show file tree

Hide file tree

Showing 6 changed files with 101 additions and 39 deletions.
diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION
@@ -1,3 +1,3 @@
-Version: 1.3.0
-Date: 2024-01-09 19:31:21 UTC
-SHA: e5500ba623c38eecbf21ba4e990d78daa8ad8e99
+Version: 1.3.1
+Date: 2024-01-12 17:59:54 UTC
+SHA: 8cedba659d3e66120e4224e82361e1d2faae45aa
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: profoc
 Type: Package
 Title: Probabilistic Forecast Combination Using CRPS Learning
-Version: 1.3.0
-Date: 2024-01-09
+Version: 1.3.1
+Date: 2024-01-12
 Authors@R: c(
     person(given = "Jonathan",
              family = "Berrisch",

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,16 @@
+profoc 1.3.1
+==============
+
+## Improvements
+* Adjusted the clock.h code so that a larger share of code can be shared between the R and Python versions of that file.
+* clock.h now uses welfords online algorithm to calculate the mean and variance of the timings. SD is reported in the times table.
+
+## Fixes
+* Fixed an integer overflow in the clock.h code which caused the package to fail on some systems.
+* Fixed online() function for cases where the regret is exactly zero. This can happen if:
+* * Only a single expert is used
+* * Only two experts are provided and they both have the same predictions (in the beginning).
+
 profoc 1.3.0
 ==============
 

diff --git a/inst/include/clock.h b/inst/include/clock.h
@@ -1,7 +1,7 @@
 #ifndef clock_h
 #define clock_h
 
-#include <RcppArmadillo.h>
+#include <Rcpp.h>
 #include <chrono>
 #include <string>
 #include <vector>
@@ -25,20 +25,23 @@ namespace Rcpp
     class Clock
     {
         using tp = sc::high_resolution_clock::time_point;
-        using keypair = std::pair<std::string, int>;
+        using keypair = std::pair<std::string, unsigned int>;
         using timesmap = std::map<keypair, tp>;
 
     private:
-        timesmap tickmap;
+        std::string name;                      // Name of R object to return
+        timesmap tickmap;                      // Map of start times
+        std::vector<std::string> names,        // Vector of identifiers
+            unique_names;                      // Vector of unique identifiers
+        std::vector<unsigned long int> counts; // Count occurence of identifiers
+        std::vector<double> means, sds;        // Output vecs of mean and sd
+        std::vector<unsigned long long int>    // Observed durations
+            timers;
 
     public:
-        std::string name;
-        std::vector<double> timers;
-        std::vector<std::string> names;
-
         // Init - Set name of R object
         Clock() : name("times") {}
-        Clock(std::string name_) : name(name_) {}
+        Clock(std::string name) : name(name) {}
 
         // start a timer - save time
         void tick(std::string &&name)
@@ -58,7 +61,7 @@ namespace Rcpp
 #pragma omp critical
             {
                 timers.push_back(
-                    sc::duration_cast<sc::nanoseconds>(
+                    sc::duration_cast<sc::microseconds>(
                         sc::high_resolution_clock::now() -
                         tickmap[key])
                         .count());
@@ -70,49 +73,54 @@ namespace Rcpp
         void aggregate()
         {
             // Create copy of names called unique_names
-            std::vector<std::string> unique_names = names;
+            unique_names = names;
             remove_duplicates(unique_names);
 
-            std::vector<std::tuple<std::string, double, int>>
-                table(unique_names.size());
-
-            std::vector<double> averages(unique_names.size());
-            std::vector<int> counts(unique_names.size());
-
-            // Loop over unique names
             for (unsigned int i = 0; i < unique_names.size(); i++)
             {
-                int sum = 0;
-                int count = 0;
+                unsigned long int count = 0;
+                double mean = 0, M2 = 0, variance = 0;
 
-                // Loop over all names
-                for (unsigned int j = 0; j < names.size(); j++)
+                for (unsigned long int j = 0; j < names.size(); j++)
                 {
                     if (names[j] == unique_names[i])
                     {
-                        sum += timers[j];
+                        // Welford's online algorithm for mean and variance
+                        double delta = timers[j] - mean;
                         count++;
+                        mean += delta / count;
+                        M2 += delta * (timers[j] - mean) * 1e-3;
                     }
                 }
 
-                // Calculate average, convert to milliseconds, round to 3 dec
-                averages[i] = (std::round((sum * 1e-3) / double(count)) / 1e+3);
-                counts[i] = count;
+                // Save count
+                counts.push_back(count);
+
+                // Save average, round to 3 decimal places
+                means.push_back(std::round(mean) * 1e-3);
+
+                // Calculate sample variance
+                variance = M2 / (count);
+                // Save standard deviation, round to 3 decimal places
+                sds.push_back(
+                    std::round(std::sqrt(variance * 1e-3) * 1e+3) * 1e-3);
             }
+        }
+
+        // Pass data to R / Python
+        void stop()
+        {
+            aggregate();
 
             DataFrame df = DataFrame::create(
                 Named("Name") = unique_names,
-                Named("Milliseconds") = averages,
+                Named("Milliseconds") = means,
+                Named("SD") = sds,
                 Named("Count") = counts);
             Environment env = Environment::global_env();
             env[name] = df;
         }
 
-        void stop()
-        {
-            aggregate();
-        }
-
         // Destructor
         ~Clock()
         {

diff --git a/src/conline.cpp b/src/conline.cpp
@@ -380,12 +380,13 @@ void conline::learn()
                     {
                         V(x).tube(dr, pr) = vectorise(V(x).tube(dr, pr)).t() * (1 - params["forget_regret"](x)) + square(r.t());
 
-                        E(x).tube(dr, pr) = max(vectorise(E(x).tube(dr, pr)).t() * (1 - params["forget_regret"](x)), abs(r.t()));
+                        E(x).tube(dr, pr) = pmax_arma(max(vectorise(E(x).tube(dr, pr)).t() * (1 - params["forget_regret"](x)), abs(r.t())), exp(-350));
 
-                        eta(x).tube(dr, pr) =
+                        eta(x)
+                            .tube(dr, pr) =
                             pmin_arma(
                                 min(1 / (2 * vectorise(E(x).tube(dr, pr))),
-                                    sqrt(-log(vectorise(beta0field(x).tube(dr, pr))) / vectorise(V(x).tube(dr, pr)))),
+                                    sqrt(-log(vectorise(beta0field(x).tube(dr, pr))) / pmax_arma(vectorise(V(x).tube(dr, pr)), exp(-350)))),
                                 exp(350));
 
                         vec r_reg = r - vectorise(eta(x).tube(dr, pr)) % square(r);

diff --git a/tests/testthat/test-single_same_experts.R b/tests/testthat/test-single_same_experts.R
@@ -0,0 +1,40 @@
+skip_if(debug_mode)
+# %% Test online "combination" of a single expert
+set.seed(1)
+
+mod <- online(
+    y = array(rnorm(30),
+        dim = c(5, 3)
+    ), array(rnorm(30),
+        dim = c(5, 3, 1)
+    ),
+    tau = .5,
+    trace = FALSE
+)
+
+expect_true(all(mod$weights == 1))
+# %%
+
+
+# %% Test online "combination" of two experts that are the same
+set.seed(1)
+
+experts <- array(NA,
+    dim = c(5, 3, 2)
+)
+
+experts[, , 1] <- array(rnorm(30),
+    dim = c(5, 3)
+)
+experts[, , 2] <- experts[, , 1]
+
+mod <- online(
+    y = array(rnorm(30),
+        dim = c(5, 3)
+    ), experts,
+    tau = .5,
+    trace = FALSE
+)
+
+expect_true(all(mod$weights == 0.5))
+# %%