Skip to content

Commit

Permalink
adding forecasts to thresholded models (#333)
Browse files Browse the repository at this point in the history
* adding forecasts to thresholded model, part 1

* refactor

* consistency between external and internal shingling

* streaming impute and standard consistency

* timed range vector and unifying different modes

* more tests and cleanup

* comments

* more tests + examples

* fixes and more examples
  • Loading branch information
sudiptoguha authored Jun 28, 2022
1 parent 27ec2b1 commit 5630173
Show file tree
Hide file tree
Showing 24 changed files with 2,088 additions and 147 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
package com.amazon.randomcutforest.returntypes;

import static com.amazon.randomcutforest.CommonUtils.checkArgument;
import static java.lang.Math.max;
import static java.lang.Math.min;

import java.util.Arrays;

Expand Down Expand Up @@ -76,4 +78,22 @@ public RangeVector(RangeVector base) {
this.upper = Arrays.copyOf(base.upper, dimensions);
this.lower = Arrays.copyOf(base.lower, dimensions);
}

public void shift(int i, float shift) {
checkArgument(i >= 0 && i < values.length, "incorrect index");
values[i] += shift;
// managing precision
upper[i] = max(values[i], upper[i] + shift);
lower[i] = min(values[i], lower[i] + shift);
}

public void scale(int i, float weight) {
checkArgument(i >= 0 && i < values.length, "incorrect index");
checkArgument(weight > 0, " negative weight not permitted");
values[i] = values[i] * weight;
// managing precision
upper[i] = max(upper[i] * weight, values[i]);
lower[i] = min(lower[i] * weight, values[i]);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/*
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.randomcutforest.examples.parkservices;

import static java.lang.Math.min;

import com.amazon.randomcutforest.config.Precision;
import com.amazon.randomcutforest.config.TransformMethod;
import com.amazon.randomcutforest.examples.Example;
import com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest;
import com.amazon.randomcutforest.parkservices.returntypes.TimedRangeVector;
import com.amazon.randomcutforest.returntypes.RangeVector;
import com.amazon.randomcutforest.testutils.MultiDimDataWithKey;
import com.amazon.randomcutforest.testutils.ShingledMultiDimDataWithKeys;

public class ThresholdedForecast implements Example {

public static void main(String[] args) throws Exception {
new com.amazon.randomcutforest.examples.parkservices.ThresholdedForecast().run();
}

@Override
public String command() {
return "Thresholded_Forecast_example";
}

@Override
public String description() {
return "Example of Forecast using Thresholded RCF";
}

@Override
public void run() throws Exception {

int sampleSize = 256;
int baseDimensions = 1;

long seed = 100L;

int length = 4 * sampleSize;
int outputAfter = 128;

// as the ratio of amplitude (signal) to noise is changed, the estimation range
// in forecast
// (or any other inference) should increase
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(length, 50, 100, 10, seed,
baseDimensions, true);
System.out.println(dataWithKeys.changes.length + " anomalies injected ");

// horizon/lookahead can be larger than shingleSize for transformations that do
// not
// involve differencing -- but longer horizon would have larger error
int horizon = 60;
int shingleSize = 30;

// if the useSlope is set as true then it is recommended to use NORMALIZE or
// SUBTRACT_MA as
// transformation methods to adjust to the linear drift

ThresholdedRandomCutForest forest = new ThresholdedRandomCutForest.Builder<>().compact(true)
.dimensions(baseDimensions * shingleSize).precision(Precision.FLOAT_32).randomSeed(seed)
.internalShinglingEnabled(true).shingleSize(shingleSize).outputAfter(outputAfter)
.transformMethod(TransformMethod.NORMALIZE).build();

if (forest.getTransformMethod() == TransformMethod.NORMALIZE_DIFFERENCE
|| forest.getTransformMethod() == TransformMethod.DIFFERENCE) {
// single step differencing will not produce stable forecasts over long horizons
horizon = min(horizon, shingleSize / 2 + 1);
}
double[] error = new double[horizon];
double[] lowerError = new double[horizon];
double[] upperError = new double[horizon];

for (int j = 0; j < dataWithKeys.data.length; j++) {
// forecast first; change centrality to achieve a control over the sampling
// setting centrality = 0 would correspond to random sampling from the leaves
// reached by
// impute visitor

// the following prints
// <sequenceNo> <predicted_next_value> <likely_upper_bound> <likely_lower_bound>
// where the sequence number varies between next-to-be-read .. (next + horizon
// -1 )
//
// Every new element corresponds to a new set of horizon forecasts; we measure
// the
// errors keeping the leadtime fixed.
//
// verify that forecast is done before seeing the actual value (in the process()
// function)
//

TimedRangeVector extrapolate = forest.extrapolate(horizon, true, 1.0);
RangeVector forecast = extrapolate.rangeVector;
for (int i = 0; i < horizon; i++) {
System.out.println(
(j + i) + " " + forecast.values[i] + " " + forecast.upper[i] + " " + forecast.lower[i]);
// compute errors
if (j > outputAfter + shingleSize - 1 && j + i < dataWithKeys.data.length) {
double t = dataWithKeys.data[j + i][0] - forecast.values[i];
error[i] += t * t;
t = dataWithKeys.data[j + i][0] - forecast.lower[i];
lowerError[i] += t * t;
t = dataWithKeys.data[j + i][0] - forecast.upper[i];
upperError[i] += t * t;
}
}
System.out.println();
System.out.println();
forest.process(dataWithKeys.data[j], j);
}

System.out.println(forest.getTransformMethod().name() + " RMSE (as horizon increases) ");
for (int i = 0; i < horizon; i++) {
double t = error[i] / (dataWithKeys.data.length - shingleSize + 1 - outputAfter - i);
System.out.print(Math.sqrt(t) + " ");
}
System.out.println();
System.out.println("RMSE Lower (as horizon increases)");
for (int i = 0; i < horizon; i++) {
double t = lowerError[i] / (dataWithKeys.data.length - shingleSize + 1 - outputAfter - i);
System.out.print(Math.sqrt(t) + " ");
}
System.out.println();
System.out.println("RMSE Upper (as horizon increases)");
for (int i = 0; i < horizon; i++) {
double t = upperError[i] / (dataWithKeys.data.length - shingleSize + 1 - outputAfter - i);
System.out.print(Math.sqrt(t) + " ");
}
System.out.println();

}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/*
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.randomcutforest.examples.parkservices;

import java.util.Arrays;
import java.util.Random;

import com.amazon.randomcutforest.config.ForestMode;
import com.amazon.randomcutforest.config.ImputationMethod;
import com.amazon.randomcutforest.config.Precision;
import com.amazon.randomcutforest.config.TransformMethod;
import com.amazon.randomcutforest.examples.Example;
import com.amazon.randomcutforest.parkservices.AnomalyDescriptor;
import com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest;
import com.amazon.randomcutforest.testutils.MultiDimDataWithKey;
import com.amazon.randomcutforest.testutils.ShingledMultiDimDataWithKeys;

public class ThresholdedImpute implements Example {

public static void main(String[] args) throws Exception {
new ThresholdedImpute().run();
}

@Override
public String command() {
return "Thresholded_Imputation_example";
}

@Override
public String description() {
return "Thresholded Imputation Example";
}

@Override
public void run() throws Exception {
// Create and populate a random cut forest

int shingleSize = 4;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_32;
int dataSize = 4 * sampleSize;
int baseDimensions = 1;

long count = 0;

int dropped = 0;

int dimensions = baseDimensions * shingleSize;
ThresholdedRandomCutForest forest = new ThresholdedRandomCutForest.Builder<>().compact(true)
.dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize)
.sampleSize(sampleSize).precision(precision).anomalyRate(0.01).imputationMethod(ImputationMethod.RCF)
.forestMode(ForestMode.STREAMING_IMPUTE).transformMethod(TransformMethod.NORMALIZE_DIFFERENCE)
.adjustThreshold(true).build();

long seed = new Random().nextLong();
Random noisePRG = new Random(0);

System.out.println("seed = " + seed);
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(dataSize + shingleSize - 1, 50,
100, 5, seed, baseDimensions);

// as we loop over the data we will be dropping observations with probability
// 0.2
// note that as a result the predictor correct method would like be more
// error-prone
// note that estimation of the number of entries to be imputed is also another
// estimation
// therefore the overall method may have runaway effects if more values are
// dropped.

int keyCounter = 0;
for (double[] point : dataWithKeys.data) {

if (noisePRG.nextDouble() < 0.2 && !((keyCounter < dataWithKeys.changeIndices.length
&& count == dataWithKeys.changeIndices[keyCounter]))) {
dropped++;
if (keyCounter < dataWithKeys.changeIndices.length && count == dataWithKeys.changeIndices[keyCounter]) {
System.out.println(" dropped sequence " + (count) + " INPUT " + Arrays.toString(point) + " CHANGE "
+ Arrays.toString(dataWithKeys.changes[keyCounter]));
}
} else {
long newStamp = 100 * count + 2 * noisePRG.nextInt(10) - 5;
AnomalyDescriptor result = forest.process(point, newStamp);

if (keyCounter < dataWithKeys.changeIndices.length && count == dataWithKeys.changeIndices[keyCounter]) {
System.out.println("sequence " + (count) + " INPUT " + Arrays.toString(point) + " CHANGE "
+ Arrays.toString(dataWithKeys.changes[keyCounter]));
++keyCounter;
}

if (result.getAnomalyGrade() != 0) {
System.out.print("sequence " + (count) + " RESULT value ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getCurrentInput()[i] + ", ");
}
System.out.print("score " + result.getRCFScore() + ", grade " + result.getAnomalyGrade() + ", ");

if (result.isExpectedValuesPresent()) {
if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
System.out.print(-result.getRelativeIndex() + " steps ago, instead of ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getPastValues()[i] + ", ");
}
System.out.print("expected ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getExpectedValuesList()[0][i] + ", ");
if (result.getPastValues()[i] != result.getExpectedValuesList()[0][i]) {
System.out.print(
"( " + (result.getPastValues()[i] - result.getExpectedValuesList()[0][i])
+ " ) ");
}
}
} else {
System.out.print("expected ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getExpectedValuesList()[0][i] + ", ");
if (result.getCurrentInput()[i] != result.getExpectedValuesList()[0][i]) {
System.out.print(
"( " + (result.getCurrentInput()[i] - result.getExpectedValuesList()[0][i])
+ " ) ");
}
}
}
}
System.out.println();
}
}
++count;
}
System.out.println("Dropped " + dropped + " out of " + count);
}

}
Loading

0 comments on commit 5630173

Please sign in to comment.