Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove standard deviation from stats aggregation #1788

Merged
merged 1 commit into from
Jan 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions src/aggregation/agg_req.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ impl BucketAggregationType {
/// called multi-value numeric metrics aggregation.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub enum MetricAggregation {
/// Computes the average.
/// Computes the average of the extracted values.
#[serde(rename = "avg")]
Average(AverageAggregation),
/// Counts the number of extracted values.
Expand All @@ -252,10 +252,11 @@ pub enum MetricAggregation {
/// Finds the minimum value.
#[serde(rename = "min")]
Min(MinAggregation),
/// Calculates stats sum, average, min, max, standard_deviation on a field.
/// Computes a collection of statistics (`min`, `max`, `sum`, `count`, and `avg`) over the
/// extracted values.
#[serde(rename = "stats")]
Stats(StatsAggregation),
/// Computes the sum.
/// Computes the sum of the extracted values.
#[serde(rename = "sum")]
Sum(SumAggregation),
}
Expand Down
1 change: 0 additions & 1 deletion src/aggregation/bucket/histogram/histogram.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1366,7 +1366,6 @@ mod tests {
"min": Value::Null,
"max": Value::Null,
"avg": Value::Null,
"standard_deviation": Value::Null,
}
})
);
Expand Down
55 changes: 14 additions & 41 deletions src/aggregation/metric/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ pub struct Stats {
pub count: u64,
/// The sum of the fast field values.
pub sum: f64,
/// The standard deviation of the fast field values. `None` if count equals zero.
pub standard_deviation: Option<f64>,
/// The min value of the fast field values.
pub min: Option<f64>,
/// The max value of the fast field values.
Expand All @@ -58,7 +56,6 @@ impl Stats {
match agg_property {
"count" => Ok(Some(self.count as f64)),
"sum" => Ok(Some(self.sum)),
"standard_deviation" => Ok(self.standard_deviation),
"min" => Ok(self.min),
"max" => Ok(self.max),
"avg" => Ok(self.avg),
Expand All @@ -74,53 +71,32 @@ impl Stats {
/// results.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct IntermediateStats {
/// The number of values.
pub count: u64,
/// The sum of the values.
pub sum: f64,
/// The sum of the squared values.
pub squared_sum: f64,
/// The min value of the values.
pub min: f64,
/// The max value of the values.
pub max: f64,
/// The number of extracted values.
count: u64,
/// The sum of the extracted values.
sum: f64,
/// The min value.
min: f64,
/// The max value.
max: f64,
}

impl Default for IntermediateStats {
fn default() -> Self {
Self {
count: 0,
sum: 0.0,
squared_sum: 0.0,
min: f64::MAX,
max: f64::MIN,
}
}
}

impl IntermediateStats {
fn avg(&self) -> Option<f64> {
if self.count == 0 {
None
} else {
Some(self.sum / (self.count as f64))
}
}

fn square_mean(&self) -> f64 {
self.squared_sum / (self.count as f64)
}

fn standard_deviation(&self) -> Option<f64> {
self.avg()
.map(|average| (self.square_mean() - average * average).sqrt())
}

/// Merges the other stats intermediate result into self.
pub fn merge_fruits(&mut self, other: IntermediateStats) {
self.count += other.count;
self.sum += other.sum;
self.squared_sum += other.squared_sum;
self.min = self.min.min(other.min);
self.max = self.max.max(other.max);
}
Expand All @@ -137,21 +113,24 @@ impl IntermediateStats {
} else {
Some(self.max)
};
let avg = if self.count == 0 {
None
} else {
Some(self.sum / (self.count as f64))
};
Stats {
count: self.count,
sum: self.sum,
standard_deviation: self.standard_deviation(),
min,
max,
avg: self.avg(),
avg,
}
}

#[inline]
fn collect(&mut self, value: f64) {
self.count += 1;
self.sum += value;
self.squared_sum += value * value;
self.min = self.min.min(value);
self.max = self.max.max(value);
}
Expand Down Expand Up @@ -255,7 +234,6 @@ mod tests {
"count": 0,
"max": Value::Null,
"min": Value::Null,
"standard_deviation": Value::Null,
"sum": 0.0
})
);
Expand Down Expand Up @@ -332,7 +310,6 @@ mod tests {
"count": 7,
"max": 44.0,
"min": 1.0,
"standard_deviation": 13.65313748796613,
"sum": 85.0
})
);
Expand All @@ -344,7 +321,6 @@ mod tests {
"count": 7,
"max": 44.0,
"min": 1.0,
"standard_deviation": 13.65313748796613,
"sum": 85.0
})
);
Expand All @@ -356,7 +332,6 @@ mod tests {
"count": 7,
"max": 44.5,
"min": 1.0,
"standard_deviation": 13.819905785437443,
"sum": 85.5
})
);
Expand All @@ -368,7 +343,6 @@ mod tests {
"count": 3,
"max": 14.0,
"min": 7.0,
"standard_deviation": 2.867441755680877,
"sum": 32.0
})
);
Expand All @@ -380,7 +354,6 @@ mod tests {
"count": 0,
"max": serde_json::Value::Null,
"min": serde_json::Value::Null,
"standard_deviation": serde_json::Value::Null,
"sum": 0.0,
})
);
Expand Down