Skip to content

Commit

Permalink
Implement rate aggregation (#37)
Browse files Browse the repository at this point in the history
  • Loading branch information
iamazy authored Oct 30, 2021
1 parent e1d2928 commit 9eeb145
Show file tree
Hide file tree
Showing 6 changed files with 160 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/search/aggregations/metrics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@ mod avg_aggregation;
mod cardinality_aggregation;
mod max_aggregation;
mod min_aggregation;
mod rate_aggregation;
mod sum_aggregation;
mod top_hits_aggregation;

pub use self::avg_aggregation::*;
pub use self::cardinality_aggregation::*;
pub use self::max_aggregation::*;
pub use self::min_aggregation::*;
pub use self::rate_aggregation::*;
pub use self::sum_aggregation::*;
pub use self::top_hits_aggregation::*;
100 changes: 100 additions & 0 deletions src/search/aggregations/metrics/rate_aggregation.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
use crate::util::*;
use crate::{Aggregation, CalendarInterval, RateMode};

/// A `rate` metrics aggregation can be used only inside a `date_histogram` and calculates a rate of
/// documents or a field in each `date_histogram` bucket. The field values can be generated extracted
/// from specific numeric or [histogram fields](https://www.elastic.co/guide/en/elasticsearch/reference/current/histogram.html)
/// in the documents.
///
/// <https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-rate-aggregation.html>
#[derive(Debug, Clone, Serialize, PartialEq)]
pub struct RateAggregation {
#[serde(skip_serializing)]
pub(crate) name: String,
rate: RateAggregationInner,
}

#[derive(Debug, Clone, Serialize, PartialEq)]
struct RateAggregationInner {
#[serde(skip_serializing_if = "ShouldSkip::should_skip")]
field: Option<String>,
#[serde(skip_serializing_if = "ShouldSkip::should_skip")]
unit: Option<CalendarInterval>,
#[serde(skip_serializing_if = "ShouldSkip::should_skip")]
mode: Option<RateMode>,
}

impl Aggregation {
/// Creates an instance of [`RateAggregation`]
///
/// - `name` - name of the aggregation
pub fn rate(name: impl Into<String>) -> RateAggregation {
RateAggregation {
name: name.into(),
rate: RateAggregationInner {
field: None,
unit: None,
mode: None,
},
}
}
}

impl RateAggregation {
/// Calculate sum or number of values of the `field`
pub fn field(mut self, field: impl Into<String>) -> Self {
self.rate.field = Some(field.into());
self
}

/// The `rate` aggregation supports all rate that can be used [calendar_intervals parameter](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-datehistogram-aggregation.html#calendar_intervals)
/// of `date_histogram` aggregation. The specified rate should compatible with the date_histogram
/// aggregation interval, i.e. it should be possible to convert the bucket size into the rate.
/// By default the interval of the `date_histogram` is used.
///
/// There is also an additional limitations if the date histogram is not a direct parent of the
/// rate histogram. In this case both rate interval and histogram interval have to be in the
/// same group: [second, `minute`, hour, day, week] or [month, quarter, year]. For example,
/// if the date histogram is month based, only rate intervals of month, quarter or year are
/// supported. If the date histogram is `day` based, only `second`, ` minute`, `hour`, `day,
/// and `week` rate intervals are supported.
pub fn unit(mut self, unit: impl Into<CalendarInterval>) -> Self {
self.rate.unit = Some(unit.into());
self
}

/// By default sum mode is used.
///
/// By adding the `mode` parameter with the value `value_count`, we can change the calculation from
/// `sum` to the number of values of the field.
pub fn mode(mut self, mode: impl Into<RateMode>) -> Self {
self.rate.mode = Some(mode.into());
self
}
}

#[cfg(test)]
mod tests {
use super::*;

test_serialization! {
with_required_fields(
Aggregation::rate("test_rate"),
json!({ "rate": { } })
);

with_all_fields(
Aggregation::rate("test_rate")
.field("price")
.unit(CalendarInterval::Day)
.mode(RateMode::ValueCount),
json!({
"rate": {
"field": "price",
"unit": "day",
"mode": "value_count"
}
})
);
}
}
3 changes: 3 additions & 0 deletions src/search/aggregations/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
pub mod bucket;
pub mod metrics;
pub mod params;
pub mod pipeline;

pub use self::bucket::*;
pub use self::metrics::*;
pub use self::params::*;
pub use self::pipeline::*;

macro_rules! aggregation {
Expand Down Expand Up @@ -62,6 +64,7 @@ aggregation!(Aggregation {
Max(MaxAggregation),
Min(MinAggregation),
Sum(SumAggregation),
Rate(RateAggregation),
});

/// Type alias for a collection of aggregations
Expand Down
7 changes: 7 additions & 0 deletions src/search/aggregations/params/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
//! Value types accepted by aggregation clauses
// Common parameters
mod rate_mode;

// Public re-exports
pub use self::rate_mode::*;
9 changes: 9 additions & 0 deletions src/search/aggregations/params/rate_mode.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
/// Calculate sum or number of values of the field for [elasticsearch_dsl::search::RateAggregation]
#[derive(Debug, PartialEq, Clone, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum RateMode {
/// calculate the sum of all values field
Sum,
/// use the number of values in the field
ValueCount,
}
39 changes: 39 additions & 0 deletions src/search/params/units.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,45 @@ impl Serialize for Time {
}
}

/// Calendar-aware intervals are configured with the `calendar_interval` parameter. You can specify
/// calendar intervals using the unit name, such as `month`, or as a single unit quantity, such as
/// `1M`. For example,`day` and `1d` are equivalent. Multiple quantities, such as `2d`, are not supported.
///
/// <https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-datehistogram-aggregation.html#calendar_intervals>
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum CalendarInterval {
/// All minutes begin at 00 seconds. One minute is the interval between 00 seconds of the first
/// minute and 00 seconds of the following minute in the specified time zone, compensating for
/// any intervening leap seconds, so that the number of minutes and seconds past the hour is the
/// same at the start and end.
Minute,
/// All hours begin at 00 minutes and 00 seconds. One hour (1h) is the interval between 00:00
/// minutes of the first hour and 00:00 minutes of the following hour in the specified time zone,
/// compensating for any intervening leap seconds, so that the number of minutes and seconds past
/// the hour is the same at the start and end.
Hour,
/// All days begin at the earliest possible time, which is usually 00:00:00 (midnight). One day
/// (1d) is the interval between the start of the day and the start of the following day in the
/// specified time zone, compensating for any intervening time changes.
Day,
/// One week is the interval between the start day_of_week:hour:minute:second and the same day
/// of the week and time of the following week in the specified time zone.
Week,
/// One month is the interval between the start day of the month and time of day and the same
/// day of the month and time of the following month in the specified time zone, so that the day
/// of the month and time of day are the same at the start and end.
Month,
/// One quarter is the interval between the start day of the month and time of day and the same
/// day of the month and time of day three months later, so that the day of the month and time
/// of day are the same at the start and end.
Quarter,
/// One year is the interval between the start day of the month and time of day and the same day
/// of the month and time of day the following year in the specified time zone, so that the date
/// and time are the same at the start and end.
Year,
}

/// Whenever the byte size of data needs to be specified, e.g. when setting a
/// buffer size parameter, the value must specify the unit,
/// like `10kb` for 10 kilobytes.
Expand Down

0 comments on commit 9eeb145

Please sign in to comment.