mito2/compaction/
buckets.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use common_time::timestamp::TimeUnit;
16use common_time::Timestamp;
17
18use crate::sst::file::FileHandle;
19
20/// Infers the suitable time bucket duration.
21/// Now it simply find the max and min timestamp across all SSTs in level and fit the time span
22/// into time bucket.
23pub(crate) fn infer_time_bucket<'a>(files: impl Iterator<Item = &'a FileHandle>) -> i64 {
24    let mut max_ts = Timestamp::new(i64::MIN, TimeUnit::Second);
25    let mut min_ts = Timestamp::new(i64::MAX, TimeUnit::Second);
26
27    for f in files {
28        let (start, end) = f.time_range();
29        min_ts = min_ts.min(start);
30        max_ts = max_ts.max(end);
31    }
32
33    // safety: Convert whatever timestamp into seconds will not cause overflow.
34    let min_sec = min_ts.convert_to(TimeUnit::Second).unwrap().value();
35    let max_sec = max_ts.convert_to(TimeUnit::Second).unwrap().value();
36
37    max_sec
38        .checked_sub(min_sec)
39        .map(|span| TIME_BUCKETS.fit_time_bucket(span)) // return the max bucket on subtraction overflow.
40        .unwrap_or_else(|| TIME_BUCKETS.max()) // safety: TIME_BUCKETS cannot be empty.
41}
42
43pub(crate) struct TimeBuckets([i64; 5]);
44
45impl TimeBuckets {
46    /// Fits a given time span into time bucket by find the minimum bucket that can cover the span.
47    /// Returns the max bucket if no such bucket can be found.
48    fn fit_time_bucket(&self, span_sec: i64) -> i64 {
49        assert!(span_sec >= 0);
50        match self.0.binary_search(&span_sec) {
51            Ok(idx) => self.0[idx],
52            Err(idx) => {
53                if idx < self.0.len() {
54                    self.0[idx]
55                } else {
56                    self.0.last().copied().unwrap()
57                }
58            }
59        }
60    }
61
62    #[cfg(test)]
63    fn get(&self, idx: usize) -> i64 {
64        self.0[idx]
65    }
66
67    fn max(&self) -> i64 {
68        self.0.last().copied().unwrap()
69    }
70}
71
72/// A set of predefined time buckets.
73pub(crate) const TIME_BUCKETS: TimeBuckets = TimeBuckets([
74    60 * 60,          // one hour
75    2 * 60 * 60,      // two hours
76    12 * 60 * 60,     // twelve hours
77    24 * 60 * 60,     // one day
78    7 * 24 * 60 * 60, // one week
79]);
80
81#[cfg(test)]
82mod tests {
83    use super::*;
84    use crate::compaction::test_util::new_file_handle;
85    use crate::sst::file::FileId;
86
87    #[test]
88    fn test_time_bucket() {
89        assert_eq!(TIME_BUCKETS.get(0), TIME_BUCKETS.fit_time_bucket(1));
90        assert_eq!(TIME_BUCKETS.get(0), TIME_BUCKETS.fit_time_bucket(60 * 60));
91        assert_eq!(
92            TIME_BUCKETS.get(1),
93            TIME_BUCKETS.fit_time_bucket(60 * 60 + 1)
94        );
95
96        assert_eq!(
97            TIME_BUCKETS.get(2),
98            TIME_BUCKETS.fit_time_bucket(TIME_BUCKETS.get(2) - 1)
99        );
100        assert_eq!(
101            TIME_BUCKETS.get(2),
102            TIME_BUCKETS.fit_time_bucket(TIME_BUCKETS.get(2))
103        );
104        assert_eq!(
105            TIME_BUCKETS.get(3),
106            TIME_BUCKETS.fit_time_bucket(TIME_BUCKETS.get(3) - 1)
107        );
108        assert_eq!(TIME_BUCKETS.get(4), TIME_BUCKETS.fit_time_bucket(i64::MAX));
109    }
110
111    #[test]
112    fn test_infer_time_buckets() {
113        assert_eq!(
114            TIME_BUCKETS.get(0),
115            infer_time_bucket(
116                [
117                    new_file_handle(FileId::random(), 0, TIME_BUCKETS.get(0) * 1000 - 1, 0),
118                    new_file_handle(FileId::random(), 1, 10_000, 0)
119                ]
120                .iter()
121            )
122        );
123    }
124}