mito2/compaction/
buckets.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use common_time::timestamp::TimeUnit;
16use common_time::Timestamp;
17
18use crate::sst::file::FileHandle;
19
20/// Infers the suitable time bucket duration.
21/// Now it simply find the max and min timestamp across all SSTs in level and fit the time span
22/// into time bucket.
23pub(crate) fn infer_time_bucket<'a>(files: impl Iterator<Item = &'a FileHandle>) -> i64 {
24    let mut max_ts = Timestamp::new(i64::MIN, TimeUnit::Second);
25    let mut min_ts = Timestamp::new(i64::MAX, TimeUnit::Second);
26
27    for f in files {
28        let (start, end) = f.time_range();
29        min_ts = min_ts.min(start);
30        max_ts = max_ts.max(end);
31    }
32
33    // safety: Convert whatever timestamp into seconds will not cause overflow.
34    let min_sec = min_ts.convert_to(TimeUnit::Second).unwrap().value();
35    let max_sec = max_ts.convert_to(TimeUnit::Second).unwrap().value();
36
37    max_sec
38        .checked_sub(min_sec)
39        .map(|span| TIME_BUCKETS.fit_time_bucket(span)) // return the max bucket on subtraction overflow.
40        .unwrap_or_else(|| TIME_BUCKETS.max()) // safety: TIME_BUCKETS cannot be empty.
41}
42
43pub(crate) struct TimeBuckets([i64; 7]);
44
45impl TimeBuckets {
46    /// Fits a given time span into time bucket by find the minimum bucket that can cover the span.
47    /// Returns the max bucket if no such bucket can be found.
48    fn fit_time_bucket(&self, span_sec: i64) -> i64 {
49        assert!(span_sec >= 0);
50        match self.0.binary_search(&span_sec) {
51            Ok(idx) => self.0[idx],
52            Err(idx) => {
53                if idx < self.0.len() {
54                    self.0[idx]
55                } else {
56                    self.0.last().copied().unwrap()
57                }
58            }
59        }
60    }
61
62    #[cfg(test)]
63    fn get(&self, idx: usize) -> i64 {
64        self.0[idx]
65    }
66
67    fn max(&self) -> i64 {
68        self.0.last().copied().unwrap()
69    }
70}
71
72/// A set of predefined time buckets.
73pub(crate) const TIME_BUCKETS: TimeBuckets = TimeBuckets([
74    60 * 60,                 // one hour
75    2 * 60 * 60,             // two hours
76    12 * 60 * 60,            // twelve hours
77    24 * 60 * 60,            // one day
78    7 * 24 * 60 * 60,        // one week
79    365 * 24 * 60 * 60,      // one year
80    10 * 365 * 24 * 60 * 60, // ten years
81]);
82
83#[cfg(test)]
84mod tests {
85    use super::*;
86    use crate::compaction::test_util::new_file_handle;
87    use crate::sst::file::FileId;
88
89    #[test]
90    fn test_time_bucket() {
91        assert_eq!(TIME_BUCKETS.get(0), TIME_BUCKETS.fit_time_bucket(1));
92        assert_eq!(TIME_BUCKETS.get(0), TIME_BUCKETS.fit_time_bucket(60 * 60));
93        assert_eq!(
94            TIME_BUCKETS.get(1),
95            TIME_BUCKETS.fit_time_bucket(60 * 60 + 1)
96        );
97
98        assert_eq!(
99            TIME_BUCKETS.get(2),
100            TIME_BUCKETS.fit_time_bucket(TIME_BUCKETS.get(2) - 1)
101        );
102        assert_eq!(
103            TIME_BUCKETS.get(2),
104            TIME_BUCKETS.fit_time_bucket(TIME_BUCKETS.get(2))
105        );
106        assert_eq!(
107            TIME_BUCKETS.get(3),
108            TIME_BUCKETS.fit_time_bucket(TIME_BUCKETS.get(3) - 1)
109        );
110        assert_eq!(TIME_BUCKETS.get(6), TIME_BUCKETS.fit_time_bucket(i64::MAX));
111    }
112
113    #[test]
114    fn test_infer_time_buckets() {
115        assert_eq!(
116            TIME_BUCKETS.get(0),
117            infer_time_bucket(
118                [
119                    new_file_handle(FileId::random(), 0, TIME_BUCKETS.get(0) * 1000 - 1, 0),
120                    new_file_handle(FileId::random(), 1, 10_000, 0)
121                ]
122                .iter()
123            )
124        );
125    }
126}