meta_srv/gc/
options.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::time::Duration;
16
17use serde::{Deserialize, Serialize};
18use snafu::ensure;
19
20use crate::error::{self, Result};
21
22/// The interval of the gc ticker.
23#[allow(unused)]
24pub(crate) const TICKER_INTERVAL: Duration = Duration::from_secs(60 * 5);
25
26/// Configuration for GC operations.
27///
28/// TODO(discord9): not expose most config to users for now, until GC scheduler is fully stable.
29#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
30#[serde(default)]
31pub struct GcSchedulerOptions {
32    /// Whether GC is enabled. Default to false.
33    /// If set to false, no GC will be performed, and potentially some
34    /// files from datanodes will never be deleted.
35    pub enable: bool,
36    /// Maximum number of tables to process concurrently.
37    pub max_concurrent_tables: usize,
38    /// Maximum number of retries per region when GC fails.
39    pub max_retries_per_region: usize,
40    /// Concurrency for region GC within a table.
41    pub region_gc_concurrency: usize,
42    /// Backoff duration between retries.
43    pub retry_backoff_duration: Duration,
44    /// Minimum region size threshold for GC (in bytes).
45    pub min_region_size_threshold: u64,
46    /// Weight for SST file count in GC scoring.
47    pub sst_count_weight: f64,
48    /// Weight for file removal rate in GC scoring.
49    pub file_removed_count_weight: f64,
50    /// Cooldown period between GC operations on the same region.
51    pub gc_cooldown_period: Duration,
52    /// Maximum number of regions to select for GC per table.
53    pub regions_per_table_threshold: usize,
54    /// Timeout duration for mailbox communication with datanodes.
55    pub mailbox_timeout: Duration,
56    /// Interval for performing full file listing during GC to find orphan files.
57    /// Full file listing is expensive but necessary to clean up orphan files.
58    /// Set to a larger value (e.g., 24 hours) to balance performance and cleanup.
59    /// Every Nth GC cycle will use full file listing, where N = full_file_listing_interval / TICKER_INTERVAL.
60    pub full_file_listing_interval: Duration,
61    /// Interval for cleaning up stale region entries from the GC tracker.
62    /// This removes entries for regions that no longer exist (e.g., after table drops).
63    /// Set to a larger value (e.g., 6 hours) since this is just for memory cleanup.
64    pub tracker_cleanup_interval: Duration,
65}
66
67impl Default for GcSchedulerOptions {
68    fn default() -> Self {
69        Self {
70            enable: false,
71            max_concurrent_tables: 10,
72            max_retries_per_region: 3,
73            retry_backoff_duration: Duration::from_secs(5),
74            region_gc_concurrency: 16,
75            min_region_size_threshold: 100 * 1024 * 1024, // 100MB
76            sst_count_weight: 1.0,
77            file_removed_count_weight: 0.5,
78            gc_cooldown_period: Duration::from_secs(60 * 5), // 5 minutes
79            regions_per_table_threshold: 20,                 // Select top 20 regions per table
80            mailbox_timeout: Duration::from_secs(60),        // 60 seconds
81            // Perform full file listing every 24 hours to find orphan files
82            full_file_listing_interval: Duration::from_secs(60 * 60 * 24),
83            // Clean up stale tracker entries every 6 hours
84            tracker_cleanup_interval: Duration::from_secs(60 * 60 * 6),
85        }
86    }
87}
88
89impl GcSchedulerOptions {
90    /// Validates the configuration options.
91    pub fn validate(&self) -> Result<()> {
92        ensure!(
93            self.max_concurrent_tables > 0,
94            error::InvalidArgumentsSnafu {
95                err_msg: "max_concurrent_tables must be greater than 0",
96            }
97        );
98
99        ensure!(
100            self.max_retries_per_region > 0,
101            error::InvalidArgumentsSnafu {
102                err_msg: "max_retries_per_region must be greater than 0",
103            }
104        );
105
106        ensure!(
107            self.region_gc_concurrency > 0,
108            error::InvalidArgumentsSnafu {
109                err_msg: "region_gc_concurrency must be greater than 0",
110            }
111        );
112
113        ensure!(
114            !self.retry_backoff_duration.is_zero(),
115            error::InvalidArgumentsSnafu {
116                err_msg: "retry_backoff_duration must be greater than 0",
117            }
118        );
119
120        ensure!(
121            self.sst_count_weight >= 0.0,
122            error::InvalidArgumentsSnafu {
123                err_msg: "sst_count_weight must be non-negative",
124            }
125        );
126
127        ensure!(
128            self.file_removed_count_weight >= 0.0,
129            error::InvalidArgumentsSnafu {
130                err_msg: "file_removal_rate_weight must be non-negative",
131            }
132        );
133
134        ensure!(
135            !self.gc_cooldown_period.is_zero(),
136            error::InvalidArgumentsSnafu {
137                err_msg: "gc_cooldown_period must be greater than 0",
138            }
139        );
140
141        ensure!(
142            self.regions_per_table_threshold > 0,
143            error::InvalidArgumentsSnafu {
144                err_msg: "regions_per_table_threshold must be greater than 0",
145            }
146        );
147
148        ensure!(
149            !self.mailbox_timeout.is_zero(),
150            error::InvalidArgumentsSnafu {
151                err_msg: "mailbox_timeout must be greater than 0",
152            }
153        );
154
155        ensure!(
156            !self.full_file_listing_interval.is_zero(),
157            error::InvalidArgumentsSnafu {
158                err_msg: "full_file_listing_interval must be greater than 0",
159            }
160        );
161
162        ensure!(
163            !self.tracker_cleanup_interval.is_zero(),
164            error::InvalidArgumentsSnafu {
165                err_msg: "tracker_cleanup_interval must be greater than 0",
166            }
167        );
168
169        Ok(())
170    }
171}