meta_srv/gc/options.rs
1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::time::Duration;
16
17use serde::{Deserialize, Serialize};
18use snafu::ensure;
19
20use crate::error::{self, Result};
21
22/// The interval of the gc ticker.
23#[allow(unused)]
24pub(crate) const TICKER_INTERVAL: Duration = Duration::from_secs(60 * 5);
25
26/// Configuration for GC operations.
27///
28/// TODO(discord9): not expose most config to users for now, until GC scheduler is fully stable.
29#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
30#[serde(default)]
31pub struct GcSchedulerOptions {
32 /// Whether GC is enabled. Default to false.
33 /// If set to false, no GC will be performed, and potentially some
34 /// files from datanodes will never be deleted.
35 pub enable: bool,
36 /// Maximum number of tables to process concurrently.
37 pub max_concurrent_tables: usize,
38 /// Maximum number of retries per region when GC fails.
39 pub max_retries_per_region: usize,
40 /// Concurrency for region GC within a table.
41 pub region_gc_concurrency: usize,
42 /// Backoff duration between retries.
43 pub retry_backoff_duration: Duration,
44 /// Minimum region size threshold for GC (in bytes).
45 pub min_region_size_threshold: u64,
46 /// Weight for SST file count in GC scoring.
47 pub sst_count_weight: f64,
48 /// Weight for file removal rate in GC scoring.
49 pub file_removed_count_weight: f64,
50 /// Cooldown period between GC operations on the same region.
51 pub gc_cooldown_period: Duration,
52 /// Maximum number of regions to select for GC per table.
53 pub regions_per_table_threshold: usize,
54 /// Timeout duration for mailbox communication with datanodes.
55 pub mailbox_timeout: Duration,
56 /// Interval for performing full file listing during GC to find orphan files.
57 /// Full file listing is expensive but necessary to clean up orphan files.
58 /// Set to a larger value (e.g., 24 hours) to balance performance and cleanup.
59 /// Every Nth GC cycle will use full file listing, where N = full_file_listing_interval / TICKER_INTERVAL.
60 pub full_file_listing_interval: Duration,
61 /// Interval for cleaning up stale region entries from the GC tracker.
62 /// This removes entries for regions that no longer exist (e.g., after table drops).
63 /// Set to a larger value (e.g., 6 hours) since this is just for memory cleanup.
64 pub tracker_cleanup_interval: Duration,
65}
66
67impl Default for GcSchedulerOptions {
68 fn default() -> Self {
69 Self {
70 enable: false,
71 max_concurrent_tables: 10,
72 max_retries_per_region: 3,
73 retry_backoff_duration: Duration::from_secs(5),
74 region_gc_concurrency: 16,
75 min_region_size_threshold: 100 * 1024 * 1024, // 100MB
76 sst_count_weight: 1.0,
77 file_removed_count_weight: 0.5,
78 gc_cooldown_period: Duration::from_secs(60 * 5), // 5 minutes
79 regions_per_table_threshold: 20, // Select top 20 regions per table
80 mailbox_timeout: Duration::from_secs(60), // 60 seconds
81 // Perform full file listing every 24 hours to find orphan files
82 full_file_listing_interval: Duration::from_secs(60 * 60 * 24),
83 // Clean up stale tracker entries every 6 hours
84 tracker_cleanup_interval: Duration::from_secs(60 * 60 * 6),
85 }
86 }
87}
88
89impl GcSchedulerOptions {
90 /// Validates the configuration options.
91 pub fn validate(&self) -> Result<()> {
92 ensure!(
93 self.max_concurrent_tables > 0,
94 error::InvalidArgumentsSnafu {
95 err_msg: "max_concurrent_tables must be greater than 0",
96 }
97 );
98
99 ensure!(
100 self.max_retries_per_region > 0,
101 error::InvalidArgumentsSnafu {
102 err_msg: "max_retries_per_region must be greater than 0",
103 }
104 );
105
106 ensure!(
107 self.region_gc_concurrency > 0,
108 error::InvalidArgumentsSnafu {
109 err_msg: "region_gc_concurrency must be greater than 0",
110 }
111 );
112
113 ensure!(
114 !self.retry_backoff_duration.is_zero(),
115 error::InvalidArgumentsSnafu {
116 err_msg: "retry_backoff_duration must be greater than 0",
117 }
118 );
119
120 ensure!(
121 self.sst_count_weight >= 0.0,
122 error::InvalidArgumentsSnafu {
123 err_msg: "sst_count_weight must be non-negative",
124 }
125 );
126
127 ensure!(
128 self.file_removed_count_weight >= 0.0,
129 error::InvalidArgumentsSnafu {
130 err_msg: "file_removal_rate_weight must be non-negative",
131 }
132 );
133
134 ensure!(
135 !self.gc_cooldown_period.is_zero(),
136 error::InvalidArgumentsSnafu {
137 err_msg: "gc_cooldown_period must be greater than 0",
138 }
139 );
140
141 ensure!(
142 self.regions_per_table_threshold > 0,
143 error::InvalidArgumentsSnafu {
144 err_msg: "regions_per_table_threshold must be greater than 0",
145 }
146 );
147
148 ensure!(
149 !self.mailbox_timeout.is_zero(),
150 error::InvalidArgumentsSnafu {
151 err_msg: "mailbox_timeout must be greater than 0",
152 }
153 );
154
155 ensure!(
156 !self.full_file_listing_interval.is_zero(),
157 error::InvalidArgumentsSnafu {
158 err_msg: "full_file_listing_interval must be greater than 0",
159 }
160 );
161
162 ensure!(
163 !self.tracker_cleanup_interval.is_zero(),
164 error::InvalidArgumentsSnafu {
165 err_msg: "tracker_cleanup_interval must be greater than 0",
166 }
167 );
168
169 Ok(())
170 }
171}