1use std::collections::HashMap;
16use std::time::Instant;
17
18use common_meta::datanode::{RegionManifestInfo, RegionStat};
19use common_telemetry::{debug, info};
20use ordered_float::OrderedFloat;
21use store_api::region_engine::RegionRole;
22use store_api::storage::RegionId;
23use table::metadata::TableId;
24
25use crate::error::Result;
26use crate::gc::scheduler::GcScheduler;
27
28#[derive(Debug, Clone, PartialEq, Eq)]
30pub(crate) struct GcCandidate {
31 pub(crate) region_id: RegionId,
32 pub(crate) score: OrderedFloat<f64>,
33 pub(crate) region_stat: RegionStat,
34}
35
36impl GcCandidate {
37 fn new(region_id: RegionId, score: f64, region_stat: RegionStat) -> Self {
38 Self {
39 region_id,
40 score: OrderedFloat(score),
41 region_stat,
42 }
43 }
44
45 #[allow(unused)]
46 fn score_f64(&self) -> f64 {
47 self.score.into_inner()
48 }
49}
50
51impl GcScheduler {
52 fn calculate_gc_score(&self, region_stat: &RegionStat) -> f64 {
54 let sst_count_score = region_stat.sst_num as f64 * self.config.sst_count_weight;
55
56 let file_remove_cnt_score = match ®ion_stat.region_manifest {
57 RegionManifestInfo::Mito {
58 file_removed_cnt, ..
59 } => *file_removed_cnt as f64 * self.config.file_removed_count_weight,
60 RegionManifestInfo::Metric { .. } => 0.0,
62 };
63
64 sst_count_score + file_remove_cnt_score
65 }
66
67 pub(crate) async fn select_gc_candidates(
69 &self,
70 table_to_region_stats: &HashMap<TableId, Vec<RegionStat>>,
71 ) -> Result<HashMap<TableId, Vec<GcCandidate>>> {
72 let mut table_candidates: HashMap<TableId, Vec<GcCandidate>> = HashMap::new();
73 let now = Instant::now();
74
75 for (table_id, region_stats) in table_to_region_stats {
76 let mut candidates = Vec::new();
77 let tracker = self.region_gc_tracker.lock().await;
78
79 for region_stat in region_stats {
80 if region_stat.role != RegionRole::Leader {
81 continue;
82 }
83
84 if region_stat.approximate_bytes < self.config.min_region_size_threshold {
86 continue;
87 }
88
89 if let Some(gc_info) = tracker.get(®ion_stat.id)
91 && now.duration_since(gc_info.last_gc_time) < self.config.gc_cooldown_period
92 {
93 debug!("Skipping region {} due to cooldown", region_stat.id);
94 continue;
95 }
96
97 let score = self.calculate_gc_score(region_stat);
98
99 debug!(
100 "Region {} (table {}) has GC score {:.4}",
101 region_stat.id, table_id, score
102 );
103
104 if score > 0.0 {
106 candidates.push(GcCandidate::new(region_stat.id, score, region_stat.clone()));
107 }
108 }
109
110 candidates.sort_by(|a, b| b.score.cmp(&a.score));
112 let top_candidates: Vec<GcCandidate> = candidates
113 .into_iter()
114 .take(self.config.regions_per_table_threshold)
115 .collect();
116
117 if !top_candidates.is_empty() {
118 info!(
119 "Selected {} GC candidates for table {} (top {} out of all qualified)",
120 top_candidates.len(),
121 table_id,
122 self.config.regions_per_table_threshold
123 );
124 table_candidates.insert(*table_id, top_candidates);
125 }
126 }
127
128 info!(
129 "Selected GC candidates for {} tables",
130 table_candidates.len()
131 );
132 Ok(table_candidates)
133 }
134}