store_api/storage/
file.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::{HashMap, HashSet};
16use std::fmt;
17use std::fmt::Debug;
18use std::str::FromStr;
19
20use serde::{Deserialize, Serialize};
21use snafu::{ResultExt, Snafu};
22use uuid::Uuid;
23
24use crate::ManifestVersion;
25use crate::storage::RegionId;
26
27/// Index version
28pub type IndexVersion = u64;
29
30#[derive(Debug, Snafu, PartialEq)]
31pub struct ParseIdError {
32    source: uuid::Error,
33}
34
35/// Unique id for [SST File].
36#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
37pub struct FileId(Uuid);
38
39impl FileId {
40    /// Returns a new unique [FileId] randomly.
41    pub fn random() -> FileId {
42        FileId(Uuid::new_v4())
43    }
44
45    /// Parses id from string.
46    pub fn parse_str(input: &str) -> std::result::Result<FileId, ParseIdError> {
47        Uuid::parse_str(input).map(FileId).context(ParseIdSnafu)
48    }
49
50    /// Converts [FileId] as byte slice.
51    pub fn as_bytes(&self) -> &[u8] {
52        self.0.as_bytes()
53    }
54}
55
56impl From<FileId> for Uuid {
57    fn from(value: FileId) -> Self {
58        value.0
59    }
60}
61
62impl fmt::Display for FileId {
63    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64        write!(f, "{}", self.0)
65    }
66}
67
68impl FromStr for FileId {
69    type Err = ParseIdError;
70
71    fn from_str(s: &str) -> std::result::Result<FileId, ParseIdError> {
72        FileId::parse_str(s)
73    }
74}
75
76#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
77pub struct FileRef {
78    pub region_id: RegionId,
79    pub file_id: FileId,
80    pub index_version: Option<IndexVersion>,
81}
82
83impl FileRef {
84    pub fn new(region_id: RegionId, file_id: FileId, index_version: Option<IndexVersion>) -> Self {
85        Self {
86            region_id,
87            file_id,
88            index_version,
89        }
90    }
91}
92
93/// The tmp file manifest which record a table's file references.
94/// Also record the manifest version when these tmp files are read.
95#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
96pub struct FileRefsManifest {
97    pub file_refs: HashMap<RegionId, HashSet<FileRef>>,
98    /// Manifest version when this manifest is read for it's files
99    pub manifest_version: HashMap<RegionId, ManifestVersion>,
100}
101
102#[derive(Clone, Default, Debug, PartialEq, Eq, Serialize, Deserialize)]
103pub struct GcReport {
104    /// deleted files per region
105    /// TODO(discord9): change to `RemovedFile`?
106    pub deleted_files: HashMap<RegionId, Vec<FileId>>,
107    pub deleted_indexes: HashMap<RegionId, Vec<(FileId, IndexVersion)>>,
108    /// Regions that need retry in next gc round, usually because their tmp ref files are outdated
109    pub need_retry_regions: HashSet<RegionId>,
110}
111
112impl GcReport {
113    pub fn new(
114        deleted_files: HashMap<RegionId, Vec<FileId>>,
115        deleted_indexes: HashMap<RegionId, Vec<(FileId, IndexVersion)>>,
116        need_retry_regions: HashSet<RegionId>,
117    ) -> Self {
118        Self {
119            deleted_files,
120            deleted_indexes,
121            need_retry_regions,
122        }
123    }
124
125    pub fn merge(&mut self, other: GcReport) {
126        for (region, files) in other.deleted_files {
127            let self_files = self.deleted_files.entry(region).or_default();
128            let dedup: HashSet<FileId> = HashSet::from_iter(
129                std::mem::take(self_files)
130                    .into_iter()
131                    .chain(files.iter().cloned()),
132            );
133            *self_files = dedup.into_iter().collect();
134        }
135        self.need_retry_regions.extend(other.need_retry_regions);
136        // Remove regions that have succeeded from need_retry_regions
137        self.need_retry_regions
138            .retain(|region| !self.deleted_files.contains_key(region));
139    }
140}
141
142#[cfg(test)]
143mod tests {
144
145    use super::*;
146
147    #[test]
148    fn test_file_id() {
149        let id = FileId::random();
150        let uuid_str = id.to_string();
151        assert_eq!(id.0.to_string(), uuid_str);
152
153        let parsed = FileId::parse_str(&uuid_str).unwrap();
154        assert_eq!(id, parsed);
155        let parsed = uuid_str.parse().unwrap();
156        assert_eq!(id, parsed);
157    }
158
159    #[test]
160    fn test_file_id_serialization() {
161        let id = FileId::random();
162        let json = serde_json::to_string(&id).unwrap();
163        assert_eq!(format!("\"{id}\""), json);
164
165        let parsed = serde_json::from_str(&json).unwrap();
166        assert_eq!(id, parsed);
167    }
168
169    #[test]
170    fn test_file_refs_manifest_serialization() {
171        let mut manifest = FileRefsManifest::default();
172        let r0 = RegionId::new(1024, 1);
173        let r1 = RegionId::new(1024, 2);
174        manifest
175            .file_refs
176            .insert(r0, [FileRef::new(r0, FileId::random(), None)].into());
177        manifest
178            .file_refs
179            .insert(r1, [FileRef::new(r1, FileId::random(), None)].into());
180        manifest.manifest_version.insert(r0, 10);
181        manifest.manifest_version.insert(r1, 20);
182
183        let json = serde_json::to_string(&manifest).unwrap();
184        let parsed: FileRefsManifest = serde_json::from_str(&json).unwrap();
185        assert_eq!(manifest, parsed);
186    }
187
188    #[test]
189    fn test_file_ref_new() {
190        let region_id = RegionId::new(1024, 1);
191        let file_id = FileId::random();
192
193        // Test with Some(index_version)
194        let index_version: IndexVersion = 42;
195        let file_ref = FileRef::new(region_id, file_id, Some(index_version));
196        assert_eq!(file_ref.region_id, region_id);
197        assert_eq!(file_ref.file_id, file_id);
198        assert_eq!(file_ref.index_version, Some(index_version));
199
200        // Test with None
201        let file_ref_none = FileRef::new(region_id, file_id, None);
202        assert_eq!(file_ref_none.region_id, region_id);
203        assert_eq!(file_ref_none.file_id, file_id);
204        assert_eq!(file_ref_none.index_version, None);
205    }
206
207    #[test]
208    fn test_file_ref_equality() {
209        let region_id = RegionId::new(1024, 1);
210        let file_id = FileId::random();
211
212        let file_ref1 = FileRef::new(region_id, file_id, Some(10));
213        let file_ref2 = FileRef::new(region_id, file_id, Some(10));
214        let file_ref3 = FileRef::new(region_id, file_id, Some(20));
215        let file_ref4 = FileRef::new(region_id, file_id, None);
216
217        assert_eq!(file_ref1, file_ref2);
218        assert_ne!(file_ref1, file_ref3);
219        assert_ne!(file_ref1, file_ref4);
220        assert_ne!(file_ref3, file_ref4);
221
222        // Test equality with Some(0) vs None
223        let file_ref_zero = FileRef::new(region_id, file_id, Some(0));
224        assert_ne!(file_ref_zero, file_ref4);
225    }
226
227    #[test]
228    fn test_file_ref_serialization() {
229        let region_id = RegionId::new(1024, 1);
230        let file_id = FileId::random();
231
232        // Test with Some(index_version)
233        let index_version: IndexVersion = 12345;
234        let file_ref = FileRef::new(region_id, file_id, Some(index_version));
235
236        let json = serde_json::to_string(&file_ref).unwrap();
237        let parsed: FileRef = serde_json::from_str(&json).unwrap();
238
239        assert_eq!(file_ref, parsed);
240        assert_eq!(parsed.index_version, Some(index_version));
241
242        // Test with None
243        let file_ref_none = FileRef::new(region_id, file_id, None);
244        let json_none = serde_json::to_string(&file_ref_none).unwrap();
245        let parsed_none: FileRef = serde_json::from_str(&json_none).unwrap();
246
247        assert_eq!(file_ref_none, parsed_none);
248        assert_eq!(parsed_none.index_version, None);
249    }
250}