store_api/storage/
file.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::{HashMap, HashSet};
16use std::fmt;
17use std::fmt::Debug;
18use std::str::FromStr;
19
20use serde::{Deserialize, Serialize};
21use snafu::{ResultExt, Snafu};
22use uuid::Uuid;
23
24use crate::ManifestVersion;
25use crate::storage::RegionId;
26
27/// Index version
28pub type IndexVersion = u64;
29
30#[derive(Debug, Snafu, PartialEq)]
31pub struct ParseIdError {
32    source: uuid::Error,
33}
34
35/// Unique id for [SST File].
36#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
37pub struct FileId(Uuid);
38
39impl FileId {
40    /// Returns a new unique [FileId] randomly.
41    pub fn random() -> FileId {
42        FileId(Uuid::new_v4())
43    }
44
45    /// Parses id from string.
46    pub fn parse_str(input: &str) -> std::result::Result<FileId, ParseIdError> {
47        Uuid::parse_str(input).map(FileId).context(ParseIdSnafu)
48    }
49
50    /// Converts [FileId] as byte slice.
51    pub fn as_bytes(&self) -> &[u8] {
52        self.0.as_bytes()
53    }
54}
55
56impl From<FileId> for Uuid {
57    fn from(value: FileId) -> Self {
58        value.0
59    }
60}
61
62impl fmt::Display for FileId {
63    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64        write!(f, "{}", self.0)
65    }
66}
67
68impl FromStr for FileId {
69    type Err = ParseIdError;
70
71    fn from_str(s: &str) -> std::result::Result<FileId, ParseIdError> {
72        FileId::parse_str(s)
73    }
74}
75
76#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
77pub struct FileRef {
78    pub region_id: RegionId,
79    pub file_id: FileId,
80}
81
82impl FileRef {
83    pub fn new(region_id: RegionId, file_id: FileId) -> Self {
84        Self { region_id, file_id }
85    }
86}
87
88/// The tmp file manifest which record a table's file references.
89/// Also record the manifest version when these tmp files are read.
90#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
91pub struct FileRefsManifest {
92    pub file_refs: HashMap<RegionId, HashSet<FileId>>,
93    /// Manifest version when this manifest is read for it's files
94    pub manifest_version: HashMap<RegionId, ManifestVersion>,
95}
96
97#[derive(Clone, Default, Debug, PartialEq, Eq, Serialize, Deserialize)]
98pub struct GcReport {
99    /// deleted files per region
100    pub deleted_files: HashMap<RegionId, Vec<FileId>>,
101    /// Regions that need retry in next gc round, usually because their tmp ref files are outdated
102    pub need_retry_regions: HashSet<RegionId>,
103}
104
105impl GcReport {
106    pub fn new(
107        deleted_files: HashMap<RegionId, Vec<FileId>>,
108        need_retry_regions: HashSet<RegionId>,
109    ) -> Self {
110        Self {
111            deleted_files,
112            need_retry_regions,
113        }
114    }
115
116    pub fn merge(&mut self, other: GcReport) {
117        for (region, files) in other.deleted_files {
118            let self_files = self.deleted_files.entry(region).or_default();
119            let dedup: HashSet<FileId> = HashSet::from_iter(
120                std::mem::take(self_files)
121                    .into_iter()
122                    .chain(files.iter().cloned()),
123            );
124            *self_files = dedup.into_iter().collect();
125        }
126        self.need_retry_regions.extend(other.need_retry_regions);
127        // Remove regions that have succeeded from need_retry_regions
128        self.need_retry_regions
129            .retain(|region| !self.deleted_files.contains_key(region));
130    }
131}
132
133#[cfg(test)]
134mod tests {
135
136    use super::*;
137
138    #[test]
139    fn test_file_id() {
140        let id = FileId::random();
141        let uuid_str = id.to_string();
142        assert_eq!(id.0.to_string(), uuid_str);
143
144        let parsed = FileId::parse_str(&uuid_str).unwrap();
145        assert_eq!(id, parsed);
146        let parsed = uuid_str.parse().unwrap();
147        assert_eq!(id, parsed);
148    }
149
150    #[test]
151    fn test_file_id_serialization() {
152        let id = FileId::random();
153        let json = serde_json::to_string(&id).unwrap();
154        assert_eq!(format!("\"{id}\""), json);
155
156        let parsed = serde_json::from_str(&json).unwrap();
157        assert_eq!(id, parsed);
158    }
159
160    #[test]
161    fn test_file_refs_manifest_serialization() {
162        let mut manifest = FileRefsManifest::default();
163        let r0 = RegionId::new(1024, 1);
164        let r1 = RegionId::new(1024, 2);
165        manifest.file_refs.insert(r0, [FileId::random()].into());
166        manifest.file_refs.insert(r1, [FileId::random()].into());
167        manifest.manifest_version.insert(r0, 10);
168        manifest.manifest_version.insert(r1, 20);
169
170        let json = serde_json::to_string(&manifest).unwrap();
171        let parsed: FileRefsManifest = serde_json::from_str(&json).unwrap();
172        assert_eq!(manifest, parsed);
173    }
174}