store_api/storage/
file.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::{HashMap, HashSet};
16use std::fmt;
17use std::fmt::Debug;
18use std::str::FromStr;
19
20use serde::{Deserialize, Serialize};
21use snafu::{ResultExt, Snafu};
22use uuid::Uuid;
23
24use crate::ManifestVersion;
25use crate::storage::RegionId;
26
27#[derive(Debug, Snafu, PartialEq)]
28pub struct ParseIdError {
29    source: uuid::Error,
30}
31
32/// Unique id for [SST File].
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
34pub struct FileId(Uuid);
35
36impl FileId {
37    /// Returns a new unique [FileId] randomly.
38    pub fn random() -> FileId {
39        FileId(Uuid::new_v4())
40    }
41
42    /// Parses id from string.
43    pub fn parse_str(input: &str) -> std::result::Result<FileId, ParseIdError> {
44        Uuid::parse_str(input).map(FileId).context(ParseIdSnafu)
45    }
46
47    /// Converts [FileId] as byte slice.
48    pub fn as_bytes(&self) -> &[u8] {
49        self.0.as_bytes()
50    }
51}
52
53impl From<FileId> for Uuid {
54    fn from(value: FileId) -> Self {
55        value.0
56    }
57}
58
59impl fmt::Display for FileId {
60    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61        write!(f, "{}", self.0)
62    }
63}
64
65impl FromStr for FileId {
66    type Err = ParseIdError;
67
68    fn from_str(s: &str) -> std::result::Result<FileId, ParseIdError> {
69        FileId::parse_str(s)
70    }
71}
72
73#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
74pub struct FileRef {
75    pub region_id: RegionId,
76    pub file_id: FileId,
77}
78
79impl FileRef {
80    pub fn new(region_id: RegionId, file_id: FileId) -> Self {
81        Self { region_id, file_id }
82    }
83}
84
85/// The tmp file manifest which record a table's file references.
86/// Also record the manifest version when these tmp files are read.
87#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
88pub struct FileRefsManifest {
89    pub file_refs: HashMap<RegionId, HashSet<FileId>>,
90    /// Manifest version when this manifest is read for it's files
91    pub manifest_version: HashMap<RegionId, ManifestVersion>,
92}
93
94#[derive(Clone, Default, Debug, PartialEq, Eq, Serialize, Deserialize)]
95pub struct GcReport {
96    /// deleted files per region
97    pub deleted_files: HashMap<RegionId, Vec<FileId>>,
98    /// Regions that need retry in next gc round, usually because their tmp ref files are outdated
99    pub need_retry_regions: HashSet<RegionId>,
100}
101
102impl GcReport {
103    pub fn new(
104        deleted_files: HashMap<RegionId, Vec<FileId>>,
105        need_retry_regions: HashSet<RegionId>,
106    ) -> Self {
107        Self {
108            deleted_files,
109            need_retry_regions,
110        }
111    }
112
113    pub fn merge(&mut self, other: GcReport) {
114        for (region, files) in other.deleted_files {
115            let self_files = self.deleted_files.entry(region).or_default();
116            let dedup: HashSet<FileId> = HashSet::from_iter(
117                std::mem::take(self_files)
118                    .into_iter()
119                    .chain(files.iter().cloned()),
120            );
121            *self_files = dedup.into_iter().collect();
122        }
123        self.need_retry_regions.extend(other.need_retry_regions);
124    }
125}
126
127#[cfg(test)]
128mod tests {
129
130    use super::*;
131
132    #[test]
133    fn test_file_id() {
134        let id = FileId::random();
135        let uuid_str = id.to_string();
136        assert_eq!(id.0.to_string(), uuid_str);
137
138        let parsed = FileId::parse_str(&uuid_str).unwrap();
139        assert_eq!(id, parsed);
140        let parsed = uuid_str.parse().unwrap();
141        assert_eq!(id, parsed);
142    }
143
144    #[test]
145    fn test_file_id_serialization() {
146        let id = FileId::random();
147        let json = serde_json::to_string(&id).unwrap();
148        assert_eq!(format!("\"{id}\""), json);
149
150        let parsed = serde_json::from_str(&json).unwrap();
151        assert_eq!(id, parsed);
152    }
153
154    #[test]
155    fn test_file_refs_manifest_serialization() {
156        let mut manifest = FileRefsManifest::default();
157        let r0 = RegionId::new(1024, 1);
158        let r1 = RegionId::new(1024, 2);
159        manifest.file_refs.insert(r0, [FileId::random()].into());
160        manifest.file_refs.insert(r1, [FileId::random()].into());
161        manifest.manifest_version.insert(r0, 10);
162        manifest.manifest_version.insert(r1, 20);
163
164        let json = serde_json::to_string(&manifest).unwrap();
165        let parsed: FileRefsManifest = serde_json::from_str(&json).unwrap();
166        assert_eq!(manifest, parsed);
167    }
168}