mito2/sst/
location.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use object_store::util;
16use snafu::OptionExt as _;
17use store_api::metric_engine_consts::{DATA_REGION_SUBDIR, METADATA_REGION_SUBDIR};
18use store_api::path_utils::region_name;
19use store_api::region_request::PathType;
20use store_api::storage::{FileId, RegionId};
21
22use crate::cache::file_cache::FileType;
23use crate::error::UnexpectedSnafu;
24use crate::sst::file::{RegionFileId, RegionIndexId};
25
26/// Generate region dir from table_dir, region_id and path_type
27pub fn region_dir_from_table_dir(
28    table_dir: &str,
29    region_id: RegionId,
30    path_type: PathType,
31) -> String {
32    let region_name = region_name(region_id.table_id(), region_id.region_sequence());
33    let base_region_dir = util::join_dir(table_dir, &region_name);
34
35    match path_type {
36        PathType::Bare => base_region_dir,
37        PathType::Data => util::join_dir(&base_region_dir, DATA_REGION_SUBDIR),
38        PathType::Metadata => util::join_dir(&base_region_dir, METADATA_REGION_SUBDIR),
39    }
40}
41
42pub fn sst_file_path(table_dir: &str, region_file_id: RegionFileId, path_type: PathType) -> String {
43    let region_dir = region_dir_from_table_dir(table_dir, region_file_id.region_id(), path_type);
44    util::join_path(
45        &region_dir,
46        &format!("{}.parquet", region_file_id.file_id()),
47    )
48}
49
50pub fn index_file_path(table_dir: &str, index_id: RegionIndexId, path_type: PathType) -> String {
51    let region_dir = region_dir_from_table_dir(table_dir, index_id.file_id.region_id(), path_type);
52    let index_dir = util::join_dir(&region_dir, "index");
53
54    let filename = if index_id.version == 0 {
55        format!("{}.puffin", index_id.file_id.file_id())
56    } else {
57        format!("{}.{}.puffin", index_id.file_id.file_id(), index_id.version)
58    };
59
60    util::join_path(&index_dir, &filename)
61}
62
63/// Legacy function for backward compatibility - creates index file path using RegionFileId with version 0
64pub fn index_file_path_legacy(
65    table_dir: &str,
66    region_file_id: RegionFileId,
67    path_type: PathType,
68) -> String {
69    let index_id = RegionIndexId::new(region_file_id, 0);
70    index_file_path(table_dir, index_id, path_type)
71}
72
73/// Parse file ID and version from index filename
74pub fn parse_index_file_info(filepath: &str) -> crate::error::Result<(FileId, u64)> {
75    let filename = filepath.rsplit('/').next().context(UnexpectedSnafu {
76        reason: format!("invalid file path: {}", filepath),
77    })?;
78    let parts: Vec<&str> = filename.split('.').collect();
79
80    if parts.len() == 2 && parts[1] == "puffin" {
81        // Legacy format: {file_id}.puffin (version 0)
82        let file_id = parts[0];
83        FileId::parse_str(file_id).map(|id| (id, 0)).map_err(|e| {
84            UnexpectedSnafu {
85                reason: format!("invalid file id: {}, err: {}", file_id, e),
86            }
87            .build()
88        })
89    } else if parts.len() == 3 && parts[2] == "puffin" {
90        // New format: {file_id}.{version}.puffin
91        let file_id = parts[0];
92        let version = parts[1].parse::<u64>().map_err(|_| {
93            UnexpectedSnafu {
94                reason: format!("invalid version in file name: {}", filename),
95            }
96            .build()
97        })?;
98        FileId::parse_str(file_id)
99            .map(|id| (id, version))
100            .map_err(|e| {
101                UnexpectedSnafu {
102                    reason: format!("invalid file id: {}, err: {}", file_id, e),
103                }
104                .build()
105            })
106    } else {
107        UnexpectedSnafu {
108            reason: format!("invalid index file name: {}", filename),
109        }
110        .fail()
111    }
112}
113
114pub fn parse_file_id_type_from_path(filepath: &str) -> crate::error::Result<(FileId, FileType)> {
115    let filename = filepath.rsplit('/').next().context(UnexpectedSnafu {
116        reason: format!("invalid file path: {}", filepath),
117    })?;
118    // get part before first '.'
119    let parts: Vec<&str> = filename.split('.').collect();
120    if parts.len() < 2 {
121        return UnexpectedSnafu {
122            reason: format!("invalid file name: {}", filename),
123        }
124        .fail();
125    }
126    let file_id = parts[0];
127    let file_id = FileId::parse_str(file_id).map_err(|e| {
128        UnexpectedSnafu {
129            reason: format!("invalid file id: {}, err: {}", file_id, e),
130        }
131        .build()
132    })?;
133    let file_type = FileType::parse(parts[1..].join(".").as_str()).context(UnexpectedSnafu {
134        reason: format!("invalid file type in file name: {}", filename),
135    })?;
136    Ok((file_id, file_type))
137}
138
139#[cfg(test)]
140mod tests {
141    use store_api::storage::{FileId, RegionId};
142
143    use super::*;
144
145    #[test]
146    fn test_sst_file_path() {
147        let file_id = FileId::random();
148        let region_file_id = RegionFileId::new(RegionId::new(1, 2), file_id);
149        assert_eq!(
150            sst_file_path("table_dir", region_file_id, PathType::Bare),
151            format!("table_dir/1_0000000002/{}.parquet", file_id)
152        );
153        assert_eq!(
154            sst_file_path("table_dir", region_file_id, PathType::Data),
155            format!("table_dir/1_0000000002/data/{}.parquet", file_id)
156        );
157        assert_eq!(
158            sst_file_path("table_dir", region_file_id, PathType::Metadata),
159            format!("table_dir/1_0000000002/metadata/{}.parquet", file_id)
160        );
161    }
162
163    #[test]
164    fn test_index_file_path() {
165        let file_id = FileId::random();
166        let region_file_id = RegionFileId::new(RegionId::new(1, 2), file_id);
167        let index_id = RegionIndexId::new(region_file_id, 0);
168        assert_eq!(
169            index_file_path("table_dir", index_id, PathType::Bare),
170            format!("table_dir/1_0000000002/index/{}.puffin", file_id)
171        );
172        assert_eq!(
173            index_file_path("table_dir", index_id, PathType::Data),
174            format!("table_dir/1_0000000002/data/index/{}.puffin", file_id)
175        );
176        assert_eq!(
177            index_file_path("table_dir", index_id, PathType::Metadata),
178            format!("table_dir/1_0000000002/metadata/index/{}.puffin", file_id)
179        );
180    }
181
182    #[test]
183    fn test_index_file_path_versioned() {
184        let file_id = FileId::random();
185        let region_file_id = RegionFileId::new(RegionId::new(1, 2), file_id);
186        let index_id_v1 = RegionIndexId::new(region_file_id, 1);
187        let index_id_v2 = RegionIndexId::new(region_file_id, 2);
188
189        assert_eq!(
190            index_file_path("table_dir", index_id_v1, PathType::Bare),
191            format!("table_dir/1_0000000002/index/{}.1.puffin", file_id)
192        );
193        assert_eq!(
194            index_file_path("table_dir", index_id_v2, PathType::Bare),
195            format!("table_dir/1_0000000002/index/{}.2.puffin", file_id)
196        );
197    }
198
199    #[test]
200    fn test_parse_index_file_info() {
201        // Test legacy format
202        let file_id = FileId::random();
203        let result =
204            parse_index_file_info(&format!("table_dir/1_0000000002/index/{file_id}.puffin"))
205                .unwrap();
206        assert_eq!(result.0.to_string(), file_id.to_string());
207        assert_eq!(result.1, 0);
208
209        // Test versioned format
210        let result =
211            parse_index_file_info(&format!("table_dir/1_0000000002/index/{file_id}.1.puffin"))
212                .unwrap();
213        assert_eq!(result.0.to_string(), file_id.to_string());
214        assert_eq!(result.1, 1);
215
216        let result =
217            parse_index_file_info(&format!("table_dir/1_0000000002/index/{file_id}.42.puffin"))
218                .unwrap();
219        assert_eq!(result.0.to_string(), file_id.to_string());
220        assert_eq!(result.1, 42);
221    }
222
223    #[test]
224    fn test_parse_file_id_type_from_path() {
225        use crate::cache::file_cache::FileType;
226
227        // Test parquet file
228        let file_id = FileId::random();
229        let path = format!("table_dir/1_0000000002/data/{}.parquet", file_id);
230        let result = parse_file_id_type_from_path(&path).unwrap();
231        assert_eq!(result.0.to_string(), file_id.to_string());
232        assert_eq!(result.1, FileType::Parquet);
233
234        // Test puffin file (legacy format, version 0)
235        let file_id = FileId::random();
236        let path = format!("table_dir/1_0000000002/index/{}.puffin", file_id);
237        let result = parse_file_id_type_from_path(&path).unwrap();
238        assert_eq!(result.0.to_string(), file_id.to_string());
239        assert_eq!(result.1, FileType::Puffin(0));
240
241        // Test versioned puffin file
242        let file_id = FileId::random();
243        let path = format!("table_dir/1_0000000002/index/{}.1.puffin", file_id);
244        let result = parse_file_id_type_from_path(&path).unwrap();
245        assert_eq!(result.0.to_string(), file_id.to_string());
246        assert_eq!(result.1, FileType::Puffin(1));
247
248        // Test with different path types
249        let file_id = FileId::random();
250        let path = format!("table_dir/1_0000000002/metadata/{}.parquet", file_id);
251        let result = parse_file_id_type_from_path(&path).unwrap();
252        assert_eq!(result.0.to_string(), file_id.to_string());
253        assert_eq!(result.1, FileType::Parquet);
254
255        // Test with bare path type
256        let file_id = FileId::random();
257        let path = format!("table_dir/1_0000000002/{}.parquet", file_id);
258        let result = parse_file_id_type_from_path(&path).unwrap();
259        assert_eq!(result.0.to_string(), file_id.to_string());
260        assert_eq!(result.1, FileType::Parquet);
261
262        // Test error cases
263        // Invalid file extension
264        let result = parse_file_id_type_from_path("table_dir/1_0000000002/data/test.invalid");
265        assert!(result.is_err());
266
267        // Invalid file ID
268        let result =
269            parse_file_id_type_from_path("table_dir/1_0000000002/data/invalid-file-id.parquet");
270        assert!(result.is_err());
271
272        // No file extension
273        let result = parse_file_id_type_from_path("table_dir/1_0000000002/data/test");
274        assert!(result.is_err());
275
276        // Empty filename
277        let result = parse_file_id_type_from_path("table_dir/1_0000000002/data/");
278        assert!(result.is_err());
279    }
280}