puffin/puffin_manager.rs
1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15pub mod cache;
16pub mod file_accessor;
17pub mod fs_puffin_manager;
18pub mod stager;
19
20#[cfg(test)]
21mod tests;
22
23use std::collections::HashMap;
24use std::path::PathBuf;
25use std::sync::Arc;
26
27use async_trait::async_trait;
28use common_base::range_read::RangeReader;
29use futures::AsyncRead;
30
31use crate::blob_metadata::{BlobMetadata, CompressionCodec};
32use crate::error::Result;
33use crate::file_metadata::FileMetadata;
34
35/// The `PuffinManager` trait provides a unified interface for creating `PuffinReader` and `PuffinWriter`.
36#[async_trait]
37pub trait PuffinManager {
38 type Reader: PuffinReader;
39 type Writer: PuffinWriter;
40 type FileHandle: ToString + Clone + Send + Sync;
41
42 /// Creates a `PuffinReader` for the specified `handle`.
43 async fn reader(&self, handle: &Self::FileHandle) -> Result<Self::Reader>;
44
45 /// Creates a `PuffinWriter` for the specified `handle`.
46 async fn writer(&self, handle: &Self::FileHandle) -> Result<Self::Writer>;
47}
48
49/// The `PuffinWriter` trait provides methods for writing blobs and directories to a Puffin file.
50#[async_trait]
51pub trait PuffinWriter {
52 /// Writes a blob associated with the specified `key` to the Puffin file.
53 /// Returns the number of bytes written.
54 async fn put_blob<R>(
55 &mut self,
56 key: &str,
57 raw_data: R,
58 options: PutOptions,
59 properties: HashMap<String, String>,
60 ) -> Result<u64>
61 where
62 R: AsyncRead + Send;
63
64 /// Writes a directory associated with the specified `key` to the Puffin file.
65 /// Returns the number of bytes written.
66 ///
67 /// The specified `dir` should be accessible from the filesystem.
68 async fn put_dir(
69 &mut self,
70 key: &str,
71 dir: PathBuf,
72 options: PutOptions,
73 properties: HashMap<String, String>,
74 ) -> Result<u64>;
75
76 /// Sets whether the footer should be LZ4 compressed.
77 fn set_footer_lz4_compressed(&mut self, lz4_compressed: bool);
78
79 /// Finalizes the Puffin file after writing.
80 async fn finish(self) -> Result<u64>;
81}
82
83/// Options available for `put_blob` and `put_dir` methods.
84#[derive(Debug, Clone, Default)]
85pub struct PutOptions {
86 /// The compression codec to use for blob data.
87 pub compression: Option<CompressionCodec>,
88}
89
90/// The `PuffinReader` trait provides methods for reading blobs and directories from a Puffin file.
91#[async_trait]
92pub trait PuffinReader {
93 type Blob: BlobGuard;
94 type Dir: DirGuard;
95
96 fn with_file_size_hint(self, file_size_hint: Option<u64>) -> Self;
97
98 /// Returns the metadata of the Puffin file.
99 async fn metadata(&self) -> Result<Arc<FileMetadata>>;
100
101 /// Reads a blob from the Puffin file.
102 ///
103 /// The returned `GuardWithMetadata` is used to access the blob data and its metadata.
104 /// Users should hold the `GuardWithMetadata` until they are done with the blob data.
105 async fn blob(&self, key: &str) -> Result<GuardWithMetadata<Self::Blob>>;
106
107 /// Reads a directory from the Puffin file.
108 ///
109 /// The returned `GuardWithMetadata` is used to access the directory data and its metadata.
110 /// Users should hold the `GuardWithMetadata` until they are done with the directory data.
111 async fn dir(&self, key: &str) -> Result<GuardWithMetadata<Self::Dir>>;
112}
113
114/// `BlobGuard` is provided by the `PuffinReader` to access the blob data.
115/// Users should hold the `BlobGuard` until they are done with the blob data.
116#[async_trait]
117#[auto_impl::auto_impl(Arc)]
118pub trait BlobGuard {
119 type Reader: RangeReader;
120 async fn reader(&self) -> Result<Self::Reader>;
121}
122
123/// `DirGuard` is provided by the `PuffinReader` to access the directory in the filesystem.
124/// Users should hold the `DirGuard` until they are done with the directory.
125#[auto_impl::auto_impl(Arc)]
126pub trait DirGuard {
127 fn path(&self) -> &PathBuf;
128}
129
130/// `GuardWithMetadata` provides access to the blob or directory data and its metadata.
131pub struct GuardWithMetadata<G> {
132 guard: G,
133 metadata: BlobMetadata,
134}
135
136impl<G> GuardWithMetadata<G> {
137 /// Creates a new `GuardWithMetadata` instance.
138 pub fn new(guard: G, metadata: BlobMetadata) -> Self {
139 Self { guard, metadata }
140 }
141
142 /// Returns the metadata of the directory.
143 pub fn metadata(&self) -> &BlobMetadata {
144 &self.metadata
145 }
146}
147
148impl<G: BlobGuard> GuardWithMetadata<G> {
149 /// Returns the reader for the blob data.
150 pub async fn reader(&self) -> Result<G::Reader> {
151 self.guard.reader().await
152 }
153}
154
155impl<G: DirGuard> GuardWithMetadata<G> {
156 /// Returns the path of the directory.
157 pub fn path(&self) -> &PathBuf {
158 self.guard.path()
159 }
160}