puffin/
puffin_manager.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15pub mod cache;
16pub mod file_accessor;
17pub mod fs_puffin_manager;
18pub mod stager;
19
20#[cfg(test)]
21mod tests;
22
23use std::collections::HashMap;
24use std::path::PathBuf;
25use std::sync::Arc;
26
27use async_trait::async_trait;
28use common_base::range_read::RangeReader;
29use futures::AsyncRead;
30
31use crate::blob_metadata::{BlobMetadata, CompressionCodec};
32use crate::error::Result;
33use crate::file_metadata::FileMetadata;
34
35/// The `PuffinManager` trait provides a unified interface for creating `PuffinReader` and `PuffinWriter`.
36#[async_trait]
37pub trait PuffinManager {
38    type Reader: PuffinReader;
39    type Writer: PuffinWriter;
40    type FileHandle: ToString + Clone + Send + Sync;
41
42    /// Creates a `PuffinReader` for the specified `handle`.
43    async fn reader(&self, handle: &Self::FileHandle) -> Result<Self::Reader>;
44
45    /// Creates a `PuffinWriter` for the specified `handle`.
46    async fn writer(&self, handle: &Self::FileHandle) -> Result<Self::Writer>;
47}
48
49/// The `PuffinWriter` trait provides methods for writing blobs and directories to a Puffin file.
50#[async_trait]
51pub trait PuffinWriter {
52    /// Writes a blob associated with the specified `key` to the Puffin file.
53    /// Returns the number of bytes written.
54    async fn put_blob<R>(
55        &mut self,
56        key: &str,
57        raw_data: R,
58        options: PutOptions,
59        properties: HashMap<String, String>,
60    ) -> Result<u64>
61    where
62        R: AsyncRead + Send;
63
64    /// Writes a directory associated with the specified `key` to the Puffin file.
65    /// Returns the number of bytes written.
66    ///
67    /// The specified `dir` should be accessible from the filesystem.
68    async fn put_dir(
69        &mut self,
70        key: &str,
71        dir: PathBuf,
72        options: PutOptions,
73        properties: HashMap<String, String>,
74    ) -> Result<u64>;
75
76    /// Sets whether the footer should be LZ4 compressed.
77    fn set_footer_lz4_compressed(&mut self, lz4_compressed: bool);
78
79    /// Finalizes the Puffin file after writing.
80    async fn finish(self) -> Result<u64>;
81}
82
83/// Options available for `put_blob` and `put_dir` methods.
84#[derive(Debug, Clone, Default)]
85pub struct PutOptions {
86    /// The compression codec to use for blob data.
87    pub compression: Option<CompressionCodec>,
88}
89
90/// The `PuffinReader` trait provides methods for reading blobs and directories from a Puffin file.
91#[async_trait]
92pub trait PuffinReader {
93    type Blob: BlobGuard;
94    type Dir: DirGuard;
95
96    fn with_file_size_hint(self, file_size_hint: Option<u64>) -> Self;
97
98    /// Returns the metadata of the Puffin file.
99    async fn metadata(&self) -> Result<Arc<FileMetadata>>;
100
101    /// Reads a blob from the Puffin file.
102    ///
103    /// The returned `GuardWithMetadata` is used to access the blob data and its metadata.
104    /// Users should hold the `GuardWithMetadata` until they are done with the blob data.
105    async fn blob(&self, key: &str) -> Result<GuardWithMetadata<Self::Blob>>;
106
107    /// Reads a directory from the Puffin file.
108    ///
109    /// The returned `GuardWithMetadata` is used to access the directory data and its metadata.
110    /// Users should hold the `GuardWithMetadata` until they are done with the directory data.
111    async fn dir(&self, key: &str) -> Result<GuardWithMetadata<Self::Dir>>;
112}
113
114/// `BlobGuard` is provided by the `PuffinReader` to access the blob data.
115/// Users should hold the `BlobGuard` until they are done with the blob data.
116#[async_trait]
117#[auto_impl::auto_impl(Arc)]
118pub trait BlobGuard {
119    type Reader: RangeReader;
120    async fn reader(&self) -> Result<Self::Reader>;
121}
122
123/// `DirGuard` is provided by the `PuffinReader` to access the directory in the filesystem.
124/// Users should hold the `DirGuard` until they are done with the directory.
125#[auto_impl::auto_impl(Arc)]
126pub trait DirGuard {
127    fn path(&self) -> &PathBuf;
128}
129
130/// `GuardWithMetadata` provides access to the blob or directory data and its metadata.
131pub struct GuardWithMetadata<G> {
132    guard: G,
133    metadata: BlobMetadata,
134}
135
136impl<G> GuardWithMetadata<G> {
137    /// Creates a new `GuardWithMetadata` instance.
138    pub fn new(guard: G, metadata: BlobMetadata) -> Self {
139        Self { guard, metadata }
140    }
141
142    /// Returns the metadata of the directory.
143    pub fn metadata(&self) -> &BlobMetadata {
144        &self.metadata
145    }
146}
147
148impl<G: BlobGuard> GuardWithMetadata<G> {
149    /// Returns the reader for the blob data.
150    pub async fn reader(&self) -> Result<G::Reader> {
151        self.guard.reader().await
152    }
153}
154
155impl<G: DirGuard> GuardWithMetadata<G> {
156    /// Returns the path of the directory.
157    pub fn path(&self) -> &PathBuf {
158        self.guard.path()
159    }
160}