index/inverted_index/format/
reader.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::VecDeque;
16use std::ops::Range;
17use std::sync::Arc;
18
19use async_trait::async_trait;
20use bytes::Bytes;
21use greptime_proto::v1::index::InvertedIndexMetas;
22use snafu::ResultExt;
23
24use crate::bitmap::{Bitmap, BitmapType};
25use crate::inverted_index::error::{DecodeBitmapSnafu, DecodeFstSnafu, Result};
26pub use crate::inverted_index::format::reader::blob::InvertedIndexBlobReader;
27use crate::inverted_index::FstMap;
28
29mod blob;
30mod footer;
31
32/// InvertedIndexReader defines an asynchronous reader of inverted index data
33#[mockall::automock]
34#[async_trait]
35pub trait InvertedIndexReader: Send + Sync {
36    /// Seeks to given offset and reads data with exact size as provided.
37    async fn range_read(&self, offset: u64, size: u32) -> Result<Vec<u8>>;
38
39    /// Reads the bytes in the given ranges.
40    async fn read_vec(&self, ranges: &[Range<u64>]) -> Result<Vec<Bytes>> {
41        let mut result = Vec::with_capacity(ranges.len());
42        for range in ranges {
43            let data = self
44                .range_read(range.start, (range.end - range.start) as u32)
45                .await?;
46            result.push(Bytes::from(data));
47        }
48        Ok(result)
49    }
50
51    /// Retrieves metadata of all inverted indices stored within the blob.
52    async fn metadata(&self) -> Result<Arc<InvertedIndexMetas>>;
53
54    /// Retrieves the finite state transducer (FST) map from the given offset and size.
55    async fn fst(&self, offset: u64, size: u32) -> Result<FstMap> {
56        let fst_data = self.range_read(offset, size).await?;
57        FstMap::new(fst_data).context(DecodeFstSnafu)
58    }
59
60    /// Retrieves the multiple finite state transducer (FST) maps from the given ranges.
61    async fn fst_vec(&mut self, ranges: &[Range<u64>]) -> Result<Vec<FstMap>> {
62        self.read_vec(ranges)
63            .await?
64            .into_iter()
65            .map(|bytes| FstMap::new(bytes.to_vec()).context(DecodeFstSnafu))
66            .collect::<Result<Vec<_>>>()
67    }
68
69    /// Retrieves the bitmap from the given offset and size.
70    async fn bitmap(&self, offset: u64, size: u32, bitmap_type: BitmapType) -> Result<Bitmap> {
71        self.range_read(offset, size).await.and_then(|bytes| {
72            Bitmap::deserialize_from(&bytes, bitmap_type).context(DecodeBitmapSnafu)
73        })
74    }
75
76    /// Retrieves the multiple bitmaps from the given ranges.
77    async fn bitmap_deque(
78        &mut self,
79        ranges: &[(Range<u64>, BitmapType)],
80    ) -> Result<VecDeque<Bitmap>> {
81        let (ranges, types): (Vec<_>, Vec<_>) = ranges.iter().cloned().unzip();
82        let bytes = self.read_vec(&ranges).await?;
83        bytes
84            .into_iter()
85            .zip(types)
86            .map(|(bytes, bitmap_type)| {
87                Bitmap::deserialize_from(&bytes, bitmap_type).context(DecodeBitmapSnafu)
88            })
89            .collect::<Result<VecDeque<_>>>()
90    }
91}