index/inverted_index/format/
reader.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::VecDeque;
16use std::ops::Range;
17use std::sync::Arc;
18use std::time::Duration;
19
20use async_trait::async_trait;
21use bytes::Bytes;
22use greptime_proto::v1::index::InvertedIndexMetas;
23use snafu::ResultExt;
24
25use crate::bitmap::{Bitmap, BitmapType};
26use crate::inverted_index::FstMap;
27use crate::inverted_index::error::{DecodeBitmapSnafu, DecodeFstSnafu, Result};
28pub use crate::inverted_index::format::reader::blob::InvertedIndexBlobReader;
29
30mod blob;
31mod footer;
32
33/// Metrics for inverted index read operations.
34#[derive(Default, Clone)]
35pub struct InvertedIndexReadMetrics {
36    /// Total byte size to read.
37    pub total_bytes: u64,
38    /// Total number of ranges to read.
39    pub total_ranges: usize,
40    /// Elapsed time to fetch data.
41    pub fetch_elapsed: Duration,
42    /// Number of cache hits.
43    pub cache_hit: usize,
44    /// Number of cache misses.
45    pub cache_miss: usize,
46}
47
48impl std::fmt::Debug for InvertedIndexReadMetrics {
49    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
50        let Self {
51            total_bytes,
52            total_ranges,
53            fetch_elapsed,
54            cache_hit,
55            cache_miss,
56        } = self;
57
58        // If both total_bytes and cache_hit are 0, we didn't read anything.
59        if *total_bytes == 0 && *cache_hit == 0 {
60            return write!(f, "{{}}");
61        }
62        write!(f, "{{")?;
63
64        if *total_bytes > 0 {
65            write!(f, "\"total_bytes\":{}", total_bytes)?;
66        }
67        if *cache_hit > 0 {
68            if *total_bytes > 0 {
69                write!(f, ", ")?;
70            }
71            write!(f, "\"cache_hit\":{}", cache_hit)?;
72        }
73
74        if *total_ranges > 0 {
75            write!(f, ", \"total_ranges\":{}", total_ranges)?;
76        }
77        if !fetch_elapsed.is_zero() {
78            write!(f, ", \"fetch_elapsed\":\"{:?}\"", fetch_elapsed)?;
79        }
80        if *cache_miss > 0 {
81            write!(f, ", \"cache_miss\":{}", cache_miss)?;
82        }
83
84        write!(f, "}}")
85    }
86}
87
88impl InvertedIndexReadMetrics {
89    /// Merges another metrics into this one.
90    pub fn merge_from(&mut self, other: &Self) {
91        self.total_bytes += other.total_bytes;
92        self.total_ranges += other.total_ranges;
93        self.fetch_elapsed += other.fetch_elapsed;
94        self.cache_hit += other.cache_hit;
95        self.cache_miss += other.cache_miss;
96    }
97}
98
99/// InvertedIndexReader defines an asynchronous reader of inverted index data
100#[mockall::automock]
101#[async_trait]
102pub trait InvertedIndexReader: Send + Sync {
103    /// Seeks to given offset and reads data with exact size as provided.
104    async fn range_read<'a>(
105        &self,
106        offset: u64,
107        size: u32,
108        metrics: Option<&'a mut InvertedIndexReadMetrics>,
109    ) -> Result<Vec<u8>>;
110
111    /// Reads the bytes in the given ranges.
112    async fn read_vec<'a>(
113        &self,
114        ranges: &[Range<u64>],
115        metrics: Option<&'a mut InvertedIndexReadMetrics>,
116    ) -> Result<Vec<Bytes>>;
117
118    /// Retrieves metadata of all inverted indices stored within the blob.
119    async fn metadata<'a>(
120        &self,
121        metrics: Option<&'a mut InvertedIndexReadMetrics>,
122    ) -> Result<Arc<InvertedIndexMetas>>;
123
124    /// Retrieves the finite state transducer (FST) map from the given offset and size.
125    async fn fst<'a>(
126        &self,
127        offset: u64,
128        size: u32,
129        metrics: Option<&'a mut InvertedIndexReadMetrics>,
130    ) -> Result<FstMap> {
131        let fst_data = self.range_read(offset, size, metrics).await?;
132        FstMap::new(fst_data).context(DecodeFstSnafu)
133    }
134
135    /// Retrieves the multiple finite state transducer (FST) maps from the given ranges.
136    async fn fst_vec<'a>(
137        &mut self,
138        ranges: &[Range<u64>],
139        metrics: Option<&'a mut InvertedIndexReadMetrics>,
140    ) -> Result<Vec<FstMap>> {
141        self.read_vec(ranges, metrics)
142            .await?
143            .into_iter()
144            .map(|bytes| FstMap::new(bytes.to_vec()).context(DecodeFstSnafu))
145            .collect::<Result<Vec<_>>>()
146    }
147
148    /// Retrieves the bitmap from the given offset and size.
149    async fn bitmap<'a>(
150        &self,
151        offset: u64,
152        size: u32,
153        bitmap_type: BitmapType,
154        metrics: Option<&'a mut InvertedIndexReadMetrics>,
155    ) -> Result<Bitmap> {
156        self.range_read(offset, size, metrics)
157            .await
158            .and_then(|bytes| {
159                Bitmap::deserialize_from(&bytes, bitmap_type).context(DecodeBitmapSnafu)
160            })
161    }
162
163    /// Retrieves the multiple bitmaps from the given ranges.
164    async fn bitmap_deque<'a>(
165        &mut self,
166        ranges: &[(Range<u64>, BitmapType)],
167        metrics: Option<&'a mut InvertedIndexReadMetrics>,
168    ) -> Result<VecDeque<Bitmap>> {
169        let (ranges, types): (Vec<_>, Vec<_>) = ranges.iter().cloned().unzip();
170        let bytes = self.read_vec(&ranges, metrics).await?;
171        bytes
172            .into_iter()
173            .zip(types)
174            .map(|(bytes, bitmap_type)| {
175                Bitmap::deserialize_from(&bytes, bitmap_type).context(DecodeBitmapSnafu)
176            })
177            .collect::<Result<VecDeque<_>>>()
178    }
179}