index/inverted_index/search/fst_apply/
keys_apply.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashSet;
16use std::mem::size_of;
17
18use snafu::{ensure, ResultExt};
19
20use crate::inverted_index::error::{
21    EmptyPredicatesSnafu, KeysApplierUnexpectedPredicatesSnafu, KeysApplierWithoutInListSnafu,
22    ParseRegexSnafu, Result,
23};
24use crate::inverted_index::search::fst_apply::FstApplier;
25use crate::inverted_index::search::predicate::Predicate;
26use crate::inverted_index::FstMap;
27use crate::Bytes;
28
29/// `KeysFstApplier` is responsible for applying a search using a set of predefined keys
30/// against an FstMap to fetch associated values.
31pub struct KeysFstApplier {
32    /// A list of keys to be fetched directly from the FstMap.
33    keys: Vec<Bytes>,
34}
35
36impl FstApplier for KeysFstApplier {
37    fn apply(&self, fst: &FstMap) -> Vec<u64> {
38        self.keys.iter().filter_map(|k| fst.get(k)).collect()
39    }
40
41    fn memory_usage(&self) -> usize {
42        self.keys.capacity() * size_of::<Bytes>()
43            + self.keys.iter().map(|k| k.capacity()).sum::<usize>()
44    }
45}
46
47impl KeysFstApplier {
48    /// Tries to create a `KeysFstApplier` from a list of predicates.
49    ///
50    /// This function constructs the applier by intersecting keys from one or more `InList` predicates,
51    /// which are required. It then optionally refines this set using any additional `Range` and `RegexMatch`
52    /// predicates provided.
53    pub fn try_from(mut predicates: Vec<Predicate>) -> Result<Self> {
54        ensure!(!predicates.is_empty(), EmptyPredicatesSnafu);
55
56        let (in_lists, others) = Self::split_at_in_lists(&mut predicates);
57        let (ranges, regexes) = Self::split_at_ranges(others);
58        Self::ensure_all_regexes(regexes)?;
59
60        ensure!(!in_lists.is_empty(), KeysApplierWithoutInListSnafu);
61        let intersected_keys = Self::intersect_with_lists(in_lists);
62        let range_matched_keys = Self::filter_by_ranges(intersected_keys, ranges);
63        let regex_matched_keys = Self::filter_by_regexes(range_matched_keys, regexes)?;
64
65        Ok(Self {
66            keys: regex_matched_keys,
67        })
68    }
69
70    fn split_at_in_lists(predicates: &mut [Predicate]) -> (&mut [Predicate], &mut [Predicate]) {
71        let in_list_index = predicates
72            .iter_mut()
73            .partition_in_place(|p| matches!(p, Predicate::InList(_)));
74        predicates.split_at_mut(in_list_index)
75    }
76
77    fn split_at_ranges(predicates: &mut [Predicate]) -> (&mut [Predicate], &mut [Predicate]) {
78        let range_index = predicates
79            .iter_mut()
80            .partition_in_place(|p| matches!(p, Predicate::Range(_)));
81        predicates.split_at_mut(range_index)
82    }
83
84    fn ensure_all_regexes(ps: &[Predicate]) -> Result<()> {
85        ensure!(
86            ps.iter().all(|p| matches!(p, Predicate::RegexMatch(_))),
87            KeysApplierUnexpectedPredicatesSnafu {
88                predicates: ps.to_vec()
89            }
90        );
91        Ok(())
92    }
93
94    fn intersect_with_lists(in_lists: &mut [Predicate]) -> Vec<Bytes> {
95        #[inline]
96        fn get_list(p: &Predicate) -> &HashSet<Bytes> {
97            match p {
98                Predicate::InList(i) => &i.list,
99                _ => unreachable!(), // `in_lists` is filtered by `split_at_in_lists`
100            }
101        }
102
103        in_lists.sort_unstable_by_key(|p| get_list(p).len());
104        get_list(&in_lists[0])
105            .iter()
106            .filter(|c| in_lists[1..].iter().all(|s| get_list(s).contains(*c)))
107            .cloned()
108            .collect()
109    }
110
111    fn filter_by_ranges(mut keys: Vec<Bytes>, ranges: &[Predicate]) -> Vec<Bytes> {
112        #[inline]
113        fn range_contains(p: &Predicate, key: &Bytes) -> bool {
114            let (lower, upper) = match p {
115                Predicate::Range(r) => (&r.range.lower, &r.range.upper),
116                _ => unreachable!(), // `ranges` is filtered by `split_at_ranges`
117            };
118
119            match (lower, upper) {
120                (Some(lower), Some(upper)) => match (lower.inclusive, upper.inclusive) {
121                    (true, true) => &lower.value <= key && key <= &upper.value,
122                    (true, false) => &lower.value <= key && key < &upper.value,
123                    (false, true) => &lower.value < key && key <= &upper.value,
124                    (false, false) => &lower.value < key && key < &upper.value,
125                },
126                (Some(lower), None) => match lower.inclusive {
127                    true => &lower.value <= key,
128                    false => &lower.value < key,
129                },
130                (None, Some(upper)) => match upper.inclusive {
131                    true => key <= &upper.value,
132                    false => key < &upper.value,
133                },
134                (None, None) => true,
135            }
136        }
137
138        keys.retain(|k| ranges.iter().all(|r| range_contains(r, k)));
139        keys
140    }
141
142    fn filter_by_regexes(mut keys: Vec<Bytes>, regexes: &[Predicate]) -> Result<Vec<Bytes>> {
143        for p in regexes {
144            let pattern = match p {
145                Predicate::RegexMatch(r) => &r.pattern,
146                _ => unreachable!(), // checked by `ensure_all_regexes`
147            };
148
149            let regex = regex::Regex::new(pattern).with_context(|_| ParseRegexSnafu {
150                pattern: pattern.to_owned(),
151            })?;
152
153            keys.retain(|k| {
154                std::str::from_utf8(k)
155                    .map(|k| regex.is_match(k))
156                    .unwrap_or_default()
157            });
158            if keys.is_empty() {
159                return Ok(keys);
160            }
161        }
162
163        Ok(keys)
164    }
165}
166
167impl TryFrom<Vec<Predicate>> for KeysFstApplier {
168    type Error = crate::inverted_index::error::Error;
169    fn try_from(predicates: Vec<Predicate>) -> Result<Self> {
170        Self::try_from(predicates)
171    }
172}
173
174#[cfg(test)]
175mod tests {
176    use fst::Map as FstMap;
177
178    use super::*;
179    use crate::inverted_index::error::Error;
180    use crate::inverted_index::search::predicate::{
181        Bound, InListPredicate, Predicate, Range, RangePredicate, RegexMatchPredicate,
182    };
183
184    fn create_fst_map(items: &[(&[u8], u64)]) -> FstMap<Vec<u8>> {
185        let mut items = items
186            .iter()
187            .map(|(k, v)| (k.to_vec(), *v))
188            .collect::<Vec<_>>();
189        items.sort();
190        FstMap::from_iter(items).unwrap()
191    }
192
193    fn b(s: &str) -> Vec<u8> {
194        s.as_bytes().to_vec()
195    }
196
197    #[test]
198    fn test_keys_fst_applier_apply() {
199        let test_fst = create_fst_map(&[(b"foo", 1), (b"bar", 2), (b"baz", 3)]);
200        let applier = KeysFstApplier {
201            keys: vec![b("foo"), b("baz")],
202        };
203
204        let results = applier.apply(&test_fst);
205        assert_eq!(results, vec![1, 3]);
206    }
207
208    #[test]
209    fn test_keys_fst_applier_with_empty_keys() {
210        let test_fst = create_fst_map(&[(b"foo", 1), (b"bar", 2), (b"baz", 3)]);
211        let applier = KeysFstApplier { keys: vec![] };
212
213        let results = applier.apply(&test_fst);
214        assert!(results.is_empty());
215    }
216
217    #[test]
218    fn test_keys_fst_applier_with_unmatched_keys() {
219        let test_fst = create_fst_map(&[(b"foo", 1), (b"bar", 2), (b"baz", 3)]);
220        let applier = KeysFstApplier {
221            keys: vec![b("qux"), b("quux")],
222        };
223
224        let results = applier.apply(&test_fst);
225        assert!(results.is_empty());
226    }
227
228    #[test]
229    fn test_keys_fst_applier_try_from() {
230        let predicates = vec![
231            Predicate::InList(InListPredicate {
232                list: HashSet::from_iter(vec![b("foo"), b("bar")]),
233            }),
234            Predicate::Range(RangePredicate {
235                range: Range {
236                    lower: Some(Bound {
237                        value: b("bar"),
238                        inclusive: true,
239                    }),
240                    upper: None,
241                },
242            }),
243            Predicate::RegexMatch(RegexMatchPredicate {
244                pattern: ".*r".to_string(),
245            }),
246        ];
247        let applier = KeysFstApplier::try_from(predicates).unwrap();
248        assert_eq!(applier.keys, vec![b("bar")]);
249    }
250
251    #[test]
252    fn test_keys_fst_applier_try_from_filter_out_unmatched_keys() {
253        let predicates = vec![
254            Predicate::InList(InListPredicate {
255                list: HashSet::from_iter(vec![b("foo"), b("bar")]),
256            }),
257            Predicate::Range(RangePredicate {
258                range: Range {
259                    lower: Some(Bound {
260                        value: b("f"),
261                        inclusive: true,
262                    }),
263                    upper: None,
264                },
265            }),
266            Predicate::RegexMatch(RegexMatchPredicate {
267                pattern: ".*o".to_string(),
268            }),
269        ];
270        let applier = KeysFstApplier::try_from(predicates).unwrap();
271        assert_eq!(applier.keys, vec![b("foo")]);
272    }
273
274    #[test]
275    fn test_keys_fst_applier_try_from_empty_predicates() {
276        let predicates = vec![];
277        let result = KeysFstApplier::try_from(predicates);
278        assert!(matches!(result, Err(Error::EmptyPredicates { .. })));
279    }
280
281    #[test]
282    fn test_keys_fst_applier_try_from_without_in_list() {
283        let predicates = vec![Predicate::Range(RangePredicate {
284            range: Range {
285                lower: Some(Bound {
286                    value: b("bar"),
287                    inclusive: true,
288                }),
289                upper: None,
290            },
291        })];
292        let result = KeysFstApplier::try_from(predicates);
293        assert!(matches!(
294            result,
295            Err(Error::KeysApplierWithoutInList { .. })
296        ));
297    }
298
299    #[test]
300    fn test_keys_fst_applier_try_from_with_invalid_regex() {
301        let predicates = vec![
302            Predicate::InList(InListPredicate {
303                list: HashSet::from_iter(vec![b("foo"), b("bar")]),
304            }),
305            Predicate::RegexMatch(RegexMatchPredicate {
306                pattern: "*invalid regex".to_string(),
307            }),
308        ];
309        let result = KeysFstApplier::try_from(predicates);
310        assert!(matches!(result, Err(Error::ParseRegex { .. })));
311    }
312
313    #[test]
314    fn test_keys_fst_applier_memory_usage() {
315        let applier = KeysFstApplier { keys: vec![] };
316        assert_eq!(applier.memory_usage(), 0);
317
318        let applier = KeysFstApplier {
319            keys: vec![b("foo"), b("bar")],
320        };
321        assert_eq!(applier.memory_usage(), 2 * size_of::<Bytes>() + 6);
322    }
323}