index/inverted_index/search/fst_apply/
keys_apply.rs1use std::collections::HashSet;
16use std::mem::size_of;
17
18use snafu::{ensure, ResultExt};
19
20use crate::inverted_index::error::{
21 EmptyPredicatesSnafu, KeysApplierUnexpectedPredicatesSnafu, KeysApplierWithoutInListSnafu,
22 ParseRegexSnafu, Result,
23};
24use crate::inverted_index::search::fst_apply::FstApplier;
25use crate::inverted_index::search::predicate::Predicate;
26use crate::inverted_index::FstMap;
27use crate::Bytes;
28
29pub struct KeysFstApplier {
32 keys: Vec<Bytes>,
34}
35
36impl FstApplier for KeysFstApplier {
37 fn apply(&self, fst: &FstMap) -> Vec<u64> {
38 self.keys.iter().filter_map(|k| fst.get(k)).collect()
39 }
40
41 fn memory_usage(&self) -> usize {
42 self.keys.capacity() * size_of::<Bytes>()
43 + self.keys.iter().map(|k| k.capacity()).sum::<usize>()
44 }
45}
46
47impl KeysFstApplier {
48 pub fn try_from(mut predicates: Vec<Predicate>) -> Result<Self> {
54 ensure!(!predicates.is_empty(), EmptyPredicatesSnafu);
55
56 let (in_lists, others) = Self::split_at_in_lists(&mut predicates);
57 let (ranges, regexes) = Self::split_at_ranges(others);
58 Self::ensure_all_regexes(regexes)?;
59
60 ensure!(!in_lists.is_empty(), KeysApplierWithoutInListSnafu);
61 let intersected_keys = Self::intersect_with_lists(in_lists);
62 let range_matched_keys = Self::filter_by_ranges(intersected_keys, ranges);
63 let regex_matched_keys = Self::filter_by_regexes(range_matched_keys, regexes)?;
64
65 Ok(Self {
66 keys: regex_matched_keys,
67 })
68 }
69
70 fn split_at_in_lists(predicates: &mut [Predicate]) -> (&mut [Predicate], &mut [Predicate]) {
71 let in_list_index = predicates
72 .iter_mut()
73 .partition_in_place(|p| matches!(p, Predicate::InList(_)));
74 predicates.split_at_mut(in_list_index)
75 }
76
77 fn split_at_ranges(predicates: &mut [Predicate]) -> (&mut [Predicate], &mut [Predicate]) {
78 let range_index = predicates
79 .iter_mut()
80 .partition_in_place(|p| matches!(p, Predicate::Range(_)));
81 predicates.split_at_mut(range_index)
82 }
83
84 fn ensure_all_regexes(ps: &[Predicate]) -> Result<()> {
85 ensure!(
86 ps.iter().all(|p| matches!(p, Predicate::RegexMatch(_))),
87 KeysApplierUnexpectedPredicatesSnafu {
88 predicates: ps.to_vec()
89 }
90 );
91 Ok(())
92 }
93
94 fn intersect_with_lists(in_lists: &mut [Predicate]) -> Vec<Bytes> {
95 #[inline]
96 fn get_list(p: &Predicate) -> &HashSet<Bytes> {
97 match p {
98 Predicate::InList(i) => &i.list,
99 _ => unreachable!(), }
101 }
102
103 in_lists.sort_unstable_by_key(|p| get_list(p).len());
104 get_list(&in_lists[0])
105 .iter()
106 .filter(|c| in_lists[1..].iter().all(|s| get_list(s).contains(*c)))
107 .cloned()
108 .collect()
109 }
110
111 fn filter_by_ranges(mut keys: Vec<Bytes>, ranges: &[Predicate]) -> Vec<Bytes> {
112 #[inline]
113 fn range_contains(p: &Predicate, key: &Bytes) -> bool {
114 let (lower, upper) = match p {
115 Predicate::Range(r) => (&r.range.lower, &r.range.upper),
116 _ => unreachable!(), };
118
119 match (lower, upper) {
120 (Some(lower), Some(upper)) => match (lower.inclusive, upper.inclusive) {
121 (true, true) => &lower.value <= key && key <= &upper.value,
122 (true, false) => &lower.value <= key && key < &upper.value,
123 (false, true) => &lower.value < key && key <= &upper.value,
124 (false, false) => &lower.value < key && key < &upper.value,
125 },
126 (Some(lower), None) => match lower.inclusive {
127 true => &lower.value <= key,
128 false => &lower.value < key,
129 },
130 (None, Some(upper)) => match upper.inclusive {
131 true => key <= &upper.value,
132 false => key < &upper.value,
133 },
134 (None, None) => true,
135 }
136 }
137
138 keys.retain(|k| ranges.iter().all(|r| range_contains(r, k)));
139 keys
140 }
141
142 fn filter_by_regexes(mut keys: Vec<Bytes>, regexes: &[Predicate]) -> Result<Vec<Bytes>> {
143 for p in regexes {
144 let pattern = match p {
145 Predicate::RegexMatch(r) => &r.pattern,
146 _ => unreachable!(), };
148
149 let regex = regex::Regex::new(pattern).with_context(|_| ParseRegexSnafu {
150 pattern: pattern.to_owned(),
151 })?;
152
153 keys.retain(|k| {
154 std::str::from_utf8(k)
155 .map(|k| regex.is_match(k))
156 .unwrap_or_default()
157 });
158 if keys.is_empty() {
159 return Ok(keys);
160 }
161 }
162
163 Ok(keys)
164 }
165}
166
167impl TryFrom<Vec<Predicate>> for KeysFstApplier {
168 type Error = crate::inverted_index::error::Error;
169 fn try_from(predicates: Vec<Predicate>) -> Result<Self> {
170 Self::try_from(predicates)
171 }
172}
173
174#[cfg(test)]
175mod tests {
176 use fst::Map as FstMap;
177
178 use super::*;
179 use crate::inverted_index::error::Error;
180 use crate::inverted_index::search::predicate::{
181 Bound, InListPredicate, Predicate, Range, RangePredicate, RegexMatchPredicate,
182 };
183
184 fn create_fst_map(items: &[(&[u8], u64)]) -> FstMap<Vec<u8>> {
185 let mut items = items
186 .iter()
187 .map(|(k, v)| (k.to_vec(), *v))
188 .collect::<Vec<_>>();
189 items.sort();
190 FstMap::from_iter(items).unwrap()
191 }
192
193 fn b(s: &str) -> Vec<u8> {
194 s.as_bytes().to_vec()
195 }
196
197 #[test]
198 fn test_keys_fst_applier_apply() {
199 let test_fst = create_fst_map(&[(b"foo", 1), (b"bar", 2), (b"baz", 3)]);
200 let applier = KeysFstApplier {
201 keys: vec![b("foo"), b("baz")],
202 };
203
204 let results = applier.apply(&test_fst);
205 assert_eq!(results, vec![1, 3]);
206 }
207
208 #[test]
209 fn test_keys_fst_applier_with_empty_keys() {
210 let test_fst = create_fst_map(&[(b"foo", 1), (b"bar", 2), (b"baz", 3)]);
211 let applier = KeysFstApplier { keys: vec![] };
212
213 let results = applier.apply(&test_fst);
214 assert!(results.is_empty());
215 }
216
217 #[test]
218 fn test_keys_fst_applier_with_unmatched_keys() {
219 let test_fst = create_fst_map(&[(b"foo", 1), (b"bar", 2), (b"baz", 3)]);
220 let applier = KeysFstApplier {
221 keys: vec![b("qux"), b("quux")],
222 };
223
224 let results = applier.apply(&test_fst);
225 assert!(results.is_empty());
226 }
227
228 #[test]
229 fn test_keys_fst_applier_try_from() {
230 let predicates = vec![
231 Predicate::InList(InListPredicate {
232 list: HashSet::from_iter(vec![b("foo"), b("bar")]),
233 }),
234 Predicate::Range(RangePredicate {
235 range: Range {
236 lower: Some(Bound {
237 value: b("bar"),
238 inclusive: true,
239 }),
240 upper: None,
241 },
242 }),
243 Predicate::RegexMatch(RegexMatchPredicate {
244 pattern: ".*r".to_string(),
245 }),
246 ];
247 let applier = KeysFstApplier::try_from(predicates).unwrap();
248 assert_eq!(applier.keys, vec![b("bar")]);
249 }
250
251 #[test]
252 fn test_keys_fst_applier_try_from_filter_out_unmatched_keys() {
253 let predicates = vec![
254 Predicate::InList(InListPredicate {
255 list: HashSet::from_iter(vec![b("foo"), b("bar")]),
256 }),
257 Predicate::Range(RangePredicate {
258 range: Range {
259 lower: Some(Bound {
260 value: b("f"),
261 inclusive: true,
262 }),
263 upper: None,
264 },
265 }),
266 Predicate::RegexMatch(RegexMatchPredicate {
267 pattern: ".*o".to_string(),
268 }),
269 ];
270 let applier = KeysFstApplier::try_from(predicates).unwrap();
271 assert_eq!(applier.keys, vec![b("foo")]);
272 }
273
274 #[test]
275 fn test_keys_fst_applier_try_from_empty_predicates() {
276 let predicates = vec![];
277 let result = KeysFstApplier::try_from(predicates);
278 assert!(matches!(result, Err(Error::EmptyPredicates { .. })));
279 }
280
281 #[test]
282 fn test_keys_fst_applier_try_from_without_in_list() {
283 let predicates = vec![Predicate::Range(RangePredicate {
284 range: Range {
285 lower: Some(Bound {
286 value: b("bar"),
287 inclusive: true,
288 }),
289 upper: None,
290 },
291 })];
292 let result = KeysFstApplier::try_from(predicates);
293 assert!(matches!(
294 result,
295 Err(Error::KeysApplierWithoutInList { .. })
296 ));
297 }
298
299 #[test]
300 fn test_keys_fst_applier_try_from_with_invalid_regex() {
301 let predicates = vec![
302 Predicate::InList(InListPredicate {
303 list: HashSet::from_iter(vec![b("foo"), b("bar")]),
304 }),
305 Predicate::RegexMatch(RegexMatchPredicate {
306 pattern: "*invalid regex".to_string(),
307 }),
308 ];
309 let result = KeysFstApplier::try_from(predicates);
310 assert!(matches!(result, Err(Error::ParseRegex { .. })));
311 }
312
313 #[test]
314 fn test_keys_fst_applier_memory_usage() {
315 let applier = KeysFstApplier { keys: vec![] };
316 assert_eq!(applier.memory_usage(), 0);
317
318 let applier = KeysFstApplier {
319 keys: vec![b("foo"), b("bar")],
320 };
321 assert_eq!(applier.memory_usage(), 2 * size_of::<Bytes>() + 6);
322 }
323}