mito2/sst/index/inverted_index/applier/builder/
regex_match.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use datafusion_common::ScalarValue;
16use datafusion_expr::Expr as DfExpr;
17use index::inverted_index::search::predicate::{Predicate, RegexMatchPredicate};
18
19use crate::error::Result;
20use crate::sst::index::inverted_index::applier::builder::InvertedIndexApplierBuilder;
21
22impl InvertedIndexApplierBuilder<'_> {
23    /// Collects a regex match expression in the form of `column ~ pattern`.
24    pub(crate) fn collect_regex_match(&mut self, column: &DfExpr, pattern: &DfExpr) -> Result<()> {
25        let Some(column_name) = Self::column_name(column) else {
26            return Ok(());
27        };
28        let Some((column_id, data_type)) = self.column_id_and_type(column_name)? else {
29            return Ok(());
30        };
31        if !data_type.is_string() {
32            return Ok(());
33        }
34        let DfExpr::Literal(ScalarValue::Utf8(Some(pattern))) = pattern else {
35            return Ok(());
36        };
37
38        let predicate = Predicate::RegexMatch(RegexMatchPredicate {
39            pattern: pattern.clone(),
40        });
41        self.add_predicate(column_id, predicate);
42        Ok(())
43    }
44}
45
46#[cfg(test)]
47mod tests {
48    use std::collections::HashSet;
49
50    use super::*;
51    use crate::error::Error;
52    use crate::sst::index::inverted_index::applier::builder::tests::{
53        field_column, int64_lit, nonexistent_column, string_lit, tag_column, test_object_store,
54        test_region_metadata,
55    };
56    use crate::sst::index::puffin_manager::PuffinManagerFactory;
57
58    #[test]
59    fn test_regex_match_basic() {
60        let (_d, facotry) = PuffinManagerFactory::new_for_test_block("test_regex_match_basic_");
61        let metadata = test_region_metadata();
62        let mut builder = InvertedIndexApplierBuilder::new(
63            "test".to_string(),
64            test_object_store(),
65            &metadata,
66            HashSet::from_iter([1, 2, 3]),
67            facotry,
68        );
69
70        builder
71            .collect_regex_match(&tag_column(), &string_lit("abc"))
72            .unwrap();
73
74        let predicates = builder.output.get(&1).unwrap();
75        assert_eq!(predicates.len(), 1);
76        assert_eq!(
77            predicates[0],
78            Predicate::RegexMatch(RegexMatchPredicate {
79                pattern: "abc".to_string()
80            })
81        );
82    }
83
84    #[test]
85    fn test_regex_match_field_column() {
86        let (_d, facotry) =
87            PuffinManagerFactory::new_for_test_block("test_regex_match_field_column_");
88        let metadata = test_region_metadata();
89        let mut builder = InvertedIndexApplierBuilder::new(
90            "test".to_string(),
91            test_object_store(),
92            &metadata,
93            HashSet::from_iter([1, 2, 3]),
94            facotry,
95        );
96
97        builder
98            .collect_regex_match(&field_column(), &string_lit("abc"))
99            .unwrap();
100
101        let predicates = builder.output.get(&3).unwrap();
102        assert_eq!(predicates.len(), 1);
103        assert_eq!(
104            predicates[0],
105            Predicate::RegexMatch(RegexMatchPredicate {
106                pattern: "abc".to_string()
107            })
108        );
109    }
110
111    #[test]
112    fn test_regex_match_type_mismatch() {
113        let (_d, facotry) =
114            PuffinManagerFactory::new_for_test_block("test_regex_match_type_mismatch_");
115        let metadata = test_region_metadata();
116        let mut builder = InvertedIndexApplierBuilder::new(
117            "test".to_string(),
118            test_object_store(),
119            &metadata,
120            HashSet::from_iter([1, 2, 3]),
121            facotry,
122        );
123
124        builder
125            .collect_regex_match(&tag_column(), &int64_lit(123))
126            .unwrap();
127
128        assert!(builder.output.is_empty());
129    }
130
131    #[test]
132    fn test_regex_match_type_nonexist_column() {
133        let (_d, facotry) =
134            PuffinManagerFactory::new_for_test_block("test_regex_match_type_nonexist_column_");
135        let metadata = test_region_metadata();
136        let mut builder = InvertedIndexApplierBuilder::new(
137            "test".to_string(),
138            test_object_store(),
139            &metadata,
140            HashSet::from_iter([1, 2, 3]),
141            facotry,
142        );
143
144        let res = builder.collect_regex_match(&nonexistent_column(), &string_lit("abc"));
145        assert!(matches!(res, Err(Error::ColumnNotFound { .. })));
146        assert!(builder.output.is_empty());
147    }
148}