mito2/sst/index/inverted_index/applier/builder/
regex_match.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use datafusion_common::ScalarValue;
16use datafusion_expr::Expr as DfExpr;
17use index::inverted_index::search::predicate::{Predicate, RegexMatchPredicate};
18
19use crate::error::Result;
20use crate::sst::index::inverted_index::applier::builder::InvertedIndexApplierBuilder;
21
22impl InvertedIndexApplierBuilder<'_> {
23    /// Collects a regex match expression in the form of `column ~ pattern`.
24    pub(crate) fn collect_regex_match(&mut self, column: &DfExpr, pattern: &DfExpr) -> Result<()> {
25        let Some(column_name) = Self::column_name(column) else {
26            return Ok(());
27        };
28        let Some((column_id, data_type)) = self.column_id_and_type(column_name)? else {
29            return Ok(());
30        };
31        if !data_type.is_string() {
32            return Ok(());
33        }
34        let DfExpr::Literal(ScalarValue::Utf8(Some(pattern))) = pattern else {
35            return Ok(());
36        };
37
38        let predicate = Predicate::RegexMatch(RegexMatchPredicate {
39            pattern: pattern.clone(),
40        });
41        self.add_predicate(column_id, predicate);
42        Ok(())
43    }
44}
45
46#[cfg(test)]
47mod tests {
48    use std::collections::HashSet;
49
50    use store_api::region_request::PathType;
51
52    use super::*;
53    use crate::error::Error;
54    use crate::sst::index::inverted_index::applier::builder::tests::{
55        field_column, int64_lit, nonexistent_column, string_lit, tag_column, test_object_store,
56        test_region_metadata,
57    };
58    use crate::sst::index::puffin_manager::PuffinManagerFactory;
59
60    #[test]
61    fn test_regex_match_basic() {
62        let (_d, facotry) = PuffinManagerFactory::new_for_test_block("test_regex_match_basic_");
63        let metadata = test_region_metadata();
64        let mut builder = InvertedIndexApplierBuilder::new(
65            "test".to_string(),
66            PathType::Bare,
67            test_object_store(),
68            &metadata,
69            HashSet::from_iter([1, 2, 3]),
70            facotry,
71        );
72
73        builder
74            .collect_regex_match(&tag_column(), &string_lit("abc"))
75            .unwrap();
76
77        let predicates = builder.output.get(&1).unwrap();
78        assert_eq!(predicates.len(), 1);
79        assert_eq!(
80            predicates[0],
81            Predicate::RegexMatch(RegexMatchPredicate {
82                pattern: "abc".to_string()
83            })
84        );
85    }
86
87    #[test]
88    fn test_regex_match_field_column() {
89        let (_d, facotry) =
90            PuffinManagerFactory::new_for_test_block("test_regex_match_field_column_");
91        let metadata = test_region_metadata();
92        let mut builder = InvertedIndexApplierBuilder::new(
93            "test".to_string(),
94            PathType::Bare,
95            test_object_store(),
96            &metadata,
97            HashSet::from_iter([1, 2, 3]),
98            facotry,
99        );
100
101        builder
102            .collect_regex_match(&field_column(), &string_lit("abc"))
103            .unwrap();
104
105        let predicates = builder.output.get(&3).unwrap();
106        assert_eq!(predicates.len(), 1);
107        assert_eq!(
108            predicates[0],
109            Predicate::RegexMatch(RegexMatchPredicate {
110                pattern: "abc".to_string()
111            })
112        );
113    }
114
115    #[test]
116    fn test_regex_match_type_mismatch() {
117        let (_d, facotry) =
118            PuffinManagerFactory::new_for_test_block("test_regex_match_type_mismatch_");
119        let metadata = test_region_metadata();
120        let mut builder = InvertedIndexApplierBuilder::new(
121            "test".to_string(),
122            PathType::Bare,
123            test_object_store(),
124            &metadata,
125            HashSet::from_iter([1, 2, 3]),
126            facotry,
127        );
128
129        builder
130            .collect_regex_match(&tag_column(), &int64_lit(123))
131            .unwrap();
132
133        assert!(builder.output.is_empty());
134    }
135
136    #[test]
137    fn test_regex_match_type_nonexist_column() {
138        let (_d, facotry) =
139            PuffinManagerFactory::new_for_test_block("test_regex_match_type_nonexist_column_");
140        let metadata = test_region_metadata();
141        let mut builder = InvertedIndexApplierBuilder::new(
142            "test".to_string(),
143            PathType::Bare,
144            test_object_store(),
145            &metadata,
146            HashSet::from_iter([1, 2, 3]),
147            facotry,
148        );
149
150        let res = builder.collect_regex_match(&nonexistent_column(), &string_lit("abc"));
151        assert!(matches!(res, Err(Error::ColumnNotFound { .. })));
152        assert!(builder.output.is_empty());
153    }
154}