mito2/sst/index/inverted_index/applier/builder/
regex_match.rs1use datafusion_common::ScalarValue;
16use datafusion_expr::Expr as DfExpr;
17use index::inverted_index::search::predicate::{Predicate, RegexMatchPredicate};
18
19use crate::error::Result;
20use crate::sst::index::inverted_index::applier::builder::InvertedIndexApplierBuilder;
21
22impl InvertedIndexApplierBuilder<'_> {
23 pub(crate) fn collect_regex_match(&mut self, column: &DfExpr, pattern: &DfExpr) -> Result<()> {
25 let Some(column_name) = Self::column_name(column) else {
26 return Ok(());
27 };
28 let Some((column_id, data_type)) = self.column_id_and_type(column_name)? else {
29 return Ok(());
30 };
31 if !data_type.is_string() {
32 return Ok(());
33 }
34 let DfExpr::Literal(ScalarValue::Utf8(Some(pattern))) = pattern else {
35 return Ok(());
36 };
37
38 let predicate = Predicate::RegexMatch(RegexMatchPredicate {
39 pattern: pattern.clone(),
40 });
41 self.add_predicate(column_id, predicate);
42 Ok(())
43 }
44}
45
46#[cfg(test)]
47mod tests {
48 use std::collections::HashSet;
49
50 use super::*;
51 use crate::error::Error;
52 use crate::sst::index::inverted_index::applier::builder::tests::{
53 field_column, int64_lit, nonexistent_column, string_lit, tag_column, test_object_store,
54 test_region_metadata,
55 };
56 use crate::sst::index::puffin_manager::PuffinManagerFactory;
57
58 #[test]
59 fn test_regex_match_basic() {
60 let (_d, facotry) = PuffinManagerFactory::new_for_test_block("test_regex_match_basic_");
61 let metadata = test_region_metadata();
62 let mut builder = InvertedIndexApplierBuilder::new(
63 "test".to_string(),
64 test_object_store(),
65 &metadata,
66 HashSet::from_iter([1, 2, 3]),
67 facotry,
68 );
69
70 builder
71 .collect_regex_match(&tag_column(), &string_lit("abc"))
72 .unwrap();
73
74 let predicates = builder.output.get(&1).unwrap();
75 assert_eq!(predicates.len(), 1);
76 assert_eq!(
77 predicates[0],
78 Predicate::RegexMatch(RegexMatchPredicate {
79 pattern: "abc".to_string()
80 })
81 );
82 }
83
84 #[test]
85 fn test_regex_match_field_column() {
86 let (_d, facotry) =
87 PuffinManagerFactory::new_for_test_block("test_regex_match_field_column_");
88 let metadata = test_region_metadata();
89 let mut builder = InvertedIndexApplierBuilder::new(
90 "test".to_string(),
91 test_object_store(),
92 &metadata,
93 HashSet::from_iter([1, 2, 3]),
94 facotry,
95 );
96
97 builder
98 .collect_regex_match(&field_column(), &string_lit("abc"))
99 .unwrap();
100
101 let predicates = builder.output.get(&3).unwrap();
102 assert_eq!(predicates.len(), 1);
103 assert_eq!(
104 predicates[0],
105 Predicate::RegexMatch(RegexMatchPredicate {
106 pattern: "abc".to_string()
107 })
108 );
109 }
110
111 #[test]
112 fn test_regex_match_type_mismatch() {
113 let (_d, facotry) =
114 PuffinManagerFactory::new_for_test_block("test_regex_match_type_mismatch_");
115 let metadata = test_region_metadata();
116 let mut builder = InvertedIndexApplierBuilder::new(
117 "test".to_string(),
118 test_object_store(),
119 &metadata,
120 HashSet::from_iter([1, 2, 3]),
121 facotry,
122 );
123
124 builder
125 .collect_regex_match(&tag_column(), &int64_lit(123))
126 .unwrap();
127
128 assert!(builder.output.is_empty());
129 }
130
131 #[test]
132 fn test_regex_match_type_nonexist_column() {
133 let (_d, facotry) =
134 PuffinManagerFactory::new_for_test_block("test_regex_match_type_nonexist_column_");
135 let metadata = test_region_metadata();
136 let mut builder = InvertedIndexApplierBuilder::new(
137 "test".to_string(),
138 test_object_store(),
139 &metadata,
140 HashSet::from_iter([1, 2, 3]),
141 facotry,
142 );
143
144 let res = builder.collect_regex_match(&nonexistent_column(), &string_lit("abc"));
145 assert!(matches!(res, Err(Error::ColumnNotFound { .. })));
146 assert!(builder.output.is_empty());
147 }
148}