mito2/sst/index/inverted_index/applier/builder/
regex_match.rs1use datafusion_common::ScalarValue;
16use datafusion_expr::Expr as DfExpr;
17use index::inverted_index::search::predicate::{Predicate, RegexMatchPredicate};
18
19use crate::error::Result;
20use crate::sst::index::inverted_index::applier::builder::InvertedIndexApplierBuilder;
21
22impl InvertedIndexApplierBuilder<'_> {
23 pub(crate) fn collect_regex_match(&mut self, column: &DfExpr, pattern: &DfExpr) -> Result<()> {
25 let Some(column_name) = Self::column_name(column) else {
26 return Ok(());
27 };
28 let Some((column_id, data_type)) = self.column_id_and_type(column_name)? else {
29 return Ok(());
30 };
31 if !data_type.is_string() {
32 return Ok(());
33 }
34 let DfExpr::Literal(ScalarValue::Utf8(Some(pattern))) = pattern else {
35 return Ok(());
36 };
37
38 let predicate = Predicate::RegexMatch(RegexMatchPredicate {
39 pattern: pattern.clone(),
40 });
41 self.add_predicate(column_id, predicate);
42 Ok(())
43 }
44}
45
46#[cfg(test)]
47mod tests {
48 use std::collections::HashSet;
49
50 use store_api::region_request::PathType;
51
52 use super::*;
53 use crate::error::Error;
54 use crate::sst::index::inverted_index::applier::builder::tests::{
55 field_column, int64_lit, nonexistent_column, string_lit, tag_column, test_object_store,
56 test_region_metadata,
57 };
58 use crate::sst::index::puffin_manager::PuffinManagerFactory;
59
60 #[test]
61 fn test_regex_match_basic() {
62 let (_d, facotry) = PuffinManagerFactory::new_for_test_block("test_regex_match_basic_");
63 let metadata = test_region_metadata();
64 let mut builder = InvertedIndexApplierBuilder::new(
65 "test".to_string(),
66 PathType::Bare,
67 test_object_store(),
68 &metadata,
69 HashSet::from_iter([1, 2, 3]),
70 facotry,
71 );
72
73 builder
74 .collect_regex_match(&tag_column(), &string_lit("abc"))
75 .unwrap();
76
77 let predicates = builder.output.get(&1).unwrap();
78 assert_eq!(predicates.len(), 1);
79 assert_eq!(
80 predicates[0],
81 Predicate::RegexMatch(RegexMatchPredicate {
82 pattern: "abc".to_string()
83 })
84 );
85 }
86
87 #[test]
88 fn test_regex_match_field_column() {
89 let (_d, facotry) =
90 PuffinManagerFactory::new_for_test_block("test_regex_match_field_column_");
91 let metadata = test_region_metadata();
92 let mut builder = InvertedIndexApplierBuilder::new(
93 "test".to_string(),
94 PathType::Bare,
95 test_object_store(),
96 &metadata,
97 HashSet::from_iter([1, 2, 3]),
98 facotry,
99 );
100
101 builder
102 .collect_regex_match(&field_column(), &string_lit("abc"))
103 .unwrap();
104
105 let predicates = builder.output.get(&3).unwrap();
106 assert_eq!(predicates.len(), 1);
107 assert_eq!(
108 predicates[0],
109 Predicate::RegexMatch(RegexMatchPredicate {
110 pattern: "abc".to_string()
111 })
112 );
113 }
114
115 #[test]
116 fn test_regex_match_type_mismatch() {
117 let (_d, facotry) =
118 PuffinManagerFactory::new_for_test_block("test_regex_match_type_mismatch_");
119 let metadata = test_region_metadata();
120 let mut builder = InvertedIndexApplierBuilder::new(
121 "test".to_string(),
122 PathType::Bare,
123 test_object_store(),
124 &metadata,
125 HashSet::from_iter([1, 2, 3]),
126 facotry,
127 );
128
129 builder
130 .collect_regex_match(&tag_column(), &int64_lit(123))
131 .unwrap();
132
133 assert!(builder.output.is_empty());
134 }
135
136 #[test]
137 fn test_regex_match_type_nonexist_column() {
138 let (_d, facotry) =
139 PuffinManagerFactory::new_for_test_block("test_regex_match_type_nonexist_column_");
140 let metadata = test_region_metadata();
141 let mut builder = InvertedIndexApplierBuilder::new(
142 "test".to_string(),
143 PathType::Bare,
144 test_object_store(),
145 &metadata,
146 HashSet::from_iter([1, 2, 3]),
147 facotry,
148 );
149
150 let res = builder.collect_regex_match(&nonexistent_column(), &string_lit("abc"));
151 assert!(matches!(res, Err(Error::ColumnNotFound { .. })));
152 assert!(builder.output.is_empty());
153 }
154}