common_function/scalars/json/
json_path_exists.rs1use std::fmt::{self, Display};
16
17use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
18use datafusion_expr::{Signature, TypeSignature, Volatility};
19use datatypes::arrow::datatypes::DataType;
20use datatypes::data_type::ConcreteDataType;
21use datatypes::prelude::VectorRef;
22use datatypes::scalars::ScalarVectorBuilder;
23use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
24use snafu::ensure;
25
26use crate::function::{Function, FunctionContext};
27
28#[derive(Clone, Debug, Default)]
30pub struct JsonPathExistsFunction;
31
32const NAME: &str = "json_path_exists";
33
34impl Function for JsonPathExistsFunction {
35 fn name(&self) -> &str {
36 NAME
37 }
38
39 fn return_type(&self, _: &[DataType]) -> Result<DataType> {
40 Ok(DataType::Boolean)
41 }
42
43 fn signature(&self) -> Signature {
44 Signature::one_of(
46 vec![
47 TypeSignature::Exact(vec![DataType::Binary, DataType::Utf8]),
48 TypeSignature::Exact(vec![DataType::Null, DataType::Utf8]),
49 TypeSignature::Exact(vec![DataType::Binary, DataType::Null]),
50 TypeSignature::Exact(vec![DataType::Null, DataType::Null]),
51 ],
52 Volatility::Immutable,
53 )
54 }
55
56 fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
57 ensure!(
58 columns.len() == 2,
59 InvalidFuncArgsSnafu {
60 err_msg: format!(
61 "The length of the args is not correct, expect exactly two, have: {}",
62 columns.len()
63 ),
64 }
65 );
66 let jsons = &columns[0];
67 let paths = &columns[1];
68
69 let size = jsons.len();
70 let mut results = BooleanVectorBuilder::with_capacity(size);
71
72 match (jsons.data_type(), paths.data_type()) {
73 (ConcreteDataType::Binary(_), ConcreteDataType::String(_)) => {
74 for i in 0..size {
75 let result = match (jsons.get_ref(i).as_binary(), paths.get_ref(i).as_string())
76 {
77 (Ok(Some(json)), Ok(Some(path))) => {
78 let json_path = match jsonb::jsonpath::parse_json_path(path.as_bytes())
80 {
81 Ok(json_path) => json_path,
82 Err(_) => {
83 return InvalidFuncArgsSnafu {
84 err_msg: format!("Illegal json path: {:?}", path),
85 }
86 .fail();
87 }
88 };
89 jsonb::path_exists(json, json_path).ok()
90 }
91 _ => None,
92 };
93
94 results.push(result);
95 }
96 }
97
98 (ConcreteDataType::Null(_), ConcreteDataType::String(_)) => results.push_nulls(size),
100 (ConcreteDataType::Binary(_), ConcreteDataType::Null(_)) => results.push_nulls(size),
101 (ConcreteDataType::Null(_), ConcreteDataType::Null(_)) => results.push_nulls(size),
102
103 _ => {
104 return UnsupportedInputDataTypeSnafu {
105 function: NAME,
106 datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
107 }
108 .fail();
109 }
110 }
111
112 Ok(results.to_vector())
113 }
114}
115
116impl Display for JsonPathExistsFunction {
117 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
118 write!(f, "JSON_PATH_EXISTS")
119 }
120}
121
122#[cfg(test)]
123mod tests {
124 use std::sync::Arc;
125
126 use datatypes::prelude::ScalarVector;
127 use datatypes::vectors::{BinaryVector, NullVector, StringVector};
128
129 use super::*;
130
131 #[test]
132 fn test_json_path_exists_function() {
133 let json_path_exists = JsonPathExistsFunction;
134
135 assert_eq!("json_path_exists", json_path_exists.name());
136 assert_eq!(
137 DataType::Boolean,
138 json_path_exists.return_type(&[DataType::Binary]).unwrap()
139 );
140
141 assert!(matches!(json_path_exists.signature(),
142 Signature {
143 type_signature: TypeSignature::OneOf(valid_types),
144 volatility: Volatility::Immutable
145 } if valid_types ==
146 vec![
147 TypeSignature::Exact(vec![
148 DataType::Binary,
149 DataType::Utf8,
150 ]),
151 TypeSignature::Exact(vec![
152 DataType::Null,
153 DataType::Utf8,
154 ]),
155 TypeSignature::Exact(vec![
156 DataType::Binary,
157 DataType::Null,
158 ]),
159 TypeSignature::Exact(vec![
160 DataType::Null,
161 DataType::Null,
162 ]),
163 ],
164 ));
165
166 let json_strings = [
167 r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
168 r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
169 r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
170 r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
171 r#"[1, 2, 3]"#,
172 r#"null"#,
173 r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
174 r#"null"#,
175 ];
176 let paths = vec![
177 "$.a.b.c", "$.b", "$.c.a", ".d", "$[0]", "$.a", "null", "null",
178 ];
179 let expected = [false, true, true, false, true, false, false, false];
180
181 let jsonbs = json_strings
182 .iter()
183 .map(|s| {
184 let value = jsonb::parse_value(s.as_bytes()).unwrap();
185 value.to_vec()
186 })
187 .collect::<Vec<_>>();
188
189 let json_vector = BinaryVector::from_vec(jsonbs);
190 let path_vector = StringVector::from_vec(paths);
191 let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
192 let vector = json_path_exists
193 .eval(&FunctionContext::default(), &args)
194 .unwrap();
195
196 assert_eq!(8, vector.len());
198 for (i, real) in expected.iter().enumerate() {
199 let result = vector.get_ref(i);
200 assert!(!result.is_null());
201 let val = result.as_boolean().unwrap().unwrap();
202 assert_eq!(val, *real);
203 }
204
205 let json_bytes = jsonb::parse_value("{}".as_bytes()).unwrap().to_vec();
207 let json = BinaryVector::from_vec(vec![json_bytes]);
208 let illegal_path = StringVector::from_vec(vec!["$..a"]);
209
210 let args: Vec<VectorRef> = vec![Arc::new(json), Arc::new(illegal_path)];
211 let err = json_path_exists.eval(&FunctionContext::default(), &args);
212 assert!(err.is_err());
213
214 let json_bytes = jsonb::parse_value("{}".as_bytes()).unwrap().to_vec();
216 let json = BinaryVector::from_vec(vec![json_bytes]);
217 let null_json = NullVector::new(1);
218
219 let path = StringVector::from_vec(vec!["$.a"]);
220 let null_path = NullVector::new(1);
221
222 let args: Vec<VectorRef> = vec![Arc::new(null_json), Arc::new(path)];
223 let result1 = json_path_exists
224 .eval(&FunctionContext::default(), &args)
225 .unwrap();
226 let args: Vec<VectorRef> = vec![Arc::new(json), Arc::new(null_path)];
227 let result2 = json_path_exists
228 .eval(&FunctionContext::default(), &args)
229 .unwrap();
230
231 assert_eq!(result1.len(), 1);
232 assert!(result1.get_ref(0).is_null());
233 assert_eq!(result2.len(), 1);
234 assert!(result2.get_ref(0).is_null());
235 }
236}