common_function/scalars/json/
json_path_match.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::{self, Display};
16
17use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
18use common_query::prelude::Signature;
19use datafusion::logical_expr::Volatility;
20use datatypes::data_type::ConcreteDataType;
21use datatypes::prelude::VectorRef;
22use datatypes::scalars::ScalarVectorBuilder;
23use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
24use snafu::ensure;
25
26use crate::function::{Function, FunctionContext};
27
28/// Check if the given JSON data match the given JSON path's predicate.
29#[derive(Clone, Debug, Default)]
30pub struct JsonPathMatchFunction;
31
32const NAME: &str = "json_path_match";
33
34impl Function for JsonPathMatchFunction {
35    fn name(&self) -> &str {
36        NAME
37    }
38
39    fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
40        Ok(ConcreteDataType::boolean_datatype())
41    }
42
43    fn signature(&self) -> Signature {
44        Signature::exact(
45            vec![
46                ConcreteDataType::json_datatype(),
47                ConcreteDataType::string_datatype(),
48            ],
49            Volatility::Immutable,
50        )
51    }
52
53    fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
54        ensure!(
55            columns.len() == 2,
56            InvalidFuncArgsSnafu {
57                err_msg: format!(
58                    "The length of the args is not correct, expect exactly two, have: {}",
59                    columns.len()
60                ),
61            }
62        );
63        let jsons = &columns[0];
64        let paths = &columns[1];
65
66        let size = jsons.len();
67        let mut results = BooleanVectorBuilder::with_capacity(size);
68
69        for i in 0..size {
70            let json = jsons.get_ref(i);
71            let path = paths.get_ref(i);
72
73            match json.data_type() {
74                // JSON data type uses binary vector
75                ConcreteDataType::Binary(_) => {
76                    let json = json.as_binary();
77                    let path = path.as_string();
78                    let result = match (json, path) {
79                        (Ok(Some(json)), Ok(Some(path))) => {
80                            if !jsonb::is_null(json) {
81                                let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
82                                match json_path {
83                                    Ok(json_path) => jsonb::path_match(json, json_path).ok(),
84                                    Err(_) => None,
85                                }
86                            } else {
87                                None
88                            }
89                        }
90                        _ => None,
91                    };
92
93                    results.push(result);
94                }
95
96                _ => {
97                    return UnsupportedInputDataTypeSnafu {
98                        function: NAME,
99                        datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
100                    }
101                    .fail();
102                }
103            }
104        }
105
106        Ok(results.to_vector())
107    }
108}
109
110impl Display for JsonPathMatchFunction {
111    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
112        write!(f, "JSON_PATH_MATCH")
113    }
114}
115
116#[cfg(test)]
117mod tests {
118    use std::sync::Arc;
119
120    use common_query::prelude::TypeSignature;
121    use datatypes::vectors::{BinaryVector, StringVector};
122
123    use super::*;
124
125    #[test]
126    fn test_json_path_match_function() {
127        let json_path_match = JsonPathMatchFunction;
128
129        assert_eq!("json_path_match", json_path_match.name());
130        assert_eq!(
131            ConcreteDataType::boolean_datatype(),
132            json_path_match
133                .return_type(&[ConcreteDataType::json_datatype()])
134                .unwrap()
135        );
136
137        assert!(matches!(json_path_match.signature(),
138                         Signature {
139                             type_signature: TypeSignature::Exact(valid_types),
140                             volatility: Volatility::Immutable
141                         } if valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()],
142        ));
143
144        let json_strings = [
145            Some(r#"{"a": {"b": 2}, "b": 2, "c": 3}"#.to_string()),
146            Some(r#"{"a": 1, "b": [1,2,3]}"#.to_string()),
147            Some(r#"{"a": 1 ,"b": [1,2,3]}"#.to_string()),
148            Some(r#"[1,2,3]"#.to_string()),
149            Some(r#"{"a":1,"b":[1,2,3]}"#.to_string()),
150            Some(r#"null"#.to_string()),
151            Some(r#"null"#.to_string()),
152        ];
153
154        let paths = vec![
155            Some("$.a.b == 2".to_string()),
156            Some("$.b[1 to last] >= 2".to_string()),
157            Some("$.c > 0".to_string()),
158            Some("$[0 to last] > 0".to_string()),
159            Some(r#"null"#.to_string()),
160            Some("$.c > 0".to_string()),
161            Some(r#"null"#.to_string()),
162        ];
163
164        let results = [
165            Some(true),
166            Some(true),
167            Some(false),
168            Some(true),
169            None,
170            None,
171            None,
172        ];
173
174        let jsonbs = json_strings
175            .into_iter()
176            .map(|s| s.map(|json| jsonb::parse_value(json.as_bytes()).unwrap().to_vec()))
177            .collect::<Vec<_>>();
178
179        let json_vector = BinaryVector::from(jsonbs);
180        let path_vector = StringVector::from(paths);
181        let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
182        let vector = json_path_match
183            .eval(&FunctionContext::default(), &args)
184            .unwrap();
185
186        assert_eq!(7, vector.len());
187        for (i, expected) in results.iter().enumerate() {
188            let result = vector.get_ref(i);
189
190            match expected {
191                Some(expected_value) => {
192                    assert!(!result.is_null());
193                    let result_value = result.as_boolean().unwrap().unwrap();
194                    assert_eq!(*expected_value, result_value);
195                }
196                None => {
197                    assert!(result.is_null());
198                }
199            }
200        }
201    }
202}