common_function/scalars/json/
json_path_match.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::{self, Display};
16
17use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
18use datafusion_expr::{Signature, Volatility};
19use datatypes::arrow::datatypes::DataType;
20use datatypes::data_type::ConcreteDataType;
21use datatypes::prelude::VectorRef;
22use datatypes::scalars::ScalarVectorBuilder;
23use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
24use snafu::ensure;
25
26use crate::function::{Function, FunctionContext};
27
28/// Check if the given JSON data match the given JSON path's predicate.
29#[derive(Clone, Debug, Default)]
30pub struct JsonPathMatchFunction;
31
32const NAME: &str = "json_path_match";
33
34impl Function for JsonPathMatchFunction {
35    fn name(&self) -> &str {
36        NAME
37    }
38
39    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
40        Ok(DataType::Boolean)
41    }
42
43    fn signature(&self) -> Signature {
44        // TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
45        Signature::exact(
46            vec![DataType::Binary, DataType::Utf8],
47            Volatility::Immutable,
48        )
49    }
50
51    fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
52        ensure!(
53            columns.len() == 2,
54            InvalidFuncArgsSnafu {
55                err_msg: format!(
56                    "The length of the args is not correct, expect exactly two, have: {}",
57                    columns.len()
58                ),
59            }
60        );
61        let jsons = &columns[0];
62        let paths = &columns[1];
63
64        let size = jsons.len();
65        let mut results = BooleanVectorBuilder::with_capacity(size);
66
67        for i in 0..size {
68            let json = jsons.get_ref(i);
69            let path = paths.get_ref(i);
70
71            match json.data_type() {
72                // JSON data type uses binary vector
73                ConcreteDataType::Binary(_) => {
74                    let json = json.as_binary();
75                    let path = path.as_string();
76                    let result = match (json, path) {
77                        (Ok(Some(json)), Ok(Some(path))) => {
78                            if !jsonb::is_null(json) {
79                                let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
80                                match json_path {
81                                    Ok(json_path) => jsonb::path_match(json, json_path).ok(),
82                                    Err(_) => None,
83                                }
84                            } else {
85                                None
86                            }
87                        }
88                        _ => None,
89                    };
90
91                    results.push(result);
92                }
93
94                _ => {
95                    return UnsupportedInputDataTypeSnafu {
96                        function: NAME,
97                        datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
98                    }
99                    .fail();
100                }
101            }
102        }
103
104        Ok(results.to_vector())
105    }
106}
107
108impl Display for JsonPathMatchFunction {
109    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
110        write!(f, "JSON_PATH_MATCH")
111    }
112}
113
114#[cfg(test)]
115mod tests {
116    use std::sync::Arc;
117
118    use datafusion_expr::TypeSignature;
119    use datatypes::vectors::{BinaryVector, StringVector};
120
121    use super::*;
122
123    #[test]
124    fn test_json_path_match_function() {
125        let json_path_match = JsonPathMatchFunction;
126
127        assert_eq!("json_path_match", json_path_match.name());
128        assert_eq!(
129            DataType::Boolean,
130            json_path_match.return_type(&[DataType::Binary]).unwrap()
131        );
132
133        assert!(matches!(json_path_match.signature(),
134                         Signature {
135                             type_signature: TypeSignature::Exact(valid_types),
136                             volatility: Volatility::Immutable
137                         } if valid_types == vec![DataType::Binary, DataType::Utf8],
138        ));
139
140        let json_strings = [
141            Some(r#"{"a": {"b": 2}, "b": 2, "c": 3}"#.to_string()),
142            Some(r#"{"a": 1, "b": [1,2,3]}"#.to_string()),
143            Some(r#"{"a": 1 ,"b": [1,2,3]}"#.to_string()),
144            Some(r#"[1,2,3]"#.to_string()),
145            Some(r#"{"a":1,"b":[1,2,3]}"#.to_string()),
146            Some(r#"null"#.to_string()),
147            Some(r#"null"#.to_string()),
148        ];
149
150        let paths = vec![
151            Some("$.a.b == 2".to_string()),
152            Some("$.b[1 to last] >= 2".to_string()),
153            Some("$.c > 0".to_string()),
154            Some("$[0 to last] > 0".to_string()),
155            Some(r#"null"#.to_string()),
156            Some("$.c > 0".to_string()),
157            Some(r#"null"#.to_string()),
158        ];
159
160        let results = [
161            Some(true),
162            Some(true),
163            Some(false),
164            Some(true),
165            None,
166            None,
167            None,
168        ];
169
170        let jsonbs = json_strings
171            .into_iter()
172            .map(|s| s.map(|json| jsonb::parse_value(json.as_bytes()).unwrap().to_vec()))
173            .collect::<Vec<_>>();
174
175        let json_vector = BinaryVector::from(jsonbs);
176        let path_vector = StringVector::from(paths);
177        let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
178        let vector = json_path_match
179            .eval(&FunctionContext::default(), &args)
180            .unwrap();
181
182        assert_eq!(7, vector.len());
183        for (i, expected) in results.iter().enumerate() {
184            let result = vector.get_ref(i);
185
186            match expected {
187                Some(expected_value) => {
188                    assert!(!result.is_null());
189                    let result_value = result.as_boolean().unwrap().unwrap();
190                    assert_eq!(*expected_value, result_value);
191                }
192                None => {
193                    assert!(result.is_null());
194                }
195            }
196        }
197    }
198}