common_function/scalars/json/
json_path_exists.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::{self, Display};
16
17use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
18use datafusion_expr::{Signature, TypeSignature, Volatility};
19use datatypes::arrow::datatypes::DataType;
20use datatypes::data_type::ConcreteDataType;
21use datatypes::prelude::VectorRef;
22use datatypes::scalars::ScalarVectorBuilder;
23use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
24use snafu::ensure;
25
26use crate::function::{Function, FunctionContext};
27
28/// Check if the given JSON data contains the given JSON path.
29#[derive(Clone, Debug, Default)]
30pub struct JsonPathExistsFunction;
31
32const NAME: &str = "json_path_exists";
33
34impl Function for JsonPathExistsFunction {
35    fn name(&self) -> &str {
36        NAME
37    }
38
39    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
40        Ok(DataType::Boolean)
41    }
42
43    fn signature(&self) -> Signature {
44        // TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
45        Signature::one_of(
46            vec![
47                TypeSignature::Exact(vec![DataType::Binary, DataType::Utf8]),
48                TypeSignature::Exact(vec![DataType::Null, DataType::Utf8]),
49                TypeSignature::Exact(vec![DataType::Binary, DataType::Null]),
50                TypeSignature::Exact(vec![DataType::Null, DataType::Null]),
51            ],
52            Volatility::Immutable,
53        )
54    }
55
56    fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
57        ensure!(
58            columns.len() == 2,
59            InvalidFuncArgsSnafu {
60                err_msg: format!(
61                    "The length of the args is not correct, expect exactly two, have: {}",
62                    columns.len()
63                ),
64            }
65        );
66        let jsons = &columns[0];
67        let paths = &columns[1];
68
69        let size = jsons.len();
70        let mut results = BooleanVectorBuilder::with_capacity(size);
71
72        match (jsons.data_type(), paths.data_type()) {
73            (ConcreteDataType::Binary(_), ConcreteDataType::String(_)) => {
74                for i in 0..size {
75                    let result = match (jsons.get_ref(i).as_binary(), paths.get_ref(i).as_string())
76                    {
77                        (Ok(Some(json)), Ok(Some(path))) => {
78                            // Get `JsonPath`.
79                            let json_path = match jsonb::jsonpath::parse_json_path(path.as_bytes())
80                            {
81                                Ok(json_path) => json_path,
82                                Err(_) => {
83                                    return InvalidFuncArgsSnafu {
84                                        err_msg: format!("Illegal json path: {:?}", path),
85                                    }
86                                    .fail();
87                                }
88                            };
89                            jsonb::path_exists(json, json_path).ok()
90                        }
91                        _ => None,
92                    };
93
94                    results.push(result);
95                }
96            }
97
98            // Any null args existence causes the result to be NULL.
99            (ConcreteDataType::Null(_), ConcreteDataType::String(_)) => results.push_nulls(size),
100            (ConcreteDataType::Binary(_), ConcreteDataType::Null(_)) => results.push_nulls(size),
101            (ConcreteDataType::Null(_), ConcreteDataType::Null(_)) => results.push_nulls(size),
102
103            _ => {
104                return UnsupportedInputDataTypeSnafu {
105                    function: NAME,
106                    datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
107                }
108                .fail();
109            }
110        }
111
112        Ok(results.to_vector())
113    }
114}
115
116impl Display for JsonPathExistsFunction {
117    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
118        write!(f, "JSON_PATH_EXISTS")
119    }
120}
121
122#[cfg(test)]
123mod tests {
124    use std::sync::Arc;
125
126    use datatypes::prelude::ScalarVector;
127    use datatypes::vectors::{BinaryVector, NullVector, StringVector};
128
129    use super::*;
130
131    #[test]
132    fn test_json_path_exists_function() {
133        let json_path_exists = JsonPathExistsFunction;
134
135        assert_eq!("json_path_exists", json_path_exists.name());
136        assert_eq!(
137            DataType::Boolean,
138            json_path_exists.return_type(&[DataType::Binary]).unwrap()
139        );
140
141        assert!(matches!(json_path_exists.signature(),
142                         Signature {
143                             type_signature: TypeSignature::OneOf(valid_types),
144                             volatility: Volatility::Immutable
145                         } if valid_types ==
146            vec![
147                TypeSignature::Exact(vec![
148                    DataType::Binary,
149                    DataType::Utf8,
150                ]),
151                TypeSignature::Exact(vec![
152                    DataType::Null,
153                    DataType::Utf8,
154                ]),
155                TypeSignature::Exact(vec![
156                    DataType::Binary,
157                    DataType::Null,
158                ]),
159                TypeSignature::Exact(vec![
160                    DataType::Null,
161                    DataType::Null,
162                ]),
163            ],
164        ));
165
166        let json_strings = [
167            r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
168            r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
169            r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
170            r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
171            r#"[1, 2, 3]"#,
172            r#"null"#,
173            r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
174            r#"null"#,
175        ];
176        let paths = vec![
177            "$.a.b.c", "$.b", "$.c.a", ".d", "$[0]", "$.a", "null", "null",
178        ];
179        let expected = [false, true, true, false, true, false, false, false];
180
181        let jsonbs = json_strings
182            .iter()
183            .map(|s| {
184                let value = jsonb::parse_value(s.as_bytes()).unwrap();
185                value.to_vec()
186            })
187            .collect::<Vec<_>>();
188
189        let json_vector = BinaryVector::from_vec(jsonbs);
190        let path_vector = StringVector::from_vec(paths);
191        let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
192        let vector = json_path_exists
193            .eval(&FunctionContext::default(), &args)
194            .unwrap();
195
196        // Test for non-nulls.
197        assert_eq!(8, vector.len());
198        for (i, real) in expected.iter().enumerate() {
199            let result = vector.get_ref(i);
200            assert!(!result.is_null());
201            let val = result.as_boolean().unwrap().unwrap();
202            assert_eq!(val, *real);
203        }
204
205        // Test for path error.
206        let json_bytes = jsonb::parse_value("{}".as_bytes()).unwrap().to_vec();
207        let json = BinaryVector::from_vec(vec![json_bytes]);
208        let illegal_path = StringVector::from_vec(vec!["$..a"]);
209
210        let args: Vec<VectorRef> = vec![Arc::new(json), Arc::new(illegal_path)];
211        let err = json_path_exists.eval(&FunctionContext::default(), &args);
212        assert!(err.is_err());
213
214        // Test for nulls.
215        let json_bytes = jsonb::parse_value("{}".as_bytes()).unwrap().to_vec();
216        let json = BinaryVector::from_vec(vec![json_bytes]);
217        let null_json = NullVector::new(1);
218
219        let path = StringVector::from_vec(vec!["$.a"]);
220        let null_path = NullVector::new(1);
221
222        let args: Vec<VectorRef> = vec![Arc::new(null_json), Arc::new(path)];
223        let result1 = json_path_exists
224            .eval(&FunctionContext::default(), &args)
225            .unwrap();
226        let args: Vec<VectorRef> = vec![Arc::new(json), Arc::new(null_path)];
227        let result2 = json_path_exists
228            .eval(&FunctionContext::default(), &args)
229            .unwrap();
230
231        assert_eq!(result1.len(), 1);
232        assert!(result1.get_ref(0).is_null());
233        assert_eq!(result2.len(), 1);
234        assert!(result2.get_ref(0).is_null());
235    }
236}