Skip to main content

datatypes/vectors/json/
builder.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::any::Any;
16
17use crate::data_type::ConcreteDataType;
18use crate::error::{Result, TryFromValueSnafu, UnsupportedOperationSnafu};
19use crate::json::value::{JsonValue, JsonVariant};
20use crate::prelude::{ValueRef, Vector, VectorRef};
21use crate::types::JsonType;
22use crate::types::json_type::{JsonFormat, JsonNativeType};
23use crate::vectors::{MutableVector, StructVectorBuilder};
24
25#[derive(Clone)]
26pub(crate) struct JsonVectorBuilder {
27    merged_type: JsonType,
28    values: Vec<JsonValue>,
29}
30
31impl JsonVectorBuilder {
32    pub(crate) fn new(initial_native_type: JsonNativeType, capacity: usize) -> Self {
33        debug_assert!(matches!(
34            initial_native_type,
35            JsonNativeType::Object(_) | JsonNativeType::Null
36        ));
37        Self {
38            merged_type: JsonType::new_json2(initial_native_type),
39            values: Vec::with_capacity(capacity),
40        }
41    }
42
43    fn try_build(&mut self) -> Result<VectorRef> {
44        let mut builder = StructVectorBuilder::with_type_and_capacity(
45            self.merged_type.as_struct_type(),
46            self.values.len(),
47        );
48        for value in self.values.iter_mut() {
49            value.try_align(&self.merged_type)?;
50            if value.is_null() {
51                builder.push_null();
52                continue;
53            }
54            builder.try_push_value_ref(&value.as_ref().as_value_ref())?;
55        }
56        Ok(builder.to_vector())
57    }
58}
59
60impl MutableVector for JsonVectorBuilder {
61    fn data_type(&self) -> ConcreteDataType {
62        ConcreteDataType::Json(self.merged_type.clone())
63    }
64
65    fn len(&self) -> usize {
66        self.values.len()
67    }
68
69    fn as_any(&self) -> &dyn Any {
70        self
71    }
72
73    fn as_mut_any(&mut self) -> &mut dyn Any {
74        self
75    }
76
77    fn to_vector(&mut self) -> VectorRef {
78        self.try_build().unwrap_or_else(|e| panic!("{}", e))
79    }
80
81    fn to_vector_cloned(&self) -> VectorRef {
82        self.clone().to_vector()
83    }
84
85    fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> {
86        let ValueRef::Json(value) = value else {
87            return TryFromValueSnafu {
88                reason: format!("expected json value, got {value:?}"),
89            }
90            .fail();
91        };
92        let json_type = value.json_type();
93        if !matches!(
94            json_type.format,
95            JsonFormat::Json2(ref native_type)
96                if matches!(native_type.as_ref(), JsonNativeType::Object(_) | JsonNativeType::Null)
97        ) {
98            return TryFromValueSnafu {
99                reason: format!("expected json object value, got {value:?}"),
100            }
101            .fail();
102        }
103        if !self.merged_type.is_include(json_type) {
104            self.merged_type.merge(json_type)?;
105        }
106
107        let value = JsonValue::new(JsonVariant::from(value.variant().clone()));
108        self.values.push(value);
109        Ok(())
110    }
111
112    fn push_null(&mut self) {
113        self.values.push(JsonValue::null())
114    }
115
116    fn extend_slice_of(&mut self, _: &dyn Vector, _: usize, _: usize) -> Result<()> {
117        UnsupportedOperationSnafu {
118            op: "extend_slice_of",
119            vector_type: "JsonVector",
120        }
121        .fail()
122    }
123}
124
125#[cfg(test)]
126mod tests {
127    use common_base::bytes::Bytes;
128
129    use super::*;
130    use crate::data_type::ConcreteDataType;
131    use crate::types::json_type::JsonObjectType;
132    use crate::value::{StructValue, Value, ValueRef};
133
134    #[test]
135    fn test_json_vector_builder() -> Result<()> {
136        fn parse_json_value(json: &str) -> Value {
137            let value: serde_json::Value = serde_json::from_str(json).unwrap();
138            Value::Json(Box::new(value.into()))
139        }
140
141        // Object inputs should merge into a superset schema, preserve null rows,
142        // and align conflicting nested values into Variant payloads.
143        let mut builder = JsonVectorBuilder::new(JsonNativeType::Object(Default::default()), 3);
144        let first = parse_json_value(r#"{"id":1,"payload":{"name":"foo"}}"#);
145        let second = parse_json_value(r#"{"id":2,"extra":true,"payload":"raw"}"#);
146        builder.try_push_value_ref(&first.as_value_ref())?;
147        builder.push_null();
148        builder.try_push_value_ref(&second.as_value_ref())?;
149
150        let merged_type = JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([
151            ("extra".to_string(), JsonNativeType::Bool),
152            ("id".to_string(), JsonNativeType::i64()),
153            ("payload".to_string(), JsonNativeType::Variant),
154        ])));
155        assert_eq!(
156            builder.data_type(),
157            ConcreteDataType::Json(merged_type.clone())
158        );
159
160        let merged_struct_type = merged_type.as_struct_type();
161        let vector = builder.to_vector();
162        assert_eq!(vector.len(), 3);
163        assert_eq!(
164            vector.get(0),
165            Value::Struct(StructValue::new(
166                vec![
167                    Value::Null,
168                    Value::Int64(1),
169                    Value::Binary(Bytes::from(br#"{"name":"foo"}"#.to_vec())),
170                ],
171                merged_struct_type.clone(),
172            ))
173        );
174        assert_eq!(vector.get(1), Value::Null);
175        assert_eq!(
176            vector.get(2),
177            Value::Struct(StructValue::new(
178                vec![
179                    Value::Boolean(true),
180                    Value::Int64(2),
181                    Value::Binary(Bytes::from(br#""raw""#.to_vec())),
182                ],
183                merged_struct_type,
184            ))
185        );
186
187        // A Null initial type represents an unknown JSON2 runtime type. The first
188        // non-null value should set the concrete type instead of aligning all rows to Null.
189        let mut inferred_builder = JsonVectorBuilder::new(JsonNativeType::Null, 2);
190        let inferred_value = parse_json_value(r#"{"id":3}"#);
191        inferred_builder.push_null();
192        inferred_builder.try_push_value_ref(&inferred_value.as_value_ref())?;
193
194        let inferred_type = JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([(
195            "id".to_string(),
196            JsonNativeType::i64(),
197        )])));
198        assert_eq!(
199            inferred_builder.data_type(),
200            ConcreteDataType::Json(inferred_type.clone())
201        );
202
203        let inferred_struct_type = inferred_type.as_struct_type();
204        let vector = inferred_builder.to_vector();
205        assert_eq!(vector.get(0), Value::Null);
206        assert_eq!(
207            vector.get(1),
208            Value::Struct(StructValue::new(
209                vec![Value::Int64(3)],
210                inferred_struct_type,
211            ))
212        );
213
214        // Non-object initial types are rejected by the builder invariant.
215        let result = std::panic::catch_unwind(|| JsonVectorBuilder::new(JsonNativeType::Bool, 2));
216        assert!(result.is_err());
217
218        // Non-object root values should be rejected at push time.
219        let mut object_builder =
220            JsonVectorBuilder::new(JsonNativeType::Object(Default::default()), 2);
221        let object = parse_json_value(r#"{"k":1}"#);
222        let boolean = parse_json_value("true");
223        let err = object_builder
224            .try_push_value_ref(&boolean.as_value_ref())
225            .unwrap_err();
226        assert!(err.to_string().contains("expected json object value"));
227        object_builder.try_push_value_ref(&object.as_value_ref())?;
228
229        // Non-JSON values should be rejected at push time.
230        let mut invalid_builder =
231            JsonVectorBuilder::new(JsonNativeType::Object(Default::default()), 1);
232        let err = invalid_builder
233            .try_push_value_ref(&ValueRef::Boolean(true))
234            .unwrap_err();
235        assert!(err.to_string().contains("expected json value"));
236
237        Ok(())
238    }
239}