Skip to main content

datatypes/vectors/json/
builder.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::any::Any;
16
17use crate::data_type::ConcreteDataType;
18use crate::error::{Result, TryFromValueSnafu, UnsupportedOperationSnafu};
19use crate::json::value::{JsonValue, JsonVariant};
20use crate::prelude::{ValueRef, Vector, VectorRef};
21use crate::types::JsonType;
22use crate::types::json_type::JsonNativeType;
23use crate::vectors::{MutableVector, StructVectorBuilder};
24
25#[derive(Clone)]
26pub(crate) struct JsonVectorBuilder {
27    merged_type: JsonType,
28    values: Vec<JsonValue>,
29}
30
31impl JsonVectorBuilder {
32    pub(crate) fn new(json_type: JsonNativeType, capacity: usize) -> Self {
33        Self {
34            merged_type: JsonType::new_json2(json_type),
35            values: Vec::with_capacity(capacity),
36        }
37    }
38
39    fn try_build(&mut self) -> Result<VectorRef> {
40        let mut builder = StructVectorBuilder::with_type_and_capacity(
41            self.merged_type.as_struct_type(),
42            self.values.len(),
43        );
44        for value in self.values.iter_mut() {
45            value.try_align(&self.merged_type)?;
46
47            if value.is_null() {
48                builder.push_null();
49                continue;
50            }
51
52            let value = value.as_ref();
53            builder.try_push_value_ref(&value.as_struct_value())?;
54        }
55        Ok(builder.to_vector())
56    }
57}
58
59impl MutableVector for JsonVectorBuilder {
60    fn data_type(&self) -> ConcreteDataType {
61        ConcreteDataType::Json(self.merged_type.clone())
62    }
63
64    fn len(&self) -> usize {
65        self.values.len()
66    }
67
68    fn as_any(&self) -> &dyn Any {
69        self
70    }
71
72    fn as_mut_any(&mut self) -> &mut dyn Any {
73        self
74    }
75
76    fn to_vector(&mut self) -> VectorRef {
77        self.try_build().unwrap_or_else(|e| panic!("{}", e))
78    }
79
80    fn to_vector_cloned(&self) -> VectorRef {
81        self.clone().to_vector()
82    }
83
84    fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> {
85        let ValueRef::Json(value) = value else {
86            return TryFromValueSnafu {
87                reason: format!("expected json value, got {value:?}"),
88            }
89            .fail();
90        };
91        let json_type = value.json_type();
92        if !self.merged_type.is_include(json_type) {
93            self.merged_type.merge(json_type)?;
94        }
95
96        let value = JsonValue::new(JsonVariant::from(value.variant().clone()));
97        self.values.push(value);
98        Ok(())
99    }
100
101    fn push_null(&mut self) {
102        self.values.push(JsonValue::null())
103    }
104
105    fn extend_slice_of(&mut self, _: &dyn Vector, _: usize, _: usize) -> Result<()> {
106        UnsupportedOperationSnafu {
107            op: "extend_slice_of",
108            vector_type: "JsonVector",
109        }
110        .fail()
111    }
112}
113
114#[cfg(test)]
115mod tests {
116    use common_base::bytes::Bytes;
117
118    use super::*;
119    use crate::data_type::ConcreteDataType;
120    use crate::types::json_type::JsonObjectType;
121    use crate::value::{StructValue, Value, ValueRef};
122
123    #[test]
124    fn test_json_vector_builder() -> Result<()> {
125        fn parse_json_value(json: &str) -> Value {
126            let value: serde_json::Value = serde_json::from_str(json).unwrap();
127            Value::Json(Box::new(value.into()))
128        }
129
130        // Object inputs should merge into a superset schema, preserve null rows,
131        // and align conflicting nested values into Variant payloads.
132        let mut builder = JsonVectorBuilder::new(JsonNativeType::Object(Default::default()), 3);
133        let first = parse_json_value(r#"{"id":1,"payload":{"name":"foo"}}"#);
134        let second = parse_json_value(r#"{"id":2,"extra":true,"payload":"raw"}"#);
135        builder.try_push_value_ref(&first.as_value_ref())?;
136        builder.push_null();
137        builder.try_push_value_ref(&second.as_value_ref())?;
138
139        let merged_type = JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([
140            ("extra".to_string(), JsonNativeType::Bool),
141            ("id".to_string(), JsonNativeType::i64()),
142            ("payload".to_string(), JsonNativeType::Variant),
143        ])));
144        assert_eq!(
145            builder.data_type(),
146            ConcreteDataType::Json(merged_type.clone())
147        );
148
149        let merged_struct_type = merged_type.as_struct_type();
150        let vector = builder.to_vector();
151        assert_eq!(vector.len(), 3);
152        assert_eq!(
153            vector.get(0),
154            Value::Struct(StructValue::new(
155                vec![
156                    Value::Null,
157                    Value::Int64(1),
158                    Value::Binary(Bytes::from(br#"{"name":"foo"}"#.to_vec())),
159                ],
160                merged_struct_type.clone(),
161            ))
162        );
163        assert_eq!(vector.get(1), Value::Null);
164        assert_eq!(
165            vector.get(2),
166            Value::Struct(StructValue::new(
167                vec![
168                    Value::Boolean(true),
169                    Value::Int64(2),
170                    Value::Binary(Bytes::from(br#""raw""#.to_vec())),
171                ],
172                merged_struct_type,
173            ))
174        );
175
176        // Root-level conflicts should be lifted to a plain Variant field that preserves
177        // each original JSON payload.
178        let mut variant_builder = JsonVectorBuilder::new(JsonNativeType::Bool, 2);
179        let object = parse_json_value(r#"{"k":1}"#);
180        let boolean = parse_json_value("true");
181        variant_builder.try_push_value_ref(&boolean.as_value_ref())?;
182        variant_builder.try_push_value_ref(&object.as_value_ref())?;
183
184        let variant_type = JsonType::new_json2(JsonNativeType::Variant);
185        assert_eq!(
186            variant_builder.data_type(),
187            ConcreteDataType::Json(variant_type.clone())
188        );
189
190        let variant_struct_type = variant_type.as_struct_type();
191        let vector = variant_builder.to_vector();
192        assert_eq!(
193            vector.get(0),
194            Value::Struct(StructValue::new(
195                vec![Value::Binary(Bytes::from(b"true".to_vec()))],
196                variant_struct_type.clone(),
197            ))
198        );
199        assert_eq!(
200            vector.get(1),
201            Value::Struct(StructValue::new(
202                vec![Value::Binary(Bytes::from(br#"{"k":1}"#.to_vec()))],
203                variant_struct_type,
204            ))
205        );
206
207        // Non-JSON values should be rejected at push time.
208        let mut invalid_builder = JsonVectorBuilder::new(JsonNativeType::Bool, 1);
209        let err = invalid_builder
210            .try_push_value_ref(&ValueRef::Boolean(true))
211            .unwrap_err();
212        assert!(err.to_string().contains("expected json value"));
213
214        Ok(())
215    }
216}