Skip to main content

datatypes/vectors/json/
builder.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::any::Any;
16
17use crate::data_type::ConcreteDataType;
18use crate::error::{Result, TryFromValueSnafu, UnsupportedOperationSnafu};
19use crate::json::value::{JsonValue, JsonVariant};
20use crate::prelude::{ValueRef, Vector, VectorRef};
21use crate::types::JsonType;
22use crate::types::json_type::JsonNativeType;
23use crate::vectors::{MutableVector, StructVectorBuilder};
24
25#[derive(Clone)]
26pub(crate) struct JsonVectorBuilder {
27    merged_type: JsonType,
28    values: Vec<JsonValue>,
29}
30
31impl JsonVectorBuilder {
32    pub(crate) fn new(json_type: JsonNativeType, capacity: usize) -> Self {
33        Self {
34            merged_type: JsonType::new_json2(json_type),
35            values: Vec::with_capacity(capacity),
36        }
37    }
38
39    fn try_build(&mut self) -> Result<VectorRef> {
40        let mut builder = StructVectorBuilder::with_type_and_capacity(
41            self.merged_type.as_struct_type(),
42            self.values.len(),
43        );
44        for value in self.values.iter_mut() {
45            value.try_align(&self.merged_type)?;
46
47            if value.is_null() {
48                builder.push_null();
49                continue;
50            }
51
52            let value = value.as_ref();
53            builder.try_push_value_ref(&value.as_struct_value())?;
54        }
55        Ok(builder.to_vector())
56    }
57}
58
59impl MutableVector for JsonVectorBuilder {
60    fn data_type(&self) -> ConcreteDataType {
61        ConcreteDataType::Json(self.merged_type.clone())
62    }
63
64    fn len(&self) -> usize {
65        self.values.len()
66    }
67
68    fn as_any(&self) -> &dyn Any {
69        self
70    }
71
72    fn as_mut_any(&mut self) -> &mut dyn Any {
73        self
74    }
75
76    fn to_vector(&mut self) -> VectorRef {
77        self.try_build().unwrap_or_else(|e| panic!("{}", e))
78    }
79
80    fn to_vector_cloned(&self) -> VectorRef {
81        self.clone().to_vector()
82    }
83
84    fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> {
85        let ValueRef::Json(value) = value else {
86            return TryFromValueSnafu {
87                reason: format!("expected json value, got {value:?}"),
88            }
89            .fail();
90        };
91        let json_type = value.json_type();
92        self.merged_type.merge(json_type)?;
93
94        let value = JsonValue::new(JsonVariant::from(value.variant().clone()));
95        self.values.push(value);
96        Ok(())
97    }
98
99    fn push_null(&mut self) {
100        self.values.push(JsonValue::null())
101    }
102
103    fn extend_slice_of(&mut self, _: &dyn Vector, _: usize, _: usize) -> Result<()> {
104        UnsupportedOperationSnafu {
105            op: "extend_slice_of",
106            vector_type: "JsonVector",
107        }
108        .fail()
109    }
110}
111
112#[cfg(test)]
113mod tests {
114    use common_base::bytes::Bytes;
115
116    use super::*;
117    use crate::data_type::ConcreteDataType;
118    use crate::types::json_type::JsonObjectType;
119    use crate::value::{StructValue, Value, ValueRef};
120
121    #[test]
122    fn test_json_vector_builder() -> Result<()> {
123        fn parse_json_value(json: &str) -> Value {
124            let value: serde_json::Value = serde_json::from_str(json).unwrap();
125            Value::Json(Box::new(value.into()))
126        }
127
128        // Object inputs should merge into a superset schema, preserve null rows,
129        // and align conflicting nested values into Variant payloads.
130        let mut builder = JsonVectorBuilder::new(JsonNativeType::Object(Default::default()), 3);
131        let first = parse_json_value(r#"{"id":1,"payload":{"name":"foo"}}"#);
132        let second = parse_json_value(r#"{"id":2,"extra":true,"payload":"raw"}"#);
133        builder.try_push_value_ref(&first.as_value_ref())?;
134        builder.push_null();
135        builder.try_push_value_ref(&second.as_value_ref())?;
136
137        let merged_type = JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([
138            ("extra".to_string(), JsonNativeType::Bool),
139            ("id".to_string(), JsonNativeType::i64()),
140            ("payload".to_string(), JsonNativeType::Variant),
141        ])));
142        assert_eq!(
143            builder.data_type(),
144            ConcreteDataType::Json(merged_type.clone())
145        );
146
147        let merged_struct_type = merged_type.as_struct_type();
148        let vector = builder.to_vector();
149        assert_eq!(vector.len(), 3);
150        assert_eq!(
151            vector.get(0),
152            Value::Struct(StructValue::new(
153                vec![
154                    Value::Null,
155                    Value::Int64(1),
156                    Value::Binary(Bytes::from(br#"{"name":"foo"}"#.to_vec())),
157                ],
158                merged_struct_type.clone(),
159            ))
160        );
161        assert_eq!(vector.get(1), Value::Null);
162        assert_eq!(
163            vector.get(2),
164            Value::Struct(StructValue::new(
165                vec![
166                    Value::Boolean(true),
167                    Value::Int64(2),
168                    Value::Binary(Bytes::from(br#""raw""#.to_vec())),
169                ],
170                merged_struct_type,
171            ))
172        );
173
174        // Root-level conflicts should be lifted to a plain Variant field that preserves
175        // each original JSON payload.
176        let mut variant_builder = JsonVectorBuilder::new(JsonNativeType::Bool, 2);
177        let object = parse_json_value(r#"{"k":1}"#);
178        let boolean = parse_json_value("true");
179        variant_builder.try_push_value_ref(&boolean.as_value_ref())?;
180        variant_builder.try_push_value_ref(&object.as_value_ref())?;
181
182        let variant_type = JsonType::new_json2(JsonNativeType::Variant);
183        assert_eq!(
184            variant_builder.data_type(),
185            ConcreteDataType::Json(variant_type.clone())
186        );
187
188        let variant_struct_type = variant_type.as_struct_type();
189        let vector = variant_builder.to_vector();
190        assert_eq!(
191            vector.get(0),
192            Value::Struct(StructValue::new(
193                vec![Value::Binary(Bytes::from(b"true".to_vec()))],
194                variant_struct_type.clone(),
195            ))
196        );
197        assert_eq!(
198            vector.get(1),
199            Value::Struct(StructValue::new(
200                vec![Value::Binary(Bytes::from(br#"{"k":1}"#.to_vec()))],
201                variant_struct_type,
202            ))
203        );
204
205        // Non-JSON values should be rejected at push time.
206        let mut invalid_builder = JsonVectorBuilder::new(JsonNativeType::Bool, 1);
207        let err = invalid_builder
208            .try_push_value_ref(&ValueRef::Boolean(true))
209            .unwrap_err();
210        assert!(err.to_string().contains("expected json value"));
211
212        Ok(())
213    }
214}