Skip to main content

datatypes/types/
json_type.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::BTreeMap;
16use std::fmt::{Debug, Display, Formatter};
17use std::str::FromStr;
18use std::sync::{Arc, LazyLock};
19
20use arrow::datatypes::DataType as ArrowDataType;
21use arrow_schema::{Field, Fields};
22use common_base::bytes::Bytes;
23use regex::{Captures, Regex};
24use serde::{Deserialize, Serialize};
25use snafu::ResultExt;
26
27use crate::Error;
28use crate::data_type::DataType;
29use crate::error::{
30    DeserializeSnafu, InvalidJsonSnafu, InvalidJsonbSnafu, MergeJsonDatatypeSnafu, Result,
31    UnsupportedArrowTypeSnafu,
32};
33use crate::prelude::ConcreteDataType;
34use crate::scalars::ScalarVectorBuilder;
35use crate::type_id::LogicalTypeId;
36use crate::types::{StructField, StructType};
37use crate::value::Value;
38use crate::vectors::json::builder::JsonVectorBuilder;
39use crate::vectors::{BinaryVectorBuilder, MutableVector};
40
41pub const JSON_TYPE_NAME: &str = "Json";
42const JSON2_TYPE_NAME: &str = "Json2";
43const JSON_PLAIN_FIELD_NAME: &str = "__json_plain__";
44const JSON_PLAIN_FIELD_METADATA_KEY: &str = "is_plain_json";
45
46pub type JsonObjectType = BTreeMap<String, JsonNativeType>;
47
48#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
49pub enum JsonNumberType {
50    U64,
51    I64,
52    F64,
53}
54
55#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default)]
56pub enum JsonNativeType {
57    #[default]
58    Null,
59    Bool,
60    Number(JsonNumberType),
61    String,
62    Array(Box<JsonNativeType>),
63    Object(JsonObjectType),
64    /// A special (not in the JSON official specification) JSON type to indicate the "resolved" or
65    /// "lifted" type of two conflicting JSON types. For example, when merging JSON types of "Bool"
66    /// and "Number".
67    Variant,
68}
69
70impl JsonNativeType {
71    pub fn is_null(&self) -> bool {
72        matches!(self, JsonNativeType::Null)
73    }
74
75    pub fn u64() -> Self {
76        Self::Number(JsonNumberType::U64)
77    }
78
79    pub fn i64() -> Self {
80        Self::Number(JsonNumberType::I64)
81    }
82
83    pub fn f64() -> Self {
84        Self::Number(JsonNumberType::F64)
85    }
86
87    /// Merge other [JsonNativeType] into this.
88    /// Conflicting fields will be resolved to the "Variant" type.
89    pub fn merge(&mut self, other: &JsonNativeType) {
90        if self == other {
91            return;
92        }
93
94        fn merge_object(this: &mut JsonObjectType, that: &JsonObjectType) {
95            // merge "that" into "this" directly:
96            for (type_name, that_type) in that {
97                if let Some(this_type) = this.get_mut(type_name) {
98                    this_type.merge(that_type);
99                } else {
100                    this.insert(type_name.clone(), that_type.clone());
101                }
102            }
103        }
104
105        let zelf = std::mem::take(self);
106        *self = match (zelf, other) {
107            (JsonNativeType::Object(mut this), JsonNativeType::Object(that)) => {
108                merge_object(&mut this, that);
109                JsonNativeType::Object(this)
110            }
111            (JsonNativeType::Array(mut this), JsonNativeType::Array(that)) => {
112                this.merge(that);
113                JsonNativeType::Array(this)
114            }
115            (JsonNativeType::Null, that) => that.clone(),
116            (this, JsonNativeType::Null) => this,
117            (this, that) if this == *that => this,
118
119            (JsonNativeType::Number(x), JsonNativeType::Number(y)) => {
120                JsonNativeType::Number(match (x, y) {
121                    (x, y) if x == *y => x,
122                    (JsonNumberType::F64, _) | (_, JsonNumberType::F64) => JsonNumberType::F64,
123                    _ => JsonNumberType::I64,
124                })
125            }
126            _ => JsonNativeType::Variant,
127        };
128    }
129
130    pub fn as_arrow_type(&self) -> ArrowDataType {
131        match self {
132            JsonNativeType::Null => ArrowDataType::Null,
133            JsonNativeType::Bool => ArrowDataType::Boolean,
134            JsonNativeType::Number(n) => match n {
135                JsonNumberType::U64 => ArrowDataType::UInt64,
136                JsonNumberType::I64 => ArrowDataType::Int64,
137                JsonNumberType::F64 => ArrowDataType::Float64,
138            },
139            JsonNativeType::String => ArrowDataType::Utf8View,
140            JsonNativeType::Array(array) => {
141                ArrowDataType::List(Arc::new(Field::new("item", array.as_arrow_type(), true)))
142            }
143            JsonNativeType::Object(object) => {
144                let fields = object
145                    .iter()
146                    .map(|(k, v)| Arc::new(Field::new(k, v.as_arrow_type(), true)))
147                    .collect::<Vec<_>>();
148                ArrowDataType::Struct(Fields::from(fields))
149            }
150            JsonNativeType::Variant => ArrowDataType::Binary,
151        }
152    }
153}
154
155impl From<&ConcreteDataType> for JsonNativeType {
156    fn from(value: &ConcreteDataType) -> Self {
157        match value {
158            ConcreteDataType::Null(_) => JsonNativeType::Null,
159            ConcreteDataType::Boolean(_) => JsonNativeType::Bool,
160            ConcreteDataType::UInt64(_)
161            | ConcreteDataType::UInt32(_)
162            | ConcreteDataType::UInt16(_)
163            | ConcreteDataType::UInt8(_) => JsonNativeType::u64(),
164            ConcreteDataType::Int64(_)
165            | ConcreteDataType::Int32(_)
166            | ConcreteDataType::Int16(_)
167            | ConcreteDataType::Int8(_) => JsonNativeType::i64(),
168            ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_) => JsonNativeType::f64(),
169            ConcreteDataType::String(_) => JsonNativeType::String,
170            ConcreteDataType::List(list_type) => {
171                JsonNativeType::Array(Box::new(list_type.item_type().into()))
172            }
173            ConcreteDataType::Struct(struct_type) => JsonNativeType::Object(
174                struct_type
175                    .fields()
176                    .iter()
177                    .map(|field| (field.name().to_string(), field.data_type().into()))
178                    .collect(),
179            ),
180            ConcreteDataType::Json(json_type) => json_type.native_type().clone(),
181            ConcreteDataType::Binary(_) => JsonNativeType::Variant,
182            _ => unreachable!(),
183        }
184    }
185}
186
187impl TryFrom<&ArrowDataType> for JsonNativeType {
188    type Error = Error;
189
190    fn try_from(t: &ArrowDataType) -> Result<Self> {
191        let t = match t {
192            ArrowDataType::Null => JsonNativeType::Null,
193            ArrowDataType::Boolean => JsonNativeType::Bool,
194            ArrowDataType::Int8
195            | ArrowDataType::Int16
196            | ArrowDataType::Int32
197            | ArrowDataType::Int64 => JsonNativeType::i64(),
198            ArrowDataType::UInt8
199            | ArrowDataType::UInt16
200            | ArrowDataType::UInt32
201            | ArrowDataType::UInt64 => JsonNativeType::u64(),
202            ArrowDataType::Float16 | ArrowDataType::Float32 | ArrowDataType::Float64 => {
203                JsonNativeType::f64()
204            }
205            ArrowDataType::Binary
206            | ArrowDataType::FixedSizeBinary(_)
207            | ArrowDataType::LargeBinary
208            | ArrowDataType::BinaryView => JsonNativeType::Variant,
209            ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View => {
210                JsonNativeType::String
211            }
212            ArrowDataType::List(field)
213            | ArrowDataType::ListView(field)
214            | ArrowDataType::FixedSizeList(field, _)
215            | ArrowDataType::LargeList(field)
216            | ArrowDataType::LargeListView(field) => {
217                JsonNativeType::Array(Box::new(Self::try_from(field.data_type())?))
218            }
219            ArrowDataType::Struct(fields) => {
220                let mut object = JsonObjectType::new();
221                for field in fields {
222                    object.insert(field.name().clone(), Self::try_from(field.data_type())?);
223                }
224                JsonNativeType::Object(object)
225            }
226            t => {
227                return UnsupportedArrowTypeSnafu {
228                    arrow_type: t.clone(),
229                }
230                .fail();
231            }
232        };
233        Ok(t)
234    }
235}
236
237impl Display for JsonNativeType {
238    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
239        match self {
240            JsonNativeType::Null => write!(f, r#""<Null>""#),
241            JsonNativeType::Bool => write!(f, r#""<Bool>""#),
242            JsonNativeType::Number(_) => {
243                write!(f, r#""<Number>""#)
244            }
245            JsonNativeType::String => write!(f, r#""<String>""#),
246            JsonNativeType::Array(item_type) => {
247                write!(f, "[{}]", item_type)
248            }
249            JsonNativeType::Object(object) => {
250                write!(
251                    f,
252                    "{{{}}}",
253                    object
254                        .iter()
255                        .map(|(k, v)| format!(r#""{k}":{v}"#))
256                        .collect::<Vec<_>>()
257                        .join(",")
258                )
259            }
260            JsonNativeType::Variant => write!(f, r#""<Variant>""#),
261        }
262    }
263}
264
265#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default)]
266pub enum JsonFormat {
267    #[default]
268    Jsonb,
269    Json2(Box<JsonNativeType>),
270}
271
272/// JsonType is a data type for JSON data. It is stored as binary data of jsonb format.
273/// It utilizes current binary value and vector implementation.
274#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
275pub struct JsonType {
276    pub format: JsonFormat,
277}
278
279impl JsonType {
280    pub fn new(format: JsonFormat) -> Self {
281        Self { format }
282    }
283
284    pub(crate) fn new_json2(native: JsonNativeType) -> Self {
285        Self {
286            format: JsonFormat::Json2(Box::new(native)),
287        }
288    }
289
290    pub fn is_json2(&self) -> bool {
291        matches!(self.format, JsonFormat::Json2(_))
292    }
293
294    pub(crate) fn native_type(&self) -> &JsonNativeType {
295        match &self.format {
296            JsonFormat::Jsonb => &JsonNativeType::String,
297            JsonFormat::Json2(x) => x.as_ref(),
298        }
299    }
300
301    pub fn null() -> Self {
302        Self {
303            format: JsonFormat::Json2(Box::new(JsonNativeType::Null)),
304        }
305    }
306
307    /// Make json type a struct type, by:
308    /// - if the json is an object, its entries are mapped to struct fields, obviously;
309    /// - if not, the json is one of bool, number, string or array, make it a special field
310    ///   (see [plain_json_struct_type]).
311    pub(crate) fn as_struct_type(&self) -> StructType {
312        match &self.format {
313            JsonFormat::Jsonb => StructType::default(),
314            JsonFormat::Json2(native_type) => match native_type.as_arrow_type() {
315                // TODO(LFC): Direct use Arrow's Struct datatype here.
316                ArrowDataType::Struct(fields) => StructType::from(&fields),
317                data_type => plain_json_struct_type(&data_type),
318            },
319        }
320    }
321
322    /// Try to merge this json type with others, error on datatype conflict.
323    pub fn merge(&mut self, other: &JsonType) -> Result<()> {
324        if self == other {
325            return Ok(());
326        }
327
328        match (&mut self.format, &other.format) {
329            (JsonFormat::Jsonb, JsonFormat::Jsonb) => Ok(()),
330            (JsonFormat::Json2(this), JsonFormat::Json2(that)) => {
331                this.merge(that);
332                Ok(())
333            }
334            _ => MergeJsonDatatypeSnafu {
335                reason: "json format not match",
336            }
337            .fail(),
338        }
339    }
340
341    /// Check if it includes all fields in `other` json type.
342    pub fn is_include(&self, other: &JsonType) -> bool {
343        match (&self.format, &other.format) {
344            (JsonFormat::Jsonb, JsonFormat::Jsonb) => true,
345            (JsonFormat::Json2(this), JsonFormat::Json2(that)) => {
346                is_include(this.as_ref(), that.as_ref())
347            }
348            _ => false,
349        }
350    }
351}
352
353fn is_include(this: &JsonNativeType, that: &JsonNativeType) -> bool {
354    fn is_include_object(this: &JsonObjectType, that: &JsonObjectType) -> bool {
355        for (type_name, that_type) in that {
356            let Some(this_type) = this.get(type_name) else {
357                return false;
358            };
359            if !is_include(this_type, that_type) {
360                return false;
361            }
362        }
363        true
364    }
365
366    match (this, that) {
367        (this, that) if this == that => true,
368        (JsonNativeType::Array(this), JsonNativeType::Array(that)) => {
369            is_include(this.as_ref(), that.as_ref())
370        }
371        (JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
372            is_include_object(this, that)
373        }
374        (_, JsonNativeType::Null) => true,
375        _ => false,
376    }
377}
378
379/// A special struct type for denoting "plain"(not object) json value. It has only one field, with
380/// fixed name [JSON_PLAIN_FIELD_NAME] and with metadata [JSON_PLAIN_FIELD_METADATA_KEY] = `"true"`.
381fn plain_json_struct_type(data_type: &ArrowDataType) -> StructType {
382    let mut field = StructField::new(
383        JSON_PLAIN_FIELD_NAME.to_string(),
384        ConcreteDataType::from_arrow_type(data_type),
385        true,
386    );
387    field.insert_metadata(JSON_PLAIN_FIELD_METADATA_KEY, true);
388    StructType::new(Arc::new(vec![field]))
389}
390
391impl From<&ArrowDataType> for JsonType {
392    fn from(t: &ArrowDataType) -> Self {
393        JsonType::new_json2(JsonNativeType::from(&ConcreteDataType::from_arrow_type(t)))
394    }
395}
396
397impl DataType for JsonType {
398    fn name(&self) -> String {
399        match &self.format {
400            JsonFormat::Jsonb => JSON_TYPE_NAME.to_string(),
401            JsonFormat::Json2(x) => format!(
402                "{JSON2_TYPE_NAME}{}",
403                if x.is_null() {
404                    "".to_string()
405                } else {
406                    x.to_string()
407                }
408            ),
409        }
410    }
411
412    fn logical_type_id(&self) -> LogicalTypeId {
413        LogicalTypeId::Json
414    }
415
416    fn default_value(&self) -> Value {
417        Bytes::default().into()
418    }
419
420    fn as_arrow_type(&self) -> ArrowDataType {
421        match self.format {
422            JsonFormat::Jsonb => ArrowDataType::Binary,
423            JsonFormat::Json2(_) => self.as_struct_type().as_arrow_type(),
424        }
425    }
426
427    fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
428        match &self.format {
429            JsonFormat::Jsonb => Box::new(BinaryVectorBuilder::with_capacity(capacity)),
430            JsonFormat::Json2(x) => Box::new(JsonVectorBuilder::new(*x.clone(), capacity)),
431        }
432    }
433
434    fn try_cast(&self, from: Value) -> Option<Value> {
435        match from {
436            Value::Binary(v) => Some(Value::Binary(v)),
437            _ => None,
438        }
439    }
440}
441
442impl Display for JsonType {
443    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
444        write!(f, "{}", self.name())
445    }
446}
447
448/// Converts a json type value to string
449pub fn jsonb_to_string(val: &[u8]) -> Result<String> {
450    if val.is_empty() {
451        return Ok("".to_string());
452    }
453    match jsonb::from_slice(val) {
454        Ok(jsonb_value) => {
455            let serialized = jsonb_value.to_string();
456            fix_unicode_point(&serialized)
457        }
458        Err(e) => InvalidJsonbSnafu { error: e }.fail(),
459    }
460}
461
462/// Converts a json type value to serde_json::Value
463pub fn jsonb_to_serde_json(val: &[u8]) -> Result<serde_json::Value> {
464    let json_string = jsonb_to_string(val)?;
465    serde_json::Value::from_str(&json_string).context(DeserializeSnafu { json: json_string })
466}
467
468/// Normalizes a JSON string by converting Rust-style Unicode escape sequences to JSON-compatible format.
469///
470/// The input is scanned for Rust-style Unicode code
471/// point escapes of the form `\\u{H...}` (a backslash, `u`, an opening brace,
472/// followed by 1–6 hexadecimal digits, and a closing brace). Each such escape is
473/// converted into JSON-compatible UTF‑16 escape sequences:
474///
475/// - For code points in the Basic Multilingual Plane (≤ `0xFFFF`), the escape is
476///   converted to a single JSON `\\uXXXX` sequence with four uppercase hex digits.
477/// - For code points above `0xFFFF` and less than Unicode max code point `0x10FFFF`,
478///   the code point is encoded as a UTF‑16 surrogate pair and emitted as two consecutive
479///   `\\uXXXX` sequences (as JSON format required).
480///
481/// After this normalization, the function returns the normalized string
482fn fix_unicode_point(json: &str) -> Result<String> {
483    static UNICODE_CODE_POINT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
484        // Match literal "\u{...}" sequences, capturing 1–6 (code point range) hex digits
485        // inside braces.
486        Regex::new(r"\\u\{([0-9a-fA-F]{1,6})}").unwrap_or_else(|e| panic!("{}", e))
487    });
488
489    let v = UNICODE_CODE_POINT_PATTERN.replace_all(json, |caps: &Captures| {
490        // Extract the hex payload (without braces) and parse to a code point.
491        let hex = &caps[1];
492        let Ok(code) = u32::from_str_radix(hex, 16) else {
493            // On parse failure, leave the original escape sequence unchanged.
494            return caps[0].to_string();
495        };
496
497        if code <= 0xFFFF {
498            // Basic Multilingual Plane: JSON can represent this directly as \uXXXX.
499            format!("\\u{:04X}", code)
500        } else if code > 0x10FFFF {
501            // Beyond max Unicode code point
502            caps[0].to_string()
503        } else {
504            // Supplementary planes: JSON needs UTF-16 surrogate pairs.
505            // Convert the code point to a 20-bit value.
506            let code = code - 0x10000;
507
508            // High surrogate: top 10 bits, offset by 0xD800.
509            let high = 0xD800 + ((code >> 10) & 0x3FF);
510
511            // Low surrogate: bottom 10 bits, offset by 0xDC00.
512            let low = 0xDC00 + (code & 0x3FF);
513
514            // Emit two \uXXXX escapes in sequence.
515            format!("\\u{:04X}\\u{:04X}", high, low)
516        }
517    });
518    Ok(v.to_string())
519}
520
521/// Parses a string to a json type value
522pub fn parse_string_to_jsonb(s: &str) -> Result<Vec<u8>> {
523    jsonb::parse_value(s.as_bytes())
524        .map_err(|_| InvalidJsonSnafu { value: s }.build())
525        .map(|json| json.to_vec())
526}
527
528#[cfg(test)]
529mod tests {
530    use super::*;
531    use crate::json::JsonStructureSettings;
532
533    #[test]
534    fn test_fix_unicode_point() -> Result<()> {
535        let valid_cases = vec![
536            (r#"{"data": "simple ascii"}"#, r#"{"data": "simple ascii"}"#),
537            (
538                r#"{"data":"Greek sigma: \u{03a3}"}"#,
539                r#"{"data":"Greek sigma: \u03A3"}"#,
540            ),
541            (
542                r#"{"data":"Joker card: \u{1f0df}"}"#,
543                r#"{"data":"Joker card: \uD83C\uDCDF"}"#,
544            ),
545            (
546                r#"{"data":"BMP boundary: \u{ffff}"}"#,
547                r#"{"data":"BMP boundary: \uFFFF"}"#,
548            ),
549            (
550                r#"{"data":"Supplementary min: \u{10000}"}"#,
551                r#"{"data":"Supplementary min: \uD800\uDC00"}"#,
552            ),
553            (
554                r#"{"data":"Supplementary max: \u{10ffff}"}"#,
555                r#"{"data":"Supplementary max: \uDBFF\uDFFF"}"#,
556            ),
557        ];
558        for (input, expect) in valid_cases {
559            let v = fix_unicode_point(input)?;
560            assert_eq!(v, expect);
561        }
562
563        let invalid_escape_cases = vec![
564            (
565                r#"{"data": "Invalid hex: \u{gggg}"}"#,
566                r#"{"data": "Invalid hex: \u{gggg}"}"#,
567            ),
568            (
569                r#"{"data": "Empty braces: \u{}"}"#,
570                r#"{"data": "Empty braces: \u{}"}"#,
571            ),
572            (
573                r#"{"data": "Out of range: \u{1100000}"}"#,
574                r#"{"data": "Out of range: \u{1100000}"}"#,
575            ),
576        ];
577        for (input, expect) in invalid_escape_cases {
578            let v = fix_unicode_point(input)?;
579            assert_eq!(v, expect);
580        }
581
582        Ok(())
583    }
584
585    #[test]
586    fn test_json_type_include() {
587        fn test(this: &JsonNativeType, that: &JsonNativeType, expected: bool) {
588            assert_eq!(is_include(this, that), expected);
589        }
590
591        test(&JsonNativeType::Null, &JsonNativeType::Null, true);
592        test(&JsonNativeType::Null, &JsonNativeType::Bool, false);
593
594        test(&JsonNativeType::Bool, &JsonNativeType::Null, true);
595        test(&JsonNativeType::Bool, &JsonNativeType::Bool, true);
596        test(&JsonNativeType::Bool, &JsonNativeType::u64(), false);
597
598        test(&JsonNativeType::u64(), &JsonNativeType::Null, true);
599        test(&JsonNativeType::u64(), &JsonNativeType::u64(), true);
600        test(&JsonNativeType::u64(), &JsonNativeType::String, false);
601
602        test(&JsonNativeType::String, &JsonNativeType::Null, true);
603        test(&JsonNativeType::String, &JsonNativeType::String, true);
604        test(
605            &JsonNativeType::String,
606            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
607            false,
608        );
609
610        test(
611            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
612            &JsonNativeType::Null,
613            true,
614        );
615        test(
616            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
617            &JsonNativeType::Array(Box::new(JsonNativeType::Null)),
618            true,
619        );
620        test(
621            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
622            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
623            true,
624        );
625        test(
626            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
627            &JsonNativeType::String,
628            false,
629        );
630        test(
631            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
632            &JsonNativeType::Object(JsonObjectType::new()),
633            false,
634        );
635
636        let simple_json_object = &JsonNativeType::Object(JsonObjectType::from([(
637            "foo".to_string(),
638            JsonNativeType::String,
639        )]));
640        test(simple_json_object, &JsonNativeType::Null, true);
641        test(simple_json_object, simple_json_object, true);
642        test(simple_json_object, &JsonNativeType::i64(), false);
643        test(
644            simple_json_object,
645            &JsonNativeType::Object(JsonObjectType::from([(
646                "bar".to_string(),
647                JsonNativeType::i64(),
648            )])),
649            false,
650        );
651
652        let complex_json_object = &JsonNativeType::Object(JsonObjectType::from([
653            (
654                "nested".to_string(),
655                JsonNativeType::Object(JsonObjectType::from([(
656                    "a".to_string(),
657                    JsonNativeType::Object(JsonObjectType::from([(
658                        "b".to_string(),
659                        JsonNativeType::Object(JsonObjectType::from([(
660                            "c".to_string(),
661                            JsonNativeType::String,
662                        )])),
663                    )])),
664                )])),
665            ),
666            ("bar".to_string(), JsonNativeType::i64()),
667        ]));
668        test(complex_json_object, &JsonNativeType::Null, true);
669        test(complex_json_object, &JsonNativeType::String, false);
670        test(complex_json_object, complex_json_object, true);
671        test(
672            complex_json_object,
673            &JsonNativeType::Object(JsonObjectType::from([(
674                "bar".to_string(),
675                JsonNativeType::i64(),
676            )])),
677            true,
678        );
679        test(
680            complex_json_object,
681            &JsonNativeType::Object(JsonObjectType::from([
682                (
683                    "nested".to_string(),
684                    JsonNativeType::Object(JsonObjectType::from([(
685                        "a".to_string(),
686                        JsonNativeType::Null,
687                    )])),
688                ),
689                ("bar".to_string(), JsonNativeType::i64()),
690            ])),
691            true,
692        );
693        test(
694            complex_json_object,
695            &JsonNativeType::Object(JsonObjectType::from([
696                (
697                    "nested".to_string(),
698                    JsonNativeType::Object(JsonObjectType::from([(
699                        "a".to_string(),
700                        JsonNativeType::String,
701                    )])),
702                ),
703                ("bar".to_string(), JsonNativeType::i64()),
704            ])),
705            false,
706        );
707        test(
708            complex_json_object,
709            &JsonNativeType::Object(JsonObjectType::from([
710                (
711                    "nested".to_string(),
712                    JsonNativeType::Object(JsonObjectType::from([(
713                        "a".to_string(),
714                        JsonNativeType::Object(JsonObjectType::from([(
715                            "b".to_string(),
716                            JsonNativeType::String,
717                        )])),
718                    )])),
719                ),
720                ("bar".to_string(), JsonNativeType::i64()),
721            ])),
722            false,
723        );
724        test(
725            complex_json_object,
726            &JsonNativeType::Object(JsonObjectType::from([
727                (
728                    "nested".to_string(),
729                    JsonNativeType::Object(JsonObjectType::from([(
730                        "a".to_string(),
731                        JsonNativeType::Object(JsonObjectType::from([(
732                            "b".to_string(),
733                            JsonNativeType::Object(JsonObjectType::from([(
734                                "c".to_string(),
735                                JsonNativeType::Null,
736                            )])),
737                        )])),
738                    )])),
739                ),
740                ("bar".to_string(), JsonNativeType::i64()),
741            ])),
742            true,
743        );
744        test(
745            complex_json_object,
746            &JsonNativeType::Object(JsonObjectType::from([
747                (
748                    "nested".to_string(),
749                    JsonNativeType::Object(JsonObjectType::from([(
750                        "a".to_string(),
751                        JsonNativeType::Object(JsonObjectType::from([(
752                            "b".to_string(),
753                            JsonNativeType::Object(JsonObjectType::from([(
754                                "c".to_string(),
755                                JsonNativeType::Bool,
756                            )])),
757                        )])),
758                    )])),
759                ),
760                ("bar".to_string(), JsonNativeType::i64()),
761            ])),
762            false,
763        );
764        test(
765            complex_json_object,
766            &JsonNativeType::Object(JsonObjectType::from([(
767                "nested".to_string(),
768                JsonNativeType::Object(JsonObjectType::from([(
769                    "a".to_string(),
770                    JsonNativeType::Object(JsonObjectType::from([(
771                        "b".to_string(),
772                        JsonNativeType::Object(JsonObjectType::from([(
773                            "c".to_string(),
774                            JsonNativeType::String,
775                        )])),
776                    )])),
777                )])),
778            )])),
779            true,
780        );
781    }
782
783    #[test]
784    fn test_merge_json_type() -> Result<()> {
785        fn test(
786            json: &str,
787            json_type: &mut JsonType,
788            expected: std::result::Result<&str, &str>,
789        ) -> Result<()> {
790            let json: serde_json::Value = serde_json::from_str(json).unwrap();
791
792            let settings = JsonStructureSettings::Structured(None);
793            let value = settings.encode(json)?;
794            let value_type = value.data_type();
795            let Some(other) = value_type.as_json() else {
796                unreachable!()
797            };
798
799            let result = json_type.merge(other);
800            match (result, expected) {
801                (Ok(()), Ok(expected)) => {
802                    assert_eq!(json_type.native_type().to_string(), expected);
803                }
804                (Err(err), Err(expected)) => {
805                    assert_eq!(err.to_string(), expected);
806                }
807                _ => unreachable!(),
808            }
809            Ok(())
810        }
811
812        // Null should be absorbed by a concrete scalar type.
813        test("true", &mut JsonType::null(), Ok(r#""<Bool>""#))?;
814
815        // Merging a null value into an existing concrete type should keep the type unchanged.
816        test(
817            "null",
818            &mut JsonType::new_json2(JsonNativeType::Bool),
819            Ok(r#""<Bool>""#),
820        )?;
821
822        // Identical number categories should stay as Number.
823        test(
824            "1",
825            &mut JsonType::new_json2(JsonNativeType::i64()),
826            Ok(r#""<Number>""#),
827        )?;
828
829        // Conflicting number categories should be lifted to Variant.
830        test(
831            "1.5",
832            &mut JsonType::new_json2(JsonNativeType::i64()),
833            Ok(r#""<Number>""#),
834        )?;
835
836        // Object merge should preserve existing fields and append missing fields.
837        test(
838            r#"{"foo":"x"}"#,
839            &mut JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([(
840                "bar".to_string(),
841                JsonNativeType::i64(),
842            )]))),
843            Ok(r#"{"bar":"<Number>","foo":"<String>"}"#),
844        )?;
845
846        // Conflicting object field types should only lift that field to Variant.
847        test(
848            r#"{"foo":1}"#,
849            &mut JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([(
850                "foo".to_string(),
851                JsonNativeType::Bool,
852            )]))),
853            Ok(r#"{"foo":"<Variant>"}"#),
854        )?;
855
856        // Nested objects should merge recursively.
857        test(
858            r#"{"nested":{"foo":"bar"}}"#,
859            &mut JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([(
860                "nested".to_string(),
861                JsonNativeType::Object(JsonObjectType::from([(
862                    "bar".to_string(),
863                    JsonNativeType::Bool,
864                )])),
865            )]))),
866            Ok(r#"{"nested":{"bar":"<Bool>","foo":"<String>"}}"#),
867        )?;
868
869        // Arrays should merge their element types recursively.
870        test(
871            r#"["foo"]"#,
872            &mut JsonType::new_json2(JsonNativeType::Array(Box::new(JsonNativeType::u64()))),
873            Ok(r#"["<Variant>"]"#),
874        )?;
875
876        // Root-level incompatible types should be lifted to Variant.
877        test(
878            r#"{"foo":"bar"}"#,
879            &mut JsonType::new_json2(JsonNativeType::Bool),
880            Ok(r#""<Variant>""#),
881        )?;
882
883        // Jsonb and Json2 should not be mergeable.
884        test(
885            "true",
886            &mut JsonType::new(JsonFormat::Jsonb),
887            Err("Failed to merge JSON datatype: json format not match"),
888        )?;
889
890        Ok(())
891    }
892}