Skip to main content

datatypes/types/
json_type.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::BTreeMap;
16use std::fmt::{Debug, Display, Formatter};
17use std::str::FromStr;
18use std::sync::{Arc, LazyLock};
19
20use arrow::datatypes::DataType as ArrowDataType;
21use arrow_schema::{Field, Fields};
22use common_base::bytes::Bytes;
23use regex::{Captures, Regex};
24use serde::{Deserialize, Serialize};
25use snafu::ResultExt;
26
27use crate::Error;
28use crate::data_type::DataType;
29use crate::error::{
30    DeserializeSnafu, InvalidJsonSnafu, InvalidJsonbSnafu, MergeJsonDatatypeSnafu, Result,
31    UnsupportedArrowTypeSnafu,
32};
33use crate::prelude::ConcreteDataType;
34use crate::scalars::ScalarVectorBuilder;
35use crate::type_id::LogicalTypeId;
36use crate::types::{StructField, StructType};
37use crate::value::Value;
38use crate::vectors::json::builder::JsonVectorBuilder;
39use crate::vectors::{BinaryVectorBuilder, MutableVector};
40
41pub const JSON_TYPE_NAME: &str = "Json";
42const JSON2_TYPE_NAME: &str = "Json2";
43const JSON_PLAIN_FIELD_NAME: &str = "__json_plain__";
44const JSON_PLAIN_FIELD_METADATA_KEY: &str = "is_plain_json";
45
46pub type JsonObjectType = BTreeMap<String, JsonNativeType>;
47
48#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
49pub enum JsonNumberType {
50    U64,
51    I64,
52    F64,
53}
54
55#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default)]
56pub enum JsonNativeType {
57    #[default]
58    Null,
59    Bool,
60    Number(JsonNumberType),
61    String,
62    Array(Box<JsonNativeType>),
63    Object(JsonObjectType),
64    /// A special (not in the JSON official specification) JSON type to indicate the "resolved" or
65    /// "lifted" type of two conflicting JSON types. For example, when merging JSON types of "Bool"
66    /// and "Number".
67    Variant,
68}
69
70impl JsonNativeType {
71    pub fn is_null(&self) -> bool {
72        matches!(self, JsonNativeType::Null)
73    }
74
75    pub fn u64() -> Self {
76        Self::Number(JsonNumberType::U64)
77    }
78
79    pub fn i64() -> Self {
80        Self::Number(JsonNumberType::I64)
81    }
82
83    pub fn f64() -> Self {
84        Self::Number(JsonNumberType::F64)
85    }
86
87    /// Merge other [JsonNativeType] into this.
88    /// Conflicting fields will be resolved to the "Variant" type.
89    pub fn merge(&mut self, other: &JsonNativeType) {
90        if self == other {
91            return;
92        }
93
94        fn merge_object(this: &mut JsonObjectType, that: &JsonObjectType) {
95            // merge "that" into "this" directly:
96            for (type_name, that_type) in that {
97                if let Some(this_type) = this.get_mut(type_name) {
98                    this_type.merge(that_type);
99                } else {
100                    this.insert(type_name.clone(), that_type.clone());
101                }
102            }
103        }
104
105        let zelf = std::mem::take(self);
106        *self = match (zelf, other) {
107            (JsonNativeType::Object(mut this), JsonNativeType::Object(that)) => {
108                merge_object(&mut this, that);
109                JsonNativeType::Object(this)
110            }
111            (JsonNativeType::Array(mut this), JsonNativeType::Array(that)) => {
112                this.merge(that);
113                JsonNativeType::Array(this)
114            }
115            (JsonNativeType::Null, that) => that.clone(),
116            (this, JsonNativeType::Null) => this,
117            (this, that) if this == *that => this,
118            _ => JsonNativeType::Variant,
119        };
120    }
121
122    pub fn as_arrow_type(&self) -> ArrowDataType {
123        match self {
124            JsonNativeType::Null => ArrowDataType::Null,
125            JsonNativeType::Bool => ArrowDataType::Boolean,
126            JsonNativeType::Number(n) => match n {
127                JsonNumberType::U64 => ArrowDataType::UInt64,
128                JsonNumberType::I64 => ArrowDataType::Int64,
129                JsonNumberType::F64 => ArrowDataType::Float64,
130            },
131            JsonNativeType::String => ArrowDataType::Utf8,
132            JsonNativeType::Array(array) => {
133                ArrowDataType::List(Arc::new(Field::new("item", array.as_arrow_type(), true)))
134            }
135            JsonNativeType::Object(object) => {
136                let fields = object
137                    .iter()
138                    .map(|(k, v)| Arc::new(Field::new(k, v.as_arrow_type(), true)))
139                    .collect::<Vec<_>>();
140                ArrowDataType::Struct(Fields::from(fields))
141            }
142            JsonNativeType::Variant => ArrowDataType::Binary,
143        }
144    }
145}
146
147impl From<&ConcreteDataType> for JsonNativeType {
148    fn from(value: &ConcreteDataType) -> Self {
149        match value {
150            ConcreteDataType::Null(_) => JsonNativeType::Null,
151            ConcreteDataType::Boolean(_) => JsonNativeType::Bool,
152            ConcreteDataType::UInt64(_)
153            | ConcreteDataType::UInt32(_)
154            | ConcreteDataType::UInt16(_)
155            | ConcreteDataType::UInt8(_) => JsonNativeType::u64(),
156            ConcreteDataType::Int64(_)
157            | ConcreteDataType::Int32(_)
158            | ConcreteDataType::Int16(_)
159            | ConcreteDataType::Int8(_) => JsonNativeType::i64(),
160            ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_) => JsonNativeType::f64(),
161            ConcreteDataType::String(_) => JsonNativeType::String,
162            ConcreteDataType::List(list_type) => {
163                JsonNativeType::Array(Box::new(list_type.item_type().into()))
164            }
165            ConcreteDataType::Struct(struct_type) => JsonNativeType::Object(
166                struct_type
167                    .fields()
168                    .iter()
169                    .map(|field| (field.name().to_string(), field.data_type().into()))
170                    .collect(),
171            ),
172            ConcreteDataType::Json(json_type) => json_type.native_type().clone(),
173            ConcreteDataType::Binary(_) => JsonNativeType::Variant,
174            _ => unreachable!(),
175        }
176    }
177}
178
179impl TryFrom<&ArrowDataType> for JsonNativeType {
180    type Error = Error;
181
182    fn try_from(t: &ArrowDataType) -> Result<Self> {
183        let t = match t {
184            ArrowDataType::Null => JsonNativeType::Null,
185            ArrowDataType::Boolean => JsonNativeType::Bool,
186            ArrowDataType::Int8
187            | ArrowDataType::Int16
188            | ArrowDataType::Int32
189            | ArrowDataType::Int64 => JsonNativeType::i64(),
190            ArrowDataType::UInt8
191            | ArrowDataType::UInt16
192            | ArrowDataType::UInt32
193            | ArrowDataType::UInt64 => JsonNativeType::u64(),
194            ArrowDataType::Float16 | ArrowDataType::Float32 | ArrowDataType::Float64 => {
195                JsonNativeType::f64()
196            }
197            ArrowDataType::Binary
198            | ArrowDataType::FixedSizeBinary(_)
199            | ArrowDataType::LargeBinary
200            | ArrowDataType::BinaryView => JsonNativeType::Variant,
201            ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View => {
202                JsonNativeType::String
203            }
204            ArrowDataType::List(field)
205            | ArrowDataType::ListView(field)
206            | ArrowDataType::FixedSizeList(field, _)
207            | ArrowDataType::LargeList(field)
208            | ArrowDataType::LargeListView(field) => {
209                JsonNativeType::Array(Box::new(Self::try_from(field.data_type())?))
210            }
211            ArrowDataType::Struct(fields) => {
212                let mut object = JsonObjectType::new();
213                for field in fields {
214                    object.insert(field.name().clone(), Self::try_from(field.data_type())?);
215                }
216                JsonNativeType::Object(object)
217            }
218            t => {
219                return UnsupportedArrowTypeSnafu {
220                    arrow_type: t.clone(),
221                }
222                .fail();
223            }
224        };
225        Ok(t)
226    }
227}
228
229impl Display for JsonNativeType {
230    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
231        match self {
232            JsonNativeType::Null => write!(f, r#""<Null>""#),
233            JsonNativeType::Bool => write!(f, r#""<Bool>""#),
234            JsonNativeType::Number(_) => {
235                write!(f, r#""<Number>""#)
236            }
237            JsonNativeType::String => write!(f, r#""<String>""#),
238            JsonNativeType::Array(item_type) => {
239                write!(f, "[{}]", item_type)
240            }
241            JsonNativeType::Object(object) => {
242                write!(
243                    f,
244                    "{{{}}}",
245                    object
246                        .iter()
247                        .map(|(k, v)| format!(r#""{k}":{v}"#))
248                        .collect::<Vec<_>>()
249                        .join(",")
250                )
251            }
252            JsonNativeType::Variant => write!(f, r#""<Variant>""#),
253        }
254    }
255}
256
257#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default)]
258pub enum JsonFormat {
259    #[default]
260    Jsonb,
261    Json2(Box<JsonNativeType>),
262}
263
264/// JsonType is a data type for JSON data. It is stored as binary data of jsonb format.
265/// It utilizes current binary value and vector implementation.
266#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
267pub struct JsonType {
268    pub format: JsonFormat,
269}
270
271impl JsonType {
272    pub fn new(format: JsonFormat) -> Self {
273        Self { format }
274    }
275
276    pub(crate) fn new_json2(native: JsonNativeType) -> Self {
277        Self {
278            format: JsonFormat::Json2(Box::new(native)),
279        }
280    }
281
282    pub fn is_json2(&self) -> bool {
283        matches!(self.format, JsonFormat::Json2(_))
284    }
285
286    pub(crate) fn native_type(&self) -> &JsonNativeType {
287        match &self.format {
288            JsonFormat::Jsonb => &JsonNativeType::String,
289            JsonFormat::Json2(x) => x.as_ref(),
290        }
291    }
292
293    pub fn null() -> Self {
294        Self {
295            format: JsonFormat::Json2(Box::new(JsonNativeType::Null)),
296        }
297    }
298
299    /// Make json type a struct type, by:
300    /// - if the json is an object, its entries are mapped to struct fields, obviously;
301    /// - if not, the json is one of bool, number, string or array, make it a special field
302    ///   (see [plain_json_struct_type]).
303    pub(crate) fn as_struct_type(&self) -> StructType {
304        match &self.format {
305            JsonFormat::Jsonb => StructType::default(),
306            JsonFormat::Json2(native_type) => match native_type.as_arrow_type() {
307                // TODO(LFC): Direct use Arrow's Struct datatype here.
308                ArrowDataType::Struct(fields) => StructType::from(&fields),
309                data_type => plain_json_struct_type(&data_type),
310            },
311        }
312    }
313
314    /// Try to merge this json type with others, error on datatype conflict.
315    pub fn merge(&mut self, other: &JsonType) -> Result<()> {
316        if self == other {
317            return Ok(());
318        }
319
320        match (&mut self.format, &other.format) {
321            (JsonFormat::Jsonb, JsonFormat::Jsonb) => Ok(()),
322            (JsonFormat::Json2(this), JsonFormat::Json2(that)) => {
323                this.merge(that);
324                Ok(())
325            }
326            _ => MergeJsonDatatypeSnafu {
327                reason: "json format not match",
328            }
329            .fail(),
330        }
331    }
332
333    /// Check if it includes all fields in `other` json type.
334    pub fn is_include(&self, other: &JsonType) -> bool {
335        match (&self.format, &other.format) {
336            (JsonFormat::Jsonb, JsonFormat::Jsonb) => true,
337            (JsonFormat::Json2(this), JsonFormat::Json2(that)) => {
338                is_include(this.as_ref(), that.as_ref())
339            }
340            _ => false,
341        }
342    }
343}
344
345fn is_include(this: &JsonNativeType, that: &JsonNativeType) -> bool {
346    fn is_include_object(this: &JsonObjectType, that: &JsonObjectType) -> bool {
347        for (type_name, that_type) in that {
348            let Some(this_type) = this.get(type_name) else {
349                return false;
350            };
351            if !is_include(this_type, that_type) {
352                return false;
353            }
354        }
355        true
356    }
357
358    match (this, that) {
359        (this, that) if this == that => true,
360        (JsonNativeType::Array(this), JsonNativeType::Array(that)) => {
361            is_include(this.as_ref(), that.as_ref())
362        }
363        (JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
364            is_include_object(this, that)
365        }
366        (_, JsonNativeType::Null) => true,
367        _ => false,
368    }
369}
370
371/// A special struct type for denoting "plain"(not object) json value. It has only one field, with
372/// fixed name [JSON_PLAIN_FIELD_NAME] and with metadata [JSON_PLAIN_FIELD_METADATA_KEY] = `"true"`.
373fn plain_json_struct_type(data_type: &ArrowDataType) -> StructType {
374    let mut field = StructField::new(
375        JSON_PLAIN_FIELD_NAME.to_string(),
376        ConcreteDataType::from_arrow_type(data_type),
377        true,
378    );
379    field.insert_metadata(JSON_PLAIN_FIELD_METADATA_KEY, true);
380    StructType::new(Arc::new(vec![field]))
381}
382
383impl From<&ArrowDataType> for JsonType {
384    fn from(t: &ArrowDataType) -> Self {
385        JsonType::new_json2(JsonNativeType::from(&ConcreteDataType::from_arrow_type(t)))
386    }
387}
388
389impl DataType for JsonType {
390    fn name(&self) -> String {
391        match &self.format {
392            JsonFormat::Jsonb => JSON_TYPE_NAME.to_string(),
393            JsonFormat::Json2(x) => format!(
394                "{JSON2_TYPE_NAME}{}",
395                if x.is_null() {
396                    "".to_string()
397                } else {
398                    x.to_string()
399                }
400            ),
401        }
402    }
403
404    fn logical_type_id(&self) -> LogicalTypeId {
405        LogicalTypeId::Json
406    }
407
408    fn default_value(&self) -> Value {
409        Bytes::default().into()
410    }
411
412    fn as_arrow_type(&self) -> ArrowDataType {
413        match self.format {
414            JsonFormat::Jsonb => ArrowDataType::Binary,
415            JsonFormat::Json2(_) => self.as_struct_type().as_arrow_type(),
416        }
417    }
418
419    fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
420        match &self.format {
421            JsonFormat::Jsonb => Box::new(BinaryVectorBuilder::with_capacity(capacity)),
422            JsonFormat::Json2(x) => Box::new(JsonVectorBuilder::new(*x.clone(), capacity)),
423        }
424    }
425
426    fn try_cast(&self, from: Value) -> Option<Value> {
427        match from {
428            Value::Binary(v) => Some(Value::Binary(v)),
429            _ => None,
430        }
431    }
432}
433
434impl Display for JsonType {
435    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
436        write!(f, "{}", self.name())
437    }
438}
439
440/// Converts a json type value to string
441pub fn jsonb_to_string(val: &[u8]) -> Result<String> {
442    if val.is_empty() {
443        return Ok("".to_string());
444    }
445    match jsonb::from_slice(val) {
446        Ok(jsonb_value) => {
447            let serialized = jsonb_value.to_string();
448            fix_unicode_point(&serialized)
449        }
450        Err(e) => InvalidJsonbSnafu { error: e }.fail(),
451    }
452}
453
454/// Converts a json type value to serde_json::Value
455pub fn jsonb_to_serde_json(val: &[u8]) -> Result<serde_json::Value> {
456    let json_string = jsonb_to_string(val)?;
457    serde_json::Value::from_str(&json_string).context(DeserializeSnafu { json: json_string })
458}
459
460/// Normalizes a JSON string by converting Rust-style Unicode escape sequences to JSON-compatible format.
461///
462/// The input is scanned for Rust-style Unicode code
463/// point escapes of the form `\\u{H...}` (a backslash, `u`, an opening brace,
464/// followed by 1–6 hexadecimal digits, and a closing brace). Each such escape is
465/// converted into JSON-compatible UTF‑16 escape sequences:
466///
467/// - For code points in the Basic Multilingual Plane (≤ `0xFFFF`), the escape is
468///   converted to a single JSON `\\uXXXX` sequence with four uppercase hex digits.
469/// - For code points above `0xFFFF` and less than Unicode max code point `0x10FFFF`,
470///   the code point is encoded as a UTF‑16 surrogate pair and emitted as two consecutive
471///   `\\uXXXX` sequences (as JSON format required).
472///
473/// After this normalization, the function returns the normalized string
474fn fix_unicode_point(json: &str) -> Result<String> {
475    static UNICODE_CODE_POINT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
476        // Match literal "\u{...}" sequences, capturing 1–6 (code point range) hex digits
477        // inside braces.
478        Regex::new(r"\\u\{([0-9a-fA-F]{1,6})}").unwrap_or_else(|e| panic!("{}", e))
479    });
480
481    let v = UNICODE_CODE_POINT_PATTERN.replace_all(json, |caps: &Captures| {
482        // Extract the hex payload (without braces) and parse to a code point.
483        let hex = &caps[1];
484        let Ok(code) = u32::from_str_radix(hex, 16) else {
485            // On parse failure, leave the original escape sequence unchanged.
486            return caps[0].to_string();
487        };
488
489        if code <= 0xFFFF {
490            // Basic Multilingual Plane: JSON can represent this directly as \uXXXX.
491            format!("\\u{:04X}", code)
492        } else if code > 0x10FFFF {
493            // Beyond max Unicode code point
494            caps[0].to_string()
495        } else {
496            // Supplementary planes: JSON needs UTF-16 surrogate pairs.
497            // Convert the code point to a 20-bit value.
498            let code = code - 0x10000;
499
500            // High surrogate: top 10 bits, offset by 0xD800.
501            let high = 0xD800 + ((code >> 10) & 0x3FF);
502
503            // Low surrogate: bottom 10 bits, offset by 0xDC00.
504            let low = 0xDC00 + (code & 0x3FF);
505
506            // Emit two \uXXXX escapes in sequence.
507            format!("\\u{:04X}\\u{:04X}", high, low)
508        }
509    });
510    Ok(v.to_string())
511}
512
513/// Parses a string to a json type value
514pub fn parse_string_to_jsonb(s: &str) -> Result<Vec<u8>> {
515    jsonb::parse_value(s.as_bytes())
516        .map_err(|_| InvalidJsonSnafu { value: s }.build())
517        .map(|json| json.to_vec())
518}
519
520#[cfg(test)]
521mod tests {
522    use super::*;
523    use crate::json::JsonStructureSettings;
524
525    #[test]
526    fn test_fix_unicode_point() -> Result<()> {
527        let valid_cases = vec![
528            (r#"{"data": "simple ascii"}"#, r#"{"data": "simple ascii"}"#),
529            (
530                r#"{"data":"Greek sigma: \u{03a3}"}"#,
531                r#"{"data":"Greek sigma: \u03A3"}"#,
532            ),
533            (
534                r#"{"data":"Joker card: \u{1f0df}"}"#,
535                r#"{"data":"Joker card: \uD83C\uDCDF"}"#,
536            ),
537            (
538                r#"{"data":"BMP boundary: \u{ffff}"}"#,
539                r#"{"data":"BMP boundary: \uFFFF"}"#,
540            ),
541            (
542                r#"{"data":"Supplementary min: \u{10000}"}"#,
543                r#"{"data":"Supplementary min: \uD800\uDC00"}"#,
544            ),
545            (
546                r#"{"data":"Supplementary max: \u{10ffff}"}"#,
547                r#"{"data":"Supplementary max: \uDBFF\uDFFF"}"#,
548            ),
549        ];
550        for (input, expect) in valid_cases {
551            let v = fix_unicode_point(input)?;
552            assert_eq!(v, expect);
553        }
554
555        let invalid_escape_cases = vec![
556            (
557                r#"{"data": "Invalid hex: \u{gggg}"}"#,
558                r#"{"data": "Invalid hex: \u{gggg}"}"#,
559            ),
560            (
561                r#"{"data": "Empty braces: \u{}"}"#,
562                r#"{"data": "Empty braces: \u{}"}"#,
563            ),
564            (
565                r#"{"data": "Out of range: \u{1100000}"}"#,
566                r#"{"data": "Out of range: \u{1100000}"}"#,
567            ),
568        ];
569        for (input, expect) in invalid_escape_cases {
570            let v = fix_unicode_point(input)?;
571            assert_eq!(v, expect);
572        }
573
574        Ok(())
575    }
576
577    #[test]
578    fn test_json_type_include() {
579        fn test(this: &JsonNativeType, that: &JsonNativeType, expected: bool) {
580            assert_eq!(is_include(this, that), expected);
581        }
582
583        test(&JsonNativeType::Null, &JsonNativeType::Null, true);
584        test(&JsonNativeType::Null, &JsonNativeType::Bool, false);
585
586        test(&JsonNativeType::Bool, &JsonNativeType::Null, true);
587        test(&JsonNativeType::Bool, &JsonNativeType::Bool, true);
588        test(&JsonNativeType::Bool, &JsonNativeType::u64(), false);
589
590        test(&JsonNativeType::u64(), &JsonNativeType::Null, true);
591        test(&JsonNativeType::u64(), &JsonNativeType::u64(), true);
592        test(&JsonNativeType::u64(), &JsonNativeType::String, false);
593
594        test(&JsonNativeType::String, &JsonNativeType::Null, true);
595        test(&JsonNativeType::String, &JsonNativeType::String, true);
596        test(
597            &JsonNativeType::String,
598            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
599            false,
600        );
601
602        test(
603            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
604            &JsonNativeType::Null,
605            true,
606        );
607        test(
608            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
609            &JsonNativeType::Array(Box::new(JsonNativeType::Null)),
610            true,
611        );
612        test(
613            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
614            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
615            true,
616        );
617        test(
618            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
619            &JsonNativeType::String,
620            false,
621        );
622        test(
623            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
624            &JsonNativeType::Object(JsonObjectType::new()),
625            false,
626        );
627
628        let simple_json_object = &JsonNativeType::Object(JsonObjectType::from([(
629            "foo".to_string(),
630            JsonNativeType::String,
631        )]));
632        test(simple_json_object, &JsonNativeType::Null, true);
633        test(simple_json_object, simple_json_object, true);
634        test(simple_json_object, &JsonNativeType::i64(), false);
635        test(
636            simple_json_object,
637            &JsonNativeType::Object(JsonObjectType::from([(
638                "bar".to_string(),
639                JsonNativeType::i64(),
640            )])),
641            false,
642        );
643
644        let complex_json_object = &JsonNativeType::Object(JsonObjectType::from([
645            (
646                "nested".to_string(),
647                JsonNativeType::Object(JsonObjectType::from([(
648                    "a".to_string(),
649                    JsonNativeType::Object(JsonObjectType::from([(
650                        "b".to_string(),
651                        JsonNativeType::Object(JsonObjectType::from([(
652                            "c".to_string(),
653                            JsonNativeType::String,
654                        )])),
655                    )])),
656                )])),
657            ),
658            ("bar".to_string(), JsonNativeType::i64()),
659        ]));
660        test(complex_json_object, &JsonNativeType::Null, true);
661        test(complex_json_object, &JsonNativeType::String, false);
662        test(complex_json_object, complex_json_object, true);
663        test(
664            complex_json_object,
665            &JsonNativeType::Object(JsonObjectType::from([(
666                "bar".to_string(),
667                JsonNativeType::i64(),
668            )])),
669            true,
670        );
671        test(
672            complex_json_object,
673            &JsonNativeType::Object(JsonObjectType::from([
674                (
675                    "nested".to_string(),
676                    JsonNativeType::Object(JsonObjectType::from([(
677                        "a".to_string(),
678                        JsonNativeType::Null,
679                    )])),
680                ),
681                ("bar".to_string(), JsonNativeType::i64()),
682            ])),
683            true,
684        );
685        test(
686            complex_json_object,
687            &JsonNativeType::Object(JsonObjectType::from([
688                (
689                    "nested".to_string(),
690                    JsonNativeType::Object(JsonObjectType::from([(
691                        "a".to_string(),
692                        JsonNativeType::String,
693                    )])),
694                ),
695                ("bar".to_string(), JsonNativeType::i64()),
696            ])),
697            false,
698        );
699        test(
700            complex_json_object,
701            &JsonNativeType::Object(JsonObjectType::from([
702                (
703                    "nested".to_string(),
704                    JsonNativeType::Object(JsonObjectType::from([(
705                        "a".to_string(),
706                        JsonNativeType::Object(JsonObjectType::from([(
707                            "b".to_string(),
708                            JsonNativeType::String,
709                        )])),
710                    )])),
711                ),
712                ("bar".to_string(), JsonNativeType::i64()),
713            ])),
714            false,
715        );
716        test(
717            complex_json_object,
718            &JsonNativeType::Object(JsonObjectType::from([
719                (
720                    "nested".to_string(),
721                    JsonNativeType::Object(JsonObjectType::from([(
722                        "a".to_string(),
723                        JsonNativeType::Object(JsonObjectType::from([(
724                            "b".to_string(),
725                            JsonNativeType::Object(JsonObjectType::from([(
726                                "c".to_string(),
727                                JsonNativeType::Null,
728                            )])),
729                        )])),
730                    )])),
731                ),
732                ("bar".to_string(), JsonNativeType::i64()),
733            ])),
734            true,
735        );
736        test(
737            complex_json_object,
738            &JsonNativeType::Object(JsonObjectType::from([
739                (
740                    "nested".to_string(),
741                    JsonNativeType::Object(JsonObjectType::from([(
742                        "a".to_string(),
743                        JsonNativeType::Object(JsonObjectType::from([(
744                            "b".to_string(),
745                            JsonNativeType::Object(JsonObjectType::from([(
746                                "c".to_string(),
747                                JsonNativeType::Bool,
748                            )])),
749                        )])),
750                    )])),
751                ),
752                ("bar".to_string(), JsonNativeType::i64()),
753            ])),
754            false,
755        );
756        test(
757            complex_json_object,
758            &JsonNativeType::Object(JsonObjectType::from([(
759                "nested".to_string(),
760                JsonNativeType::Object(JsonObjectType::from([(
761                    "a".to_string(),
762                    JsonNativeType::Object(JsonObjectType::from([(
763                        "b".to_string(),
764                        JsonNativeType::Object(JsonObjectType::from([(
765                            "c".to_string(),
766                            JsonNativeType::String,
767                        )])),
768                    )])),
769                )])),
770            )])),
771            true,
772        );
773    }
774
775    #[test]
776    fn test_merge_json_type() -> Result<()> {
777        fn test(
778            json: &str,
779            json_type: &mut JsonType,
780            expected: std::result::Result<&str, &str>,
781        ) -> Result<()> {
782            let json: serde_json::Value = serde_json::from_str(json).unwrap();
783
784            let settings = JsonStructureSettings::Structured(None);
785            let value = settings.encode(json)?;
786            let value_type = value.data_type();
787            let Some(other) = value_type.as_json() else {
788                unreachable!()
789            };
790
791            let result = json_type.merge(other);
792            match (result, expected) {
793                (Ok(()), Ok(expected)) => {
794                    assert_eq!(json_type.native_type().to_string(), expected);
795                }
796                (Err(err), Err(expected)) => {
797                    assert_eq!(err.to_string(), expected);
798                }
799                _ => unreachable!(),
800            }
801            Ok(())
802        }
803
804        // Null should be absorbed by a concrete scalar type.
805        test("true", &mut JsonType::null(), Ok(r#""<Bool>""#))?;
806
807        // Merging a null value into an existing concrete type should keep the type unchanged.
808        test(
809            "null",
810            &mut JsonType::new_json2(JsonNativeType::Bool),
811            Ok(r#""<Bool>""#),
812        )?;
813
814        // Identical number categories should stay as Number.
815        test(
816            "1",
817            &mut JsonType::new_json2(JsonNativeType::i64()),
818            Ok(r#""<Number>""#),
819        )?;
820
821        // Conflicting number categories should be lifted to Variant.
822        test(
823            "1.5",
824            &mut JsonType::new_json2(JsonNativeType::i64()),
825            Ok(r#""<Variant>""#),
826        )?;
827
828        // Object merge should preserve existing fields and append missing fields.
829        test(
830            r#"{"foo":"x"}"#,
831            &mut JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([(
832                "bar".to_string(),
833                JsonNativeType::i64(),
834            )]))),
835            Ok(r#"{"bar":"<Number>","foo":"<String>"}"#),
836        )?;
837
838        // Conflicting object field types should only lift that field to Variant.
839        test(
840            r#"{"foo":1}"#,
841            &mut JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([(
842                "foo".to_string(),
843                JsonNativeType::Bool,
844            )]))),
845            Ok(r#"{"foo":"<Variant>"}"#),
846        )?;
847
848        // Nested objects should merge recursively.
849        test(
850            r#"{"nested":{"foo":"bar"}}"#,
851            &mut JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([(
852                "nested".to_string(),
853                JsonNativeType::Object(JsonObjectType::from([(
854                    "bar".to_string(),
855                    JsonNativeType::Bool,
856                )])),
857            )]))),
858            Ok(r#"{"nested":{"bar":"<Bool>","foo":"<String>"}}"#),
859        )?;
860
861        // Arrays should merge their element types recursively.
862        test(
863            r#"["foo"]"#,
864            &mut JsonType::new_json2(JsonNativeType::Array(Box::new(JsonNativeType::u64()))),
865            Ok(r#"["<Variant>"]"#),
866        )?;
867
868        // Root-level incompatible types should be lifted to Variant.
869        test(
870            r#"{"foo":"bar"}"#,
871            &mut JsonType::new_json2(JsonNativeType::Bool),
872            Ok(r#""<Variant>""#),
873        )?;
874
875        // Jsonb and Json2 should not be mergeable.
876        test(
877            "true",
878            &mut JsonType::new(JsonFormat::Jsonb),
879            Err("Failed to merge JSON datatype: json format not match"),
880        )?;
881
882        Ok(())
883    }
884}