Skip to main content

datatypes/types/
json_type.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::BTreeMap;
16use std::fmt::{Debug, Display, Formatter};
17use std::str::FromStr;
18use std::sync::{Arc, LazyLock};
19
20use arrow::datatypes::DataType as ArrowDataType;
21use common_base::bytes::Bytes;
22use regex::{Captures, Regex};
23use serde::{Deserialize, Serialize};
24use snafu::ResultExt;
25
26use crate::data_type::DataType;
27use crate::error::{
28    DeserializeSnafu, InvalidJsonSnafu, InvalidJsonbSnafu, MergeJsonDatatypeSnafu, Result,
29};
30use crate::prelude::ConcreteDataType;
31use crate::scalars::ScalarVectorBuilder;
32use crate::type_id::LogicalTypeId;
33use crate::types::{ListType, StructField, StructType};
34use crate::value::Value;
35use crate::vectors::json::builder::JsonVectorBuilder;
36use crate::vectors::{BinaryVectorBuilder, MutableVector};
37
38pub const JSON_TYPE_NAME: &str = "Json";
39const JSON2_TYPE_NAME: &str = "Json2";
40const JSON_PLAIN_FIELD_NAME: &str = "__json_plain__";
41const JSON_PLAIN_FIELD_METADATA_KEY: &str = "is_plain_json";
42
43pub type JsonObjectType = BTreeMap<String, JsonNativeType>;
44
45#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
46pub enum JsonNumberType {
47    U64,
48    I64,
49    F64,
50}
51
52#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
53pub enum JsonNativeType {
54    Null,
55    Bool,
56    Number(JsonNumberType),
57    String,
58    Array(Box<JsonNativeType>),
59    Object(JsonObjectType),
60    /// A special (not in the JSON official specification) JSON type to indicate the "resolved" or
61    /// "lifted" type of two conflicting JSON types. For example, when merging JSON types of "Bool"
62    /// and "Number".
63    Variant,
64}
65
66impl JsonNativeType {
67    pub fn is_null(&self) -> bool {
68        matches!(self, JsonNativeType::Null)
69    }
70
71    pub fn u64() -> Self {
72        Self::Number(JsonNumberType::U64)
73    }
74
75    pub fn i64() -> Self {
76        Self::Number(JsonNumberType::I64)
77    }
78
79    pub fn f64() -> Self {
80        Self::Number(JsonNumberType::F64)
81    }
82}
83
84impl From<&JsonNativeType> for ConcreteDataType {
85    fn from(value: &JsonNativeType) -> Self {
86        match value {
87            JsonNativeType::Null => ConcreteDataType::null_datatype(),
88            JsonNativeType::Bool => ConcreteDataType::boolean_datatype(),
89            JsonNativeType::Number(JsonNumberType::U64) => ConcreteDataType::uint64_datatype(),
90            JsonNativeType::Number(JsonNumberType::I64) => ConcreteDataType::int64_datatype(),
91            JsonNativeType::Number(JsonNumberType::F64) => ConcreteDataType::float64_datatype(),
92            JsonNativeType::String => ConcreteDataType::string_datatype(),
93            JsonNativeType::Array(item_type) => {
94                ConcreteDataType::List(ListType::new(Arc::new(item_type.as_ref().into())))
95            }
96            JsonNativeType::Object(object) => {
97                let fields = object
98                    .iter()
99                    .map(|(type_name, field_type)| {
100                        StructField::new(type_name.clone(), field_type.into(), true)
101                    })
102                    .collect();
103                ConcreteDataType::Struct(StructType::new(Arc::new(fields)))
104            }
105            JsonNativeType::Variant => ConcreteDataType::binary_datatype(),
106        }
107    }
108}
109
110impl From<&ConcreteDataType> for JsonNativeType {
111    fn from(value: &ConcreteDataType) -> Self {
112        match value {
113            ConcreteDataType::Null(_) => JsonNativeType::Null,
114            ConcreteDataType::Boolean(_) => JsonNativeType::Bool,
115            ConcreteDataType::UInt64(_)
116            | ConcreteDataType::UInt32(_)
117            | ConcreteDataType::UInt16(_)
118            | ConcreteDataType::UInt8(_) => JsonNativeType::u64(),
119            ConcreteDataType::Int64(_)
120            | ConcreteDataType::Int32(_)
121            | ConcreteDataType::Int16(_)
122            | ConcreteDataType::Int8(_) => JsonNativeType::i64(),
123            ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_) => JsonNativeType::f64(),
124            ConcreteDataType::String(_) => JsonNativeType::String,
125            ConcreteDataType::List(list_type) => {
126                JsonNativeType::Array(Box::new(list_type.item_type().into()))
127            }
128            ConcreteDataType::Struct(struct_type) => JsonNativeType::Object(
129                struct_type
130                    .fields()
131                    .iter()
132                    .map(|field| (field.name().to_string(), field.data_type().into()))
133                    .collect(),
134            ),
135            ConcreteDataType::Json(json_type) => json_type.native_type().clone(),
136            ConcreteDataType::Binary(_) => JsonNativeType::Variant,
137            _ => unreachable!(),
138        }
139    }
140}
141
142impl Display for JsonNativeType {
143    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
144        match self {
145            JsonNativeType::Null => write!(f, r#""<Null>""#),
146            JsonNativeType::Bool => write!(f, r#""<Bool>""#),
147            JsonNativeType::Number(_) => {
148                write!(f, r#""<Number>""#)
149            }
150            JsonNativeType::String => write!(f, r#""<String>""#),
151            JsonNativeType::Array(item_type) => {
152                write!(f, "[{}]", item_type)
153            }
154            JsonNativeType::Object(object) => {
155                write!(
156                    f,
157                    "{{{}}}",
158                    object
159                        .iter()
160                        .map(|(k, v)| format!(r#""{k}":{v}"#))
161                        .collect::<Vec<_>>()
162                        .join(",")
163                )
164            }
165            JsonNativeType::Variant => write!(f, r#""<Variant>""#),
166        }
167    }
168}
169
170#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default)]
171pub enum JsonFormat {
172    #[default]
173    Jsonb,
174    Json2(Box<JsonNativeType>),
175}
176
177/// JsonType is a data type for JSON data. It is stored as binary data of jsonb format.
178/// It utilizes current binary value and vector implementation.
179#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
180pub struct JsonType {
181    pub format: JsonFormat,
182}
183
184impl JsonType {
185    pub fn new(format: JsonFormat) -> Self {
186        Self { format }
187    }
188
189    pub(crate) fn new_json2(native: JsonNativeType) -> Self {
190        Self {
191            format: JsonFormat::Json2(Box::new(native)),
192        }
193    }
194
195    pub fn is_json2(&self) -> bool {
196        matches!(self.format, JsonFormat::Json2(_))
197    }
198
199    pub(crate) fn native_type(&self) -> &JsonNativeType {
200        match &self.format {
201            JsonFormat::Jsonb => &JsonNativeType::String,
202            JsonFormat::Json2(x) => x.as_ref(),
203        }
204    }
205
206    pub fn null() -> Self {
207        Self {
208            format: JsonFormat::Json2(Box::new(JsonNativeType::Null)),
209        }
210    }
211
212    /// Make json type a struct type, by:
213    /// - if the json is an object, its entries are mapped to struct fields, obviously;
214    /// - if not, the json is one of bool, number, string or array, make it a special field
215    ///   (see [plain_json_struct_type]).
216    pub(crate) fn as_struct_type(&self) -> StructType {
217        match &self.format {
218            JsonFormat::Jsonb => StructType::default(),
219            JsonFormat::Json2(inner) => match ConcreteDataType::from(inner.as_ref()) {
220                ConcreteDataType::Struct(t) => t.clone(),
221                x => plain_json_struct_type(x),
222            },
223        }
224    }
225
226    /// Try to merge this json type with others, error on datatype conflict.
227    pub fn merge(&mut self, other: &JsonType) -> Result<()> {
228        if self == other {
229            return Ok(());
230        }
231
232        match (&self.format, &other.format) {
233            (JsonFormat::Jsonb, JsonFormat::Jsonb) => Ok(()),
234            (JsonFormat::Json2(this), JsonFormat::Json2(that)) => {
235                let merged = merge(this.as_ref(), that.as_ref());
236                self.format = JsonFormat::Json2(Box::new(merged));
237                Ok(())
238            }
239            _ => MergeJsonDatatypeSnafu {
240                reason: "json format not match",
241            }
242            .fail(),
243        }
244    }
245
246    /// Check if it includes all fields in `other` json type.
247    pub fn is_include(&self, other: &JsonType) -> bool {
248        match (&self.format, &other.format) {
249            (JsonFormat::Jsonb, JsonFormat::Jsonb) => true,
250            (JsonFormat::Json2(this), JsonFormat::Json2(that)) => {
251                is_include(this.as_ref(), that.as_ref())
252            }
253            _ => false,
254        }
255    }
256}
257
258fn is_include(this: &JsonNativeType, that: &JsonNativeType) -> bool {
259    fn is_include_object(this: &JsonObjectType, that: &JsonObjectType) -> bool {
260        for (type_name, that_type) in that {
261            let Some(this_type) = this.get(type_name) else {
262                return false;
263            };
264            if !is_include(this_type, that_type) {
265                return false;
266            }
267        }
268        true
269    }
270
271    match (this, that) {
272        (this, that) if this == that => true,
273        (JsonNativeType::Array(this), JsonNativeType::Array(that)) => {
274            is_include(this.as_ref(), that.as_ref())
275        }
276        (JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
277            is_include_object(this, that)
278        }
279        (_, JsonNativeType::Null) => true,
280        _ => false,
281    }
282}
283
284/// A special struct type for denoting "plain"(not object) json value. It has only one field, with
285/// fixed name [JSON_PLAIN_FIELD_NAME] and with metadata [JSON_PLAIN_FIELD_METADATA_KEY] = `"true"`.
286pub(crate) fn plain_json_struct_type(item_type: ConcreteDataType) -> StructType {
287    let mut field = StructField::new(JSON_PLAIN_FIELD_NAME.to_string(), item_type, true);
288    field.insert_metadata(JSON_PLAIN_FIELD_METADATA_KEY, true);
289    StructType::new(Arc::new(vec![field]))
290}
291
292fn merge(this: &JsonNativeType, that: &JsonNativeType) -> JsonNativeType {
293    fn merge_object(this: &JsonObjectType, that: &JsonObjectType) -> JsonObjectType {
294        let mut this = this.clone();
295        // merge "that" into "this" directly:
296        for (type_name, that_type) in that {
297            if let Some(this_type) = this.get_mut(type_name) {
298                let merged_type = merge(this_type, that_type);
299                *this_type = merged_type;
300            } else {
301                this.insert(type_name.clone(), that_type.clone());
302            }
303        }
304        this
305    }
306
307    match (this, that) {
308        (this, that) if this == that => this.clone(),
309        (JsonNativeType::Array(this), JsonNativeType::Array(that)) => {
310            JsonNativeType::Array(Box::new(merge(this.as_ref(), that.as_ref())))
311        }
312        (JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
313            JsonNativeType::Object(merge_object(this, that))
314        }
315        (JsonNativeType::Null, x) | (x, JsonNativeType::Null) => x.clone(),
316        _ => JsonNativeType::Variant,
317    }
318}
319
320impl From<&ArrowDataType> for JsonType {
321    fn from(t: &ArrowDataType) -> Self {
322        JsonType::new_json2(JsonNativeType::from(&ConcreteDataType::from_arrow_type(t)))
323    }
324}
325
326impl DataType for JsonType {
327    fn name(&self) -> String {
328        match &self.format {
329            JsonFormat::Jsonb => JSON_TYPE_NAME.to_string(),
330            JsonFormat::Json2(x) => format!(
331                "{JSON2_TYPE_NAME}{}",
332                if x.is_null() {
333                    "".to_string()
334                } else {
335                    x.to_string()
336                }
337            ),
338        }
339    }
340
341    fn logical_type_id(&self) -> LogicalTypeId {
342        LogicalTypeId::Json
343    }
344
345    fn default_value(&self) -> Value {
346        Bytes::default().into()
347    }
348
349    fn as_arrow_type(&self) -> ArrowDataType {
350        match self.format {
351            JsonFormat::Jsonb => ArrowDataType::Binary,
352            JsonFormat::Json2(_) => self.as_struct_type().as_arrow_type(),
353        }
354    }
355
356    fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
357        match &self.format {
358            JsonFormat::Jsonb => Box::new(BinaryVectorBuilder::with_capacity(capacity)),
359            JsonFormat::Json2(x) => Box::new(JsonVectorBuilder::new(*x.clone(), capacity)),
360        }
361    }
362
363    fn try_cast(&self, from: Value) -> Option<Value> {
364        match from {
365            Value::Binary(v) => Some(Value::Binary(v)),
366            _ => None,
367        }
368    }
369}
370
371impl Display for JsonType {
372    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
373        write!(f, "{}", self.name())
374    }
375}
376
377/// Converts a json type value to string
378pub fn jsonb_to_string(val: &[u8]) -> Result<String> {
379    if val.is_empty() {
380        return Ok("".to_string());
381    }
382    match jsonb::from_slice(val) {
383        Ok(jsonb_value) => {
384            let serialized = jsonb_value.to_string();
385            fix_unicode_point(&serialized)
386        }
387        Err(e) => InvalidJsonbSnafu { error: e }.fail(),
388    }
389}
390
391/// Converts a json type value to serde_json::Value
392pub fn jsonb_to_serde_json(val: &[u8]) -> Result<serde_json::Value> {
393    let json_string = jsonb_to_string(val)?;
394    serde_json::Value::from_str(&json_string).context(DeserializeSnafu { json: json_string })
395}
396
397/// Normalizes a JSON string by converting Rust-style Unicode escape sequences to JSON-compatible format.
398///
399/// The input is scanned for Rust-style Unicode code
400/// point escapes of the form `\\u{H...}` (a backslash, `u`, an opening brace,
401/// followed by 1–6 hexadecimal digits, and a closing brace). Each such escape is
402/// converted into JSON-compatible UTF‑16 escape sequences:
403///
404/// - For code points in the Basic Multilingual Plane (≤ `0xFFFF`), the escape is
405///   converted to a single JSON `\\uXXXX` sequence with four uppercase hex digits.
406/// - For code points above `0xFFFF` and less than Unicode max code point `0x10FFFF`,
407///   the code point is encoded as a UTF‑16 surrogate pair and emitted as two consecutive
408///   `\\uXXXX` sequences (as JSON format required).
409///
410/// After this normalization, the function returns the normalized string
411fn fix_unicode_point(json: &str) -> Result<String> {
412    static UNICODE_CODE_POINT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
413        // Match literal "\u{...}" sequences, capturing 1–6 (code point range) hex digits
414        // inside braces.
415        Regex::new(r"\\u\{([0-9a-fA-F]{1,6})}").unwrap_or_else(|e| panic!("{}", e))
416    });
417
418    let v = UNICODE_CODE_POINT_PATTERN.replace_all(json, |caps: &Captures| {
419        // Extract the hex payload (without braces) and parse to a code point.
420        let hex = &caps[1];
421        let Ok(code) = u32::from_str_radix(hex, 16) else {
422            // On parse failure, leave the original escape sequence unchanged.
423            return caps[0].to_string();
424        };
425
426        if code <= 0xFFFF {
427            // Basic Multilingual Plane: JSON can represent this directly as \uXXXX.
428            format!("\\u{:04X}", code)
429        } else if code > 0x10FFFF {
430            // Beyond max Unicode code point
431            caps[0].to_string()
432        } else {
433            // Supplementary planes: JSON needs UTF-16 surrogate pairs.
434            // Convert the code point to a 20-bit value.
435            let code = code - 0x10000;
436
437            // High surrogate: top 10 bits, offset by 0xD800.
438            let high = 0xD800 + ((code >> 10) & 0x3FF);
439
440            // Low surrogate: bottom 10 bits, offset by 0xDC00.
441            let low = 0xDC00 + (code & 0x3FF);
442
443            // Emit two \uXXXX escapes in sequence.
444            format!("\\u{:04X}\\u{:04X}", high, low)
445        }
446    });
447    Ok(v.to_string())
448}
449
450/// Parses a string to a json type value
451pub fn parse_string_to_jsonb(s: &str) -> Result<Vec<u8>> {
452    jsonb::parse_value(s.as_bytes())
453        .map_err(|_| InvalidJsonSnafu { value: s }.build())
454        .map(|json| json.to_vec())
455}
456
457#[cfg(test)]
458mod tests {
459    use super::*;
460    use crate::json::JsonStructureSettings;
461
462    #[test]
463    fn test_fix_unicode_point() -> Result<()> {
464        let valid_cases = vec![
465            (r#"{"data": "simple ascii"}"#, r#"{"data": "simple ascii"}"#),
466            (
467                r#"{"data":"Greek sigma: \u{03a3}"}"#,
468                r#"{"data":"Greek sigma: \u03A3"}"#,
469            ),
470            (
471                r#"{"data":"Joker card: \u{1f0df}"}"#,
472                r#"{"data":"Joker card: \uD83C\uDCDF"}"#,
473            ),
474            (
475                r#"{"data":"BMP boundary: \u{ffff}"}"#,
476                r#"{"data":"BMP boundary: \uFFFF"}"#,
477            ),
478            (
479                r#"{"data":"Supplementary min: \u{10000}"}"#,
480                r#"{"data":"Supplementary min: \uD800\uDC00"}"#,
481            ),
482            (
483                r#"{"data":"Supplementary max: \u{10ffff}"}"#,
484                r#"{"data":"Supplementary max: \uDBFF\uDFFF"}"#,
485            ),
486        ];
487        for (input, expect) in valid_cases {
488            let v = fix_unicode_point(input)?;
489            assert_eq!(v, expect);
490        }
491
492        let invalid_escape_cases = vec![
493            (
494                r#"{"data": "Invalid hex: \u{gggg}"}"#,
495                r#"{"data": "Invalid hex: \u{gggg}"}"#,
496            ),
497            (
498                r#"{"data": "Empty braces: \u{}"}"#,
499                r#"{"data": "Empty braces: \u{}"}"#,
500            ),
501            (
502                r#"{"data": "Out of range: \u{1100000}"}"#,
503                r#"{"data": "Out of range: \u{1100000}"}"#,
504            ),
505        ];
506        for (input, expect) in invalid_escape_cases {
507            let v = fix_unicode_point(input)?;
508            assert_eq!(v, expect);
509        }
510
511        Ok(())
512    }
513
514    #[test]
515    fn test_json_type_include() {
516        fn test(this: &JsonNativeType, that: &JsonNativeType, expected: bool) {
517            assert_eq!(is_include(this, that), expected);
518        }
519
520        test(&JsonNativeType::Null, &JsonNativeType::Null, true);
521        test(&JsonNativeType::Null, &JsonNativeType::Bool, false);
522
523        test(&JsonNativeType::Bool, &JsonNativeType::Null, true);
524        test(&JsonNativeType::Bool, &JsonNativeType::Bool, true);
525        test(&JsonNativeType::Bool, &JsonNativeType::u64(), false);
526
527        test(&JsonNativeType::u64(), &JsonNativeType::Null, true);
528        test(&JsonNativeType::u64(), &JsonNativeType::u64(), true);
529        test(&JsonNativeType::u64(), &JsonNativeType::String, false);
530
531        test(&JsonNativeType::String, &JsonNativeType::Null, true);
532        test(&JsonNativeType::String, &JsonNativeType::String, true);
533        test(
534            &JsonNativeType::String,
535            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
536            false,
537        );
538
539        test(
540            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
541            &JsonNativeType::Null,
542            true,
543        );
544        test(
545            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
546            &JsonNativeType::Array(Box::new(JsonNativeType::Null)),
547            true,
548        );
549        test(
550            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
551            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
552            true,
553        );
554        test(
555            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
556            &JsonNativeType::String,
557            false,
558        );
559        test(
560            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
561            &JsonNativeType::Object(JsonObjectType::new()),
562            false,
563        );
564
565        let simple_json_object = &JsonNativeType::Object(JsonObjectType::from([(
566            "foo".to_string(),
567            JsonNativeType::String,
568        )]));
569        test(simple_json_object, &JsonNativeType::Null, true);
570        test(simple_json_object, simple_json_object, true);
571        test(simple_json_object, &JsonNativeType::i64(), false);
572        test(
573            simple_json_object,
574            &JsonNativeType::Object(JsonObjectType::from([(
575                "bar".to_string(),
576                JsonNativeType::i64(),
577            )])),
578            false,
579        );
580
581        let complex_json_object = &JsonNativeType::Object(JsonObjectType::from([
582            (
583                "nested".to_string(),
584                JsonNativeType::Object(JsonObjectType::from([(
585                    "a".to_string(),
586                    JsonNativeType::Object(JsonObjectType::from([(
587                        "b".to_string(),
588                        JsonNativeType::Object(JsonObjectType::from([(
589                            "c".to_string(),
590                            JsonNativeType::String,
591                        )])),
592                    )])),
593                )])),
594            ),
595            ("bar".to_string(), JsonNativeType::i64()),
596        ]));
597        test(complex_json_object, &JsonNativeType::Null, true);
598        test(complex_json_object, &JsonNativeType::String, false);
599        test(complex_json_object, complex_json_object, true);
600        test(
601            complex_json_object,
602            &JsonNativeType::Object(JsonObjectType::from([(
603                "bar".to_string(),
604                JsonNativeType::i64(),
605            )])),
606            true,
607        );
608        test(
609            complex_json_object,
610            &JsonNativeType::Object(JsonObjectType::from([
611                (
612                    "nested".to_string(),
613                    JsonNativeType::Object(JsonObjectType::from([(
614                        "a".to_string(),
615                        JsonNativeType::Null,
616                    )])),
617                ),
618                ("bar".to_string(), JsonNativeType::i64()),
619            ])),
620            true,
621        );
622        test(
623            complex_json_object,
624            &JsonNativeType::Object(JsonObjectType::from([
625                (
626                    "nested".to_string(),
627                    JsonNativeType::Object(JsonObjectType::from([(
628                        "a".to_string(),
629                        JsonNativeType::String,
630                    )])),
631                ),
632                ("bar".to_string(), JsonNativeType::i64()),
633            ])),
634            false,
635        );
636        test(
637            complex_json_object,
638            &JsonNativeType::Object(JsonObjectType::from([
639                (
640                    "nested".to_string(),
641                    JsonNativeType::Object(JsonObjectType::from([(
642                        "a".to_string(),
643                        JsonNativeType::Object(JsonObjectType::from([(
644                            "b".to_string(),
645                            JsonNativeType::String,
646                        )])),
647                    )])),
648                ),
649                ("bar".to_string(), JsonNativeType::i64()),
650            ])),
651            false,
652        );
653        test(
654            complex_json_object,
655            &JsonNativeType::Object(JsonObjectType::from([
656                (
657                    "nested".to_string(),
658                    JsonNativeType::Object(JsonObjectType::from([(
659                        "a".to_string(),
660                        JsonNativeType::Object(JsonObjectType::from([(
661                            "b".to_string(),
662                            JsonNativeType::Object(JsonObjectType::from([(
663                                "c".to_string(),
664                                JsonNativeType::Null,
665                            )])),
666                        )])),
667                    )])),
668                ),
669                ("bar".to_string(), JsonNativeType::i64()),
670            ])),
671            true,
672        );
673        test(
674            complex_json_object,
675            &JsonNativeType::Object(JsonObjectType::from([
676                (
677                    "nested".to_string(),
678                    JsonNativeType::Object(JsonObjectType::from([(
679                        "a".to_string(),
680                        JsonNativeType::Object(JsonObjectType::from([(
681                            "b".to_string(),
682                            JsonNativeType::Object(JsonObjectType::from([(
683                                "c".to_string(),
684                                JsonNativeType::Bool,
685                            )])),
686                        )])),
687                    )])),
688                ),
689                ("bar".to_string(), JsonNativeType::i64()),
690            ])),
691            false,
692        );
693        test(
694            complex_json_object,
695            &JsonNativeType::Object(JsonObjectType::from([(
696                "nested".to_string(),
697                JsonNativeType::Object(JsonObjectType::from([(
698                    "a".to_string(),
699                    JsonNativeType::Object(JsonObjectType::from([(
700                        "b".to_string(),
701                        JsonNativeType::Object(JsonObjectType::from([(
702                            "c".to_string(),
703                            JsonNativeType::String,
704                        )])),
705                    )])),
706                )])),
707            )])),
708            true,
709        );
710    }
711
712    #[test]
713    fn test_merge_json_type() -> Result<()> {
714        fn test(
715            json: &str,
716            json_type: &mut JsonType,
717            expected: std::result::Result<&str, &str>,
718        ) -> Result<()> {
719            let json: serde_json::Value = serde_json::from_str(json).unwrap();
720
721            let settings = JsonStructureSettings::Structured(None);
722            let value = settings.encode(json)?;
723            let value_type = value.data_type();
724            let Some(other) = value_type.as_json() else {
725                unreachable!()
726            };
727
728            let result = json_type.merge(other);
729            match (result, expected) {
730                (Ok(()), Ok(expected)) => {
731                    assert_eq!(json_type.native_type().to_string(), expected);
732                }
733                (Err(err), Err(expected)) => {
734                    assert_eq!(err.to_string(), expected);
735                }
736                _ => unreachable!(),
737            }
738            Ok(())
739        }
740
741        // Null should be absorbed by a concrete scalar type.
742        test("true", &mut JsonType::null(), Ok(r#""<Bool>""#))?;
743
744        // Merging a null value into an existing concrete type should keep the type unchanged.
745        test(
746            "null",
747            &mut JsonType::new_json2(JsonNativeType::Bool),
748            Ok(r#""<Bool>""#),
749        )?;
750
751        // Identical number categories should stay as Number.
752        test(
753            "1",
754            &mut JsonType::new_json2(JsonNativeType::i64()),
755            Ok(r#""<Number>""#),
756        )?;
757
758        // Conflicting number categories should be lifted to Variant.
759        test(
760            "1.5",
761            &mut JsonType::new_json2(JsonNativeType::i64()),
762            Ok(r#""<Variant>""#),
763        )?;
764
765        // Object merge should preserve existing fields and append missing fields.
766        test(
767            r#"{"foo":"x"}"#,
768            &mut JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([(
769                "bar".to_string(),
770                JsonNativeType::i64(),
771            )]))),
772            Ok(r#"{"bar":"<Number>","foo":"<String>"}"#),
773        )?;
774
775        // Conflicting object field types should only lift that field to Variant.
776        test(
777            r#"{"foo":1}"#,
778            &mut JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([(
779                "foo".to_string(),
780                JsonNativeType::Bool,
781            )]))),
782            Ok(r#"{"foo":"<Variant>"}"#),
783        )?;
784
785        // Nested objects should merge recursively.
786        test(
787            r#"{"nested":{"foo":"bar"}}"#,
788            &mut JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([(
789                "nested".to_string(),
790                JsonNativeType::Object(JsonObjectType::from([(
791                    "bar".to_string(),
792                    JsonNativeType::Bool,
793                )])),
794            )]))),
795            Ok(r#"{"nested":{"bar":"<Bool>","foo":"<String>"}}"#),
796        )?;
797
798        // Arrays should merge their element types recursively.
799        test(
800            r#"["foo"]"#,
801            &mut JsonType::new_json2(JsonNativeType::Array(Box::new(JsonNativeType::u64()))),
802            Ok(r#"["<Variant>"]"#),
803        )?;
804
805        // Root-level incompatible types should be lifted to Variant.
806        test(
807            r#"{"foo":"bar"}"#,
808            &mut JsonType::new_json2(JsonNativeType::Bool),
809            Ok(r#""<Variant>""#),
810        )?;
811
812        // Jsonb and Json2 should not be mergeable.
813        test(
814            "true",
815            &mut JsonType::new(JsonFormat::Jsonb),
816            Err("Failed to merge JSON datatype: json format not match"),
817        )?;
818
819        Ok(())
820    }
821}