Skip to main content

datatypes/types/
json_type.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::BTreeMap;
16use std::fmt::{Debug, Display, Formatter};
17use std::str::FromStr;
18use std::sync::{Arc, LazyLock};
19
20use arrow::datatypes::DataType as ArrowDataType;
21use arrow_schema::{Field, Fields};
22use common_base::bytes::Bytes;
23use regex::{Captures, Regex};
24use serde::{Deserialize, Serialize};
25use snafu::ResultExt;
26
27use crate::Error;
28use crate::data_type::DataType;
29use crate::error::{
30    DeserializeSnafu, InvalidJsonSnafu, InvalidJsonbSnafu, MergeJsonDatatypeSnafu, Result,
31    UnsupportedArrowTypeSnafu,
32};
33use crate::prelude::ConcreteDataType;
34use crate::scalars::ScalarVectorBuilder;
35use crate::type_id::LogicalTypeId;
36use crate::types::StructType;
37use crate::value::Value;
38use crate::vectors::json::builder::JsonVectorBuilder;
39use crate::vectors::{BinaryVectorBuilder, MutableVector};
40
41pub const JSON_TYPE_NAME: &str = "Json";
42const JSON2_TYPE_NAME: &str = "Json2";
43
44pub type JsonObjectType = BTreeMap<String, JsonNativeType>;
45
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
47pub enum JsonNumberType {
48    U64,
49    I64,
50    F64,
51}
52
53#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default)]
54pub enum JsonNativeType {
55    #[default]
56    Null,
57    Bool,
58    Number(JsonNumberType),
59    String,
60    Array(Box<JsonNativeType>),
61    Object(JsonObjectType),
62    /// A special (not in the JSON official specification) JSON type to indicate the "resolved" or
63    /// "lifted" type of two conflicting JSON types. For example, when merging JSON types of "Bool"
64    /// and "Number".
65    Variant,
66}
67
68impl JsonNativeType {
69    pub fn is_null(&self) -> bool {
70        matches!(self, JsonNativeType::Null)
71    }
72
73    pub fn u64() -> Self {
74        Self::Number(JsonNumberType::U64)
75    }
76
77    pub fn i64() -> Self {
78        Self::Number(JsonNumberType::I64)
79    }
80
81    pub fn f64() -> Self {
82        Self::Number(JsonNumberType::F64)
83    }
84
85    /// Merge other [JsonNativeType] into this.
86    /// Conflicting fields will be resolved to the "Variant" type.
87    pub fn merge(&mut self, other: &JsonNativeType) {
88        if self == other {
89            return;
90        }
91
92        fn merge_object(this: &mut JsonObjectType, that: &JsonObjectType) {
93            // merge "that" into "this" directly:
94            for (type_name, that_type) in that {
95                if let Some(this_type) = this.get_mut(type_name) {
96                    this_type.merge(that_type);
97                } else {
98                    this.insert(type_name.clone(), that_type.clone());
99                }
100            }
101        }
102
103        let zelf = std::mem::take(self);
104        *self = match (zelf, other) {
105            (JsonNativeType::Object(mut this), JsonNativeType::Object(that)) => {
106                merge_object(&mut this, that);
107                JsonNativeType::Object(this)
108            }
109            (JsonNativeType::Array(mut this), JsonNativeType::Array(that)) => {
110                this.merge(that);
111                JsonNativeType::Array(this)
112            }
113            (JsonNativeType::Null, that) => that.clone(),
114            (this, JsonNativeType::Null) => this,
115            (this, that) if this == *that => this,
116
117            (JsonNativeType::Number(x), JsonNativeType::Number(y)) => {
118                JsonNativeType::Number(match (x, y) {
119                    (x, y) if x == *y => x,
120                    (JsonNumberType::F64, _) | (_, JsonNumberType::F64) => JsonNumberType::F64,
121                    _ => JsonNumberType::I64,
122                })
123            }
124            _ => JsonNativeType::Variant,
125        };
126    }
127
128    pub fn as_arrow_type(&self) -> ArrowDataType {
129        match self {
130            JsonNativeType::Null => ArrowDataType::Null,
131            JsonNativeType::Bool => ArrowDataType::Boolean,
132            JsonNativeType::Number(n) => match n {
133                JsonNumberType::U64 => ArrowDataType::UInt64,
134                JsonNumberType::I64 => ArrowDataType::Int64,
135                JsonNumberType::F64 => ArrowDataType::Float64,
136            },
137            JsonNativeType::String => ArrowDataType::Utf8View,
138            JsonNativeType::Array(array) => {
139                ArrowDataType::List(Arc::new(Field::new("item", array.as_arrow_type(), true)))
140            }
141            JsonNativeType::Object(object) => {
142                let fields = object
143                    .iter()
144                    .map(|(k, v)| Arc::new(Field::new(k, v.as_arrow_type(), true)))
145                    .collect::<Vec<_>>();
146                ArrowDataType::Struct(Fields::from(fields))
147            }
148            JsonNativeType::Variant => ArrowDataType::Binary,
149        }
150    }
151}
152
153impl From<&ConcreteDataType> for JsonNativeType {
154    fn from(value: &ConcreteDataType) -> Self {
155        match value {
156            ConcreteDataType::Null(_) => JsonNativeType::Null,
157            ConcreteDataType::Boolean(_) => JsonNativeType::Bool,
158            ConcreteDataType::UInt64(_)
159            | ConcreteDataType::UInt32(_)
160            | ConcreteDataType::UInt16(_)
161            | ConcreteDataType::UInt8(_) => JsonNativeType::u64(),
162            ConcreteDataType::Int64(_)
163            | ConcreteDataType::Int32(_)
164            | ConcreteDataType::Int16(_)
165            | ConcreteDataType::Int8(_) => JsonNativeType::i64(),
166            ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_) => JsonNativeType::f64(),
167            ConcreteDataType::String(_) => JsonNativeType::String,
168            ConcreteDataType::List(list_type) => {
169                JsonNativeType::Array(Box::new(JsonNativeType::from(list_type.item_type())))
170            }
171            ConcreteDataType::Struct(struct_type) => JsonNativeType::Object(
172                struct_type
173                    .fields()
174                    .iter()
175                    .map(|field| (field.name().to_string(), field.data_type().into()))
176                    .collect(),
177            ),
178            ConcreteDataType::Json(json_type) => json_type.native_type().clone(),
179            ConcreteDataType::Binary(_) => JsonNativeType::Variant,
180            _ => unreachable!(),
181        }
182    }
183}
184
185impl TryFrom<&ArrowDataType> for JsonNativeType {
186    type Error = Error;
187
188    fn try_from(t: &ArrowDataType) -> Result<Self> {
189        let t = match t {
190            ArrowDataType::Null => JsonNativeType::Null,
191            ArrowDataType::Boolean => JsonNativeType::Bool,
192            ArrowDataType::Int8
193            | ArrowDataType::Int16
194            | ArrowDataType::Int32
195            | ArrowDataType::Int64 => JsonNativeType::i64(),
196            ArrowDataType::UInt8
197            | ArrowDataType::UInt16
198            | ArrowDataType::UInt32
199            | ArrowDataType::UInt64 => JsonNativeType::u64(),
200            ArrowDataType::Float16 | ArrowDataType::Float32 | ArrowDataType::Float64 => {
201                JsonNativeType::f64()
202            }
203            ArrowDataType::Binary
204            | ArrowDataType::FixedSizeBinary(_)
205            | ArrowDataType::LargeBinary
206            | ArrowDataType::BinaryView => JsonNativeType::Variant,
207            ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View => {
208                JsonNativeType::String
209            }
210            ArrowDataType::List(field)
211            | ArrowDataType::ListView(field)
212            | ArrowDataType::FixedSizeList(field, _)
213            | ArrowDataType::LargeList(field)
214            | ArrowDataType::LargeListView(field) => {
215                JsonNativeType::Array(Box::new(Self::try_from(field.data_type())?))
216            }
217            ArrowDataType::Struct(fields) => {
218                let mut object = JsonObjectType::new();
219                for field in fields {
220                    object.insert(field.name().clone(), Self::try_from(field.data_type())?);
221                }
222                JsonNativeType::Object(object)
223            }
224            t => {
225                return UnsupportedArrowTypeSnafu {
226                    arrow_type: t.clone(),
227                }
228                .fail();
229            }
230        };
231        Ok(t)
232    }
233}
234
235impl Display for JsonNativeType {
236    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
237        match self {
238            JsonNativeType::Null => write!(f, r#""<Null>""#),
239            JsonNativeType::Bool => write!(f, r#""<Bool>""#),
240            JsonNativeType::Number(_) => {
241                write!(f, r#""<Number>""#)
242            }
243            JsonNativeType::String => write!(f, r#""<String>""#),
244            JsonNativeType::Array(item_type) => write!(f, "[{}]", item_type),
245            JsonNativeType::Object(object) => {
246                write!(
247                    f,
248                    "{{{}}}",
249                    object
250                        .iter()
251                        .map(|(k, v)| format!(r#""{k}":{v}"#))
252                        .collect::<Vec<_>>()
253                        .join(",")
254                )
255            }
256            JsonNativeType::Variant => write!(f, r#""<Variant>""#),
257        }
258    }
259}
260
261#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default)]
262pub enum JsonFormat {
263    #[default]
264    Jsonb,
265    Json2(Box<JsonNativeType>),
266}
267
268/// JsonType is a data type for JSON data. It is stored as binary data of jsonb format.
269/// It utilizes current binary value and vector implementation.
270#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
271pub struct JsonType {
272    pub format: JsonFormat,
273}
274
275impl JsonType {
276    pub fn new(format: JsonFormat) -> Self {
277        Self { format }
278    }
279
280    pub(crate) fn new_json2(native: JsonNativeType) -> Self {
281        Self {
282            format: JsonFormat::Json2(Box::new(native)),
283        }
284    }
285
286    pub fn is_json2(&self) -> bool {
287        matches!(self.format, JsonFormat::Json2(_))
288    }
289
290    pub(crate) fn native_type(&self) -> &JsonNativeType {
291        match &self.format {
292            JsonFormat::Jsonb => &JsonNativeType::String,
293            JsonFormat::Json2(x) => x.as_ref(),
294        }
295    }
296
297    pub fn null() -> Self {
298        Self {
299            format: JsonFormat::Json2(Box::new(JsonNativeType::Null)),
300        }
301    }
302
303    pub(crate) fn as_struct_type(&self) -> StructType {
304        match &self.format {
305            JsonFormat::Json2(native_type) => match native_type.as_arrow_type() {
306                // TODO(LFC): Direct use Arrow's Struct datatype here.
307                ArrowDataType::Struct(fields) => StructType::from(&fields),
308                // FIXME(fys): Since writing with a non-object root is currently
309                // not supported, this temporarily returns default.
310                _ => StructType::default(),
311            },
312            JsonFormat::Jsonb => StructType::default(),
313        }
314    }
315
316    /// Try to merge this json type with others, error on datatype conflict.
317    pub fn merge(&mut self, other: &JsonType) -> Result<()> {
318        if self == other {
319            return Ok(());
320        }
321
322        match (&mut self.format, &other.format) {
323            (JsonFormat::Jsonb, JsonFormat::Jsonb) => Ok(()),
324            (JsonFormat::Json2(this), JsonFormat::Json2(that)) => {
325                this.merge(that);
326                Ok(())
327            }
328            _ => MergeJsonDatatypeSnafu {
329                reason: "json format not match",
330            }
331            .fail(),
332        }
333    }
334
335    /// Check if it includes all fields in `other` json type.
336    pub fn is_include(&self, other: &JsonType) -> bool {
337        match (&self.format, &other.format) {
338            (JsonFormat::Jsonb, JsonFormat::Jsonb) => true,
339            (JsonFormat::Json2(this), JsonFormat::Json2(that)) => is_include(this, that),
340            _ => false,
341        }
342    }
343}
344
345fn is_include(this: &JsonNativeType, that: &JsonNativeType) -> bool {
346    fn is_include_object(this: &JsonObjectType, that: &JsonObjectType) -> bool {
347        for (type_name, that_type) in that {
348            let Some(this_type) = this.get(type_name) else {
349                return false;
350            };
351            if !is_include(this_type, that_type) {
352                return false;
353            }
354        }
355        true
356    }
357
358    match (this, that) {
359        (this, that) if this == that => true,
360        (JsonNativeType::Array(this), JsonNativeType::Array(that)) => is_include(this, that),
361        (JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
362            is_include_object(this, that)
363        }
364        (_, JsonNativeType::Null) => true,
365        _ => false,
366    }
367}
368
369impl From<&ArrowDataType> for JsonType {
370    fn from(t: &ArrowDataType) -> Self {
371        JsonType::new_json2(JsonNativeType::from(&ConcreteDataType::from_arrow_type(t)))
372    }
373}
374
375impl DataType for JsonType {
376    fn name(&self) -> String {
377        match &self.format {
378            JsonFormat::Jsonb => JSON_TYPE_NAME.to_string(),
379            JsonFormat::Json2(ty) => {
380                format!("{JSON2_TYPE_NAME}{}", ty)
381            }
382        }
383    }
384
385    fn logical_type_id(&self) -> LogicalTypeId {
386        LogicalTypeId::Json
387    }
388
389    fn default_value(&self) -> Value {
390        Bytes::default().into()
391    }
392
393    fn as_arrow_type(&self) -> ArrowDataType {
394        match self.format {
395            JsonFormat::Jsonb => ArrowDataType::Binary,
396            JsonFormat::Json2(_) => self.as_struct_type().as_arrow_type(),
397        }
398    }
399
400    fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
401        match &self.format {
402            JsonFormat::Jsonb => Box::new(BinaryVectorBuilder::with_capacity(capacity)),
403            JsonFormat::Json2(x) => Box::new(JsonVectorBuilder::new(x.as_ref().clone(), capacity)),
404        }
405    }
406
407    fn try_cast(&self, from: Value) -> Option<Value> {
408        match from {
409            Value::Binary(v) => Some(Value::Binary(v)),
410            _ => None,
411        }
412    }
413}
414
415impl Display for JsonType {
416    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
417        write!(f, "{}", self.name())
418    }
419}
420
421/// Converts a json type value to string
422pub fn jsonb_to_string(val: &[u8]) -> Result<String> {
423    if val.is_empty() {
424        return Ok("".to_string());
425    }
426    match jsonb::from_slice(val) {
427        Ok(jsonb_value) => {
428            let serialized = jsonb_value.to_string();
429            fix_unicode_point(&serialized)
430        }
431        Err(e) => InvalidJsonbSnafu { error: e }.fail(),
432    }
433}
434
435/// Converts a json type value to serde_json::Value
436pub fn jsonb_to_serde_json(val: &[u8]) -> Result<serde_json::Value> {
437    let json_string = jsonb_to_string(val)?;
438    serde_json::Value::from_str(&json_string).context(DeserializeSnafu { json: json_string })
439}
440
441/// Normalizes a JSON string by converting Rust-style Unicode escape sequences to JSON-compatible format.
442///
443/// The input is scanned for Rust-style Unicode code
444/// point escapes of the form `\\u{H...}` (a backslash, `u`, an opening brace,
445/// followed by 1–6 hexadecimal digits, and a closing brace). Each such escape is
446/// converted into JSON-compatible UTF‑16 escape sequences:
447///
448/// - For code points in the Basic Multilingual Plane (≤ `0xFFFF`), the escape is
449///   converted to a single JSON `\\uXXXX` sequence with four uppercase hex digits.
450/// - For code points above `0xFFFF` and less than Unicode max code point `0x10FFFF`,
451///   the code point is encoded as a UTF‑16 surrogate pair and emitted as two consecutive
452///   `\\uXXXX` sequences (as JSON format required).
453///
454/// After this normalization, the function returns the normalized string
455fn fix_unicode_point(json: &str) -> Result<String> {
456    static UNICODE_CODE_POINT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
457        // Match literal "\u{...}" sequences, capturing 1–6 (code point range) hex digits
458        // inside braces.
459        Regex::new(r"\\u\{([0-9a-fA-F]{1,6})}").unwrap_or_else(|e| panic!("{}", e))
460    });
461
462    let v = UNICODE_CODE_POINT_PATTERN.replace_all(json, |caps: &Captures| {
463        // Extract the hex payload (without braces) and parse to a code point.
464        let hex = &caps[1];
465        let Ok(code) = u32::from_str_radix(hex, 16) else {
466            // On parse failure, leave the original escape sequence unchanged.
467            return caps[0].to_string();
468        };
469
470        if code <= 0xFFFF {
471            // Basic Multilingual Plane: JSON can represent this directly as \uXXXX.
472            format!("\\u{:04X}", code)
473        } else if code > 0x10FFFF {
474            // Beyond max Unicode code point
475            caps[0].to_string()
476        } else {
477            // Supplementary planes: JSON needs UTF-16 surrogate pairs.
478            // Convert the code point to a 20-bit value.
479            let code = code - 0x10000;
480
481            // High surrogate: top 10 bits, offset by 0xD800.
482            let high = 0xD800 + ((code >> 10) & 0x3FF);
483
484            // Low surrogate: bottom 10 bits, offset by 0xDC00.
485            let low = 0xDC00 + (code & 0x3FF);
486
487            // Emit two \uXXXX escapes in sequence.
488            format!("\\u{:04X}\\u{:04X}", high, low)
489        }
490    });
491    Ok(v.to_string())
492}
493
494/// Parses a string to a json type value
495pub fn parse_string_to_jsonb(s: &str) -> Result<Vec<u8>> {
496    jsonb::parse_value(s.as_bytes())
497        .map_err(|_| InvalidJsonSnafu { value: s }.build())
498        .map(|json| json.to_vec())
499}
500
501#[cfg(test)]
502mod tests {
503    use super::*;
504
505    #[test]
506    fn test_fix_unicode_point() -> Result<()> {
507        let valid_cases = vec![
508            (r#"{"data": "simple ascii"}"#, r#"{"data": "simple ascii"}"#),
509            (
510                r#"{"data":"Greek sigma: \u{03a3}"}"#,
511                r#"{"data":"Greek sigma: \u03A3"}"#,
512            ),
513            (
514                r#"{"data":"Joker card: \u{1f0df}"}"#,
515                r#"{"data":"Joker card: \uD83C\uDCDF"}"#,
516            ),
517            (
518                r#"{"data":"BMP boundary: \u{ffff}"}"#,
519                r#"{"data":"BMP boundary: \uFFFF"}"#,
520            ),
521            (
522                r#"{"data":"Supplementary min: \u{10000}"}"#,
523                r#"{"data":"Supplementary min: \uD800\uDC00"}"#,
524            ),
525            (
526                r#"{"data":"Supplementary max: \u{10ffff}"}"#,
527                r#"{"data":"Supplementary max: \uDBFF\uDFFF"}"#,
528            ),
529        ];
530        for (input, expect) in valid_cases {
531            let v = fix_unicode_point(input)?;
532            assert_eq!(v, expect);
533        }
534
535        let invalid_escape_cases = vec![
536            (
537                r#"{"data": "Invalid hex: \u{gggg}"}"#,
538                r#"{"data": "Invalid hex: \u{gggg}"}"#,
539            ),
540            (
541                r#"{"data": "Empty braces: \u{}"}"#,
542                r#"{"data": "Empty braces: \u{}"}"#,
543            ),
544            (
545                r#"{"data": "Out of range: \u{1100000}"}"#,
546                r#"{"data": "Out of range: \u{1100000}"}"#,
547            ),
548        ];
549        for (input, expect) in invalid_escape_cases {
550            let v = fix_unicode_point(input)?;
551            assert_eq!(v, expect);
552        }
553
554        Ok(())
555    }
556
557    #[test]
558    fn test_json_type_include() {
559        fn test(this: &JsonNativeType, that: &JsonNativeType, expected: bool) {
560            assert_eq!(is_include(this, that), expected, "this={this}, that={that}");
561        }
562
563        test(&JsonNativeType::Null, &JsonNativeType::Null, true);
564        test(&JsonNativeType::Null, &JsonNativeType::Bool, false);
565        test(&JsonNativeType::Bool, &JsonNativeType::Null, true);
566
567        test(&JsonNativeType::Bool, &JsonNativeType::Bool, true);
568        test(&JsonNativeType::Bool, &JsonNativeType::u64(), false);
569
570        test(&JsonNativeType::u64(), &JsonNativeType::Null, true);
571        test(&JsonNativeType::u64(), &JsonNativeType::u64(), true);
572        test(&JsonNativeType::u64(), &JsonNativeType::String, false);
573
574        test(&JsonNativeType::String, &JsonNativeType::Null, true);
575        test(&JsonNativeType::String, &JsonNativeType::String, true);
576        test(
577            &JsonNativeType::String,
578            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
579            false,
580        );
581
582        test(
583            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
584            &JsonNativeType::Null,
585            true,
586        );
587        test(
588            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
589            &JsonNativeType::Array(Box::new(JsonNativeType::Null)),
590            true,
591        );
592        test(
593            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
594            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
595            true,
596        );
597        test(
598            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
599            &JsonNativeType::String,
600            false,
601        );
602        test(
603            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
604            &JsonNativeType::Object(JsonObjectType::new()),
605            false,
606        );
607
608        let simple_json_object = &JsonNativeType::Object(JsonObjectType::from([(
609            "foo".to_string(),
610            JsonNativeType::String,
611        )]));
612        test(simple_json_object, simple_json_object, true);
613        test(simple_json_object, &JsonNativeType::i64(), false);
614        test(
615            simple_json_object,
616            &JsonNativeType::Object(JsonObjectType::from([(
617                "bar".to_string(),
618                JsonNativeType::i64(),
619            )])),
620            false,
621        );
622
623        let complex_json_object = &JsonNativeType::Object(JsonObjectType::from([
624            (
625                "nested".to_string(),
626                JsonNativeType::Object(JsonObjectType::from([(
627                    "a".to_string(),
628                    JsonNativeType::Object(JsonObjectType::from([(
629                        "b".to_string(),
630                        JsonNativeType::Object(JsonObjectType::from([(
631                            "c".to_string(),
632                            JsonNativeType::String,
633                        )])),
634                    )])),
635                )])),
636            ),
637            ("bar".to_string(), JsonNativeType::i64()),
638        ]));
639        test(simple_json_object, &JsonNativeType::Null, true);
640        test(complex_json_object, &JsonNativeType::String, false);
641        test(complex_json_object, complex_json_object, true);
642        test(
643            complex_json_object,
644            &JsonNativeType::Object(JsonObjectType::from([(
645                "bar".to_string(),
646                JsonNativeType::i64(),
647            )])),
648            true,
649        );
650        test(
651            complex_json_object,
652            &JsonNativeType::Object(JsonObjectType::from([
653                (
654                    "nested".to_string(),
655                    JsonNativeType::Object(JsonObjectType::from([(
656                        "a".to_string(),
657                        JsonNativeType::Null,
658                    )])),
659                ),
660                ("bar".to_string(), JsonNativeType::i64()),
661            ])),
662            true,
663        );
664        test(
665            complex_json_object,
666            &JsonNativeType::Object(JsonObjectType::from([
667                (
668                    "nested".to_string(),
669                    JsonNativeType::Object(JsonObjectType::from([(
670                        "a".to_string(),
671                        JsonNativeType::String,
672                    )])),
673                ),
674                ("bar".to_string(), JsonNativeType::i64()),
675            ])),
676            false,
677        );
678        test(
679            complex_json_object,
680            &JsonNativeType::Object(JsonObjectType::from([
681                (
682                    "nested".to_string(),
683                    JsonNativeType::Object(JsonObjectType::from([(
684                        "a".to_string(),
685                        JsonNativeType::Object(JsonObjectType::from([(
686                            "b".to_string(),
687                            JsonNativeType::String,
688                        )])),
689                    )])),
690                ),
691                ("bar".to_string(), JsonNativeType::i64()),
692            ])),
693            false,
694        );
695        test(
696            complex_json_object,
697            &JsonNativeType::Object(JsonObjectType::from([
698                (
699                    "nested".to_string(),
700                    JsonNativeType::Object(JsonObjectType::from([(
701                        "a".to_string(),
702                        JsonNativeType::Object(JsonObjectType::from([(
703                            "b".to_string(),
704                            JsonNativeType::Object(JsonObjectType::from([(
705                                "c".to_string(),
706                                JsonNativeType::Null,
707                            )])),
708                        )])),
709                    )])),
710                ),
711                ("bar".to_string(), JsonNativeType::i64()),
712            ])),
713            true,
714        );
715        test(
716            complex_json_object,
717            &JsonNativeType::Object(JsonObjectType::from([
718                (
719                    "nested".to_string(),
720                    JsonNativeType::Object(JsonObjectType::from([(
721                        "a".to_string(),
722                        JsonNativeType::Object(JsonObjectType::from([(
723                            "b".to_string(),
724                            JsonNativeType::Object(JsonObjectType::from([(
725                                "c".to_string(),
726                                JsonNativeType::Bool,
727                            )])),
728                        )])),
729                    )])),
730                ),
731                ("bar".to_string(), JsonNativeType::i64()),
732            ])),
733            false,
734        );
735        test(
736            complex_json_object,
737            &JsonNativeType::Object(JsonObjectType::from([(
738                "nested".to_string(),
739                JsonNativeType::Object(JsonObjectType::from([(
740                    "a".to_string(),
741                    JsonNativeType::Object(JsonObjectType::from([(
742                        "b".to_string(),
743                        JsonNativeType::Object(JsonObjectType::from([(
744                            "c".to_string(),
745                            JsonNativeType::String,
746                        )])),
747                    )])),
748                )])),
749            )])),
750            true,
751        );
752    }
753
754    #[test]
755    fn test_merge_json_type() -> Result<()> {
756        fn test(
757            other: JsonType,
758            json_type: &mut JsonType,
759            expected: std::result::Result<&str, &str>,
760        ) -> Result<()> {
761            let result = json_type.merge(&other);
762            match (result, expected) {
763                (Ok(()), Ok(expected)) => {
764                    assert_eq!(json_type.native_type().to_string(), expected);
765                }
766                (Err(err), Err(expected)) => {
767                    assert_eq!(err.to_string(), expected);
768                }
769                _ => unreachable!(),
770            }
771            Ok(())
772        }
773
774        // Null should be absorbed by a concrete scalar type.
775        test(
776            JsonType::new_json2(JsonNativeType::Bool),
777            &mut JsonType::null(),
778            Ok(r#""<Bool>""#),
779        )?;
780
781        // Merging a null value into an existing concrete type should keep the type unchanged.
782        test(
783            JsonType::null(),
784            &mut JsonType::new_json2(JsonNativeType::Bool),
785            Ok(r#""<Bool>""#),
786        )?;
787
788        // Identical number categories should stay as Number.
789        test(
790            JsonType::new_json2(JsonNativeType::i64()),
791            &mut JsonType::new_json2(JsonNativeType::i64()),
792            Ok(r#""<Number>""#),
793        )?;
794
795        // Conflicting number categories should be lifted to Variant.
796        test(
797            JsonType::new_json2(JsonNativeType::f64()),
798            &mut JsonType::new_json2(JsonNativeType::i64()),
799            Ok(r#""<Number>""#),
800        )?;
801
802        // Object merge should preserve existing fields and append missing fields.
803        test(
804            JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([(
805                "foo".to_string(),
806                JsonNativeType::String,
807            )]))),
808            &mut JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([(
809                "bar".to_string(),
810                JsonNativeType::i64(),
811            )]))),
812            Ok(r#"{"bar":"<Number>","foo":"<String>"}"#),
813        )?;
814
815        // Conflicting object field types should only lift that field to Variant.
816        test(
817            JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([(
818                "foo".to_string(),
819                JsonNativeType::i64(),
820            )]))),
821            &mut JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([(
822                "foo".to_string(),
823                JsonNativeType::Bool,
824            )]))),
825            Ok(r#"{"foo":"<Variant>"}"#),
826        )?;
827
828        // Nested objects should merge recursively.
829        test(
830            JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([(
831                "nested".to_string(),
832                JsonNativeType::Object(JsonObjectType::from([(
833                    "foo".to_string(),
834                    JsonNativeType::String,
835                )])),
836            )]))),
837            &mut JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([(
838                "nested".to_string(),
839                JsonNativeType::Object(JsonObjectType::from([(
840                    "bar".to_string(),
841                    JsonNativeType::Bool,
842                )])),
843            )]))),
844            Ok(r#"{"nested":{"bar":"<Bool>","foo":"<String>"}}"#),
845        )?;
846
847        // Arrays should merge their element types recursively.
848        test(
849            JsonType::new_json2(JsonNativeType::Array(Box::new(JsonNativeType::String))),
850            &mut JsonType::new_json2(JsonNativeType::Array(Box::new(JsonNativeType::u64()))),
851            Ok(r#"["<Variant>"]"#),
852        )?;
853
854        // Root-level incompatible types should be lifted to Variant.
855        test(
856            JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([(
857                "foo".to_string(),
858                JsonNativeType::String,
859            )]))),
860            &mut JsonType::new_json2(JsonNativeType::Bool),
861            Ok(r#""<Variant>""#),
862        )?;
863
864        // Jsonb and Json2 should not be mergeable.
865        test(
866            JsonType::new_json2(JsonNativeType::Bool),
867            &mut JsonType::new(JsonFormat::Jsonb),
868            Err("Failed to merge JSON datatype: json format not match"),
869        )?;
870
871        Ok(())
872    }
873}