datatypes/types/
json_type.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::BTreeMap;
16use std::fmt::{Debug, Display, Formatter};
17use std::str::FromStr;
18use std::sync::{Arc, LazyLock};
19
20use arrow::datatypes::DataType as ArrowDataType;
21use common_base::bytes::Bytes;
22use regex::{Captures, Regex};
23use serde::{Deserialize, Serialize};
24use snafu::ResultExt;
25
26use crate::data_type::DataType;
27use crate::error::{
28    DeserializeSnafu, InvalidJsonSnafu, InvalidJsonbSnafu, MergeJsonDatatypeSnafu, Result,
29};
30use crate::prelude::ConcreteDataType;
31use crate::scalars::ScalarVectorBuilder;
32use crate::type_id::LogicalTypeId;
33use crate::types::{ListType, StructField, StructType};
34use crate::value::Value;
35use crate::vectors::json::builder::JsonVectorBuilder;
36use crate::vectors::{BinaryVectorBuilder, MutableVector};
37
38pub const JSON_TYPE_NAME: &str = "Json";
39const JSON_PLAIN_FIELD_NAME: &str = "__json_plain__";
40const JSON_PLAIN_FIELD_METADATA_KEY: &str = "is_plain_json";
41
42pub type JsonObjectType = BTreeMap<String, JsonNativeType>;
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
45pub enum JsonNumberType {
46    U64,
47    I64,
48    F64,
49}
50
51#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
52pub enum JsonNativeType {
53    Null,
54    Bool,
55    Number(JsonNumberType),
56    String,
57    Array(Box<JsonNativeType>),
58    Object(JsonObjectType),
59}
60
61impl JsonNativeType {
62    pub fn is_null(&self) -> bool {
63        matches!(self, JsonNativeType::Null)
64    }
65
66    pub fn u64() -> Self {
67        Self::Number(JsonNumberType::U64)
68    }
69
70    pub fn i64() -> Self {
71        Self::Number(JsonNumberType::I64)
72    }
73
74    pub fn f64() -> Self {
75        Self::Number(JsonNumberType::F64)
76    }
77}
78
79impl From<&JsonNativeType> for ConcreteDataType {
80    fn from(value: &JsonNativeType) -> Self {
81        match value {
82            JsonNativeType::Null => ConcreteDataType::null_datatype(),
83            JsonNativeType::Bool => ConcreteDataType::boolean_datatype(),
84            JsonNativeType::Number(JsonNumberType::U64) => ConcreteDataType::uint64_datatype(),
85            JsonNativeType::Number(JsonNumberType::I64) => ConcreteDataType::int64_datatype(),
86            JsonNativeType::Number(JsonNumberType::F64) => ConcreteDataType::float64_datatype(),
87            JsonNativeType::String => ConcreteDataType::string_datatype(),
88            JsonNativeType::Array(item_type) => {
89                ConcreteDataType::List(ListType::new(Arc::new(item_type.as_ref().into())))
90            }
91            JsonNativeType::Object(object) => {
92                let fields = object
93                    .iter()
94                    .map(|(type_name, field_type)| {
95                        StructField::new(type_name.clone(), field_type.into(), true)
96                    })
97                    .collect();
98                ConcreteDataType::Struct(StructType::new(Arc::new(fields)))
99            }
100        }
101    }
102}
103
104impl From<&ConcreteDataType> for JsonNativeType {
105    fn from(value: &ConcreteDataType) -> Self {
106        match value {
107            ConcreteDataType::Null(_) => JsonNativeType::Null,
108            ConcreteDataType::Boolean(_) => JsonNativeType::Bool,
109            ConcreteDataType::UInt64(_)
110            | ConcreteDataType::UInt32(_)
111            | ConcreteDataType::UInt16(_)
112            | ConcreteDataType::UInt8(_) => JsonNativeType::u64(),
113            ConcreteDataType::Int64(_)
114            | ConcreteDataType::Int32(_)
115            | ConcreteDataType::Int16(_)
116            | ConcreteDataType::Int8(_) => JsonNativeType::i64(),
117            ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_) => JsonNativeType::f64(),
118            ConcreteDataType::String(_) => JsonNativeType::String,
119            ConcreteDataType::List(list_type) => {
120                JsonNativeType::Array(Box::new(list_type.item_type().into()))
121            }
122            ConcreteDataType::Struct(struct_type) => JsonNativeType::Object(
123                struct_type
124                    .fields()
125                    .iter()
126                    .map(|field| (field.name().to_string(), field.data_type().into()))
127                    .collect(),
128            ),
129            ConcreteDataType::Json(json_type) => json_type.native_type().clone(),
130            _ => unreachable!(),
131        }
132    }
133}
134
135impl Display for JsonNativeType {
136    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
137        match self {
138            JsonNativeType::Null => write!(f, r#""<Null>""#),
139            JsonNativeType::Bool => write!(f, r#""<Bool>""#),
140            JsonNativeType::Number(_) => {
141                write!(f, r#""<Number>""#)
142            }
143            JsonNativeType::String => write!(f, r#""<String>""#),
144            JsonNativeType::Array(item_type) => {
145                write!(f, "[{}]", item_type)
146            }
147            JsonNativeType::Object(object) => {
148                write!(
149                    f,
150                    "{{{}}}",
151                    object
152                        .iter()
153                        .map(|(k, v)| format!(r#""{k}":{v}"#))
154                        .collect::<Vec<_>>()
155                        .join(",")
156                )
157            }
158        }
159    }
160}
161
162#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default)]
163pub enum JsonFormat {
164    #[default]
165    Jsonb,
166    Native(Box<JsonNativeType>),
167}
168
169/// JsonType is a data type for JSON data. It is stored as binary data of jsonb format.
170/// It utilizes current binary value and vector implementation.
171#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
172pub struct JsonType {
173    pub format: JsonFormat,
174}
175
176impl JsonType {
177    pub fn new(format: JsonFormat) -> Self {
178        Self { format }
179    }
180
181    pub(crate) fn new_native(native: JsonNativeType) -> Self {
182        Self {
183            format: JsonFormat::Native(Box::new(native)),
184        }
185    }
186
187    pub fn is_native_type(&self) -> bool {
188        matches!(self.format, JsonFormat::Native(_))
189    }
190
191    pub fn native_type(&self) -> &JsonNativeType {
192        match &self.format {
193            JsonFormat::Jsonb => &JsonNativeType::String,
194            JsonFormat::Native(x) => x.as_ref(),
195        }
196    }
197
198    pub fn null() -> Self {
199        Self {
200            format: JsonFormat::Native(Box::new(JsonNativeType::Null)),
201        }
202    }
203
204    /// Make json type a struct type, by:
205    /// - if the json is an object, its entries are mapped to struct fields, obviously;
206    /// - if not, the json is one of bool, number, string or array, make it a special field
207    ///   (see [plain_json_struct_type]).
208    pub(crate) fn as_struct_type(&self) -> StructType {
209        match &self.format {
210            JsonFormat::Jsonb => StructType::default(),
211            JsonFormat::Native(inner) => match ConcreteDataType::from(inner.as_ref()) {
212                ConcreteDataType::Struct(t) => t.clone(),
213                x => plain_json_struct_type(x),
214            },
215        }
216    }
217
218    /// Try to merge this json type with others, error on datatype conflict.
219    pub fn merge(&mut self, other: &JsonType) -> Result<()> {
220        match (&self.format, &other.format) {
221            (JsonFormat::Jsonb, JsonFormat::Jsonb) => Ok(()),
222            (JsonFormat::Native(this), JsonFormat::Native(that)) => {
223                let merged = merge(this.as_ref(), that.as_ref())?;
224                self.format = JsonFormat::Native(Box::new(merged));
225                Ok(())
226            }
227            _ => MergeJsonDatatypeSnafu {
228                reason: "json format not match",
229            }
230            .fail(),
231        }
232    }
233
234    /// Check if it can merge with `other` json type.
235    pub fn is_mergeable(&self, other: &JsonType) -> bool {
236        match (&self.format, &other.format) {
237            (JsonFormat::Jsonb, JsonFormat::Jsonb) => true,
238            (JsonFormat::Native(this), JsonFormat::Native(that)) => {
239                is_mergeable(this.as_ref(), that.as_ref())
240            }
241            _ => false,
242        }
243    }
244
245    /// Check if it includes all fields in `other` json type.
246    pub fn is_include(&self, other: &JsonType) -> bool {
247        match (&self.format, &other.format) {
248            (JsonFormat::Jsonb, JsonFormat::Jsonb) => true,
249            (JsonFormat::Native(this), JsonFormat::Native(that)) => {
250                is_include(this.as_ref(), that.as_ref())
251            }
252            _ => false,
253        }
254    }
255}
256
257fn is_include(this: &JsonNativeType, that: &JsonNativeType) -> bool {
258    fn is_include_object(this: &JsonObjectType, that: &JsonObjectType) -> bool {
259        for (type_name, that_type) in that {
260            let Some(this_type) = this.get(type_name) else {
261                return false;
262            };
263            if !is_include(this_type, that_type) {
264                return false;
265            }
266        }
267        true
268    }
269
270    match (this, that) {
271        (this, that) if this == that => true,
272        (JsonNativeType::Array(this), JsonNativeType::Array(that)) => {
273            is_include(this.as_ref(), that.as_ref())
274        }
275        (JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
276            is_include_object(this, that)
277        }
278        (_, JsonNativeType::Null) => true,
279        _ => false,
280    }
281}
282
283/// A special struct type for denoting "plain"(not object) json value. It has only one field, with
284/// fixed name [JSON_PLAIN_FIELD_NAME] and with metadata [JSON_PLAIN_FIELD_METADATA_KEY] = `"true"`.
285pub(crate) fn plain_json_struct_type(item_type: ConcreteDataType) -> StructType {
286    let mut field = StructField::new(JSON_PLAIN_FIELD_NAME.to_string(), item_type, true);
287    field.insert_metadata(JSON_PLAIN_FIELD_METADATA_KEY, true);
288    StructType::new(Arc::new(vec![field]))
289}
290
291fn is_mergeable(this: &JsonNativeType, that: &JsonNativeType) -> bool {
292    fn is_mergeable_object(this: &JsonObjectType, that: &JsonObjectType) -> bool {
293        for (type_name, that_type) in that {
294            if let Some(this_type) = this.get(type_name)
295                && !is_mergeable(this_type, that_type)
296            {
297                return false;
298            }
299        }
300        true
301    }
302
303    match (this, that) {
304        (this, that) if this == that => true,
305        (JsonNativeType::Array(this), JsonNativeType::Array(that)) => {
306            is_mergeable(this.as_ref(), that.as_ref())
307        }
308        (JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
309            is_mergeable_object(this, that)
310        }
311        (JsonNativeType::Null, _) | (_, JsonNativeType::Null) => true,
312        _ => false,
313    }
314}
315
316fn merge(this: &JsonNativeType, that: &JsonNativeType) -> Result<JsonNativeType> {
317    fn merge_object(this: &JsonObjectType, that: &JsonObjectType) -> Result<JsonObjectType> {
318        let mut this = this.clone();
319        // merge "that" into "this" directly:
320        for (type_name, that_type) in that {
321            if let Some(this_type) = this.get_mut(type_name) {
322                let merged_type = merge(this_type, that_type)?;
323                *this_type = merged_type;
324            } else {
325                this.insert(type_name.clone(), that_type.clone());
326            }
327        }
328        Ok(this)
329    }
330
331    match (this, that) {
332        (this, that) if this == that => Ok(this.clone()),
333        (JsonNativeType::Array(this), JsonNativeType::Array(that)) => {
334            merge(this.as_ref(), that.as_ref()).map(|x| JsonNativeType::Array(Box::new(x)))
335        }
336        (JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
337            merge_object(this, that).map(JsonNativeType::Object)
338        }
339        (JsonNativeType::Null, x) | (x, JsonNativeType::Null) => Ok(x.clone()),
340        _ => MergeJsonDatatypeSnafu {
341            reason: format!("datatypes have conflict, this: {this}, that: {that}"),
342        }
343        .fail(),
344    }
345}
346
347impl DataType for JsonType {
348    fn name(&self) -> String {
349        match &self.format {
350            JsonFormat::Jsonb => JSON_TYPE_NAME.to_string(),
351            JsonFormat::Native(x) => format!("Json<{x}>"),
352        }
353    }
354
355    fn logical_type_id(&self) -> LogicalTypeId {
356        LogicalTypeId::Json
357    }
358
359    fn default_value(&self) -> Value {
360        Bytes::default().into()
361    }
362
363    fn as_arrow_type(&self) -> ArrowDataType {
364        match self.format {
365            JsonFormat::Jsonb => ArrowDataType::Binary,
366            JsonFormat::Native(_) => self.as_struct_type().as_arrow_type(),
367        }
368    }
369
370    fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
371        match &self.format {
372            JsonFormat::Jsonb => Box::new(BinaryVectorBuilder::with_capacity(capacity)),
373            JsonFormat::Native(x) => Box::new(JsonVectorBuilder::new(*x.clone(), capacity)),
374        }
375    }
376
377    fn try_cast(&self, from: Value) -> Option<Value> {
378        match from {
379            Value::Binary(v) => Some(Value::Binary(v)),
380            _ => None,
381        }
382    }
383}
384
385impl Display for JsonType {
386    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
387        write!(f, "{}", self.name())
388    }
389}
390
391/// Converts a json type value to string
392pub fn jsonb_to_string(val: &[u8]) -> Result<String> {
393    if val.is_empty() {
394        return Ok("".to_string());
395    }
396    match jsonb::from_slice(val) {
397        Ok(jsonb_value) => {
398            let serialized = jsonb_value.to_string();
399            Ok(serialized)
400        }
401        Err(e) => InvalidJsonbSnafu { error: e }.fail(),
402    }
403}
404
405/// Converts a json type value to serde_json::Value
406pub fn jsonb_to_serde_json(val: &[u8]) -> Result<serde_json::Value> {
407    let json_string = jsonb_to_string(val)?;
408    jsonb_string_to_serde_value(&json_string)
409}
410
411/// Attempts to deserialize a JSON text into `serde_json::Value`, with a best-effort
412/// fallback for Rust-style Unicode escape sequences.
413///
414/// This function is intended to be used on JSON strings produced from the internal
415/// JSONB representation (e.g. via [`jsonb_to_string`]). It first calls
416/// `serde_json::Value::from_str` directly. If that succeeds, the parsed value is
417/// returned as-is.
418///
419/// If the initial parse fails, the input is scanned for Rust-style Unicode code
420/// point escapes of the form `\\u{H...}` (a backslash, `u`, an opening brace,
421/// followed by 1–6 hexadecimal digits, and a closing brace). Each such escape is
422/// converted into JSON-compatible UTF‑16 escape sequences:
423///
424/// - For code points in the Basic Multilingual Plane (≤ `0xFFFF`), the escape is
425///   converted to a single JSON `\\uXXXX` sequence with four uppercase hex digits.
426/// - For code points above `0xFFFF` and less than Unicode max code point `0x10FFFF`,
427///   the code point is encoded as a UTF‑16 surrogate pair and emitted as two consecutive
428///   `\\uXXXX` sequences (as JSON format required).
429///
430/// After this normalization, the function retries parsing the resulting string as
431/// JSON and returns the deserialized value or a `DeserializeSnafu` error if it
432/// still cannot be parsed.
433fn jsonb_string_to_serde_value(json: &str) -> Result<serde_json::Value> {
434    match serde_json::Value::from_str(json) {
435        Ok(v) => Ok(v),
436        Err(e) => {
437            // If above deserialization is failed, the JSON string might contain some Rust chars
438            // that are somehow incorrectly represented as Unicode code point literal. For example,
439            // "\u{fe0f}". We have to convert them to JSON compatible format, like "\uFE0F", then
440            // try to deserialize the JSON string again.
441            if !e.is_syntax() || !e.to_string().contains("invalid escape") {
442                return Err(e).context(DeserializeSnafu { json });
443            }
444
445            static UNICODE_CODE_POINT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
446                // Match literal "\u{...}" sequences, capturing 1–6 (code point range) hex digits
447                // inside braces.
448                Regex::new(r"\\u\{([0-9a-fA-F]{1,6})}").unwrap_or_else(|e| panic!("{}", e))
449            });
450
451            let v = UNICODE_CODE_POINT_PATTERN.replace_all(json, |caps: &Captures| {
452                // Extract the hex payload (without braces) and parse to a code point.
453                let hex = &caps[1];
454                let Ok(code) = u32::from_str_radix(hex, 16) else {
455                    // On parse failure, leave the original escape sequence unchanged.
456                    return caps[0].to_string();
457                };
458
459                if code <= 0xFFFF {
460                    // Basic Multilingual Plane: JSON can represent this directly as \uXXXX.
461                    format!("\\u{:04X}", code)
462                } else if code > 0x10FFFF {
463                    // Beyond max Unicode code point
464                    caps[0].to_string()
465                } else {
466                    // Supplementary planes: JSON needs UTF-16 surrogate pairs.
467                    // Convert the code point to a 20-bit value.
468                    let code = code - 0x10000;
469
470                    // High surrogate: top 10 bits, offset by 0xD800.
471                    let high = 0xD800 + ((code >> 10) & 0x3FF);
472
473                    // Low surrogate: bottom 10 bits, offset by 0xDC00.
474                    let low = 0xDC00 + (code & 0x3FF);
475
476                    // Emit two \uXXXX escapes in sequence.
477                    format!("\\u{:04X}\\u{:04X}", high, low)
478                }
479            });
480            serde_json::Value::from_str(&v).context(DeserializeSnafu { json })
481        }
482    }
483}
484
485/// Parses a string to a json type value
486pub fn parse_string_to_jsonb(s: &str) -> Result<Vec<u8>> {
487    jsonb::parse_value(s.as_bytes())
488        .map_err(|_| InvalidJsonSnafu { value: s }.build())
489        .map(|json| json.to_vec())
490}
491
492#[cfg(test)]
493mod tests {
494    use super::*;
495    use crate::json::JsonStructureSettings;
496
497    #[test]
498    fn test_jsonb_string_to_serde_value() -> Result<()> {
499        let valid_cases = vec![
500            (r#"{"data": "simple ascii"}"#, r#"{"data":"simple ascii"}"#),
501            (
502                r#"{"data": "Greek sigma: \u{03a3}"}"#,
503                r#"{"data":"Greek sigma: Σ"}"#,
504            ),
505            (
506                r#"{"data": "Joker card: \u{1f0df}"}"#,
507                r#"{"data":"Joker card: 🃟"}"#,
508            ),
509            (
510                r#"{"data": "BMP boundary: \u{ffff}"}"#,
511                r#"{"data":"BMP boundary: ￿"}"#,
512            ),
513            (
514                r#"{"data": "Supplementary min: \u{10000}"}"#,
515                r#"{"data":"Supplementary min: 𐀀"}"#,
516            ),
517            (
518                r#"{"data": "Supplementary max: \u{10ffff}"}"#,
519                r#"{"data":"Supplementary max: 􏿿"}"#,
520            ),
521        ];
522        for (input, expect) in valid_cases {
523            let v = jsonb_string_to_serde_value(input)?;
524            assert_eq!(v.to_string(), expect);
525        }
526
527        let invalid_cases = vec![
528            r#"{"data": "Invalid hex: \u{gggg}"}"#,
529            r#"{"data": "Beyond max Unicode code point: \u{110000}"}"#,
530            r#"{"data": "Out of range: \u{1100000}"}"#, // 7 digit
531            r#"{"data": "Empty braces: \u{}"}"#,
532        ];
533        for input in invalid_cases {
534            let result = jsonb_string_to_serde_value(input);
535            assert!(result.is_err());
536        }
537        Ok(())
538    }
539
540    #[test]
541    fn test_json_type_include() {
542        fn test(this: &JsonNativeType, that: &JsonNativeType, expected: bool) {
543            assert_eq!(is_include(this, that), expected);
544        }
545
546        test(&JsonNativeType::Null, &JsonNativeType::Null, true);
547        test(&JsonNativeType::Null, &JsonNativeType::Bool, false);
548
549        test(&JsonNativeType::Bool, &JsonNativeType::Null, true);
550        test(&JsonNativeType::Bool, &JsonNativeType::Bool, true);
551        test(&JsonNativeType::Bool, &JsonNativeType::u64(), false);
552
553        test(&JsonNativeType::u64(), &JsonNativeType::Null, true);
554        test(&JsonNativeType::u64(), &JsonNativeType::u64(), true);
555        test(&JsonNativeType::u64(), &JsonNativeType::String, false);
556
557        test(&JsonNativeType::String, &JsonNativeType::Null, true);
558        test(&JsonNativeType::String, &JsonNativeType::String, true);
559        test(
560            &JsonNativeType::String,
561            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
562            false,
563        );
564
565        test(
566            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
567            &JsonNativeType::Null,
568            true,
569        );
570        test(
571            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
572            &JsonNativeType::Array(Box::new(JsonNativeType::Null)),
573            true,
574        );
575        test(
576            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
577            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
578            true,
579        );
580        test(
581            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
582            &JsonNativeType::String,
583            false,
584        );
585        test(
586            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
587            &JsonNativeType::Object(JsonObjectType::new()),
588            false,
589        );
590
591        let simple_json_object = &JsonNativeType::Object(JsonObjectType::from([(
592            "foo".to_string(),
593            JsonNativeType::String,
594        )]));
595        test(simple_json_object, &JsonNativeType::Null, true);
596        test(simple_json_object, simple_json_object, true);
597        test(simple_json_object, &JsonNativeType::i64(), false);
598        test(
599            simple_json_object,
600            &JsonNativeType::Object(JsonObjectType::from([(
601                "bar".to_string(),
602                JsonNativeType::i64(),
603            )])),
604            false,
605        );
606
607        let complex_json_object = &JsonNativeType::Object(JsonObjectType::from([
608            (
609                "nested".to_string(),
610                JsonNativeType::Object(JsonObjectType::from([(
611                    "a".to_string(),
612                    JsonNativeType::Object(JsonObjectType::from([(
613                        "b".to_string(),
614                        JsonNativeType::Object(JsonObjectType::from([(
615                            "c".to_string(),
616                            JsonNativeType::String,
617                        )])),
618                    )])),
619                )])),
620            ),
621            ("bar".to_string(), JsonNativeType::i64()),
622        ]));
623        test(complex_json_object, &JsonNativeType::Null, true);
624        test(complex_json_object, &JsonNativeType::String, false);
625        test(complex_json_object, complex_json_object, true);
626        test(
627            complex_json_object,
628            &JsonNativeType::Object(JsonObjectType::from([(
629                "bar".to_string(),
630                JsonNativeType::i64(),
631            )])),
632            true,
633        );
634        test(
635            complex_json_object,
636            &JsonNativeType::Object(JsonObjectType::from([
637                (
638                    "nested".to_string(),
639                    JsonNativeType::Object(JsonObjectType::from([(
640                        "a".to_string(),
641                        JsonNativeType::Null,
642                    )])),
643                ),
644                ("bar".to_string(), JsonNativeType::i64()),
645            ])),
646            true,
647        );
648        test(
649            complex_json_object,
650            &JsonNativeType::Object(JsonObjectType::from([
651                (
652                    "nested".to_string(),
653                    JsonNativeType::Object(JsonObjectType::from([(
654                        "a".to_string(),
655                        JsonNativeType::String,
656                    )])),
657                ),
658                ("bar".to_string(), JsonNativeType::i64()),
659            ])),
660            false,
661        );
662        test(
663            complex_json_object,
664            &JsonNativeType::Object(JsonObjectType::from([
665                (
666                    "nested".to_string(),
667                    JsonNativeType::Object(JsonObjectType::from([(
668                        "a".to_string(),
669                        JsonNativeType::Object(JsonObjectType::from([(
670                            "b".to_string(),
671                            JsonNativeType::String,
672                        )])),
673                    )])),
674                ),
675                ("bar".to_string(), JsonNativeType::i64()),
676            ])),
677            false,
678        );
679        test(
680            complex_json_object,
681            &JsonNativeType::Object(JsonObjectType::from([
682                (
683                    "nested".to_string(),
684                    JsonNativeType::Object(JsonObjectType::from([(
685                        "a".to_string(),
686                        JsonNativeType::Object(JsonObjectType::from([(
687                            "b".to_string(),
688                            JsonNativeType::Object(JsonObjectType::from([(
689                                "c".to_string(),
690                                JsonNativeType::Null,
691                            )])),
692                        )])),
693                    )])),
694                ),
695                ("bar".to_string(), JsonNativeType::i64()),
696            ])),
697            true,
698        );
699        test(
700            complex_json_object,
701            &JsonNativeType::Object(JsonObjectType::from([
702                (
703                    "nested".to_string(),
704                    JsonNativeType::Object(JsonObjectType::from([(
705                        "a".to_string(),
706                        JsonNativeType::Object(JsonObjectType::from([(
707                            "b".to_string(),
708                            JsonNativeType::Object(JsonObjectType::from([(
709                                "c".to_string(),
710                                JsonNativeType::Bool,
711                            )])),
712                        )])),
713                    )])),
714                ),
715                ("bar".to_string(), JsonNativeType::i64()),
716            ])),
717            false,
718        );
719        test(
720            complex_json_object,
721            &JsonNativeType::Object(JsonObjectType::from([(
722                "nested".to_string(),
723                JsonNativeType::Object(JsonObjectType::from([(
724                    "a".to_string(),
725                    JsonNativeType::Object(JsonObjectType::from([(
726                        "b".to_string(),
727                        JsonNativeType::Object(JsonObjectType::from([(
728                            "c".to_string(),
729                            JsonNativeType::String,
730                        )])),
731                    )])),
732                )])),
733            )])),
734            true,
735        );
736    }
737
738    #[test]
739    fn test_merge_json_type() -> Result<()> {
740        fn test(
741            json: &str,
742            json_type: &mut JsonType,
743            expected: std::result::Result<&str, &str>,
744        ) -> Result<()> {
745            let json: serde_json::Value = serde_json::from_str(json).unwrap();
746
747            let settings = JsonStructureSettings::Structured(None);
748            let value = settings.encode(json)?;
749            let value_type = value.data_type();
750            let Some(other) = value_type.as_json() else {
751                unreachable!()
752            };
753
754            let result = json_type.merge(other);
755            match (result, expected) {
756                (Ok(()), Ok(expected)) => {
757                    assert_eq!(json_type.name(), expected);
758                    assert!(json_type.is_mergeable(other));
759                }
760                (Err(err), Err(expected)) => {
761                    assert_eq!(err.to_string(), expected);
762                    assert!(!json_type.is_mergeable(other));
763                }
764                _ => unreachable!(),
765            }
766            Ok(())
767        }
768
769        let json_type = &mut JsonType::new_native(JsonNativeType::Null);
770
771        // can merge with json object:
772        let json = r#"{
773            "hello": "world",
774            "list": [1, 2, 3],
775            "object": {"a": 1}
776        }"#;
777        let expected =
778            r#"Json<{"hello":"<String>","list":["<Number>"],"object":{"a":"<Number>"}}>"#;
779        test(json, json_type, Ok(expected))?;
780
781        // cannot merge with other non-object json values:
782        let jsons = [r#""s""#, "1", "[1]"];
783        let expects = [
784            r#"Failed to merge JSON datatype: datatypes have conflict, this: {"hello":"<String>","list":["<Number>"],"object":{"a":"<Number>"}}, that: "<String>""#,
785            r#"Failed to merge JSON datatype: datatypes have conflict, this: {"hello":"<String>","list":["<Number>"],"object":{"a":"<Number>"}}, that: "<Number>""#,
786            r#"Failed to merge JSON datatype: datatypes have conflict, this: {"hello":"<String>","list":["<Number>"],"object":{"a":"<Number>"}}, that: ["<Number>"]"#,
787        ];
788        for (json, expect) in jsons.into_iter().zip(expects.into_iter()) {
789            test(json, json_type, Err(expect))?;
790        }
791
792        // cannot merge with other json object with conflict field datatype:
793        let json = r#"{
794            "hello": 1,
795            "float": 0.123,
796            "no": 42
797        }"#;
798        let expected = r#"Failed to merge JSON datatype: datatypes have conflict, this: "<String>", that: "<Number>""#;
799        test(json, json_type, Err(expected))?;
800
801        // can merge with another json object:
802        let json = r#"{
803            "hello": "greptime",
804            "float": 0.123,
805            "int": 42
806        }"#;
807        let expected = r#"Json<{"float":"<Number>","hello":"<String>","int":"<Number>","list":["<Number>"],"object":{"a":"<Number>"}}>"#;
808        test(json, json_type, Ok(expected))?;
809
810        // can merge with some complex nested json object:
811        let json = r#"{
812            "list": [4],
813            "object": {"foo": "bar", "l": ["x"], "o": {"key": "value"}},
814            "float": 0.456,
815            "int": 0
816        }"#;
817        let expected = r#"Json<{"float":"<Number>","hello":"<String>","int":"<Number>","list":["<Number>"],"object":{"a":"<Number>","foo":"<String>","l":["<String>"],"o":{"key":"<String>"}}}>"#;
818        test(json, json_type, Ok(expected))?;
819
820        Ok(())
821    }
822}