datatypes/types/
json_type.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::BTreeMap;
16use std::fmt::{Debug, Display, Formatter};
17use std::str::FromStr;
18use std::sync::{Arc, LazyLock};
19
20use arrow::datatypes::DataType as ArrowDataType;
21use common_base::bytes::Bytes;
22use regex::{Captures, Regex};
23use serde::{Deserialize, Serialize};
24use snafu::ResultExt;
25
26use crate::data_type::DataType;
27use crate::error::{
28    DeserializeSnafu, InvalidJsonSnafu, InvalidJsonbSnafu, MergeJsonDatatypeSnafu, Result,
29};
30use crate::prelude::ConcreteDataType;
31use crate::scalars::ScalarVectorBuilder;
32use crate::type_id::LogicalTypeId;
33use crate::types::{ListType, StructField, StructType};
34use crate::value::Value;
35use crate::vectors::json::builder::JsonVectorBuilder;
36use crate::vectors::{BinaryVectorBuilder, MutableVector};
37
38pub const JSON_TYPE_NAME: &str = "Json";
39const JSON_PLAIN_FIELD_NAME: &str = "__json_plain__";
40const JSON_PLAIN_FIELD_METADATA_KEY: &str = "is_plain_json";
41
42pub type JsonObjectType = BTreeMap<String, JsonNativeType>;
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
45pub enum JsonNumberType {
46    U64,
47    I64,
48    F64,
49}
50
51#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
52pub enum JsonNativeType {
53    Null,
54    Bool,
55    Number(JsonNumberType),
56    String,
57    Array(Box<JsonNativeType>),
58    Object(JsonObjectType),
59}
60
61impl JsonNativeType {
62    pub fn is_null(&self) -> bool {
63        matches!(self, JsonNativeType::Null)
64    }
65
66    pub fn u64() -> Self {
67        Self::Number(JsonNumberType::U64)
68    }
69
70    pub fn i64() -> Self {
71        Self::Number(JsonNumberType::I64)
72    }
73
74    pub fn f64() -> Self {
75        Self::Number(JsonNumberType::F64)
76    }
77}
78
79impl From<&JsonNativeType> for ConcreteDataType {
80    fn from(value: &JsonNativeType) -> Self {
81        match value {
82            JsonNativeType::Null => ConcreteDataType::null_datatype(),
83            JsonNativeType::Bool => ConcreteDataType::boolean_datatype(),
84            JsonNativeType::Number(JsonNumberType::U64) => ConcreteDataType::uint64_datatype(),
85            JsonNativeType::Number(JsonNumberType::I64) => ConcreteDataType::int64_datatype(),
86            JsonNativeType::Number(JsonNumberType::F64) => ConcreteDataType::float64_datatype(),
87            JsonNativeType::String => ConcreteDataType::string_datatype(),
88            JsonNativeType::Array(item_type) => {
89                ConcreteDataType::List(ListType::new(Arc::new(item_type.as_ref().into())))
90            }
91            JsonNativeType::Object(object) => {
92                let fields = object
93                    .iter()
94                    .map(|(type_name, field_type)| {
95                        StructField::new(type_name.clone(), field_type.into(), true)
96                    })
97                    .collect();
98                ConcreteDataType::Struct(StructType::new(Arc::new(fields)))
99            }
100        }
101    }
102}
103
104impl From<&ConcreteDataType> for JsonNativeType {
105    fn from(value: &ConcreteDataType) -> Self {
106        match value {
107            ConcreteDataType::Null(_) => JsonNativeType::Null,
108            ConcreteDataType::Boolean(_) => JsonNativeType::Bool,
109            ConcreteDataType::UInt64(_)
110            | ConcreteDataType::UInt32(_)
111            | ConcreteDataType::UInt16(_)
112            | ConcreteDataType::UInt8(_) => JsonNativeType::u64(),
113            ConcreteDataType::Int64(_)
114            | ConcreteDataType::Int32(_)
115            | ConcreteDataType::Int16(_)
116            | ConcreteDataType::Int8(_) => JsonNativeType::i64(),
117            ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_) => JsonNativeType::f64(),
118            ConcreteDataType::String(_) => JsonNativeType::String,
119            ConcreteDataType::List(list_type) => {
120                JsonNativeType::Array(Box::new(list_type.item_type().into()))
121            }
122            ConcreteDataType::Struct(struct_type) => JsonNativeType::Object(
123                struct_type
124                    .fields()
125                    .iter()
126                    .map(|field| (field.name().to_string(), field.data_type().into()))
127                    .collect(),
128            ),
129            ConcreteDataType::Json(json_type) => json_type.native_type().clone(),
130            _ => unreachable!(),
131        }
132    }
133}
134
135impl Display for JsonNativeType {
136    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
137        match self {
138            JsonNativeType::Null => write!(f, r#""<Null>""#),
139            JsonNativeType::Bool => write!(f, r#""<Bool>""#),
140            JsonNativeType::Number(_) => {
141                write!(f, r#""<Number>""#)
142            }
143            JsonNativeType::String => write!(f, r#""<String>""#),
144            JsonNativeType::Array(item_type) => {
145                write!(f, "[{}]", item_type)
146            }
147            JsonNativeType::Object(object) => {
148                write!(
149                    f,
150                    "{{{}}}",
151                    object
152                        .iter()
153                        .map(|(k, v)| format!(r#""{k}":{v}"#))
154                        .collect::<Vec<_>>()
155                        .join(",")
156                )
157            }
158        }
159    }
160}
161
162#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default)]
163pub enum JsonFormat {
164    #[default]
165    Jsonb,
166    Native(Box<JsonNativeType>),
167}
168
169/// JsonType is a data type for JSON data. It is stored as binary data of jsonb format.
170/// It utilizes current binary value and vector implementation.
171#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
172pub struct JsonType {
173    pub format: JsonFormat,
174}
175
176impl JsonType {
177    pub fn new(format: JsonFormat) -> Self {
178        Self { format }
179    }
180
181    pub(crate) fn new_native(native: JsonNativeType) -> Self {
182        Self {
183            format: JsonFormat::Native(Box::new(native)),
184        }
185    }
186
187    pub fn is_native_type(&self) -> bool {
188        matches!(self.format, JsonFormat::Native(_))
189    }
190
191    pub fn native_type(&self) -> &JsonNativeType {
192        match &self.format {
193            JsonFormat::Jsonb => &JsonNativeType::String,
194            JsonFormat::Native(x) => x.as_ref(),
195        }
196    }
197
198    pub fn null() -> Self {
199        Self {
200            format: JsonFormat::Native(Box::new(JsonNativeType::Null)),
201        }
202    }
203
204    /// Make json type a struct type, by:
205    /// - if the json is an object, its entries are mapped to struct fields, obviously;
206    /// - if not, the json is one of bool, number, string or array, make it a special field
207    ///   (see [plain_json_struct_type]).
208    pub(crate) fn as_struct_type(&self) -> StructType {
209        match &self.format {
210            JsonFormat::Jsonb => StructType::default(),
211            JsonFormat::Native(inner) => match ConcreteDataType::from(inner.as_ref()) {
212                ConcreteDataType::Struct(t) => t.clone(),
213                x => plain_json_struct_type(x),
214            },
215        }
216    }
217
218    /// Try to merge this json type with others, error on datatype conflict.
219    pub fn merge(&mut self, other: &JsonType) -> Result<()> {
220        match (&self.format, &other.format) {
221            (JsonFormat::Jsonb, JsonFormat::Jsonb) => Ok(()),
222            (JsonFormat::Native(this), JsonFormat::Native(that)) => {
223                let merged = merge(this.as_ref(), that.as_ref())?;
224                self.format = JsonFormat::Native(Box::new(merged));
225                Ok(())
226            }
227            _ => MergeJsonDatatypeSnafu {
228                reason: "json format not match",
229            }
230            .fail(),
231        }
232    }
233
234    /// Check if it can merge with `other` json type.
235    pub fn is_mergeable(&self, other: &JsonType) -> bool {
236        match (&self.format, &other.format) {
237            (JsonFormat::Jsonb, JsonFormat::Jsonb) => true,
238            (JsonFormat::Native(this), JsonFormat::Native(that)) => {
239                is_mergeable(this.as_ref(), that.as_ref())
240            }
241            _ => false,
242        }
243    }
244
245    /// Check if it includes all fields in `other` json type.
246    pub fn is_include(&self, other: &JsonType) -> bool {
247        match (&self.format, &other.format) {
248            (JsonFormat::Jsonb, JsonFormat::Jsonb) => true,
249            (JsonFormat::Native(this), JsonFormat::Native(that)) => {
250                is_include(this.as_ref(), that.as_ref())
251            }
252            _ => false,
253        }
254    }
255}
256
257fn is_include(this: &JsonNativeType, that: &JsonNativeType) -> bool {
258    fn is_include_object(this: &JsonObjectType, that: &JsonObjectType) -> bool {
259        for (type_name, that_type) in that {
260            let Some(this_type) = this.get(type_name) else {
261                return false;
262            };
263            if !is_include(this_type, that_type) {
264                return false;
265            }
266        }
267        true
268    }
269
270    match (this, that) {
271        (this, that) if this == that => true,
272        (JsonNativeType::Array(this), JsonNativeType::Array(that)) => {
273            is_include(this.as_ref(), that.as_ref())
274        }
275        (JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
276            is_include_object(this, that)
277        }
278        (_, JsonNativeType::Null) => true,
279        _ => false,
280    }
281}
282
283/// A special struct type for denoting "plain"(not object) json value. It has only one field, with
284/// fixed name [JSON_PLAIN_FIELD_NAME] and with metadata [JSON_PLAIN_FIELD_METADATA_KEY] = `"true"`.
285pub(crate) fn plain_json_struct_type(item_type: ConcreteDataType) -> StructType {
286    let mut field = StructField::new(JSON_PLAIN_FIELD_NAME.to_string(), item_type, true);
287    field.insert_metadata(JSON_PLAIN_FIELD_METADATA_KEY, true);
288    StructType::new(Arc::new(vec![field]))
289}
290
291fn is_mergeable(this: &JsonNativeType, that: &JsonNativeType) -> bool {
292    fn is_mergeable_object(this: &JsonObjectType, that: &JsonObjectType) -> bool {
293        for (type_name, that_type) in that {
294            if let Some(this_type) = this.get(type_name)
295                && !is_mergeable(this_type, that_type)
296            {
297                return false;
298            }
299        }
300        true
301    }
302
303    match (this, that) {
304        (this, that) if this == that => true,
305        (JsonNativeType::Array(this), JsonNativeType::Array(that)) => {
306            is_mergeable(this.as_ref(), that.as_ref())
307        }
308        (JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
309            is_mergeable_object(this, that)
310        }
311        (JsonNativeType::Null, _) | (_, JsonNativeType::Null) => true,
312        _ => false,
313    }
314}
315
316fn merge(this: &JsonNativeType, that: &JsonNativeType) -> Result<JsonNativeType> {
317    fn merge_object(this: &JsonObjectType, that: &JsonObjectType) -> Result<JsonObjectType> {
318        let mut this = this.clone();
319        // merge "that" into "this" directly:
320        for (type_name, that_type) in that {
321            if let Some(this_type) = this.get_mut(type_name) {
322                let merged_type = merge(this_type, that_type)?;
323                *this_type = merged_type;
324            } else {
325                this.insert(type_name.clone(), that_type.clone());
326            }
327        }
328        Ok(this)
329    }
330
331    match (this, that) {
332        (this, that) if this == that => Ok(this.clone()),
333        (JsonNativeType::Array(this), JsonNativeType::Array(that)) => {
334            merge(this.as_ref(), that.as_ref()).map(|x| JsonNativeType::Array(Box::new(x)))
335        }
336        (JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
337            merge_object(this, that).map(JsonNativeType::Object)
338        }
339        (JsonNativeType::Null, x) | (x, JsonNativeType::Null) => Ok(x.clone()),
340        _ => MergeJsonDatatypeSnafu {
341            reason: format!("datatypes have conflict, this: {this}, that: {that}"),
342        }
343        .fail(),
344    }
345}
346
347impl DataType for JsonType {
348    fn name(&self) -> String {
349        match &self.format {
350            JsonFormat::Jsonb => JSON_TYPE_NAME.to_string(),
351            JsonFormat::Native(x) => format!("Json<{x}>"),
352        }
353    }
354
355    fn logical_type_id(&self) -> LogicalTypeId {
356        LogicalTypeId::Json
357    }
358
359    fn default_value(&self) -> Value {
360        Bytes::default().into()
361    }
362
363    fn as_arrow_type(&self) -> ArrowDataType {
364        match self.format {
365            JsonFormat::Jsonb => ArrowDataType::Binary,
366            JsonFormat::Native(_) => self.as_struct_type().as_arrow_type(),
367        }
368    }
369
370    fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
371        match &self.format {
372            JsonFormat::Jsonb => Box::new(BinaryVectorBuilder::with_capacity(capacity)),
373            JsonFormat::Native(x) => Box::new(JsonVectorBuilder::new(*x.clone(), capacity)),
374        }
375    }
376
377    fn try_cast(&self, from: Value) -> Option<Value> {
378        match from {
379            Value::Binary(v) => Some(Value::Binary(v)),
380            _ => None,
381        }
382    }
383}
384
385impl Display for JsonType {
386    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
387        write!(f, "{}", self.name())
388    }
389}
390
391/// Converts a json type value to string
392pub fn jsonb_to_string(val: &[u8]) -> Result<String> {
393    match jsonb::from_slice(val) {
394        Ok(jsonb_value) => {
395            let serialized = jsonb_value.to_string();
396            Ok(serialized)
397        }
398        Err(e) => InvalidJsonbSnafu { error: e }.fail(),
399    }
400}
401
402/// Converts a json type value to serde_json::Value
403pub fn jsonb_to_serde_json(val: &[u8]) -> Result<serde_json::Value> {
404    let json_string = jsonb_to_string(val)?;
405    jsonb_string_to_serde_value(&json_string)
406}
407
408/// Attempts to deserialize a JSON text into `serde_json::Value`, with a best-effort
409/// fallback for Rust-style Unicode escape sequences.
410///
411/// This function is intended to be used on JSON strings produced from the internal
412/// JSONB representation (e.g. via [`jsonb_to_string`]). It first calls
413/// `serde_json::Value::from_str` directly. If that succeeds, the parsed value is
414/// returned as-is.
415///
416/// If the initial parse fails, the input is scanned for Rust-style Unicode code
417/// point escapes of the form `\\u{H...}` (a backslash, `u`, an opening brace,
418/// followed by 1–6 hexadecimal digits, and a closing brace). Each such escape is
419/// converted into JSON-compatible UTF‑16 escape sequences:
420///
421/// - For code points in the Basic Multilingual Plane (≤ `0xFFFF`), the escape is
422///   converted to a single JSON `\\uXXXX` sequence with four uppercase hex digits.
423/// - For code points above `0xFFFF` and less than Unicode max code point `0x10FFFF`,
424///   the code point is encoded as a UTF‑16 surrogate pair and emitted as two consecutive
425///   `\\uXXXX` sequences (as JSON format required).
426///
427/// After this normalization, the function retries parsing the resulting string as
428/// JSON and returns the deserialized value or a `DeserializeSnafu` error if it
429/// still cannot be parsed.
430fn jsonb_string_to_serde_value(json: &str) -> Result<serde_json::Value> {
431    match serde_json::Value::from_str(json) {
432        Ok(v) => Ok(v),
433        Err(e) => {
434            // If above deserialization is failed, the JSON string might contain some Rust chars
435            // that are somehow incorrectly represented as Unicode code point literal. For example,
436            // "\u{fe0f}". We have to convert them to JSON compatible format, like "\uFE0F", then
437            // try to deserialize the JSON string again.
438            if !e.is_syntax() || !e.to_string().contains("invalid escape") {
439                return Err(e).context(DeserializeSnafu { json });
440            }
441
442            static UNICODE_CODE_POINT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
443                // Match literal "\u{...}" sequences, capturing 1–6 (code point range) hex digits
444                // inside braces.
445                Regex::new(r"\\u\{([0-9a-fA-F]{1,6})}").unwrap_or_else(|e| panic!("{}", e))
446            });
447
448            let v = UNICODE_CODE_POINT_PATTERN.replace_all(json, |caps: &Captures| {
449                // Extract the hex payload (without braces) and parse to a code point.
450                let hex = &caps[1];
451                let Ok(code) = u32::from_str_radix(hex, 16) else {
452                    // On parse failure, leave the original escape sequence unchanged.
453                    return caps[0].to_string();
454                };
455
456                if code <= 0xFFFF {
457                    // Basic Multilingual Plane: JSON can represent this directly as \uXXXX.
458                    format!("\\u{:04X}", code)
459                } else if code > 0x10FFFF {
460                    // Beyond max Unicode code point
461                    caps[0].to_string()
462                } else {
463                    // Supplementary planes: JSON needs UTF-16 surrogate pairs.
464                    // Convert the code point to a 20-bit value.
465                    let code = code - 0x10000;
466
467                    // High surrogate: top 10 bits, offset by 0xD800.
468                    let high = 0xD800 + ((code >> 10) & 0x3FF);
469
470                    // Low surrogate: bottom 10 bits, offset by 0xDC00.
471                    let low = 0xDC00 + (code & 0x3FF);
472
473                    // Emit two \uXXXX escapes in sequence.
474                    format!("\\u{:04X}\\u{:04X}", high, low)
475                }
476            });
477            serde_json::Value::from_str(&v).context(DeserializeSnafu { json })
478        }
479    }
480}
481
482/// Parses a string to a json type value
483pub fn parse_string_to_jsonb(s: &str) -> Result<Vec<u8>> {
484    jsonb::parse_value(s.as_bytes())
485        .map_err(|_| InvalidJsonSnafu { value: s }.build())
486        .map(|json| json.to_vec())
487}
488
489#[cfg(test)]
490mod tests {
491    use super::*;
492    use crate::json::JsonStructureSettings;
493
494    #[test]
495    fn test_jsonb_string_to_serde_value() -> Result<()> {
496        let valid_cases = vec![
497            (r#"{"data": "simple ascii"}"#, r#"{"data":"simple ascii"}"#),
498            (
499                r#"{"data": "Greek sigma: \u{03a3}"}"#,
500                r#"{"data":"Greek sigma: Σ"}"#,
501            ),
502            (
503                r#"{"data": "Joker card: \u{1f0df}"}"#,
504                r#"{"data":"Joker card: 🃟"}"#,
505            ),
506            (
507                r#"{"data": "BMP boundary: \u{ffff}"}"#,
508                r#"{"data":"BMP boundary: ￿"}"#,
509            ),
510            (
511                r#"{"data": "Supplementary min: \u{10000}"}"#,
512                r#"{"data":"Supplementary min: 𐀀"}"#,
513            ),
514            (
515                r#"{"data": "Supplementary max: \u{10ffff}"}"#,
516                r#"{"data":"Supplementary max: 􏿿"}"#,
517            ),
518        ];
519        for (input, expect) in valid_cases {
520            let v = jsonb_string_to_serde_value(input)?;
521            assert_eq!(v.to_string(), expect);
522        }
523
524        let invalid_cases = vec![
525            r#"{"data": "Invalid hex: \u{gggg}"}"#,
526            r#"{"data": "Beyond max Unicode code point: \u{110000}"}"#,
527            r#"{"data": "Out of range: \u{1100000}"}"#, // 7 digit
528            r#"{"data": "Empty braces: \u{}"}"#,
529        ];
530        for input in invalid_cases {
531            let result = jsonb_string_to_serde_value(input);
532            assert!(result.is_err());
533        }
534        Ok(())
535    }
536
537    #[test]
538    fn test_json_type_include() {
539        fn test(this: &JsonNativeType, that: &JsonNativeType, expected: bool) {
540            assert_eq!(is_include(this, that), expected);
541        }
542
543        test(&JsonNativeType::Null, &JsonNativeType::Null, true);
544        test(&JsonNativeType::Null, &JsonNativeType::Bool, false);
545
546        test(&JsonNativeType::Bool, &JsonNativeType::Null, true);
547        test(&JsonNativeType::Bool, &JsonNativeType::Bool, true);
548        test(&JsonNativeType::Bool, &JsonNativeType::u64(), false);
549
550        test(&JsonNativeType::u64(), &JsonNativeType::Null, true);
551        test(&JsonNativeType::u64(), &JsonNativeType::u64(), true);
552        test(&JsonNativeType::u64(), &JsonNativeType::String, false);
553
554        test(&JsonNativeType::String, &JsonNativeType::Null, true);
555        test(&JsonNativeType::String, &JsonNativeType::String, true);
556        test(
557            &JsonNativeType::String,
558            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
559            false,
560        );
561
562        test(
563            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
564            &JsonNativeType::Null,
565            true,
566        );
567        test(
568            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
569            &JsonNativeType::Array(Box::new(JsonNativeType::Null)),
570            true,
571        );
572        test(
573            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
574            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
575            true,
576        );
577        test(
578            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
579            &JsonNativeType::String,
580            false,
581        );
582        test(
583            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
584            &JsonNativeType::Object(JsonObjectType::new()),
585            false,
586        );
587
588        let simple_json_object = &JsonNativeType::Object(JsonObjectType::from([(
589            "foo".to_string(),
590            JsonNativeType::String,
591        )]));
592        test(simple_json_object, &JsonNativeType::Null, true);
593        test(simple_json_object, simple_json_object, true);
594        test(simple_json_object, &JsonNativeType::i64(), false);
595        test(
596            simple_json_object,
597            &JsonNativeType::Object(JsonObjectType::from([(
598                "bar".to_string(),
599                JsonNativeType::i64(),
600            )])),
601            false,
602        );
603
604        let complex_json_object = &JsonNativeType::Object(JsonObjectType::from([
605            (
606                "nested".to_string(),
607                JsonNativeType::Object(JsonObjectType::from([(
608                    "a".to_string(),
609                    JsonNativeType::Object(JsonObjectType::from([(
610                        "b".to_string(),
611                        JsonNativeType::Object(JsonObjectType::from([(
612                            "c".to_string(),
613                            JsonNativeType::String,
614                        )])),
615                    )])),
616                )])),
617            ),
618            ("bar".to_string(), JsonNativeType::i64()),
619        ]));
620        test(complex_json_object, &JsonNativeType::Null, true);
621        test(complex_json_object, &JsonNativeType::String, false);
622        test(complex_json_object, complex_json_object, true);
623        test(
624            complex_json_object,
625            &JsonNativeType::Object(JsonObjectType::from([(
626                "bar".to_string(),
627                JsonNativeType::i64(),
628            )])),
629            true,
630        );
631        test(
632            complex_json_object,
633            &JsonNativeType::Object(JsonObjectType::from([
634                (
635                    "nested".to_string(),
636                    JsonNativeType::Object(JsonObjectType::from([(
637                        "a".to_string(),
638                        JsonNativeType::Null,
639                    )])),
640                ),
641                ("bar".to_string(), JsonNativeType::i64()),
642            ])),
643            true,
644        );
645        test(
646            complex_json_object,
647            &JsonNativeType::Object(JsonObjectType::from([
648                (
649                    "nested".to_string(),
650                    JsonNativeType::Object(JsonObjectType::from([(
651                        "a".to_string(),
652                        JsonNativeType::String,
653                    )])),
654                ),
655                ("bar".to_string(), JsonNativeType::i64()),
656            ])),
657            false,
658        );
659        test(
660            complex_json_object,
661            &JsonNativeType::Object(JsonObjectType::from([
662                (
663                    "nested".to_string(),
664                    JsonNativeType::Object(JsonObjectType::from([(
665                        "a".to_string(),
666                        JsonNativeType::Object(JsonObjectType::from([(
667                            "b".to_string(),
668                            JsonNativeType::String,
669                        )])),
670                    )])),
671                ),
672                ("bar".to_string(), JsonNativeType::i64()),
673            ])),
674            false,
675        );
676        test(
677            complex_json_object,
678            &JsonNativeType::Object(JsonObjectType::from([
679                (
680                    "nested".to_string(),
681                    JsonNativeType::Object(JsonObjectType::from([(
682                        "a".to_string(),
683                        JsonNativeType::Object(JsonObjectType::from([(
684                            "b".to_string(),
685                            JsonNativeType::Object(JsonObjectType::from([(
686                                "c".to_string(),
687                                JsonNativeType::Null,
688                            )])),
689                        )])),
690                    )])),
691                ),
692                ("bar".to_string(), JsonNativeType::i64()),
693            ])),
694            true,
695        );
696        test(
697            complex_json_object,
698            &JsonNativeType::Object(JsonObjectType::from([
699                (
700                    "nested".to_string(),
701                    JsonNativeType::Object(JsonObjectType::from([(
702                        "a".to_string(),
703                        JsonNativeType::Object(JsonObjectType::from([(
704                            "b".to_string(),
705                            JsonNativeType::Object(JsonObjectType::from([(
706                                "c".to_string(),
707                                JsonNativeType::Bool,
708                            )])),
709                        )])),
710                    )])),
711                ),
712                ("bar".to_string(), JsonNativeType::i64()),
713            ])),
714            false,
715        );
716        test(
717            complex_json_object,
718            &JsonNativeType::Object(JsonObjectType::from([(
719                "nested".to_string(),
720                JsonNativeType::Object(JsonObjectType::from([(
721                    "a".to_string(),
722                    JsonNativeType::Object(JsonObjectType::from([(
723                        "b".to_string(),
724                        JsonNativeType::Object(JsonObjectType::from([(
725                            "c".to_string(),
726                            JsonNativeType::String,
727                        )])),
728                    )])),
729                )])),
730            )])),
731            true,
732        );
733    }
734
735    #[test]
736    fn test_merge_json_type() -> Result<()> {
737        fn test(
738            json: &str,
739            json_type: &mut JsonType,
740            expected: std::result::Result<&str, &str>,
741        ) -> Result<()> {
742            let json: serde_json::Value = serde_json::from_str(json).unwrap();
743
744            let settings = JsonStructureSettings::Structured(None);
745            let value = settings.encode(json)?;
746            let value_type = value.data_type();
747            let Some(other) = value_type.as_json() else {
748                unreachable!()
749            };
750
751            let result = json_type.merge(other);
752            match (result, expected) {
753                (Ok(()), Ok(expected)) => {
754                    assert_eq!(json_type.name(), expected);
755                    assert!(json_type.is_mergeable(other));
756                }
757                (Err(err), Err(expected)) => {
758                    assert_eq!(err.to_string(), expected);
759                    assert!(!json_type.is_mergeable(other));
760                }
761                _ => unreachable!(),
762            }
763            Ok(())
764        }
765
766        let json_type = &mut JsonType::new_native(JsonNativeType::Null);
767
768        // can merge with json object:
769        let json = r#"{
770            "hello": "world",
771            "list": [1, 2, 3],
772            "object": {"a": 1}
773        }"#;
774        let expected =
775            r#"Json<{"hello":"<String>","list":["<Number>"],"object":{"a":"<Number>"}}>"#;
776        test(json, json_type, Ok(expected))?;
777
778        // cannot merge with other non-object json values:
779        let jsons = [r#""s""#, "1", "[1]"];
780        let expects = [
781            r#"Failed to merge JSON datatype: datatypes have conflict, this: {"hello":"<String>","list":["<Number>"],"object":{"a":"<Number>"}}, that: "<String>""#,
782            r#"Failed to merge JSON datatype: datatypes have conflict, this: {"hello":"<String>","list":["<Number>"],"object":{"a":"<Number>"}}, that: "<Number>""#,
783            r#"Failed to merge JSON datatype: datatypes have conflict, this: {"hello":"<String>","list":["<Number>"],"object":{"a":"<Number>"}}, that: ["<Number>"]"#,
784        ];
785        for (json, expect) in jsons.into_iter().zip(expects.into_iter()) {
786            test(json, json_type, Err(expect))?;
787        }
788
789        // cannot merge with other json object with conflict field datatype:
790        let json = r#"{
791            "hello": 1,
792            "float": 0.123,
793            "no": 42
794        }"#;
795        let expected = r#"Failed to merge JSON datatype: datatypes have conflict, this: "<String>", that: "<Number>""#;
796        test(json, json_type, Err(expected))?;
797
798        // can merge with another json object:
799        let json = r#"{
800            "hello": "greptime",
801            "float": 0.123,
802            "int": 42
803        }"#;
804        let expected = r#"Json<{"float":"<Number>","hello":"<String>","int":"<Number>","list":["<Number>"],"object":{"a":"<Number>"}}>"#;
805        test(json, json_type, Ok(expected))?;
806
807        // can merge with some complex nested json object:
808        let json = r#"{
809            "list": [4],
810            "object": {"foo": "bar", "l": ["x"], "o": {"key": "value"}},
811            "float": 0.456,
812            "int": 0
813        }"#;
814        let expected = r#"Json<{"float":"<Number>","hello":"<String>","int":"<Number>","list":["<Number>"],"object":{"a":"<Number>","foo":"<String>","l":["<String>"],"o":{"key":"<String>"}}}>"#;
815        test(json, json_type, Ok(expected))?;
816
817        Ok(())
818    }
819}