datatypes/types/
json_type.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::BTreeMap;
16use std::fmt::{Debug, Display, Formatter};
17use std::str::FromStr;
18use std::sync::{Arc, LazyLock};
19
20use arrow::datatypes::DataType as ArrowDataType;
21use common_base::bytes::Bytes;
22use regex::{Captures, Regex};
23use serde::{Deserialize, Serialize};
24use snafu::ResultExt;
25
26use crate::data_type::DataType;
27use crate::error::{
28    DeserializeSnafu, InvalidJsonSnafu, InvalidJsonbSnafu, MergeJsonDatatypeSnafu, Result,
29};
30use crate::prelude::ConcreteDataType;
31use crate::scalars::ScalarVectorBuilder;
32use crate::type_id::LogicalTypeId;
33use crate::types::{ListType, StructField, StructType};
34use crate::value::Value;
35use crate::vectors::json::builder::JsonVectorBuilder;
36use crate::vectors::{BinaryVectorBuilder, MutableVector};
37
38pub const JSON_TYPE_NAME: &str = "Json";
39const JSON_PLAIN_FIELD_NAME: &str = "__json_plain__";
40const JSON_PLAIN_FIELD_METADATA_KEY: &str = "is_plain_json";
41
42pub type JsonObjectType = BTreeMap<String, JsonNativeType>;
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
45pub enum JsonNumberType {
46    U64,
47    I64,
48    F64,
49}
50
51#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
52pub enum JsonNativeType {
53    Null,
54    Bool,
55    Number(JsonNumberType),
56    String,
57    Array(Box<JsonNativeType>),
58    Object(JsonObjectType),
59}
60
61impl JsonNativeType {
62    pub fn is_null(&self) -> bool {
63        matches!(self, JsonNativeType::Null)
64    }
65
66    pub fn u64() -> Self {
67        Self::Number(JsonNumberType::U64)
68    }
69
70    pub fn i64() -> Self {
71        Self::Number(JsonNumberType::I64)
72    }
73
74    pub fn f64() -> Self {
75        Self::Number(JsonNumberType::F64)
76    }
77}
78
79impl From<&JsonNativeType> for ConcreteDataType {
80    fn from(value: &JsonNativeType) -> Self {
81        match value {
82            JsonNativeType::Null => ConcreteDataType::null_datatype(),
83            JsonNativeType::Bool => ConcreteDataType::boolean_datatype(),
84            JsonNativeType::Number(JsonNumberType::U64) => ConcreteDataType::uint64_datatype(),
85            JsonNativeType::Number(JsonNumberType::I64) => ConcreteDataType::int64_datatype(),
86            JsonNativeType::Number(JsonNumberType::F64) => ConcreteDataType::float64_datatype(),
87            JsonNativeType::String => ConcreteDataType::string_datatype(),
88            JsonNativeType::Array(item_type) => {
89                ConcreteDataType::List(ListType::new(Arc::new(item_type.as_ref().into())))
90            }
91            JsonNativeType::Object(object) => {
92                let fields = object
93                    .iter()
94                    .map(|(type_name, field_type)| {
95                        StructField::new(type_name.clone(), field_type.into(), true)
96                    })
97                    .collect();
98                ConcreteDataType::Struct(StructType::new(Arc::new(fields)))
99            }
100        }
101    }
102}
103
104impl From<&ConcreteDataType> for JsonNativeType {
105    fn from(value: &ConcreteDataType) -> Self {
106        match value {
107            ConcreteDataType::Null(_) => JsonNativeType::Null,
108            ConcreteDataType::Boolean(_) => JsonNativeType::Bool,
109            ConcreteDataType::UInt64(_)
110            | ConcreteDataType::UInt32(_)
111            | ConcreteDataType::UInt16(_)
112            | ConcreteDataType::UInt8(_) => JsonNativeType::u64(),
113            ConcreteDataType::Int64(_)
114            | ConcreteDataType::Int32(_)
115            | ConcreteDataType::Int16(_)
116            | ConcreteDataType::Int8(_) => JsonNativeType::i64(),
117            ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_) => JsonNativeType::f64(),
118            ConcreteDataType::String(_) => JsonNativeType::String,
119            ConcreteDataType::List(list_type) => {
120                JsonNativeType::Array(Box::new(list_type.item_type().into()))
121            }
122            ConcreteDataType::Struct(struct_type) => JsonNativeType::Object(
123                struct_type
124                    .fields()
125                    .iter()
126                    .map(|field| (field.name().to_string(), field.data_type().into()))
127                    .collect(),
128            ),
129            ConcreteDataType::Json(json_type) => json_type.native_type().clone(),
130            _ => unreachable!(),
131        }
132    }
133}
134
135impl Display for JsonNativeType {
136    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
137        match self {
138            JsonNativeType::Null => write!(f, r#""<Null>""#),
139            JsonNativeType::Bool => write!(f, r#""<Bool>""#),
140            JsonNativeType::Number(_) => {
141                write!(f, r#""<Number>""#)
142            }
143            JsonNativeType::String => write!(f, r#""<String>""#),
144            JsonNativeType::Array(item_type) => {
145                write!(f, "[{}]", item_type)
146            }
147            JsonNativeType::Object(object) => {
148                write!(
149                    f,
150                    "{{{}}}",
151                    object
152                        .iter()
153                        .map(|(k, v)| format!(r#""{k}":{v}"#))
154                        .collect::<Vec<_>>()
155                        .join(",")
156                )
157            }
158        }
159    }
160}
161
162#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default)]
163pub enum JsonFormat {
164    #[default]
165    Jsonb,
166    Native(Box<JsonNativeType>),
167}
168
169/// JsonType is a data type for JSON data. It is stored as binary data of jsonb format.
170/// It utilizes current binary value and vector implementation.
171#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
172pub struct JsonType {
173    pub format: JsonFormat,
174}
175
176impl JsonType {
177    pub fn new(format: JsonFormat) -> Self {
178        Self { format }
179    }
180
181    pub(crate) fn new_native(native: JsonNativeType) -> Self {
182        Self {
183            format: JsonFormat::Native(Box::new(native)),
184        }
185    }
186
187    pub fn is_native_type(&self) -> bool {
188        matches!(self.format, JsonFormat::Native(_))
189    }
190
191    pub fn native_type(&self) -> &JsonNativeType {
192        match &self.format {
193            JsonFormat::Jsonb => &JsonNativeType::String,
194            JsonFormat::Native(x) => x.as_ref(),
195        }
196    }
197
198    pub fn null() -> Self {
199        Self {
200            format: JsonFormat::Native(Box::new(JsonNativeType::Null)),
201        }
202    }
203
204    /// Make json type a struct type, by:
205    /// - if the json is an object, its entries are mapped to struct fields, obviously;
206    /// - if not, the json is one of bool, number, string or array, make it a special field
207    ///   (see [plain_json_struct_type]).
208    pub(crate) fn as_struct_type(&self) -> StructType {
209        match &self.format {
210            JsonFormat::Jsonb => StructType::default(),
211            JsonFormat::Native(inner) => match ConcreteDataType::from(inner.as_ref()) {
212                ConcreteDataType::Struct(t) => t.clone(),
213                x => plain_json_struct_type(x),
214            },
215        }
216    }
217
218    /// Try to merge this json type with others, error on datatype conflict.
219    pub fn merge(&mut self, other: &JsonType) -> Result<()> {
220        match (&self.format, &other.format) {
221            (JsonFormat::Jsonb, JsonFormat::Jsonb) => Ok(()),
222            (JsonFormat::Native(this), JsonFormat::Native(that)) => {
223                let merged = merge(this.as_ref(), that.as_ref())?;
224                self.format = JsonFormat::Native(Box::new(merged));
225                Ok(())
226            }
227            _ => MergeJsonDatatypeSnafu {
228                reason: "json format not match",
229            }
230            .fail(),
231        }
232    }
233
234    /// Check if it can merge with `other` json type.
235    pub fn is_mergeable(&self, other: &JsonType) -> bool {
236        match (&self.format, &other.format) {
237            (JsonFormat::Jsonb, JsonFormat::Jsonb) => true,
238            (JsonFormat::Native(this), JsonFormat::Native(that)) => {
239                is_mergeable(this.as_ref(), that.as_ref())
240            }
241            _ => false,
242        }
243    }
244
245    /// Check if it includes all fields in `other` json type.
246    pub fn is_include(&self, other: &JsonType) -> bool {
247        match (&self.format, &other.format) {
248            (JsonFormat::Jsonb, JsonFormat::Jsonb) => true,
249            (JsonFormat::Native(this), JsonFormat::Native(that)) => {
250                is_include(this.as_ref(), that.as_ref())
251            }
252            _ => false,
253        }
254    }
255}
256
257fn is_include(this: &JsonNativeType, that: &JsonNativeType) -> bool {
258    fn is_include_object(this: &JsonObjectType, that: &JsonObjectType) -> bool {
259        for (type_name, that_type) in that {
260            let Some(this_type) = this.get(type_name) else {
261                return false;
262            };
263            if !is_include(this_type, that_type) {
264                return false;
265            }
266        }
267        true
268    }
269
270    match (this, that) {
271        (this, that) if this == that => true,
272        (JsonNativeType::Array(this), JsonNativeType::Array(that)) => {
273            is_include(this.as_ref(), that.as_ref())
274        }
275        (JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
276            is_include_object(this, that)
277        }
278        (_, JsonNativeType::Null) => true,
279        _ => false,
280    }
281}
282
283/// A special struct type for denoting "plain"(not object) json value. It has only one field, with
284/// fixed name [JSON_PLAIN_FIELD_NAME] and with metadata [JSON_PLAIN_FIELD_METADATA_KEY] = `"true"`.
285pub(crate) fn plain_json_struct_type(item_type: ConcreteDataType) -> StructType {
286    let mut field = StructField::new(JSON_PLAIN_FIELD_NAME.to_string(), item_type, true);
287    field.insert_metadata(JSON_PLAIN_FIELD_METADATA_KEY, true);
288    StructType::new(Arc::new(vec![field]))
289}
290
291fn is_mergeable(this: &JsonNativeType, that: &JsonNativeType) -> bool {
292    fn is_mergeable_object(this: &JsonObjectType, that: &JsonObjectType) -> bool {
293        for (type_name, that_type) in that {
294            if let Some(this_type) = this.get(type_name)
295                && !is_mergeable(this_type, that_type)
296            {
297                return false;
298            }
299        }
300        true
301    }
302
303    match (this, that) {
304        (this, that) if this == that => true,
305        (JsonNativeType::Array(this), JsonNativeType::Array(that)) => {
306            is_mergeable(this.as_ref(), that.as_ref())
307        }
308        (JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
309            is_mergeable_object(this, that)
310        }
311        (JsonNativeType::Null, _) | (_, JsonNativeType::Null) => true,
312        _ => false,
313    }
314}
315
316fn merge(this: &JsonNativeType, that: &JsonNativeType) -> Result<JsonNativeType> {
317    fn merge_object(this: &JsonObjectType, that: &JsonObjectType) -> Result<JsonObjectType> {
318        let mut this = this.clone();
319        // merge "that" into "this" directly:
320        for (type_name, that_type) in that {
321            if let Some(this_type) = this.get_mut(type_name) {
322                let merged_type = merge(this_type, that_type)?;
323                *this_type = merged_type;
324            } else {
325                this.insert(type_name.clone(), that_type.clone());
326            }
327        }
328        Ok(this)
329    }
330
331    match (this, that) {
332        (this, that) if this == that => Ok(this.clone()),
333        (JsonNativeType::Array(this), JsonNativeType::Array(that)) => {
334            merge(this.as_ref(), that.as_ref()).map(|x| JsonNativeType::Array(Box::new(x)))
335        }
336        (JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
337            merge_object(this, that).map(JsonNativeType::Object)
338        }
339        (JsonNativeType::Null, x) | (x, JsonNativeType::Null) => Ok(x.clone()),
340        _ => MergeJsonDatatypeSnafu {
341            reason: format!("datatypes have conflict, this: {this}, that: {that}"),
342        }
343        .fail(),
344    }
345}
346
347impl DataType for JsonType {
348    fn name(&self) -> String {
349        match &self.format {
350            JsonFormat::Jsonb => JSON_TYPE_NAME.to_string(),
351            JsonFormat::Native(x) => format!("Json<{x}>"),
352        }
353    }
354
355    fn logical_type_id(&self) -> LogicalTypeId {
356        LogicalTypeId::Json
357    }
358
359    fn default_value(&self) -> Value {
360        Bytes::default().into()
361    }
362
363    fn as_arrow_type(&self) -> ArrowDataType {
364        match self.format {
365            JsonFormat::Jsonb => ArrowDataType::Binary,
366            JsonFormat::Native(_) => self.as_struct_type().as_arrow_type(),
367        }
368    }
369
370    fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
371        match &self.format {
372            JsonFormat::Jsonb => Box::new(BinaryVectorBuilder::with_capacity(capacity)),
373            JsonFormat::Native(x) => Box::new(JsonVectorBuilder::new(*x.clone(), capacity)),
374        }
375    }
376
377    fn try_cast(&self, from: Value) -> Option<Value> {
378        match from {
379            Value::Binary(v) => Some(Value::Binary(v)),
380            _ => None,
381        }
382    }
383}
384
385impl Display for JsonType {
386    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
387        write!(f, "{}", self.name())
388    }
389}
390
391/// Converts a json type value to string
392pub fn jsonb_to_string(val: &[u8]) -> Result<String> {
393    if val.is_empty() {
394        return Ok("".to_string());
395    }
396    match jsonb::from_slice(val) {
397        Ok(jsonb_value) => {
398            let serialized = jsonb_value.to_string();
399            fix_unicode_point(&serialized)
400        }
401        Err(e) => InvalidJsonbSnafu { error: e }.fail(),
402    }
403}
404
405/// Converts a json type value to serde_json::Value
406pub fn jsonb_to_serde_json(val: &[u8]) -> Result<serde_json::Value> {
407    let json_string = jsonb_to_string(val)?;
408    serde_json::Value::from_str(&json_string).context(DeserializeSnafu { json: json_string })
409}
410
411/// Normalizes a JSON string by converting Rust-style Unicode escape sequences to JSON-compatible format.
412///
413/// The input is scanned for Rust-style Unicode code
414/// point escapes of the form `\\u{H...}` (a backslash, `u`, an opening brace,
415/// followed by 1–6 hexadecimal digits, and a closing brace). Each such escape is
416/// converted into JSON-compatible UTF‑16 escape sequences:
417///
418/// - For code points in the Basic Multilingual Plane (≤ `0xFFFF`), the escape is
419///   converted to a single JSON `\\uXXXX` sequence with four uppercase hex digits.
420/// - For code points above `0xFFFF` and less than Unicode max code point `0x10FFFF`,
421///   the code point is encoded as a UTF‑16 surrogate pair and emitted as two consecutive
422///   `\\uXXXX` sequences (as JSON format required).
423///
424/// After this normalization, the function returns the normalized string
425fn fix_unicode_point(json: &str) -> Result<String> {
426    static UNICODE_CODE_POINT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
427        // Match literal "\u{...}" sequences, capturing 1–6 (code point range) hex digits
428        // inside braces.
429        Regex::new(r"\\u\{([0-9a-fA-F]{1,6})}").unwrap_or_else(|e| panic!("{}", e))
430    });
431
432    let v = UNICODE_CODE_POINT_PATTERN.replace_all(json, |caps: &Captures| {
433        // Extract the hex payload (without braces) and parse to a code point.
434        let hex = &caps[1];
435        let Ok(code) = u32::from_str_radix(hex, 16) else {
436            // On parse failure, leave the original escape sequence unchanged.
437            return caps[0].to_string();
438        };
439
440        if code <= 0xFFFF {
441            // Basic Multilingual Plane: JSON can represent this directly as \uXXXX.
442            format!("\\u{:04X}", code)
443        } else if code > 0x10FFFF {
444            // Beyond max Unicode code point
445            caps[0].to_string()
446        } else {
447            // Supplementary planes: JSON needs UTF-16 surrogate pairs.
448            // Convert the code point to a 20-bit value.
449            let code = code - 0x10000;
450
451            // High surrogate: top 10 bits, offset by 0xD800.
452            let high = 0xD800 + ((code >> 10) & 0x3FF);
453
454            // Low surrogate: bottom 10 bits, offset by 0xDC00.
455            let low = 0xDC00 + (code & 0x3FF);
456
457            // Emit two \uXXXX escapes in sequence.
458            format!("\\u{:04X}\\u{:04X}", high, low)
459        }
460    });
461    Ok(v.to_string())
462}
463
464/// Parses a string to a json type value
465pub fn parse_string_to_jsonb(s: &str) -> Result<Vec<u8>> {
466    jsonb::parse_value(s.as_bytes())
467        .map_err(|_| InvalidJsonSnafu { value: s }.build())
468        .map(|json| json.to_vec())
469}
470
471#[cfg(test)]
472mod tests {
473    use super::*;
474    use crate::json::JsonStructureSettings;
475
476    #[test]
477    fn test_fix_unicode_point() -> Result<()> {
478        let valid_cases = vec![
479            (r#"{"data": "simple ascii"}"#, r#"{"data": "simple ascii"}"#),
480            (
481                r#"{"data":"Greek sigma: \u{03a3}"}"#,
482                r#"{"data":"Greek sigma: \u03A3"}"#,
483            ),
484            (
485                r#"{"data":"Joker card: \u{1f0df}"}"#,
486                r#"{"data":"Joker card: \uD83C\uDCDF"}"#,
487            ),
488            (
489                r#"{"data":"BMP boundary: \u{ffff}"}"#,
490                r#"{"data":"BMP boundary: \uFFFF"}"#,
491            ),
492            (
493                r#"{"data":"Supplementary min: \u{10000}"}"#,
494                r#"{"data":"Supplementary min: \uD800\uDC00"}"#,
495            ),
496            (
497                r#"{"data":"Supplementary max: \u{10ffff}"}"#,
498                r#"{"data":"Supplementary max: \uDBFF\uDFFF"}"#,
499            ),
500        ];
501        for (input, expect) in valid_cases {
502            let v = fix_unicode_point(input)?;
503            assert_eq!(v, expect);
504        }
505
506        let invalid_escape_cases = vec![
507            (
508                r#"{"data": "Invalid hex: \u{gggg}"}"#,
509                r#"{"data": "Invalid hex: \u{gggg}"}"#,
510            ),
511            (
512                r#"{"data": "Empty braces: \u{}"}"#,
513                r#"{"data": "Empty braces: \u{}"}"#,
514            ),
515            (
516                r#"{"data": "Out of range: \u{1100000}"}"#,
517                r#"{"data": "Out of range: \u{1100000}"}"#,
518            ),
519        ];
520        for (input, expect) in invalid_escape_cases {
521            let v = fix_unicode_point(input)?;
522            assert_eq!(v, expect);
523        }
524
525        Ok(())
526    }
527
528    #[test]
529    fn test_json_type_include() {
530        fn test(this: &JsonNativeType, that: &JsonNativeType, expected: bool) {
531            assert_eq!(is_include(this, that), expected);
532        }
533
534        test(&JsonNativeType::Null, &JsonNativeType::Null, true);
535        test(&JsonNativeType::Null, &JsonNativeType::Bool, false);
536
537        test(&JsonNativeType::Bool, &JsonNativeType::Null, true);
538        test(&JsonNativeType::Bool, &JsonNativeType::Bool, true);
539        test(&JsonNativeType::Bool, &JsonNativeType::u64(), false);
540
541        test(&JsonNativeType::u64(), &JsonNativeType::Null, true);
542        test(&JsonNativeType::u64(), &JsonNativeType::u64(), true);
543        test(&JsonNativeType::u64(), &JsonNativeType::String, false);
544
545        test(&JsonNativeType::String, &JsonNativeType::Null, true);
546        test(&JsonNativeType::String, &JsonNativeType::String, true);
547        test(
548            &JsonNativeType::String,
549            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
550            false,
551        );
552
553        test(
554            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
555            &JsonNativeType::Null,
556            true,
557        );
558        test(
559            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
560            &JsonNativeType::Array(Box::new(JsonNativeType::Null)),
561            true,
562        );
563        test(
564            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
565            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
566            true,
567        );
568        test(
569            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
570            &JsonNativeType::String,
571            false,
572        );
573        test(
574            &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
575            &JsonNativeType::Object(JsonObjectType::new()),
576            false,
577        );
578
579        let simple_json_object = &JsonNativeType::Object(JsonObjectType::from([(
580            "foo".to_string(),
581            JsonNativeType::String,
582        )]));
583        test(simple_json_object, &JsonNativeType::Null, true);
584        test(simple_json_object, simple_json_object, true);
585        test(simple_json_object, &JsonNativeType::i64(), false);
586        test(
587            simple_json_object,
588            &JsonNativeType::Object(JsonObjectType::from([(
589                "bar".to_string(),
590                JsonNativeType::i64(),
591            )])),
592            false,
593        );
594
595        let complex_json_object = &JsonNativeType::Object(JsonObjectType::from([
596            (
597                "nested".to_string(),
598                JsonNativeType::Object(JsonObjectType::from([(
599                    "a".to_string(),
600                    JsonNativeType::Object(JsonObjectType::from([(
601                        "b".to_string(),
602                        JsonNativeType::Object(JsonObjectType::from([(
603                            "c".to_string(),
604                            JsonNativeType::String,
605                        )])),
606                    )])),
607                )])),
608            ),
609            ("bar".to_string(), JsonNativeType::i64()),
610        ]));
611        test(complex_json_object, &JsonNativeType::Null, true);
612        test(complex_json_object, &JsonNativeType::String, false);
613        test(complex_json_object, complex_json_object, true);
614        test(
615            complex_json_object,
616            &JsonNativeType::Object(JsonObjectType::from([(
617                "bar".to_string(),
618                JsonNativeType::i64(),
619            )])),
620            true,
621        );
622        test(
623            complex_json_object,
624            &JsonNativeType::Object(JsonObjectType::from([
625                (
626                    "nested".to_string(),
627                    JsonNativeType::Object(JsonObjectType::from([(
628                        "a".to_string(),
629                        JsonNativeType::Null,
630                    )])),
631                ),
632                ("bar".to_string(), JsonNativeType::i64()),
633            ])),
634            true,
635        );
636        test(
637            complex_json_object,
638            &JsonNativeType::Object(JsonObjectType::from([
639                (
640                    "nested".to_string(),
641                    JsonNativeType::Object(JsonObjectType::from([(
642                        "a".to_string(),
643                        JsonNativeType::String,
644                    )])),
645                ),
646                ("bar".to_string(), JsonNativeType::i64()),
647            ])),
648            false,
649        );
650        test(
651            complex_json_object,
652            &JsonNativeType::Object(JsonObjectType::from([
653                (
654                    "nested".to_string(),
655                    JsonNativeType::Object(JsonObjectType::from([(
656                        "a".to_string(),
657                        JsonNativeType::Object(JsonObjectType::from([(
658                            "b".to_string(),
659                            JsonNativeType::String,
660                        )])),
661                    )])),
662                ),
663                ("bar".to_string(), JsonNativeType::i64()),
664            ])),
665            false,
666        );
667        test(
668            complex_json_object,
669            &JsonNativeType::Object(JsonObjectType::from([
670                (
671                    "nested".to_string(),
672                    JsonNativeType::Object(JsonObjectType::from([(
673                        "a".to_string(),
674                        JsonNativeType::Object(JsonObjectType::from([(
675                            "b".to_string(),
676                            JsonNativeType::Object(JsonObjectType::from([(
677                                "c".to_string(),
678                                JsonNativeType::Null,
679                            )])),
680                        )])),
681                    )])),
682                ),
683                ("bar".to_string(), JsonNativeType::i64()),
684            ])),
685            true,
686        );
687        test(
688            complex_json_object,
689            &JsonNativeType::Object(JsonObjectType::from([
690                (
691                    "nested".to_string(),
692                    JsonNativeType::Object(JsonObjectType::from([(
693                        "a".to_string(),
694                        JsonNativeType::Object(JsonObjectType::from([(
695                            "b".to_string(),
696                            JsonNativeType::Object(JsonObjectType::from([(
697                                "c".to_string(),
698                                JsonNativeType::Bool,
699                            )])),
700                        )])),
701                    )])),
702                ),
703                ("bar".to_string(), JsonNativeType::i64()),
704            ])),
705            false,
706        );
707        test(
708            complex_json_object,
709            &JsonNativeType::Object(JsonObjectType::from([(
710                "nested".to_string(),
711                JsonNativeType::Object(JsonObjectType::from([(
712                    "a".to_string(),
713                    JsonNativeType::Object(JsonObjectType::from([(
714                        "b".to_string(),
715                        JsonNativeType::Object(JsonObjectType::from([(
716                            "c".to_string(),
717                            JsonNativeType::String,
718                        )])),
719                    )])),
720                )])),
721            )])),
722            true,
723        );
724    }
725
726    #[test]
727    fn test_merge_json_type() -> Result<()> {
728        fn test(
729            json: &str,
730            json_type: &mut JsonType,
731            expected: std::result::Result<&str, &str>,
732        ) -> Result<()> {
733            let json: serde_json::Value = serde_json::from_str(json).unwrap();
734
735            let settings = JsonStructureSettings::Structured(None);
736            let value = settings.encode(json)?;
737            let value_type = value.data_type();
738            let Some(other) = value_type.as_json() else {
739                unreachable!()
740            };
741
742            let result = json_type.merge(other);
743            match (result, expected) {
744                (Ok(()), Ok(expected)) => {
745                    assert_eq!(json_type.name(), expected);
746                    assert!(json_type.is_mergeable(other));
747                }
748                (Err(err), Err(expected)) => {
749                    assert_eq!(err.to_string(), expected);
750                    assert!(!json_type.is_mergeable(other));
751                }
752                _ => unreachable!(),
753            }
754            Ok(())
755        }
756
757        let json_type = &mut JsonType::new_native(JsonNativeType::Null);
758
759        // can merge with json object:
760        let json = r#"{
761            "hello": "world",
762            "list": [1, 2, 3],
763            "object": {"a": 1}
764        }"#;
765        let expected =
766            r#"Json<{"hello":"<String>","list":["<Number>"],"object":{"a":"<Number>"}}>"#;
767        test(json, json_type, Ok(expected))?;
768
769        // cannot merge with other non-object json values:
770        let jsons = [r#""s""#, "1", "[1]"];
771        let expects = [
772            r#"Failed to merge JSON datatype: datatypes have conflict, this: {"hello":"<String>","list":["<Number>"],"object":{"a":"<Number>"}}, that: "<String>""#,
773            r#"Failed to merge JSON datatype: datatypes have conflict, this: {"hello":"<String>","list":["<Number>"],"object":{"a":"<Number>"}}, that: "<Number>""#,
774            r#"Failed to merge JSON datatype: datatypes have conflict, this: {"hello":"<String>","list":["<Number>"],"object":{"a":"<Number>"}}, that: ["<Number>"]"#,
775        ];
776        for (json, expect) in jsons.into_iter().zip(expects.into_iter()) {
777            test(json, json_type, Err(expect))?;
778        }
779
780        // cannot merge with other json object with conflict field datatype:
781        let json = r#"{
782            "hello": 1,
783            "float": 0.123,
784            "no": 42
785        }"#;
786        let expected = r#"Failed to merge JSON datatype: datatypes have conflict, this: "<String>", that: "<Number>""#;
787        test(json, json_type, Err(expected))?;
788
789        // can merge with another json object:
790        let json = r#"{
791            "hello": "greptime",
792            "float": 0.123,
793            "int": 42
794        }"#;
795        let expected = r#"Json<{"float":"<Number>","hello":"<String>","int":"<Number>","list":["<Number>"],"object":{"a":"<Number>"}}>"#;
796        test(json, json_type, Ok(expected))?;
797
798        // can merge with some complex nested json object:
799        let json = r#"{
800            "list": [4],
801            "object": {"foo": "bar", "l": ["x"], "o": {"key": "value"}},
802            "float": 0.456,
803            "int": 0
804        }"#;
805        let expected = r#"Json<{"float":"<Number>","hello":"<String>","int":"<Number>","list":["<Number>"],"object":{"a":"<Number>","foo":"<String>","l":["<String>"],"o":{"key":"<String>"}}}>"#;
806        test(json, json_type, Ok(expected))?;
807
808        Ok(())
809    }
810}