1use std::collections::BTreeMap;
16use std::fmt::{Debug, Display, Formatter};
17use std::str::FromStr;
18use std::sync::Arc;
19
20use arrow::datatypes::DataType as ArrowDataType;
21use common_base::bytes::Bytes;
22use serde::{Deserialize, Serialize};
23use snafu::ResultExt;
24
25use crate::data_type::DataType;
26use crate::error::{
27 DeserializeSnafu, InvalidJsonSnafu, InvalidJsonbSnafu, MergeJsonDatatypeSnafu, Result,
28};
29use crate::prelude::ConcreteDataType;
30use crate::scalars::ScalarVectorBuilder;
31use crate::type_id::LogicalTypeId;
32use crate::types::{ListType, StructField, StructType};
33use crate::value::Value;
34use crate::vectors::json::builder::JsonVectorBuilder;
35use crate::vectors::{BinaryVectorBuilder, MutableVector};
36
37pub const JSON_TYPE_NAME: &str = "Json";
38const JSON_PLAIN_FIELD_NAME: &str = "__json_plain__";
39const JSON_PLAIN_FIELD_METADATA_KEY: &str = "is_plain_json";
40
41pub type JsonObjectType = BTreeMap<String, JsonNativeType>;
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
44pub enum JsonNumberType {
45 U64,
46 I64,
47 F64,
48}
49
50#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
51pub enum JsonNativeType {
52 Null,
53 Bool,
54 Number(JsonNumberType),
55 String,
56 Array(Box<JsonNativeType>),
57 Object(JsonObjectType),
58}
59
60impl JsonNativeType {
61 pub fn is_null(&self) -> bool {
62 matches!(self, JsonNativeType::Null)
63 }
64
65 pub fn u64() -> Self {
66 Self::Number(JsonNumberType::U64)
67 }
68
69 pub fn i64() -> Self {
70 Self::Number(JsonNumberType::I64)
71 }
72
73 pub fn f64() -> Self {
74 Self::Number(JsonNumberType::F64)
75 }
76}
77
78impl From<&JsonNativeType> for ConcreteDataType {
79 fn from(value: &JsonNativeType) -> Self {
80 match value {
81 JsonNativeType::Null => ConcreteDataType::null_datatype(),
82 JsonNativeType::Bool => ConcreteDataType::boolean_datatype(),
83 JsonNativeType::Number(JsonNumberType::U64) => ConcreteDataType::uint64_datatype(),
84 JsonNativeType::Number(JsonNumberType::I64) => ConcreteDataType::int64_datatype(),
85 JsonNativeType::Number(JsonNumberType::F64) => ConcreteDataType::float64_datatype(),
86 JsonNativeType::String => ConcreteDataType::string_datatype(),
87 JsonNativeType::Array(item_type) => {
88 ConcreteDataType::List(ListType::new(Arc::new(item_type.as_ref().into())))
89 }
90 JsonNativeType::Object(object) => {
91 let fields = object
92 .iter()
93 .map(|(type_name, field_type)| {
94 StructField::new(type_name.clone(), field_type.into(), true)
95 })
96 .collect();
97 ConcreteDataType::Struct(StructType::new(Arc::new(fields)))
98 }
99 }
100 }
101}
102
103impl From<&ConcreteDataType> for JsonNativeType {
104 fn from(value: &ConcreteDataType) -> Self {
105 match value {
106 ConcreteDataType::Null(_) => JsonNativeType::Null,
107 ConcreteDataType::Boolean(_) => JsonNativeType::Bool,
108 ConcreteDataType::UInt64(_)
109 | ConcreteDataType::UInt32(_)
110 | ConcreteDataType::UInt16(_)
111 | ConcreteDataType::UInt8(_) => JsonNativeType::u64(),
112 ConcreteDataType::Int64(_)
113 | ConcreteDataType::Int32(_)
114 | ConcreteDataType::Int16(_)
115 | ConcreteDataType::Int8(_) => JsonNativeType::i64(),
116 ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_) => JsonNativeType::f64(),
117 ConcreteDataType::String(_) => JsonNativeType::String,
118 ConcreteDataType::List(list_type) => {
119 JsonNativeType::Array(Box::new(list_type.item_type().into()))
120 }
121 ConcreteDataType::Struct(struct_type) => JsonNativeType::Object(
122 struct_type
123 .fields()
124 .iter()
125 .map(|field| (field.name().to_string(), field.data_type().into()))
126 .collect(),
127 ),
128 ConcreteDataType::Json(json_type) => json_type.native_type().clone(),
129 _ => unreachable!(),
130 }
131 }
132}
133
134impl Display for JsonNativeType {
135 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
136 match self {
137 JsonNativeType::Null => write!(f, r#""<Null>""#),
138 JsonNativeType::Bool => write!(f, r#""<Bool>""#),
139 JsonNativeType::Number(_) => {
140 write!(f, r#""<Number>""#)
141 }
142 JsonNativeType::String => write!(f, r#""<String>""#),
143 JsonNativeType::Array(item_type) => {
144 write!(f, "[{}]", item_type)
145 }
146 JsonNativeType::Object(object) => {
147 write!(
148 f,
149 "{{{}}}",
150 object
151 .iter()
152 .map(|(k, v)| format!(r#""{k}":{v}"#))
153 .collect::<Vec<_>>()
154 .join(",")
155 )
156 }
157 }
158 }
159}
160
161#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default)]
162pub enum JsonFormat {
163 #[default]
164 Jsonb,
165 Native(Box<JsonNativeType>),
166}
167
168#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
171pub struct JsonType {
172 pub format: JsonFormat,
173}
174
175impl JsonType {
176 pub fn new(format: JsonFormat) -> Self {
177 Self { format }
178 }
179
180 pub(crate) fn new_native(native: JsonNativeType) -> Self {
181 Self {
182 format: JsonFormat::Native(Box::new(native)),
183 }
184 }
185
186 pub fn is_native_type(&self) -> bool {
187 matches!(self.format, JsonFormat::Native(_))
188 }
189
190 pub fn native_type(&self) -> &JsonNativeType {
191 match &self.format {
192 JsonFormat::Jsonb => &JsonNativeType::String,
193 JsonFormat::Native(x) => x.as_ref(),
194 }
195 }
196
197 pub fn null() -> Self {
198 Self {
199 format: JsonFormat::Native(Box::new(JsonNativeType::Null)),
200 }
201 }
202
203 pub(crate) fn as_struct_type(&self) -> StructType {
208 match &self.format {
209 JsonFormat::Jsonb => StructType::default(),
210 JsonFormat::Native(inner) => match ConcreteDataType::from(inner.as_ref()) {
211 ConcreteDataType::Struct(t) => t.clone(),
212 x => plain_json_struct_type(x),
213 },
214 }
215 }
216
217 pub fn merge(&mut self, other: &JsonType) -> Result<()> {
219 match (&self.format, &other.format) {
220 (JsonFormat::Jsonb, JsonFormat::Jsonb) => Ok(()),
221 (JsonFormat::Native(this), JsonFormat::Native(that)) => {
222 let merged = merge(this.as_ref(), that.as_ref())?;
223 self.format = JsonFormat::Native(Box::new(merged));
224 Ok(())
225 }
226 _ => MergeJsonDatatypeSnafu {
227 reason: "json format not match",
228 }
229 .fail(),
230 }
231 }
232
233 pub fn is_mergeable(&self, other: &JsonType) -> bool {
235 match (&self.format, &other.format) {
236 (JsonFormat::Jsonb, JsonFormat::Jsonb) => true,
237 (JsonFormat::Native(this), JsonFormat::Native(that)) => {
238 is_mergeable(this.as_ref(), that.as_ref())
239 }
240 _ => false,
241 }
242 }
243
244 pub fn is_include(&self, other: &JsonType) -> bool {
246 match (&self.format, &other.format) {
247 (JsonFormat::Jsonb, JsonFormat::Jsonb) => true,
248 (JsonFormat::Native(this), JsonFormat::Native(that)) => {
249 is_include(this.as_ref(), that.as_ref())
250 }
251 _ => false,
252 }
253 }
254}
255
256fn is_include(this: &JsonNativeType, that: &JsonNativeType) -> bool {
257 fn is_include_object(this: &JsonObjectType, that: &JsonObjectType) -> bool {
258 for (type_name, that_type) in that {
259 let Some(this_type) = this.get(type_name) else {
260 return false;
261 };
262 if !is_include(this_type, that_type) {
263 return false;
264 }
265 }
266 true
267 }
268
269 match (this, that) {
270 (this, that) if this == that => true,
271 (JsonNativeType::Array(this), JsonNativeType::Array(that)) => {
272 is_include(this.as_ref(), that.as_ref())
273 }
274 (JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
275 is_include_object(this, that)
276 }
277 (_, JsonNativeType::Null) => true,
278 _ => false,
279 }
280}
281
282pub(crate) fn plain_json_struct_type(item_type: ConcreteDataType) -> StructType {
285 let mut field = StructField::new(JSON_PLAIN_FIELD_NAME.to_string(), item_type, true);
286 field.insert_metadata(JSON_PLAIN_FIELD_METADATA_KEY, true);
287 StructType::new(Arc::new(vec![field]))
288}
289
290fn is_mergeable(this: &JsonNativeType, that: &JsonNativeType) -> bool {
291 fn is_mergeable_object(this: &JsonObjectType, that: &JsonObjectType) -> bool {
292 for (type_name, that_type) in that {
293 if let Some(this_type) = this.get(type_name)
294 && !is_mergeable(this_type, that_type)
295 {
296 return false;
297 }
298 }
299 true
300 }
301
302 match (this, that) {
303 (this, that) if this == that => true,
304 (JsonNativeType::Array(this), JsonNativeType::Array(that)) => {
305 is_mergeable(this.as_ref(), that.as_ref())
306 }
307 (JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
308 is_mergeable_object(this, that)
309 }
310 (JsonNativeType::Null, _) | (_, JsonNativeType::Null) => true,
311 _ => false,
312 }
313}
314
315fn merge(this: &JsonNativeType, that: &JsonNativeType) -> Result<JsonNativeType> {
316 fn merge_object(this: &JsonObjectType, that: &JsonObjectType) -> Result<JsonObjectType> {
317 let mut this = this.clone();
318 for (type_name, that_type) in that {
320 if let Some(this_type) = this.get_mut(type_name) {
321 let merged_type = merge(this_type, that_type)?;
322 *this_type = merged_type;
323 } else {
324 this.insert(type_name.clone(), that_type.clone());
325 }
326 }
327 Ok(this)
328 }
329
330 match (this, that) {
331 (this, that) if this == that => Ok(this.clone()),
332 (JsonNativeType::Array(this), JsonNativeType::Array(that)) => {
333 merge(this.as_ref(), that.as_ref()).map(|x| JsonNativeType::Array(Box::new(x)))
334 }
335 (JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
336 merge_object(this, that).map(JsonNativeType::Object)
337 }
338 (JsonNativeType::Null, x) | (x, JsonNativeType::Null) => Ok(x.clone()),
339 _ => MergeJsonDatatypeSnafu {
340 reason: format!("datatypes have conflict, this: {this}, that: {that}"),
341 }
342 .fail(),
343 }
344}
345
346impl DataType for JsonType {
347 fn name(&self) -> String {
348 match &self.format {
349 JsonFormat::Jsonb => JSON_TYPE_NAME.to_string(),
350 JsonFormat::Native(x) => format!("Json<{x}>"),
351 }
352 }
353
354 fn logical_type_id(&self) -> LogicalTypeId {
355 LogicalTypeId::Json
356 }
357
358 fn default_value(&self) -> Value {
359 Bytes::default().into()
360 }
361
362 fn as_arrow_type(&self) -> ArrowDataType {
363 match self.format {
364 JsonFormat::Jsonb => ArrowDataType::Binary,
365 JsonFormat::Native(_) => self.as_struct_type().as_arrow_type(),
366 }
367 }
368
369 fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
370 match &self.format {
371 JsonFormat::Jsonb => Box::new(BinaryVectorBuilder::with_capacity(capacity)),
372 JsonFormat::Native(x) => Box::new(JsonVectorBuilder::new(*x.clone(), capacity)),
373 }
374 }
375
376 fn try_cast(&self, from: Value) -> Option<Value> {
377 match from {
378 Value::Binary(v) => Some(Value::Binary(v)),
379 _ => None,
380 }
381 }
382}
383
384impl Display for JsonType {
385 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
386 write!(f, "{}", self.name())
387 }
388}
389
390pub fn jsonb_to_string(val: &[u8]) -> Result<String> {
392 match jsonb::from_slice(val) {
393 Ok(jsonb_value) => {
394 let serialized = jsonb_value.to_string();
395 Ok(serialized)
396 }
397 Err(e) => InvalidJsonbSnafu { error: e }.fail(),
398 }
399}
400
401pub fn jsonb_to_serde_json(val: &[u8]) -> Result<serde_json::Value> {
403 let json_string = jsonb_to_string(val)?;
404 serde_json::Value::from_str(json_string.as_str())
405 .context(DeserializeSnafu { json: json_string })
406}
407
408pub fn parse_string_to_jsonb(s: &str) -> Result<Vec<u8>> {
410 jsonb::parse_value(s.as_bytes())
411 .map_err(|_| InvalidJsonSnafu { value: s }.build())
412 .map(|json| json.to_vec())
413}
414
415#[cfg(test)]
416mod tests {
417 use super::*;
418 use crate::json::JsonStructureSettings;
419
420 #[test]
421 fn test_json_type_include() {
422 fn test(this: &JsonNativeType, that: &JsonNativeType, expected: bool) {
423 assert_eq!(is_include(this, that), expected);
424 }
425
426 test(&JsonNativeType::Null, &JsonNativeType::Null, true);
427 test(&JsonNativeType::Null, &JsonNativeType::Bool, false);
428
429 test(&JsonNativeType::Bool, &JsonNativeType::Null, true);
430 test(&JsonNativeType::Bool, &JsonNativeType::Bool, true);
431 test(&JsonNativeType::Bool, &JsonNativeType::u64(), false);
432
433 test(&JsonNativeType::u64(), &JsonNativeType::Null, true);
434 test(&JsonNativeType::u64(), &JsonNativeType::u64(), true);
435 test(&JsonNativeType::u64(), &JsonNativeType::String, false);
436
437 test(&JsonNativeType::String, &JsonNativeType::Null, true);
438 test(&JsonNativeType::String, &JsonNativeType::String, true);
439 test(
440 &JsonNativeType::String,
441 &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
442 false,
443 );
444
445 test(
446 &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
447 &JsonNativeType::Null,
448 true,
449 );
450 test(
451 &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
452 &JsonNativeType::Array(Box::new(JsonNativeType::Null)),
453 true,
454 );
455 test(
456 &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
457 &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
458 true,
459 );
460 test(
461 &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
462 &JsonNativeType::String,
463 false,
464 );
465 test(
466 &JsonNativeType::Array(Box::new(JsonNativeType::f64())),
467 &JsonNativeType::Object(JsonObjectType::new()),
468 false,
469 );
470
471 let simple_json_object = &JsonNativeType::Object(JsonObjectType::from([(
472 "foo".to_string(),
473 JsonNativeType::String,
474 )]));
475 test(simple_json_object, &JsonNativeType::Null, true);
476 test(simple_json_object, simple_json_object, true);
477 test(simple_json_object, &JsonNativeType::i64(), false);
478 test(
479 simple_json_object,
480 &JsonNativeType::Object(JsonObjectType::from([(
481 "bar".to_string(),
482 JsonNativeType::i64(),
483 )])),
484 false,
485 );
486
487 let complex_json_object = &JsonNativeType::Object(JsonObjectType::from([
488 (
489 "nested".to_string(),
490 JsonNativeType::Object(JsonObjectType::from([(
491 "a".to_string(),
492 JsonNativeType::Object(JsonObjectType::from([(
493 "b".to_string(),
494 JsonNativeType::Object(JsonObjectType::from([(
495 "c".to_string(),
496 JsonNativeType::String,
497 )])),
498 )])),
499 )])),
500 ),
501 ("bar".to_string(), JsonNativeType::i64()),
502 ]));
503 test(complex_json_object, &JsonNativeType::Null, true);
504 test(complex_json_object, &JsonNativeType::String, false);
505 test(complex_json_object, complex_json_object, true);
506 test(
507 complex_json_object,
508 &JsonNativeType::Object(JsonObjectType::from([(
509 "bar".to_string(),
510 JsonNativeType::i64(),
511 )])),
512 true,
513 );
514 test(
515 complex_json_object,
516 &JsonNativeType::Object(JsonObjectType::from([
517 (
518 "nested".to_string(),
519 JsonNativeType::Object(JsonObjectType::from([(
520 "a".to_string(),
521 JsonNativeType::Null,
522 )])),
523 ),
524 ("bar".to_string(), JsonNativeType::i64()),
525 ])),
526 true,
527 );
528 test(
529 complex_json_object,
530 &JsonNativeType::Object(JsonObjectType::from([
531 (
532 "nested".to_string(),
533 JsonNativeType::Object(JsonObjectType::from([(
534 "a".to_string(),
535 JsonNativeType::String,
536 )])),
537 ),
538 ("bar".to_string(), JsonNativeType::i64()),
539 ])),
540 false,
541 );
542 test(
543 complex_json_object,
544 &JsonNativeType::Object(JsonObjectType::from([
545 (
546 "nested".to_string(),
547 JsonNativeType::Object(JsonObjectType::from([(
548 "a".to_string(),
549 JsonNativeType::Object(JsonObjectType::from([(
550 "b".to_string(),
551 JsonNativeType::String,
552 )])),
553 )])),
554 ),
555 ("bar".to_string(), JsonNativeType::i64()),
556 ])),
557 false,
558 );
559 test(
560 complex_json_object,
561 &JsonNativeType::Object(JsonObjectType::from([
562 (
563 "nested".to_string(),
564 JsonNativeType::Object(JsonObjectType::from([(
565 "a".to_string(),
566 JsonNativeType::Object(JsonObjectType::from([(
567 "b".to_string(),
568 JsonNativeType::Object(JsonObjectType::from([(
569 "c".to_string(),
570 JsonNativeType::Null,
571 )])),
572 )])),
573 )])),
574 ),
575 ("bar".to_string(), JsonNativeType::i64()),
576 ])),
577 true,
578 );
579 test(
580 complex_json_object,
581 &JsonNativeType::Object(JsonObjectType::from([
582 (
583 "nested".to_string(),
584 JsonNativeType::Object(JsonObjectType::from([(
585 "a".to_string(),
586 JsonNativeType::Object(JsonObjectType::from([(
587 "b".to_string(),
588 JsonNativeType::Object(JsonObjectType::from([(
589 "c".to_string(),
590 JsonNativeType::Bool,
591 )])),
592 )])),
593 )])),
594 ),
595 ("bar".to_string(), JsonNativeType::i64()),
596 ])),
597 false,
598 );
599 test(
600 complex_json_object,
601 &JsonNativeType::Object(JsonObjectType::from([(
602 "nested".to_string(),
603 JsonNativeType::Object(JsonObjectType::from([(
604 "a".to_string(),
605 JsonNativeType::Object(JsonObjectType::from([(
606 "b".to_string(),
607 JsonNativeType::Object(JsonObjectType::from([(
608 "c".to_string(),
609 JsonNativeType::String,
610 )])),
611 )])),
612 )])),
613 )])),
614 true,
615 );
616 }
617
618 #[test]
619 fn test_merge_json_type() -> Result<()> {
620 fn test(
621 json: &str,
622 json_type: &mut JsonType,
623 expected: std::result::Result<&str, &str>,
624 ) -> Result<()> {
625 let json: serde_json::Value = serde_json::from_str(json).unwrap();
626
627 let settings = JsonStructureSettings::Structured(None);
628 let value = settings.encode(json)?;
629 let value_type = value.data_type();
630 let Some(other) = value_type.as_json() else {
631 unreachable!()
632 };
633
634 let result = json_type.merge(other);
635 match (result, expected) {
636 (Ok(()), Ok(expected)) => {
637 assert_eq!(json_type.name(), expected);
638 assert!(json_type.is_mergeable(other));
639 }
640 (Err(err), Err(expected)) => {
641 assert_eq!(err.to_string(), expected);
642 assert!(!json_type.is_mergeable(other));
643 }
644 _ => unreachable!(),
645 }
646 Ok(())
647 }
648
649 let json_type = &mut JsonType::new_native(JsonNativeType::Null);
650
651 let json = r#"{
653 "hello": "world",
654 "list": [1, 2, 3],
655 "object": {"a": 1}
656 }"#;
657 let expected =
658 r#"Json<{"hello":"<String>","list":["<Number>"],"object":{"a":"<Number>"}}>"#;
659 test(json, json_type, Ok(expected))?;
660
661 let jsons = [r#""s""#, "1", "[1]"];
663 let expects = [
664 r#"Failed to merge JSON datatype: datatypes have conflict, this: {"hello":"<String>","list":["<Number>"],"object":{"a":"<Number>"}}, that: "<String>""#,
665 r#"Failed to merge JSON datatype: datatypes have conflict, this: {"hello":"<String>","list":["<Number>"],"object":{"a":"<Number>"}}, that: "<Number>""#,
666 r#"Failed to merge JSON datatype: datatypes have conflict, this: {"hello":"<String>","list":["<Number>"],"object":{"a":"<Number>"}}, that: ["<Number>"]"#,
667 ];
668 for (json, expect) in jsons.into_iter().zip(expects.into_iter()) {
669 test(json, json_type, Err(expect))?;
670 }
671
672 let json = r#"{
674 "hello": 1,
675 "float": 0.123,
676 "no": 42
677 }"#;
678 let expected = r#"Failed to merge JSON datatype: datatypes have conflict, this: "<String>", that: "<Number>""#;
679 test(json, json_type, Err(expected))?;
680
681 let json = r#"{
683 "hello": "greptime",
684 "float": 0.123,
685 "int": 42
686 }"#;
687 let expected = r#"Json<{"float":"<Number>","hello":"<String>","int":"<Number>","list":["<Number>"],"object":{"a":"<Number>"}}>"#;
688 test(json, json_type, Ok(expected))?;
689
690 let json = r#"{
692 "list": [4],
693 "object": {"foo": "bar", "l": ["x"], "o": {"key": "value"}},
694 "float": 0.456,
695 "int": 0
696 }"#;
697 let expected = r#"Json<{"float":"<Number>","hello":"<String>","int":"<Number>","list":["<Number>"],"object":{"a":"<Number>","foo":"<String>","l":["<String>"],"o":{"key":"<String>"}}}>"#;
698 test(json, json_type, Ok(expected))?;
699
700 Ok(())
701 }
702}