Skip to main content

datatypes/extension/
json.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use arrow_schema::extension::ExtensionType;
18use arrow_schema::{ArrowError, DataType, FieldRef};
19use serde::{Deserialize, Serialize};
20
21use crate::json::JsonSettings;
22
23#[derive(Debug, Clone, Serialize, Deserialize, Default)]
24pub struct JsonMetadata {
25    /// JSON2 settings stored in column schema metadata and represented through
26    /// Arrow extension metadata.
27    pub json_settings: Option<JsonSettings>,
28}
29
30#[derive(Debug, Clone)]
31pub struct JsonExtensionType(Arc<JsonMetadata>);
32
33impl JsonExtensionType {
34    pub fn new(metadata: Arc<JsonMetadata>) -> Self {
35        JsonExtensionType(metadata)
36    }
37}
38
39impl ExtensionType for JsonExtensionType {
40    const NAME: &'static str = "greptime.json";
41    type Metadata = Arc<JsonMetadata>;
42
43    fn metadata(&self) -> &Self::Metadata {
44        &self.0
45    }
46
47    fn serialize_metadata(&self) -> Option<String> {
48        serde_json::to_string(self.metadata()).ok()
49    }
50
51    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
52        if let Some(metadata) = metadata {
53            let metadata = serde_json::from_str(metadata).map_err(|e| {
54                ArrowError::ParseError(format!("Failed to deserialize JSON metadata: {}", e))
55            })?;
56            Ok(Arc::new(metadata))
57        } else {
58            Ok(Arc::new(JsonMetadata::default()))
59        }
60    }
61
62    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
63        match data_type {
64            // object
65            DataType::Struct(_)
66            // array
67            | DataType::List(_)
68            | DataType::ListView(_)
69            | DataType::LargeList(_)
70            | DataType::LargeListView(_)
71            // string
72            | DataType::Utf8
73            | DataType::Utf8View
74            | DataType::LargeUtf8
75            // number
76            | DataType::Int8
77            | DataType::Int16
78            | DataType::Int32
79            | DataType::Int64
80            | DataType::UInt8
81            | DataType::UInt16
82            | DataType::UInt32
83            | DataType::UInt64
84            | DataType::Float32
85            | DataType::Float64
86            // boolean
87            | DataType::Boolean
88            // null
89            | DataType::Null
90            // legacy json type
91            | DataType::Binary => Ok(()),
92            dt => Err(ArrowError::SchemaError(format!(
93                "Unexpected data type {dt}"
94            ))),
95        }
96    }
97
98    fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
99        let json = Self(metadata);
100        json.supports_data_type(data_type)?;
101        Ok(json)
102    }
103}
104
105/// Check if this field is to be treated as json extension type.
106pub fn is_json_extension_type(field: &FieldRef) -> bool {
107    field.extension_type_name() == Some(JsonExtensionType::NAME)
108}
109
110/// Check if this field is a structured JSON field.
111///
112/// Legacy JSONB columns may carry JSON extension metadata due to old metadata versions, but their
113/// physical Arrow type is still Binary. They must not enter structured JSON alignment paths.
114pub fn is_structured_json_field(field: &FieldRef) -> bool {
115    is_json_extension_type(field) && matches!(field.data_type(), DataType::Struct(_))
116}