common_macro/row/
utils.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashMap;
16
17use greptime_proto::v1::column_data_type_extension::TypeExt;
18use greptime_proto::v1::{ColumnDataType, ColumnDataTypeExtension, JsonTypeExtension};
19use once_cell::sync::Lazy;
20use quote::format_ident;
21use syn::{
22    AngleBracketedGenericArguments, Data, DataStruct, Fields, FieldsNamed, GenericArgument, Ident,
23    Path, PathArguments, PathSegment, Result, Type, TypePath, TypeReference,
24};
25
26use crate::row::attribute::{ColumnAttribute, find_column_attribute, parse_column_attribute};
27
28static SEMANTIC_TYPES: Lazy<HashMap<&'static str, SemanticType>> = Lazy::new(|| {
29    HashMap::from([
30        ("field", SemanticType::Field),
31        ("tag", SemanticType::Tag),
32        ("timestamp", SemanticType::Timestamp),
33    ])
34});
35
36static DATATYPE_TO_COLUMN_DATA_TYPE: Lazy<HashMap<&'static str, ColumnDataTypeWithExtension>> =
37    Lazy::new(|| {
38        HashMap::from([
39            // Timestamp
40            ("timestampsecond", ColumnDataType::TimestampSecond.into()),
41            (
42                "timestampmillisecond",
43                ColumnDataType::TimestampMillisecond.into(),
44            ),
45            (
46                "timestampmicrosecond",
47                ColumnDataType::TimestampMicrosecond.into(),
48            ),
49            (
50                "timestampnanosecond",
51                ColumnDataType::TimestampNanosecond.into(),
52            ),
53            // Date
54            ("date", ColumnDataType::Date.into()),
55            ("datetime", ColumnDataType::Datetime.into()),
56            // Time
57            ("timesecond", ColumnDataType::TimeSecond.into()),
58            ("timemillisecond", ColumnDataType::TimeMillisecond.into()),
59            ("timemicrosecond", ColumnDataType::TimeMicrosecond.into()),
60            ("timenanosecond", ColumnDataType::TimeNanosecond.into()),
61            // Others
62            ("string", ColumnDataType::String.into()),
63            ("json", ColumnDataTypeWithExtension::json()),
64            // TODO(weny): support vector and decimal128.
65        ])
66    });
67
68static PRIMITIVE_TYPE_TO_COLUMN_DATA_TYPE: Lazy<HashMap<&'static str, ColumnDataType>> =
69    Lazy::new(|| {
70        HashMap::from([
71            ("i8", ColumnDataType::Int8),
72            ("i16", ColumnDataType::Int16),
73            ("i32", ColumnDataType::Int32),
74            ("i64", ColumnDataType::Int64),
75            ("u8", ColumnDataType::Uint8),
76            ("u16", ColumnDataType::Uint16),
77            ("u32", ColumnDataType::Uint32),
78            ("u64", ColumnDataType::Uint64),
79            ("f32", ColumnDataType::Float32),
80            ("f64", ColumnDataType::Float64),
81            ("bool", ColumnDataType::Boolean),
82        ])
83    });
84
85/// Extract the fields of a struct.
86pub(crate) fn extract_struct_fields(data: &Data) -> Option<&FieldsNamed> {
87    let Data::Struct(DataStruct {
88        fields: Fields::Named(named),
89        ..
90    }) = &data
91    else {
92        return None;
93    };
94
95    Some(named)
96}
97
98/// Convert an identifier to a semantic type.
99pub(crate) fn semantic_type_from_str(ident: &str) -> Option<SemanticType> {
100    // Ignores the case of the identifier.
101    let lowercase = ident.to_lowercase();
102    let lowercase_str = lowercase.as_str();
103    SEMANTIC_TYPES.get(lowercase_str).cloned()
104}
105
106/// Convert a field type to a column data type.
107pub(crate) fn column_data_type_from_str(ident: &str) -> Option<ColumnDataTypeWithExtension> {
108    // Ignores the case of the identifier.
109    let lowercase = ident.to_lowercase();
110    let lowercase_str = lowercase.as_str();
111    DATATYPE_TO_COLUMN_DATA_TYPE.get(lowercase_str).cloned()
112}
113
114#[derive(Default, Clone, Copy)]
115pub(crate) enum SemanticType {
116    #[default]
117    Field,
118    Tag,
119    Timestamp,
120}
121
122pub(crate) enum FieldType<'a> {
123    Required(&'a Type),
124    Optional(&'a Type),
125}
126
127impl FieldType<'_> {
128    pub(crate) fn is_optional(&self) -> bool {
129        matches!(self, FieldType::Optional(_))
130    }
131
132    pub(crate) fn extract_ident(&self) -> Option<&Ident> {
133        match self {
134            FieldType::Required(ty) => extract_ident_from_type(ty),
135            FieldType::Optional(ty) => extract_ident_from_type(ty),
136        }
137    }
138}
139
140fn field_type(ty: &Type) -> FieldType<'_> {
141    if let Type::Reference(TypeReference { elem, .. }) = ty {
142        return field_type(elem);
143    }
144
145    if let Type::Path(TypePath {
146        qself: _,
147        path: Path {
148            leading_colon,
149            segments,
150        },
151    }) = ty
152        && leading_colon.is_none()
153        && segments.len() == 1
154        && let Some(PathSegment {
155            ident,
156            arguments: PathArguments::AngleBracketed(AngleBracketedGenericArguments { args, .. }),
157        }) = segments.first()
158        && let (1, Some(GenericArgument::Type(t))) = (args.len(), args.first())
159        && ident == "Option"
160    {
161        return FieldType::Optional(t);
162    }
163
164    FieldType::Required(ty)
165}
166
167fn extract_ident_from_type(ty: &Type) -> Option<&Ident> {
168    match ty {
169        Type::Path(TypePath { qself: None, path }) => path.get_ident(),
170        Type::Reference(type_ref) => extract_ident_from_type(&type_ref.elem),
171        Type::Group(type_group) => extract_ident_from_type(&type_group.elem),
172        _ => None,
173    }
174}
175
176/// Convert a semantic type to a proto semantic type.
177pub(crate) fn convert_semantic_type_to_proto_semantic_type(
178    semantic_type: SemanticType,
179) -> greptime_proto::v1::SemanticType {
180    match semantic_type {
181        SemanticType::Field => greptime_proto::v1::SemanticType::Field,
182        SemanticType::Tag => greptime_proto::v1::SemanticType::Tag,
183        SemanticType::Timestamp => greptime_proto::v1::SemanticType::Timestamp,
184    }
185}
186
187#[derive(Debug, Clone, Copy)]
188pub(crate) struct ColumnDataTypeWithExtension {
189    pub(crate) data_type: ColumnDataType,
190    pub(crate) extension: Option<ColumnDataTypeExtension>,
191}
192
193impl ColumnDataTypeWithExtension {
194    pub(crate) fn json() -> Self {
195        Self {
196            data_type: ColumnDataType::Json,
197            extension: Some(ColumnDataTypeExtension {
198                type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
199            }),
200        }
201    }
202}
203
204impl From<ColumnDataType> for ColumnDataTypeWithExtension {
205    fn from(data_type: ColumnDataType) -> Self {
206        Self {
207            data_type,
208            extension: None,
209        }
210    }
211}
212
213pub(crate) struct ParsedField<'a> {
214    pub(crate) ident: &'a Ident,
215    pub(crate) field_type: FieldType<'a>,
216    pub(crate) column_data_type: Option<ColumnDataTypeWithExtension>,
217    pub(crate) column_attribute: ColumnAttribute,
218}
219
220/// Parse fields from fields named.
221pub(crate) fn parse_fields_from_fields_named(named: &FieldsNamed) -> Result<Vec<ParsedField<'_>>> {
222    Ok(named
223        .named
224        .iter()
225        .map(|field| {
226            let ident = field.ident.as_ref().expect("field must have an ident");
227            let field_type = field_type(&field.ty);
228            let column_data_type = field_type
229                .extract_ident()
230                .and_then(convert_primitive_type_to_column_data_type);
231            let column_attribute = find_column_attribute(&field.attrs)
232                .map(parse_column_attribute)
233                .transpose()?
234                .unwrap_or_default();
235
236            Ok(ParsedField {
237                ident,
238                field_type,
239                column_data_type,
240                column_attribute,
241            })
242        })
243        .collect::<Result<Vec<ParsedField<'_>>>>()?
244        .into_iter()
245        .filter(|field| !field.column_attribute.skip)
246        .collect::<Vec<_>>())
247}
248
249fn convert_primitive_type_to_column_data_type(
250    ident: &Ident,
251) -> Option<ColumnDataTypeWithExtension> {
252    PRIMITIVE_TYPE_TO_COLUMN_DATA_TYPE
253        .get(ident.to_string().as_str())
254        .cloned()
255        .map(ColumnDataTypeWithExtension::from)
256}
257
258/// Get the column data type from the attribute or the inferred column data type.
259pub(crate) fn get_column_data_type(
260    infer_column_data_type: &Option<ColumnDataTypeWithExtension>,
261    attribute: &ColumnAttribute,
262) -> Option<ColumnDataTypeWithExtension> {
263    attribute.datatype.or(*infer_column_data_type)
264}
265
266/// Convert a column data type to a value data ident.
267pub(crate) fn convert_column_data_type_to_value_data_ident(
268    column_data_type: &ColumnDataType,
269) -> Ident {
270    match column_data_type {
271        ColumnDataType::Boolean => format_ident!("BoolValue"),
272        ColumnDataType::Int8 => format_ident!("I8Value"),
273        ColumnDataType::Int16 => format_ident!("I16Value"),
274        ColumnDataType::Int32 => format_ident!("I32Value"),
275        ColumnDataType::Int64 => format_ident!("I64Value"),
276        ColumnDataType::Uint8 => format_ident!("U8Value"),
277        ColumnDataType::Uint16 => format_ident!("U16Value"),
278        ColumnDataType::Uint32 => format_ident!("U32Value"),
279        ColumnDataType::Uint64 => format_ident!("U64Value"),
280        ColumnDataType::Float32 => format_ident!("F32Value"),
281        ColumnDataType::Float64 => format_ident!("F64Value"),
282        ColumnDataType::Binary => format_ident!("BinaryValue"),
283        ColumnDataType::String => format_ident!("StringValue"),
284        ColumnDataType::Date => format_ident!("DateValue"),
285        ColumnDataType::Datetime => format_ident!("DatetimeValue"),
286        ColumnDataType::TimestampSecond => format_ident!("TimestampSecondValue"),
287        ColumnDataType::TimestampMillisecond => {
288            format_ident!("TimestampMillisecondValue")
289        }
290        ColumnDataType::TimestampMicrosecond => {
291            format_ident!("TimestampMicrosecondValue")
292        }
293        ColumnDataType::TimestampNanosecond => format_ident!("TimestampNanosecondValue"),
294        ColumnDataType::TimeSecond => format_ident!("TimeSecondValue"),
295        ColumnDataType::TimeMillisecond => format_ident!("TimeMillisecondValue"),
296        ColumnDataType::TimeMicrosecond => format_ident!("TimeMicrosecondValue"),
297        ColumnDataType::TimeNanosecond => format_ident!("TimeNanosecondValue"),
298        ColumnDataType::IntervalYearMonth => format_ident!("IntervalYearMonthValue"),
299        ColumnDataType::IntervalDayTime => format_ident!("IntervalDayTimeValue"),
300        ColumnDataType::IntervalMonthDayNano => {
301            format_ident!("IntervalMonthDayNanoValue")
302        }
303        ColumnDataType::Decimal128 => format_ident!("Decimal128Value"),
304        // Json is a special case, it is actually a string column.
305        ColumnDataType::Json => format_ident!("StringValue"),
306        ColumnDataType::Vector => format_ident!("VectorValue"),
307    }
308}