datatypes/
data_type.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt;
16use std::sync::Arc;
17
18use arrow::datatypes::{
19    DataType as ArrowDataType, IntervalUnit as ArrowIntervalUnit, TimeUnit as ArrowTimeUnit,
20};
21use arrow_schema::DECIMAL_DEFAULT_SCALE;
22use common_decimal::decimal128::DECIMAL128_MAX_PRECISION;
23use common_time::interval::IntervalUnit;
24use common_time::timestamp::TimeUnit;
25use enum_dispatch::enum_dispatch;
26use paste::paste;
27use serde::{Deserialize, Serialize};
28
29use crate::error::{self, Error, Result};
30use crate::type_id::LogicalTypeId;
31use crate::types::{
32    BinaryType, BooleanType, DateType, Decimal128Type, DictionaryType, DurationMicrosecondType,
33    DurationMillisecondType, DurationNanosecondType, DurationSecondType, DurationType, Float32Type,
34    Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, IntervalDayTimeType,
35    IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType, ListType, NullType,
36    StringType, StructType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
37    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
38    UInt8Type, UInt16Type, UInt32Type, UInt64Type, VectorType,
39};
40use crate::value::Value;
41use crate::vectors::MutableVector;
42
43#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
44#[enum_dispatch(DataType)]
45pub enum ConcreteDataType {
46    Null(NullType),
47    Boolean(BooleanType),
48
49    // Numeric types:
50    Int8(Int8Type),
51    Int16(Int16Type),
52    Int32(Int32Type),
53    Int64(Int64Type),
54    UInt8(UInt8Type),
55    UInt16(UInt16Type),
56    UInt32(UInt32Type),
57    UInt64(UInt64Type),
58    Float32(Float32Type),
59    Float64(Float64Type),
60
61    // Decimal128 type:
62    Decimal128(Decimal128Type),
63
64    // String types:
65    Binary(BinaryType),
66    String(StringType),
67
68    // Date and time types:
69    Date(DateType),
70    Timestamp(TimestampType),
71    Time(TimeType),
72
73    // Duration type:
74    Duration(DurationType),
75
76    // Interval type:
77    Interval(IntervalType),
78
79    // Compound types:
80    List(ListType),
81    Dictionary(DictionaryType),
82    Struct(StructType),
83
84    // JSON type:
85    Json(JsonType),
86
87    // Vector type:
88    Vector(VectorType),
89}
90
91impl fmt::Display for ConcreteDataType {
92    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
93        match self {
94            ConcreteDataType::Null(v) => write!(f, "{}", v.name()),
95            ConcreteDataType::Boolean(v) => write!(f, "{}", v.name()),
96            ConcreteDataType::Int8(v) => write!(f, "{}", v.name()),
97            ConcreteDataType::Int16(v) => write!(f, "{}", v.name()),
98            ConcreteDataType::Int32(v) => write!(f, "{}", v.name()),
99            ConcreteDataType::Int64(v) => write!(f, "{}", v.name()),
100            ConcreteDataType::UInt8(v) => write!(f, "{}", v.name()),
101            ConcreteDataType::UInt16(v) => write!(f, "{}", v.name()),
102            ConcreteDataType::UInt32(v) => write!(f, "{}", v.name()),
103            ConcreteDataType::UInt64(v) => write!(f, "{}", v.name()),
104            ConcreteDataType::Float32(v) => write!(f, "{}", v.name()),
105            ConcreteDataType::Float64(v) => write!(f, "{}", v.name()),
106            ConcreteDataType::Binary(v) => write!(f, "{}", v.name()),
107            ConcreteDataType::String(v) => write!(f, "{}", v.name()),
108            ConcreteDataType::Date(v) => write!(f, "{}", v.name()),
109            ConcreteDataType::Timestamp(t) => match t {
110                TimestampType::Second(v) => write!(f, "{}", v.name()),
111                TimestampType::Millisecond(v) => write!(f, "{}", v.name()),
112                TimestampType::Microsecond(v) => write!(f, "{}", v.name()),
113                TimestampType::Nanosecond(v) => write!(f, "{}", v.name()),
114            },
115            ConcreteDataType::Time(t) => match t {
116                TimeType::Second(v) => write!(f, "{}", v.name()),
117                TimeType::Millisecond(v) => write!(f, "{}", v.name()),
118                TimeType::Microsecond(v) => write!(f, "{}", v.name()),
119                TimeType::Nanosecond(v) => write!(f, "{}", v.name()),
120            },
121            ConcreteDataType::Interval(i) => match i {
122                IntervalType::YearMonth(v) => write!(f, "{}", v.name()),
123                IntervalType::DayTime(v) => write!(f, "{}", v.name()),
124                IntervalType::MonthDayNano(v) => write!(f, "{}", v.name()),
125            },
126            ConcreteDataType::Duration(d) => match d {
127                DurationType::Second(v) => write!(f, "{}", v.name()),
128                DurationType::Millisecond(v) => write!(f, "{}", v.name()),
129                DurationType::Microsecond(v) => write!(f, "{}", v.name()),
130                DurationType::Nanosecond(v) => write!(f, "{}", v.name()),
131            },
132            ConcreteDataType::Decimal128(v) => write!(f, "{}", v.name()),
133            ConcreteDataType::List(v) => write!(f, "{}", v.name()),
134            ConcreteDataType::Struct(v) => write!(f, "{}", v.name()),
135            ConcreteDataType::Dictionary(v) => write!(f, "{}", v.name()),
136            ConcreteDataType::Json(v) => write!(f, "{}", v.name()),
137            ConcreteDataType::Vector(v) => write!(f, "{}", v.name()),
138        }
139    }
140}
141
142// TODO(yingwen): Refactor these `is_xxx()` methods, such as adding a `properties()` method
143// returning all these properties to the `DataType` trait
144impl ConcreteDataType {
145    pub fn is_float(&self) -> bool {
146        matches!(
147            self,
148            ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_)
149        )
150    }
151
152    pub fn is_boolean(&self) -> bool {
153        matches!(self, ConcreteDataType::Boolean(_))
154    }
155
156    pub fn is_string(&self) -> bool {
157        matches!(self, ConcreteDataType::String(_))
158    }
159
160    pub fn is_stringifiable(&self) -> bool {
161        matches!(
162            self,
163            ConcreteDataType::String(_)
164                | ConcreteDataType::Date(_)
165                | ConcreteDataType::Timestamp(_)
166                | ConcreteDataType::Time(_)
167                | ConcreteDataType::Interval(_)
168                | ConcreteDataType::Duration(_)
169                | ConcreteDataType::Decimal128(_)
170                | ConcreteDataType::Binary(_)
171                | ConcreteDataType::Json(_)
172                | ConcreteDataType::Vector(_)
173        )
174    }
175
176    pub fn is_signed(&self) -> bool {
177        matches!(
178            self,
179            ConcreteDataType::Int8(_)
180                | ConcreteDataType::Int16(_)
181                | ConcreteDataType::Int32(_)
182                | ConcreteDataType::Int64(_)
183                | ConcreteDataType::Date(_)
184                | ConcreteDataType::Timestamp(_)
185                | ConcreteDataType::Time(_)
186                | ConcreteDataType::Interval(_)
187                | ConcreteDataType::Duration(_)
188                | ConcreteDataType::Decimal128(_)
189        )
190    }
191
192    pub fn is_unsigned(&self) -> bool {
193        matches!(
194            self,
195            ConcreteDataType::UInt8(_)
196                | ConcreteDataType::UInt16(_)
197                | ConcreteDataType::UInt32(_)
198                | ConcreteDataType::UInt64(_)
199        )
200    }
201
202    pub fn is_numeric(&self) -> bool {
203        matches!(
204            self,
205            ConcreteDataType::Int8(_)
206                | ConcreteDataType::Int16(_)
207                | ConcreteDataType::Int32(_)
208                | ConcreteDataType::Int64(_)
209                | ConcreteDataType::UInt8(_)
210                | ConcreteDataType::UInt16(_)
211                | ConcreteDataType::UInt32(_)
212                | ConcreteDataType::UInt64(_)
213                | ConcreteDataType::Float32(_)
214                | ConcreteDataType::Float64(_)
215        )
216    }
217
218    pub fn is_timestamp(&self) -> bool {
219        matches!(self, ConcreteDataType::Timestamp(_))
220    }
221
222    pub fn is_decimal(&self) -> bool {
223        matches!(self, ConcreteDataType::Decimal128(_))
224    }
225
226    pub fn is_json(&self) -> bool {
227        matches!(self, ConcreteDataType::Json(_))
228    }
229
230    pub fn is_vector(&self) -> bool {
231        matches!(self, ConcreteDataType::Vector(_))
232    }
233
234    pub fn numerics() -> Vec<ConcreteDataType> {
235        vec![
236            ConcreteDataType::int8_datatype(),
237            ConcreteDataType::int16_datatype(),
238            ConcreteDataType::int32_datatype(),
239            ConcreteDataType::int64_datatype(),
240            ConcreteDataType::uint8_datatype(),
241            ConcreteDataType::uint16_datatype(),
242            ConcreteDataType::uint32_datatype(),
243            ConcreteDataType::uint64_datatype(),
244            ConcreteDataType::float32_datatype(),
245            ConcreteDataType::float64_datatype(),
246        ]
247    }
248
249    pub fn unsigned_integers() -> Vec<ConcreteDataType> {
250        vec![
251            ConcreteDataType::uint8_datatype(),
252            ConcreteDataType::uint16_datatype(),
253            ConcreteDataType::uint32_datatype(),
254            ConcreteDataType::uint64_datatype(),
255        ]
256    }
257
258    pub fn timestamps() -> Vec<ConcreteDataType> {
259        vec![
260            ConcreteDataType::timestamp_second_datatype(),
261            ConcreteDataType::timestamp_millisecond_datatype(),
262            ConcreteDataType::timestamp_microsecond_datatype(),
263            ConcreteDataType::timestamp_nanosecond_datatype(),
264        ]
265    }
266
267    /// Convert arrow data type to [ConcreteDataType].
268    ///
269    /// # Panics
270    /// Panic if given arrow data type is not supported.
271    pub fn from_arrow_type(dt: &ArrowDataType) -> Self {
272        ConcreteDataType::try_from(dt).expect("Unimplemented type")
273    }
274
275    pub fn is_null(&self) -> bool {
276        matches!(self, ConcreteDataType::Null(NullType))
277    }
278
279    pub(crate) fn is_struct(&self) -> bool {
280        matches!(self, ConcreteDataType::Struct(_))
281    }
282
283    /// Try to cast the type as a [`ListType`].
284    pub fn as_list(&self) -> Option<&ListType> {
285        match self {
286            ConcreteDataType::List(t) => Some(t),
287            _ => None,
288        }
289    }
290
291    pub fn as_struct(&self) -> Option<&StructType> {
292        match self {
293            ConcreteDataType::Struct(s) => Some(s),
294            _ => None,
295        }
296    }
297
298    /// Try to cast data type as a [`TimestampType`].
299    pub fn as_timestamp(&self) -> Option<TimestampType> {
300        match self {
301            ConcreteDataType::Timestamp(t) => Some(*t),
302            _ => None,
303        }
304    }
305
306    /// Try to get numeric precision, returns `None` if it's not numeric type
307    pub fn numeric_precision(&self) -> Option<u8> {
308        match self {
309            ConcreteDataType::Int8(_) | ConcreteDataType::UInt8(_) => Some(3),
310            ConcreteDataType::Int16(_) | ConcreteDataType::UInt16(_) => Some(5),
311            ConcreteDataType::Int32(_) | ConcreteDataType::UInt32(_) => Some(10),
312            ConcreteDataType::Int64(_) => Some(19),
313            ConcreteDataType::UInt64(_) => Some(20),
314            ConcreteDataType::Float32(_) => Some(12),
315            ConcreteDataType::Float64(_) => Some(22),
316            ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.precision()),
317            _ => None,
318        }
319    }
320
321    /// Try to get numeric scale, returns `None` if it's float or not numeric type
322    pub fn numeric_scale(&self) -> Option<i8> {
323        match self {
324            ConcreteDataType::Int8(_)
325            | ConcreteDataType::UInt8(_)
326            | ConcreteDataType::Int16(_)
327            | ConcreteDataType::UInt16(_)
328            | ConcreteDataType::Int32(_)
329            | ConcreteDataType::UInt32(_)
330            | ConcreteDataType::Int64(_)
331            | ConcreteDataType::UInt64(_) => Some(0),
332            ConcreteDataType::Float32(_) | ConcreteDataType::Float64(_) => None,
333            ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.scale()),
334            _ => None,
335        }
336    }
337
338    /// Try to cast data type as a [`TimeType`].
339    pub fn as_time(&self) -> Option<TimeType> {
340        match self {
341            ConcreteDataType::Int64(_) => Some(TimeType::Millisecond(TimeMillisecondType)),
342            ConcreteDataType::Time(t) => Some(*t),
343            _ => None,
344        }
345    }
346
347    pub fn as_decimal128(&self) -> Option<Decimal128Type> {
348        match self {
349            ConcreteDataType::Decimal128(d) => Some(*d),
350            _ => None,
351        }
352    }
353
354    pub fn as_json(&self) -> Option<&JsonType> {
355        match self {
356            ConcreteDataType::Json(j) => Some(j),
357            _ => None,
358        }
359    }
360
361    pub fn as_vector(&self) -> Option<VectorType> {
362        match self {
363            ConcreteDataType::Vector(v) => Some(*v),
364            _ => None,
365        }
366    }
367
368    /// Checks if the data type can cast to another data type.
369    pub fn can_arrow_type_cast_to(&self, to_type: &ConcreteDataType) -> bool {
370        match (self, to_type) {
371            (ConcreteDataType::Json(this), ConcreteDataType::Json(that)) => that.is_include(this),
372            _ => arrow::compute::can_cast_types(&self.as_arrow_type(), &to_type.as_arrow_type()),
373        }
374    }
375
376    /// Try to cast data type as a [`DurationType`].
377    pub fn as_duration(&self) -> Option<DurationType> {
378        match self {
379            ConcreteDataType::Duration(d) => Some(*d),
380            _ => None,
381        }
382    }
383
384    /// Return the datatype name in postgres type system
385    pub fn postgres_datatype_name(&self) -> &'static str {
386        match self {
387            &ConcreteDataType::Null(_) => "UNKNOWN",
388            &ConcreteDataType::Boolean(_) => "BOOL",
389            &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "CHAR",
390            &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "INT2",
391            &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "INT4",
392            &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "INT8",
393            &ConcreteDataType::Float32(_) => "FLOAT4",
394            &ConcreteDataType::Float64(_) => "FLOAT8",
395            &ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => "BYTEA",
396            &ConcreteDataType::String(_) => "VARCHAR",
397            &ConcreteDataType::Date(_) => "DATE",
398            &ConcreteDataType::Timestamp(_) => "TIMESTAMP",
399            &ConcreteDataType::Time(_) => "TIME",
400            &ConcreteDataType::Interval(_) => "INTERVAL",
401            &ConcreteDataType::Decimal128(_) => "NUMERIC",
402            &ConcreteDataType::Json(_) => "JSON",
403            ConcreteDataType::List(list) => match list.item_type() {
404                &ConcreteDataType::Null(_) => "UNKNOWN",
405                &ConcreteDataType::Boolean(_) => "_BOOL",
406                &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "_CHAR",
407                &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "_INT2",
408                &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "_INT4",
409                &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "_INT8",
410                &ConcreteDataType::Float32(_) => "_FLOAT4",
411                &ConcreteDataType::Float64(_) => "_FLOAT8",
412                &ConcreteDataType::Binary(_) => "_BYTEA",
413                &ConcreteDataType::String(_) => "_VARCHAR",
414                &ConcreteDataType::Date(_) => "_DATE",
415                &ConcreteDataType::Timestamp(_) => "_TIMESTAMP",
416                &ConcreteDataType::Time(_) => "_TIME",
417                &ConcreteDataType::Interval(_) => "_INTERVAL",
418                &ConcreteDataType::Decimal128(_) => "_NUMERIC",
419                &ConcreteDataType::Json(_) => "_JSON",
420                &ConcreteDataType::Duration(_)
421                | &ConcreteDataType::Dictionary(_)
422                | &ConcreteDataType::Vector(_)
423                | &ConcreteDataType::List(_)
424                | &ConcreteDataType::Struct(_) => "UNKNOWN",
425            },
426            &ConcreteDataType::Duration(_)
427            | &ConcreteDataType::Dictionary(_)
428            | &ConcreteDataType::Struct(_) => "UNKNOWN",
429        }
430    }
431}
432
433impl From<&ConcreteDataType> for ConcreteDataType {
434    fn from(t: &ConcreteDataType) -> Self {
435        t.clone()
436    }
437}
438
439impl TryFrom<&ArrowDataType> for ConcreteDataType {
440    type Error = Error;
441
442    fn try_from(dt: &ArrowDataType) -> Result<ConcreteDataType> {
443        let concrete_type = match dt {
444            ArrowDataType::Null => Self::null_datatype(),
445            ArrowDataType::Boolean => Self::boolean_datatype(),
446            ArrowDataType::UInt8 => Self::uint8_datatype(),
447            ArrowDataType::UInt16 => Self::uint16_datatype(),
448            ArrowDataType::UInt32 => Self::uint32_datatype(),
449            ArrowDataType::UInt64 => Self::uint64_datatype(),
450            ArrowDataType::Int8 => Self::int8_datatype(),
451            ArrowDataType::Int16 => Self::int16_datatype(),
452            ArrowDataType::Int32 => Self::int32_datatype(),
453            ArrowDataType::Int64 => Self::int64_datatype(),
454            ArrowDataType::Float32 => Self::float32_datatype(),
455            ArrowDataType::Float64 => Self::float64_datatype(),
456            ArrowDataType::Date32 => Self::date_datatype(),
457            ArrowDataType::Timestamp(u, _) => ConcreteDataType::from_arrow_time_unit(u),
458            ArrowDataType::Interval(u) => ConcreteDataType::from_arrow_interval_unit(u),
459            ArrowDataType::Binary | ArrowDataType::LargeBinary | ArrowDataType::BinaryView => {
460                Self::binary_datatype()
461            }
462            ArrowDataType::Utf8 | ArrowDataType::Utf8View => Self::string_datatype(),
463            ArrowDataType::LargeUtf8 => Self::large_string_datatype(),
464            ArrowDataType::List(field) => Self::List(ListType::new(Arc::new(
465                ConcreteDataType::from_arrow_type(field.data_type()),
466            ))),
467            ArrowDataType::Dictionary(key_type, value_type) => {
468                let key_type = ConcreteDataType::from_arrow_type(key_type);
469                let value_type = ConcreteDataType::from_arrow_type(value_type);
470                Self::Dictionary(DictionaryType::new(key_type, value_type))
471            }
472            ArrowDataType::Time32(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
473            ArrowDataType::Time64(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
474            ArrowDataType::Duration(u) => {
475                ConcreteDataType::Duration(DurationType::from_unit(u.into()))
476            }
477            ArrowDataType::Decimal128(precision, scale) => {
478                ConcreteDataType::decimal128_datatype(*precision, *scale)
479            }
480            ArrowDataType::Struct(fields) => ConcreteDataType::Struct(fields.try_into()?),
481            ArrowDataType::Float16
482            | ArrowDataType::Date64
483            | ArrowDataType::FixedSizeBinary(_)
484            | ArrowDataType::ListView(_)
485            | ArrowDataType::FixedSizeList(_, _)
486            | ArrowDataType::LargeList(_)
487            | ArrowDataType::LargeListView(_)
488            | ArrowDataType::Union(_, _)
489            | ArrowDataType::Decimal256(_, _)
490            | ArrowDataType::Map(_, _)
491            | ArrowDataType::RunEndEncoded(_, _)
492            | ArrowDataType::Decimal32(_, _)
493            | ArrowDataType::Decimal64(_, _) => {
494                return error::UnsupportedArrowTypeSnafu {
495                    arrow_type: dt.clone(),
496                }
497                .fail();
498            }
499        };
500
501        Ok(concrete_type)
502    }
503}
504
505macro_rules! impl_new_concrete_type_functions {
506    ($($Type: ident), +) => {
507        paste! {
508            impl ConcreteDataType {
509                $(
510                    pub fn [<$Type:lower _datatype>]() -> ConcreteDataType {
511                        ConcreteDataType::$Type([<$Type Type>]::default())
512                    }
513                )+
514            }
515        }
516    }
517}
518
519impl_new_concrete_type_functions!(
520    Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
521    Binary, Date, String, Json
522);
523
524impl ConcreteDataType {
525    pub fn large_string_datatype() -> Self {
526        ConcreteDataType::String(StringType::large_utf8())
527    }
528
529    pub fn timestamp_second_datatype() -> Self {
530        ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType))
531    }
532
533    pub fn timestamp_millisecond_datatype() -> Self {
534        ConcreteDataType::Timestamp(TimestampType::Millisecond(TimestampMillisecondType))
535    }
536
537    pub fn timestamp_microsecond_datatype() -> Self {
538        ConcreteDataType::Timestamp(TimestampType::Microsecond(TimestampMicrosecondType))
539    }
540
541    pub fn timestamp_nanosecond_datatype() -> Self {
542        ConcreteDataType::Timestamp(TimestampType::Nanosecond(TimestampNanosecondType))
543    }
544
545    /// Returns the time data type with `TimeUnit`.
546    pub fn time_datatype(unit: TimeUnit) -> Self {
547        ConcreteDataType::Time(TimeType::from_unit(unit))
548    }
549
550    /// Creates a [Time(TimeSecondType)] datatype.
551    pub fn time_second_datatype() -> Self {
552        Self::time_datatype(TimeUnit::Second)
553    }
554
555    /// Creates a [Time(TimeMillisecondType)] datatype.
556    pub fn time_millisecond_datatype() -> Self {
557        Self::time_datatype(TimeUnit::Millisecond)
558    }
559
560    /// Creates a [Time(TimeMicrosecond)] datatype.
561    pub fn time_microsecond_datatype() -> Self {
562        Self::time_datatype(TimeUnit::Microsecond)
563    }
564
565    /// Creates a [Time(TimeNanosecond)] datatype.
566    pub fn time_nanosecond_datatype() -> Self {
567        Self::time_datatype(TimeUnit::Nanosecond)
568    }
569
570    /// Creates a [Duration(DurationSecondType)] datatype.
571    pub fn duration_second_datatype() -> Self {
572        ConcreteDataType::Duration(DurationType::Second(DurationSecondType))
573    }
574
575    /// Creates a [Duration(DurationMillisecondType)] datatype.
576    pub fn duration_millisecond_datatype() -> Self {
577        ConcreteDataType::Duration(DurationType::Millisecond(DurationMillisecondType))
578    }
579
580    /// Creates a [Duration(DurationMicrosecondType)] datatype.
581    pub fn duration_microsecond_datatype() -> Self {
582        ConcreteDataType::Duration(DurationType::Microsecond(DurationMicrosecondType))
583    }
584
585    /// Creates a [Duration(DurationNanosecondType)] datatype.
586    pub fn duration_nanosecond_datatype() -> Self {
587        ConcreteDataType::Duration(DurationType::Nanosecond(DurationNanosecondType))
588    }
589
590    /// Creates a [Interval(IntervalMonthDayNanoType)] datatype.
591    pub fn interval_month_day_nano_datatype() -> Self {
592        ConcreteDataType::Interval(IntervalType::MonthDayNano(IntervalMonthDayNanoType))
593    }
594
595    /// Creates a [Interval(IntervalYearMonthType)] datatype.
596    pub fn interval_year_month_datatype() -> Self {
597        ConcreteDataType::Interval(IntervalType::YearMonth(IntervalYearMonthType))
598    }
599
600    /// Creates a [Interval(IntervalDayTimeType)] datatype.
601    pub fn interval_day_time_datatype() -> Self {
602        ConcreteDataType::Interval(IntervalType::DayTime(IntervalDayTimeType))
603    }
604
605    pub fn timestamp_datatype(unit: TimeUnit) -> Self {
606        match unit {
607            TimeUnit::Second => Self::timestamp_second_datatype(),
608            TimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
609            TimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
610            TimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
611        }
612    }
613
614    /// Converts from arrow timestamp unit to
615    pub fn from_arrow_time_unit(t: &ArrowTimeUnit) -> Self {
616        match t {
617            ArrowTimeUnit::Second => Self::timestamp_second_datatype(),
618            ArrowTimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
619            ArrowTimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
620            ArrowTimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
621        }
622    }
623
624    pub fn duration_datatype(unit: TimeUnit) -> Self {
625        match unit {
626            TimeUnit::Second => Self::duration_second_datatype(),
627            TimeUnit::Millisecond => Self::duration_millisecond_datatype(),
628            TimeUnit::Microsecond => Self::duration_microsecond_datatype(),
629            TimeUnit::Nanosecond => Self::duration_nanosecond_datatype(),
630        }
631    }
632
633    pub fn interval_datatype(unit: IntervalUnit) -> Self {
634        match unit {
635            IntervalUnit::YearMonth => Self::interval_year_month_datatype(),
636            IntervalUnit::DayTime => Self::interval_day_time_datatype(),
637            IntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
638        }
639    }
640
641    pub fn from_arrow_interval_unit(u: &ArrowIntervalUnit) -> Self {
642        match u {
643            ArrowIntervalUnit::YearMonth => Self::interval_year_month_datatype(),
644            ArrowIntervalUnit::DayTime => Self::interval_day_time_datatype(),
645            ArrowIntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
646        }
647    }
648
649    pub fn list_datatype(item_type: Arc<ConcreteDataType>) -> ConcreteDataType {
650        ConcreteDataType::List(ListType::new(item_type))
651    }
652
653    pub fn struct_datatype(fields: StructType) -> ConcreteDataType {
654        ConcreteDataType::Struct(fields)
655    }
656
657    pub fn dictionary_datatype(
658        key_type: ConcreteDataType,
659        value_type: ConcreteDataType,
660    ) -> ConcreteDataType {
661        ConcreteDataType::Dictionary(DictionaryType::new(key_type, value_type))
662    }
663
664    pub fn decimal128_datatype(precision: u8, scale: i8) -> ConcreteDataType {
665        ConcreteDataType::Decimal128(Decimal128Type::new(precision, scale))
666    }
667
668    pub fn decimal128_default_datatype() -> ConcreteDataType {
669        Self::decimal128_datatype(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
670    }
671
672    pub fn vector_datatype(dim: u32) -> ConcreteDataType {
673        ConcreteDataType::Vector(VectorType::new(dim))
674    }
675
676    pub fn vector_default_datatype() -> ConcreteDataType {
677        Self::vector_datatype(0)
678    }
679
680    pub fn json_native_datatype(inner_type: ConcreteDataType) -> ConcreteDataType {
681        ConcreteDataType::Json(JsonType::new_native((&inner_type).into()))
682    }
683}
684
685/// Data type abstraction.
686#[enum_dispatch::enum_dispatch]
687pub trait DataType: std::fmt::Debug + Send + Sync {
688    /// Name of this data type.
689    fn name(&self) -> String;
690
691    /// Returns id of the Logical data type.
692    fn logical_type_id(&self) -> LogicalTypeId;
693
694    /// Returns the default value of this type.
695    fn default_value(&self) -> Value;
696
697    /// Convert this type as [arrow::datatypes::DataType].
698    fn as_arrow_type(&self) -> ArrowDataType;
699
700    /// Creates a mutable vector with given `capacity` of this type.
701    fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector>;
702
703    /// Casts the value to specific DataType.
704    /// Return None if cast failed.
705    fn try_cast(&self, from: Value) -> Option<Value>;
706}
707
708pub type DataTypeRef = Arc<dyn DataType>;
709
710#[cfg(test)]
711mod tests {
712    use arrow::datatypes::Field;
713
714    use super::*;
715
716    #[test]
717    fn test_concrete_type_as_datatype_trait() {
718        let concrete_type = ConcreteDataType::boolean_datatype();
719
720        assert_eq!("Boolean", concrete_type.to_string());
721        assert_eq!(Value::Boolean(false), concrete_type.default_value());
722        assert_eq!(LogicalTypeId::Boolean, concrete_type.logical_type_id());
723        assert_eq!(ArrowDataType::Boolean, concrete_type.as_arrow_type());
724    }
725
726    #[test]
727    fn test_from_arrow_type() {
728        assert!(matches!(
729            ConcreteDataType::from_arrow_type(&ArrowDataType::Null),
730            ConcreteDataType::Null(_)
731        ));
732        assert!(matches!(
733            ConcreteDataType::from_arrow_type(&ArrowDataType::Boolean),
734            ConcreteDataType::Boolean(_)
735        ));
736        assert!(matches!(
737            ConcreteDataType::from_arrow_type(&ArrowDataType::Binary),
738            ConcreteDataType::Binary(_)
739        ));
740        assert!(matches!(
741            ConcreteDataType::from_arrow_type(&ArrowDataType::LargeBinary),
742            ConcreteDataType::Binary(_)
743        ));
744        assert!(matches!(
745            ConcreteDataType::from_arrow_type(&ArrowDataType::Int8),
746            ConcreteDataType::Int8(_)
747        ));
748        assert!(matches!(
749            ConcreteDataType::from_arrow_type(&ArrowDataType::Int16),
750            ConcreteDataType::Int16(_)
751        ));
752        assert!(matches!(
753            ConcreteDataType::from_arrow_type(&ArrowDataType::Int32),
754            ConcreteDataType::Int32(_)
755        ));
756        assert!(matches!(
757            ConcreteDataType::from_arrow_type(&ArrowDataType::Int64),
758            ConcreteDataType::Int64(_)
759        ));
760        assert!(matches!(
761            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt8),
762            ConcreteDataType::UInt8(_)
763        ));
764        assert!(matches!(
765            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt16),
766            ConcreteDataType::UInt16(_)
767        ));
768        assert!(matches!(
769            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt32),
770            ConcreteDataType::UInt32(_)
771        ));
772        assert!(matches!(
773            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt64),
774            ConcreteDataType::UInt64(_)
775        ));
776        assert!(matches!(
777            ConcreteDataType::from_arrow_type(&ArrowDataType::Float32),
778            ConcreteDataType::Float32(_)
779        ));
780        assert!(matches!(
781            ConcreteDataType::from_arrow_type(&ArrowDataType::Float64),
782            ConcreteDataType::Float64(_)
783        ));
784        assert!(matches!(
785            ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8),
786            ConcreteDataType::String(_)
787        ));
788        // Test LargeUtf8 mapping to large String type
789        let large_string_type = ConcreteDataType::from_arrow_type(&ArrowDataType::LargeUtf8);
790        assert!(matches!(large_string_type, ConcreteDataType::String(_)));
791        if let ConcreteDataType::String(string_type) = &large_string_type {
792            assert!(string_type.is_large());
793        } else {
794            panic!("Expected a String type");
795        }
796        assert_eq!(
797            ConcreteDataType::from_arrow_type(&ArrowDataType::List(Arc::new(Field::new(
798                "item",
799                ArrowDataType::Int32,
800                true,
801            )))),
802            ConcreteDataType::List(ListType::new(Arc::new(ConcreteDataType::int32_datatype())))
803        );
804        assert!(matches!(
805            ConcreteDataType::from_arrow_type(&ArrowDataType::Date32),
806            ConcreteDataType::Date(_)
807        ));
808    }
809
810    #[test]
811    fn test_large_utf8_round_trip() {
812        // Test round-trip conversion for LargeUtf8
813        let large_utf8_arrow = ArrowDataType::LargeUtf8;
814        let concrete_type = ConcreteDataType::from_arrow_type(&large_utf8_arrow);
815        let back_to_arrow = concrete_type.as_arrow_type();
816
817        assert!(matches!(concrete_type, ConcreteDataType::String(_)));
818        // Round-trip should preserve the LargeUtf8 type
819        assert_eq!(large_utf8_arrow, back_to_arrow);
820
821        // Test that Utf8 and LargeUtf8 map to different string variants
822        let utf8_concrete = ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8);
823        let large_utf8_concrete = ConcreteDataType::from_arrow_type(&ArrowDataType::LargeUtf8);
824
825        assert!(matches!(utf8_concrete, ConcreteDataType::String(_)));
826        assert!(matches!(large_utf8_concrete, ConcreteDataType::String(_)));
827
828        // They should have different size types
829        if let (ConcreteDataType::String(utf8_type), ConcreteDataType::String(large_type)) =
830            (&utf8_concrete, &large_utf8_concrete)
831        {
832            assert!(!utf8_type.is_large());
833            assert!(large_type.is_large());
834        } else {
835            panic!("Expected both to be String types");
836        }
837
838        // They should be different types
839        assert_ne!(utf8_concrete, large_utf8_concrete);
840    }
841
842    #[test]
843    fn test_from_arrow_timestamp() {
844        assert_eq!(
845            ConcreteDataType::timestamp_millisecond_datatype(),
846            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Millisecond)
847        );
848        assert_eq!(
849            ConcreteDataType::timestamp_microsecond_datatype(),
850            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Microsecond)
851        );
852        assert_eq!(
853            ConcreteDataType::timestamp_nanosecond_datatype(),
854            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Nanosecond)
855        );
856        assert_eq!(
857            ConcreteDataType::timestamp_second_datatype(),
858            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Second)
859        );
860    }
861
862    #[test]
863    fn test_is_null() {
864        assert!(ConcreteDataType::null_datatype().is_null());
865        assert!(!ConcreteDataType::int32_datatype().is_null());
866    }
867
868    #[test]
869    fn test_is_float() {
870        assert!(!ConcreteDataType::int32_datatype().is_float());
871        assert!(ConcreteDataType::float32_datatype().is_float());
872        assert!(ConcreteDataType::float64_datatype().is_float());
873    }
874
875    #[test]
876    fn test_is_boolean() {
877        assert!(!ConcreteDataType::int32_datatype().is_boolean());
878        assert!(!ConcreteDataType::float32_datatype().is_boolean());
879        assert!(ConcreteDataType::boolean_datatype().is_boolean());
880    }
881
882    #[test]
883    fn test_is_decimal() {
884        assert!(!ConcreteDataType::int32_datatype().is_decimal());
885        assert!(!ConcreteDataType::float32_datatype().is_decimal());
886        assert!(ConcreteDataType::decimal128_datatype(10, 2).is_decimal());
887        assert!(ConcreteDataType::decimal128_datatype(18, 6).is_decimal());
888    }
889
890    #[test]
891    fn test_is_stringifiable() {
892        assert!(!ConcreteDataType::int32_datatype().is_stringifiable());
893        assert!(!ConcreteDataType::float32_datatype().is_stringifiable());
894        assert!(ConcreteDataType::string_datatype().is_stringifiable());
895        assert!(ConcreteDataType::binary_datatype().is_stringifiable());
896        assert!(ConcreteDataType::date_datatype().is_stringifiable());
897        assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable());
898        assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable());
899        assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable());
900        assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_stringifiable());
901        assert!(ConcreteDataType::time_second_datatype().is_stringifiable());
902        assert!(ConcreteDataType::time_millisecond_datatype().is_stringifiable());
903        assert!(ConcreteDataType::time_microsecond_datatype().is_stringifiable());
904        assert!(ConcreteDataType::time_nanosecond_datatype().is_stringifiable());
905
906        assert!(ConcreteDataType::interval_year_month_datatype().is_stringifiable());
907        assert!(ConcreteDataType::interval_day_time_datatype().is_stringifiable());
908        assert!(ConcreteDataType::interval_month_day_nano_datatype().is_stringifiable());
909
910        assert!(ConcreteDataType::duration_second_datatype().is_stringifiable());
911        assert!(ConcreteDataType::duration_millisecond_datatype().is_stringifiable());
912        assert!(ConcreteDataType::duration_microsecond_datatype().is_stringifiable());
913        assert!(ConcreteDataType::duration_nanosecond_datatype().is_stringifiable());
914        assert!(ConcreteDataType::decimal128_datatype(10, 2).is_stringifiable());
915        assert!(ConcreteDataType::vector_default_datatype().is_stringifiable());
916    }
917
918    #[test]
919    fn test_is_signed() {
920        assert!(ConcreteDataType::int8_datatype().is_signed());
921        assert!(ConcreteDataType::int16_datatype().is_signed());
922        assert!(ConcreteDataType::int32_datatype().is_signed());
923        assert!(ConcreteDataType::int64_datatype().is_signed());
924        assert!(ConcreteDataType::date_datatype().is_signed());
925        assert!(ConcreteDataType::timestamp_second_datatype().is_signed());
926        assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed());
927        assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed());
928        assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_signed());
929        assert!(ConcreteDataType::time_second_datatype().is_signed());
930        assert!(ConcreteDataType::time_millisecond_datatype().is_signed());
931        assert!(ConcreteDataType::time_microsecond_datatype().is_signed());
932        assert!(ConcreteDataType::time_nanosecond_datatype().is_signed());
933        assert!(ConcreteDataType::interval_year_month_datatype().is_signed());
934        assert!(ConcreteDataType::interval_day_time_datatype().is_signed());
935        assert!(ConcreteDataType::interval_month_day_nano_datatype().is_signed());
936        assert!(ConcreteDataType::duration_second_datatype().is_signed());
937        assert!(ConcreteDataType::duration_millisecond_datatype().is_signed());
938        assert!(ConcreteDataType::duration_microsecond_datatype().is_signed());
939        assert!(ConcreteDataType::duration_nanosecond_datatype().is_signed());
940
941        assert!(!ConcreteDataType::uint8_datatype().is_signed());
942        assert!(!ConcreteDataType::uint16_datatype().is_signed());
943        assert!(!ConcreteDataType::uint32_datatype().is_signed());
944        assert!(!ConcreteDataType::uint64_datatype().is_signed());
945
946        assert!(!ConcreteDataType::float32_datatype().is_signed());
947        assert!(!ConcreteDataType::float64_datatype().is_signed());
948
949        assert!(ConcreteDataType::decimal128_datatype(10, 2).is_signed());
950    }
951
952    #[test]
953    fn test_is_unsigned() {
954        assert!(!ConcreteDataType::int8_datatype().is_unsigned());
955        assert!(!ConcreteDataType::int16_datatype().is_unsigned());
956        assert!(!ConcreteDataType::int32_datatype().is_unsigned());
957        assert!(!ConcreteDataType::int64_datatype().is_unsigned());
958        assert!(!ConcreteDataType::date_datatype().is_unsigned());
959        assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned());
960        assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned());
961        assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned());
962        assert!(!ConcreteDataType::timestamp_nanosecond_datatype().is_unsigned());
963        assert!(!ConcreteDataType::time_second_datatype().is_unsigned());
964        assert!(!ConcreteDataType::time_millisecond_datatype().is_unsigned());
965        assert!(!ConcreteDataType::time_microsecond_datatype().is_unsigned());
966        assert!(!ConcreteDataType::time_nanosecond_datatype().is_unsigned());
967        assert!(!ConcreteDataType::interval_year_month_datatype().is_unsigned());
968        assert!(!ConcreteDataType::interval_day_time_datatype().is_unsigned());
969        assert!(!ConcreteDataType::interval_month_day_nano_datatype().is_unsigned());
970        assert!(!ConcreteDataType::duration_second_datatype().is_unsigned());
971        assert!(!ConcreteDataType::duration_millisecond_datatype().is_unsigned());
972        assert!(!ConcreteDataType::duration_microsecond_datatype().is_unsigned());
973        assert!(!ConcreteDataType::duration_nanosecond_datatype().is_unsigned());
974        assert!(!ConcreteDataType::decimal128_datatype(10, 2).is_unsigned());
975
976        assert!(ConcreteDataType::uint8_datatype().is_unsigned());
977        assert!(ConcreteDataType::uint16_datatype().is_unsigned());
978        assert!(ConcreteDataType::uint32_datatype().is_unsigned());
979        assert!(ConcreteDataType::uint64_datatype().is_unsigned());
980
981        assert!(!ConcreteDataType::float32_datatype().is_unsigned());
982        assert!(!ConcreteDataType::float64_datatype().is_unsigned());
983    }
984
985    #[test]
986    fn test_numerics() {
987        let nums = ConcreteDataType::numerics();
988        assert_eq!(10, nums.len());
989    }
990
991    #[test]
992    fn test_as_list() {
993        let list_type =
994            ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()));
995        assert_eq!(
996            ListType::new(Arc::new(ConcreteDataType::int32_datatype())),
997            *list_type.as_list().unwrap()
998        );
999        assert!(ConcreteDataType::int32_datatype().as_list().is_none());
1000    }
1001
1002    #[test]
1003    fn test_display_concrete_data_type() {
1004        assert_eq!(ConcreteDataType::null_datatype().to_string(), "Null");
1005        assert_eq!(ConcreteDataType::boolean_datatype().to_string(), "Boolean");
1006        assert_eq!(ConcreteDataType::binary_datatype().to_string(), "Binary");
1007        assert_eq!(ConcreteDataType::int8_datatype().to_string(), "Int8");
1008        assert_eq!(ConcreteDataType::int16_datatype().to_string(), "Int16");
1009        assert_eq!(ConcreteDataType::int32_datatype().to_string(), "Int32");
1010        assert_eq!(ConcreteDataType::int64_datatype().to_string(), "Int64");
1011        assert_eq!(ConcreteDataType::uint8_datatype().to_string(), "UInt8");
1012        assert_eq!(ConcreteDataType::uint16_datatype().to_string(), "UInt16");
1013        assert_eq!(ConcreteDataType::uint32_datatype().to_string(), "UInt32");
1014        assert_eq!(ConcreteDataType::uint64_datatype().to_string(), "UInt64");
1015        assert_eq!(ConcreteDataType::float32_datatype().to_string(), "Float32");
1016        assert_eq!(ConcreteDataType::float64_datatype().to_string(), "Float64");
1017        assert_eq!(ConcreteDataType::string_datatype().to_string(), "String");
1018        assert_eq!(ConcreteDataType::date_datatype().to_string(), "Date");
1019        assert_eq!(
1020            ConcreteDataType::timestamp_millisecond_datatype().to_string(),
1021            "TimestampMillisecond"
1022        );
1023        assert_eq!(
1024            ConcreteDataType::time_millisecond_datatype().to_string(),
1025            "TimeMillisecond"
1026        );
1027        assert_eq!(
1028            ConcreteDataType::interval_month_day_nano_datatype().to_string(),
1029            "IntervalMonthDayNano"
1030        );
1031        assert_eq!(
1032            ConcreteDataType::duration_second_datatype().to_string(),
1033            "DurationSecond"
1034        );
1035        assert_eq!(
1036            ConcreteDataType::decimal128_datatype(10, 2).to_string(),
1037            "Decimal(10, 2)"
1038        );
1039        // Nested types
1040        assert_eq!(
1041            ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()))
1042                .to_string(),
1043            "List<Int32>"
1044        );
1045        assert_eq!(
1046            ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::Dictionary(
1047                DictionaryType::new(
1048                    ConcreteDataType::int32_datatype(),
1049                    ConcreteDataType::string_datatype()
1050                )
1051            )))
1052            .to_string(),
1053            "List<Dictionary<Int32, String>>"
1054        );
1055        assert_eq!(
1056            ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::list_datatype(Arc::new(
1057                ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()))
1058            ))))
1059            .to_string(),
1060            "List<List<List<Int32>>>"
1061        );
1062        assert_eq!(
1063            ConcreteDataType::dictionary_datatype(
1064                ConcreteDataType::int32_datatype(),
1065                ConcreteDataType::string_datatype()
1066            )
1067            .to_string(),
1068            "Dictionary<Int32, String>"
1069        );
1070        assert_eq!(
1071            ConcreteDataType::vector_datatype(3).to_string(),
1072            "Vector(3)"
1073        );
1074    }
1075}