datatypes/
data_type.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt;
16use std::sync::Arc;
17
18use arrow::compute::cast as arrow_array_cast;
19use arrow::datatypes::{
20    DataType as ArrowDataType, IntervalUnit as ArrowIntervalUnit, TimeUnit as ArrowTimeUnit,
21};
22use arrow_schema::DECIMAL_DEFAULT_SCALE;
23use common_decimal::decimal128::DECIMAL128_MAX_PRECISION;
24use common_time::interval::IntervalUnit;
25use common_time::timestamp::TimeUnit;
26use enum_dispatch::enum_dispatch;
27use paste::paste;
28use serde::{Deserialize, Serialize};
29
30use crate::error::{self, Error, Result};
31use crate::type_id::LogicalTypeId;
32use crate::types::{
33    BinaryType, BooleanType, DateType, Decimal128Type, DictionaryType, DurationMicrosecondType,
34    DurationMillisecondType, DurationNanosecondType, DurationSecondType, DurationType, Float32Type,
35    Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalDayTimeType,
36    IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType, ListType, NullType,
37    StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType, TimestampMillisecondType,
38    TimestampNanosecondType, TimestampSecondType, TimestampType, UInt16Type, UInt32Type,
39    UInt64Type, UInt8Type, VectorType,
40};
41use crate::value::Value;
42use crate::vectors::MutableVector;
43
44#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
45#[enum_dispatch(DataType)]
46pub enum ConcreteDataType {
47    Null(NullType),
48    Boolean(BooleanType),
49
50    // Numeric types:
51    Int8(Int8Type),
52    Int16(Int16Type),
53    Int32(Int32Type),
54    Int64(Int64Type),
55    UInt8(UInt8Type),
56    UInt16(UInt16Type),
57    UInt32(UInt32Type),
58    UInt64(UInt64Type),
59    Float32(Float32Type),
60    Float64(Float64Type),
61
62    // Decimal128 type:
63    Decimal128(Decimal128Type),
64
65    // String types:
66    Binary(BinaryType),
67    String(StringType),
68
69    // Date and time types:
70    Date(DateType),
71    Timestamp(TimestampType),
72    Time(TimeType),
73
74    // Duration type:
75    Duration(DurationType),
76
77    // Interval type:
78    Interval(IntervalType),
79
80    // Compound types:
81    List(ListType),
82    Dictionary(DictionaryType),
83
84    // JSON type:
85    Json(JsonType),
86
87    // Vector type:
88    Vector(VectorType),
89}
90
91impl fmt::Display for ConcreteDataType {
92    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
93        match self {
94            ConcreteDataType::Null(v) => write!(f, "{}", v.name()),
95            ConcreteDataType::Boolean(v) => write!(f, "{}", v.name()),
96            ConcreteDataType::Int8(v) => write!(f, "{}", v.name()),
97            ConcreteDataType::Int16(v) => write!(f, "{}", v.name()),
98            ConcreteDataType::Int32(v) => write!(f, "{}", v.name()),
99            ConcreteDataType::Int64(v) => write!(f, "{}", v.name()),
100            ConcreteDataType::UInt8(v) => write!(f, "{}", v.name()),
101            ConcreteDataType::UInt16(v) => write!(f, "{}", v.name()),
102            ConcreteDataType::UInt32(v) => write!(f, "{}", v.name()),
103            ConcreteDataType::UInt64(v) => write!(f, "{}", v.name()),
104            ConcreteDataType::Float32(v) => write!(f, "{}", v.name()),
105            ConcreteDataType::Float64(v) => write!(f, "{}", v.name()),
106            ConcreteDataType::Binary(v) => write!(f, "{}", v.name()),
107            ConcreteDataType::String(v) => write!(f, "{}", v.name()),
108            ConcreteDataType::Date(v) => write!(f, "{}", v.name()),
109            ConcreteDataType::Timestamp(t) => match t {
110                TimestampType::Second(v) => write!(f, "{}", v.name()),
111                TimestampType::Millisecond(v) => write!(f, "{}", v.name()),
112                TimestampType::Microsecond(v) => write!(f, "{}", v.name()),
113                TimestampType::Nanosecond(v) => write!(f, "{}", v.name()),
114            },
115            ConcreteDataType::Time(t) => match t {
116                TimeType::Second(v) => write!(f, "{}", v.name()),
117                TimeType::Millisecond(v) => write!(f, "{}", v.name()),
118                TimeType::Microsecond(v) => write!(f, "{}", v.name()),
119                TimeType::Nanosecond(v) => write!(f, "{}", v.name()),
120            },
121            ConcreteDataType::Interval(i) => match i {
122                IntervalType::YearMonth(v) => write!(f, "{}", v.name()),
123                IntervalType::DayTime(v) => write!(f, "{}", v.name()),
124                IntervalType::MonthDayNano(v) => write!(f, "{}", v.name()),
125            },
126            ConcreteDataType::Duration(d) => match d {
127                DurationType::Second(v) => write!(f, "{}", v.name()),
128                DurationType::Millisecond(v) => write!(f, "{}", v.name()),
129                DurationType::Microsecond(v) => write!(f, "{}", v.name()),
130                DurationType::Nanosecond(v) => write!(f, "{}", v.name()),
131            },
132            ConcreteDataType::Decimal128(v) => write!(f, "{}", v.name()),
133            ConcreteDataType::List(v) => write!(f, "{}", v.name()),
134            ConcreteDataType::Dictionary(v) => write!(f, "{}", v.name()),
135            ConcreteDataType::Json(v) => write!(f, "{}", v.name()),
136            ConcreteDataType::Vector(v) => write!(f, "{}", v.name()),
137        }
138    }
139}
140
141// TODO(yingwen): Refactor these `is_xxx()` methods, such as adding a `properties()` method
142// returning all these properties to the `DataType` trait
143impl ConcreteDataType {
144    pub fn is_float(&self) -> bool {
145        matches!(
146            self,
147            ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_)
148        )
149    }
150
151    pub fn is_boolean(&self) -> bool {
152        matches!(self, ConcreteDataType::Boolean(_))
153    }
154
155    pub fn is_string(&self) -> bool {
156        matches!(self, ConcreteDataType::String(_))
157    }
158
159    pub fn is_stringifiable(&self) -> bool {
160        matches!(
161            self,
162            ConcreteDataType::String(_)
163                | ConcreteDataType::Date(_)
164                | ConcreteDataType::Timestamp(_)
165                | ConcreteDataType::Time(_)
166                | ConcreteDataType::Interval(_)
167                | ConcreteDataType::Duration(_)
168                | ConcreteDataType::Decimal128(_)
169                | ConcreteDataType::Binary(_)
170                | ConcreteDataType::Json(_)
171                | ConcreteDataType::Vector(_)
172        )
173    }
174
175    pub fn is_signed(&self) -> bool {
176        matches!(
177            self,
178            ConcreteDataType::Int8(_)
179                | ConcreteDataType::Int16(_)
180                | ConcreteDataType::Int32(_)
181                | ConcreteDataType::Int64(_)
182                | ConcreteDataType::Date(_)
183                | ConcreteDataType::Timestamp(_)
184                | ConcreteDataType::Time(_)
185                | ConcreteDataType::Interval(_)
186                | ConcreteDataType::Duration(_)
187                | ConcreteDataType::Decimal128(_)
188        )
189    }
190
191    pub fn is_unsigned(&self) -> bool {
192        matches!(
193            self,
194            ConcreteDataType::UInt8(_)
195                | ConcreteDataType::UInt16(_)
196                | ConcreteDataType::UInt32(_)
197                | ConcreteDataType::UInt64(_)
198        )
199    }
200
201    pub fn is_numeric(&self) -> bool {
202        matches!(
203            self,
204            ConcreteDataType::Int8(_)
205                | ConcreteDataType::Int16(_)
206                | ConcreteDataType::Int32(_)
207                | ConcreteDataType::Int64(_)
208                | ConcreteDataType::UInt8(_)
209                | ConcreteDataType::UInt16(_)
210                | ConcreteDataType::UInt32(_)
211                | ConcreteDataType::UInt64(_)
212                | ConcreteDataType::Float32(_)
213                | ConcreteDataType::Float64(_)
214        )
215    }
216
217    pub fn is_timestamp(&self) -> bool {
218        matches!(self, ConcreteDataType::Timestamp(_))
219    }
220
221    pub fn is_decimal(&self) -> bool {
222        matches!(self, ConcreteDataType::Decimal128(_))
223    }
224
225    pub fn is_json(&self) -> bool {
226        matches!(self, ConcreteDataType::Json(_))
227    }
228
229    pub fn is_vector(&self) -> bool {
230        matches!(self, ConcreteDataType::Vector(_))
231    }
232
233    pub fn numerics() -> Vec<ConcreteDataType> {
234        vec![
235            ConcreteDataType::int8_datatype(),
236            ConcreteDataType::int16_datatype(),
237            ConcreteDataType::int32_datatype(),
238            ConcreteDataType::int64_datatype(),
239            ConcreteDataType::uint8_datatype(),
240            ConcreteDataType::uint16_datatype(),
241            ConcreteDataType::uint32_datatype(),
242            ConcreteDataType::uint64_datatype(),
243            ConcreteDataType::float32_datatype(),
244            ConcreteDataType::float64_datatype(),
245        ]
246    }
247
248    pub fn unsigned_integers() -> Vec<ConcreteDataType> {
249        vec![
250            ConcreteDataType::uint8_datatype(),
251            ConcreteDataType::uint16_datatype(),
252            ConcreteDataType::uint32_datatype(),
253            ConcreteDataType::uint64_datatype(),
254        ]
255    }
256
257    pub fn timestamps() -> Vec<ConcreteDataType> {
258        vec![
259            ConcreteDataType::timestamp_second_datatype(),
260            ConcreteDataType::timestamp_millisecond_datatype(),
261            ConcreteDataType::timestamp_microsecond_datatype(),
262            ConcreteDataType::timestamp_nanosecond_datatype(),
263        ]
264    }
265
266    /// Convert arrow data type to [ConcreteDataType].
267    ///
268    /// # Panics
269    /// Panic if given arrow data type is not supported.
270    pub fn from_arrow_type(dt: &ArrowDataType) -> Self {
271        ConcreteDataType::try_from(dt).expect("Unimplemented type")
272    }
273
274    pub fn is_null(&self) -> bool {
275        matches!(self, ConcreteDataType::Null(NullType))
276    }
277
278    /// Try to cast the type as a [`ListType`].
279    pub fn as_list(&self) -> Option<&ListType> {
280        match self {
281            ConcreteDataType::List(t) => Some(t),
282            _ => None,
283        }
284    }
285
286    /// Try to cast data type as a [`TimestampType`].
287    pub fn as_timestamp(&self) -> Option<TimestampType> {
288        match self {
289            ConcreteDataType::Timestamp(t) => Some(*t),
290            _ => None,
291        }
292    }
293
294    /// Try to get numeric precision, returns `None` if it's not numeric type
295    pub fn numeric_precision(&self) -> Option<u8> {
296        match self {
297            ConcreteDataType::Int8(_) | ConcreteDataType::UInt8(_) => Some(3),
298            ConcreteDataType::Int16(_) | ConcreteDataType::UInt16(_) => Some(5),
299            ConcreteDataType::Int32(_) | ConcreteDataType::UInt32(_) => Some(10),
300            ConcreteDataType::Int64(_) => Some(19),
301            ConcreteDataType::UInt64(_) => Some(20),
302            ConcreteDataType::Float32(_) => Some(12),
303            ConcreteDataType::Float64(_) => Some(22),
304            ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.precision()),
305            _ => None,
306        }
307    }
308
309    /// Try to get numeric scale, returns `None` if it's float or not numeric type
310    pub fn numeric_scale(&self) -> Option<i8> {
311        match self {
312            ConcreteDataType::Int8(_)
313            | ConcreteDataType::UInt8(_)
314            | ConcreteDataType::Int16(_)
315            | ConcreteDataType::UInt16(_)
316            | ConcreteDataType::Int32(_)
317            | ConcreteDataType::UInt32(_)
318            | ConcreteDataType::Int64(_)
319            | ConcreteDataType::UInt64(_) => Some(0),
320            ConcreteDataType::Float32(_) | ConcreteDataType::Float64(_) => None,
321            ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.scale()),
322            _ => None,
323        }
324    }
325
326    /// Try to cast data type as a [`TimeType`].
327    pub fn as_time(&self) -> Option<TimeType> {
328        match self {
329            ConcreteDataType::Int64(_) => Some(TimeType::Millisecond(TimeMillisecondType)),
330            ConcreteDataType::Time(t) => Some(*t),
331            _ => None,
332        }
333    }
334
335    pub fn as_decimal128(&self) -> Option<Decimal128Type> {
336        match self {
337            ConcreteDataType::Decimal128(d) => Some(*d),
338            _ => None,
339        }
340    }
341
342    pub fn as_json(&self) -> Option<JsonType> {
343        match self {
344            ConcreteDataType::Json(j) => Some(*j),
345            _ => None,
346        }
347    }
348
349    pub fn as_vector(&self) -> Option<VectorType> {
350        match self {
351            ConcreteDataType::Vector(v) => Some(*v),
352            _ => None,
353        }
354    }
355
356    /// Checks if the data type can cast to another data type.
357    pub fn can_arrow_type_cast_to(&self, to_type: &ConcreteDataType) -> bool {
358        let array = arrow_array::new_empty_array(&self.as_arrow_type());
359        arrow_array_cast(array.as_ref(), &to_type.as_arrow_type()).is_ok()
360    }
361
362    /// Try to cast data type as a [`DurationType`].
363    pub fn as_duration(&self) -> Option<DurationType> {
364        match self {
365            ConcreteDataType::Duration(d) => Some(*d),
366            _ => None,
367        }
368    }
369
370    /// Return the datatype name in postgres type system
371    pub fn postgres_datatype_name(&self) -> &'static str {
372        match self {
373            &ConcreteDataType::Null(_) => "UNKNOWN",
374            &ConcreteDataType::Boolean(_) => "BOOL",
375            &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "CHAR",
376            &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "INT2",
377            &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "INT4",
378            &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "INT8",
379            &ConcreteDataType::Float32(_) => "FLOAT4",
380            &ConcreteDataType::Float64(_) => "FLOAT8",
381            &ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => "BYTEA",
382            &ConcreteDataType::String(_) => "VARCHAR",
383            &ConcreteDataType::Date(_) => "DATE",
384            &ConcreteDataType::Timestamp(_) => "TIMESTAMP",
385            &ConcreteDataType::Time(_) => "TIME",
386            &ConcreteDataType::Interval(_) => "INTERVAL",
387            &ConcreteDataType::Decimal128(_) => "NUMERIC",
388            &ConcreteDataType::Json(_) => "JSON",
389            ConcreteDataType::List(list) => match list.item_type() {
390                &ConcreteDataType::Null(_) => "UNKNOWN",
391                &ConcreteDataType::Boolean(_) => "_BOOL",
392                &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "_CHAR",
393                &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "_INT2",
394                &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "_INT4",
395                &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "_INT8",
396                &ConcreteDataType::Float32(_) => "_FLOAT4",
397                &ConcreteDataType::Float64(_) => "_FLOAT8",
398                &ConcreteDataType::Binary(_) => "_BYTEA",
399                &ConcreteDataType::String(_) => "_VARCHAR",
400                &ConcreteDataType::Date(_) => "_DATE",
401                &ConcreteDataType::Timestamp(_) => "_TIMESTAMP",
402                &ConcreteDataType::Time(_) => "_TIME",
403                &ConcreteDataType::Interval(_) => "_INTERVAL",
404                &ConcreteDataType::Decimal128(_) => "_NUMERIC",
405                &ConcreteDataType::Json(_) => "_JSON",
406                &ConcreteDataType::Duration(_)
407                | &ConcreteDataType::Dictionary(_)
408                | &ConcreteDataType::Vector(_)
409                | &ConcreteDataType::List(_) => "UNKNOWN",
410            },
411            &ConcreteDataType::Duration(_) | &ConcreteDataType::Dictionary(_) => "UNKNOWN",
412        }
413    }
414}
415
416impl From<&ConcreteDataType> for ConcreteDataType {
417    fn from(t: &ConcreteDataType) -> Self {
418        t.clone()
419    }
420}
421
422impl TryFrom<&ArrowDataType> for ConcreteDataType {
423    type Error = Error;
424
425    fn try_from(dt: &ArrowDataType) -> Result<ConcreteDataType> {
426        let concrete_type = match dt {
427            ArrowDataType::Null => Self::null_datatype(),
428            ArrowDataType::Boolean => Self::boolean_datatype(),
429            ArrowDataType::UInt8 => Self::uint8_datatype(),
430            ArrowDataType::UInt16 => Self::uint16_datatype(),
431            ArrowDataType::UInt32 => Self::uint32_datatype(),
432            ArrowDataType::UInt64 => Self::uint64_datatype(),
433            ArrowDataType::Int8 => Self::int8_datatype(),
434            ArrowDataType::Int16 => Self::int16_datatype(),
435            ArrowDataType::Int32 => Self::int32_datatype(),
436            ArrowDataType::Int64 => Self::int64_datatype(),
437            ArrowDataType::Float32 => Self::float32_datatype(),
438            ArrowDataType::Float64 => Self::float64_datatype(),
439            ArrowDataType::Date32 => Self::date_datatype(),
440            ArrowDataType::Timestamp(u, _) => ConcreteDataType::from_arrow_time_unit(u),
441            ArrowDataType::Interval(u) => ConcreteDataType::from_arrow_interval_unit(u),
442            ArrowDataType::Binary | ArrowDataType::LargeBinary => Self::binary_datatype(),
443            ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => Self::string_datatype(),
444            ArrowDataType::List(field) => Self::List(ListType::new(
445                ConcreteDataType::from_arrow_type(field.data_type()),
446            )),
447            ArrowDataType::Dictionary(key_type, value_type) => {
448                let key_type = ConcreteDataType::from_arrow_type(key_type);
449                let value_type = ConcreteDataType::from_arrow_type(value_type);
450                Self::Dictionary(DictionaryType::new(key_type, value_type))
451            }
452            ArrowDataType::Time32(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
453            ArrowDataType::Time64(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
454            ArrowDataType::Duration(u) => {
455                ConcreteDataType::Duration(DurationType::from_unit(u.into()))
456            }
457            ArrowDataType::Decimal128(precision, scale) => {
458                ConcreteDataType::decimal128_datatype(*precision, *scale)
459            }
460            _ => {
461                return error::UnsupportedArrowTypeSnafu {
462                    arrow_type: dt.clone(),
463                }
464                .fail()
465            }
466        };
467
468        Ok(concrete_type)
469    }
470}
471
472macro_rules! impl_new_concrete_type_functions {
473    ($($Type: ident), +) => {
474        paste! {
475            impl ConcreteDataType {
476                $(
477                    pub fn [<$Type:lower _datatype>]() -> ConcreteDataType {
478                        ConcreteDataType::$Type([<$Type Type>]::default())
479                    }
480                )+
481            }
482        }
483    }
484}
485
486impl_new_concrete_type_functions!(
487    Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
488    Binary, Date, String, Json
489);
490
491impl ConcreteDataType {
492    pub fn timestamp_second_datatype() -> Self {
493        ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType))
494    }
495
496    pub fn timestamp_millisecond_datatype() -> Self {
497        ConcreteDataType::Timestamp(TimestampType::Millisecond(TimestampMillisecondType))
498    }
499
500    pub fn timestamp_microsecond_datatype() -> Self {
501        ConcreteDataType::Timestamp(TimestampType::Microsecond(TimestampMicrosecondType))
502    }
503
504    pub fn timestamp_nanosecond_datatype() -> Self {
505        ConcreteDataType::Timestamp(TimestampType::Nanosecond(TimestampNanosecondType))
506    }
507
508    /// Returns the time data type with `TimeUnit`.
509    pub fn time_datatype(unit: TimeUnit) -> Self {
510        ConcreteDataType::Time(TimeType::from_unit(unit))
511    }
512
513    /// Creates a [Time(TimeSecondType)] datatype.
514    pub fn time_second_datatype() -> Self {
515        Self::time_datatype(TimeUnit::Second)
516    }
517
518    /// Creates a [Time(TimeMillisecondType)] datatype.
519    pub fn time_millisecond_datatype() -> Self {
520        Self::time_datatype(TimeUnit::Millisecond)
521    }
522
523    /// Creates a [Time(TimeMicrosecond)] datatype.
524    pub fn time_microsecond_datatype() -> Self {
525        Self::time_datatype(TimeUnit::Microsecond)
526    }
527
528    /// Creates a [Time(TimeNanosecond)] datatype.
529    pub fn time_nanosecond_datatype() -> Self {
530        Self::time_datatype(TimeUnit::Nanosecond)
531    }
532
533    /// Creates a [Duration(DurationSecondType)] datatype.
534    pub fn duration_second_datatype() -> Self {
535        ConcreteDataType::Duration(DurationType::Second(DurationSecondType))
536    }
537
538    /// Creates a [Duration(DurationMillisecondType)] datatype.
539    pub fn duration_millisecond_datatype() -> Self {
540        ConcreteDataType::Duration(DurationType::Millisecond(DurationMillisecondType))
541    }
542
543    /// Creates a [Duration(DurationMicrosecondType)] datatype.
544    pub fn duration_microsecond_datatype() -> Self {
545        ConcreteDataType::Duration(DurationType::Microsecond(DurationMicrosecondType))
546    }
547
548    /// Creates a [Duration(DurationNanosecondType)] datatype.
549    pub fn duration_nanosecond_datatype() -> Self {
550        ConcreteDataType::Duration(DurationType::Nanosecond(DurationNanosecondType))
551    }
552
553    /// Creates a [Interval(IntervalMonthDayNanoType)] datatype.
554    pub fn interval_month_day_nano_datatype() -> Self {
555        ConcreteDataType::Interval(IntervalType::MonthDayNano(IntervalMonthDayNanoType))
556    }
557
558    /// Creates a [Interval(IntervalYearMonthType)] datatype.
559    pub fn interval_year_month_datatype() -> Self {
560        ConcreteDataType::Interval(IntervalType::YearMonth(IntervalYearMonthType))
561    }
562
563    /// Creates a [Interval(IntervalDayTimeType)] datatype.
564    pub fn interval_day_time_datatype() -> Self {
565        ConcreteDataType::Interval(IntervalType::DayTime(IntervalDayTimeType))
566    }
567
568    pub fn timestamp_datatype(unit: TimeUnit) -> Self {
569        match unit {
570            TimeUnit::Second => Self::timestamp_second_datatype(),
571            TimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
572            TimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
573            TimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
574        }
575    }
576
577    /// Converts from arrow timestamp unit to
578    pub fn from_arrow_time_unit(t: &ArrowTimeUnit) -> Self {
579        match t {
580            ArrowTimeUnit::Second => Self::timestamp_second_datatype(),
581            ArrowTimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
582            ArrowTimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
583            ArrowTimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
584        }
585    }
586
587    pub fn duration_datatype(unit: TimeUnit) -> Self {
588        match unit {
589            TimeUnit::Second => Self::duration_second_datatype(),
590            TimeUnit::Millisecond => Self::duration_millisecond_datatype(),
591            TimeUnit::Microsecond => Self::duration_microsecond_datatype(),
592            TimeUnit::Nanosecond => Self::duration_nanosecond_datatype(),
593        }
594    }
595
596    pub fn interval_datatype(unit: IntervalUnit) -> Self {
597        match unit {
598            IntervalUnit::YearMonth => Self::interval_year_month_datatype(),
599            IntervalUnit::DayTime => Self::interval_day_time_datatype(),
600            IntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
601        }
602    }
603
604    pub fn from_arrow_interval_unit(u: &ArrowIntervalUnit) -> Self {
605        match u {
606            ArrowIntervalUnit::YearMonth => Self::interval_year_month_datatype(),
607            ArrowIntervalUnit::DayTime => Self::interval_day_time_datatype(),
608            ArrowIntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
609        }
610    }
611
612    pub fn list_datatype(item_type: ConcreteDataType) -> ConcreteDataType {
613        ConcreteDataType::List(ListType::new(item_type))
614    }
615
616    pub fn dictionary_datatype(
617        key_type: ConcreteDataType,
618        value_type: ConcreteDataType,
619    ) -> ConcreteDataType {
620        ConcreteDataType::Dictionary(DictionaryType::new(key_type, value_type))
621    }
622
623    pub fn decimal128_datatype(precision: u8, scale: i8) -> ConcreteDataType {
624        ConcreteDataType::Decimal128(Decimal128Type::new(precision, scale))
625    }
626
627    pub fn decimal128_default_datatype() -> ConcreteDataType {
628        Self::decimal128_datatype(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
629    }
630
631    pub fn vector_datatype(dim: u32) -> ConcreteDataType {
632        ConcreteDataType::Vector(VectorType::new(dim))
633    }
634
635    pub fn vector_default_datatype() -> ConcreteDataType {
636        Self::vector_datatype(0)
637    }
638}
639
640/// Data type abstraction.
641#[enum_dispatch::enum_dispatch]
642pub trait DataType: std::fmt::Debug + Send + Sync {
643    /// Name of this data type.
644    fn name(&self) -> String;
645
646    /// Returns id of the Logical data type.
647    fn logical_type_id(&self) -> LogicalTypeId;
648
649    /// Returns the default value of this type.
650    fn default_value(&self) -> Value;
651
652    /// Convert this type as [arrow::datatypes::DataType].
653    fn as_arrow_type(&self) -> ArrowDataType;
654
655    /// Creates a mutable vector with given `capacity` of this type.
656    fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector>;
657
658    /// Casts the value to specific DataType.
659    /// Return None if cast failed.
660    fn try_cast(&self, from: Value) -> Option<Value>;
661}
662
663pub type DataTypeRef = Arc<dyn DataType>;
664
665#[cfg(test)]
666mod tests {
667    use arrow::datatypes::Field;
668
669    use super::*;
670
671    #[test]
672    fn test_concrete_type_as_datatype_trait() {
673        let concrete_type = ConcreteDataType::boolean_datatype();
674
675        assert_eq!("Boolean", concrete_type.to_string());
676        assert_eq!(Value::Boolean(false), concrete_type.default_value());
677        assert_eq!(LogicalTypeId::Boolean, concrete_type.logical_type_id());
678        assert_eq!(ArrowDataType::Boolean, concrete_type.as_arrow_type());
679    }
680
681    #[test]
682    fn test_from_arrow_type() {
683        assert!(matches!(
684            ConcreteDataType::from_arrow_type(&ArrowDataType::Null),
685            ConcreteDataType::Null(_)
686        ));
687        assert!(matches!(
688            ConcreteDataType::from_arrow_type(&ArrowDataType::Boolean),
689            ConcreteDataType::Boolean(_)
690        ));
691        assert!(matches!(
692            ConcreteDataType::from_arrow_type(&ArrowDataType::Binary),
693            ConcreteDataType::Binary(_)
694        ));
695        assert!(matches!(
696            ConcreteDataType::from_arrow_type(&ArrowDataType::LargeBinary),
697            ConcreteDataType::Binary(_)
698        ));
699        assert!(matches!(
700            ConcreteDataType::from_arrow_type(&ArrowDataType::Int8),
701            ConcreteDataType::Int8(_)
702        ));
703        assert!(matches!(
704            ConcreteDataType::from_arrow_type(&ArrowDataType::Int16),
705            ConcreteDataType::Int16(_)
706        ));
707        assert!(matches!(
708            ConcreteDataType::from_arrow_type(&ArrowDataType::Int32),
709            ConcreteDataType::Int32(_)
710        ));
711        assert!(matches!(
712            ConcreteDataType::from_arrow_type(&ArrowDataType::Int64),
713            ConcreteDataType::Int64(_)
714        ));
715        assert!(matches!(
716            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt8),
717            ConcreteDataType::UInt8(_)
718        ));
719        assert!(matches!(
720            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt16),
721            ConcreteDataType::UInt16(_)
722        ));
723        assert!(matches!(
724            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt32),
725            ConcreteDataType::UInt32(_)
726        ));
727        assert!(matches!(
728            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt64),
729            ConcreteDataType::UInt64(_)
730        ));
731        assert!(matches!(
732            ConcreteDataType::from_arrow_type(&ArrowDataType::Float32),
733            ConcreteDataType::Float32(_)
734        ));
735        assert!(matches!(
736            ConcreteDataType::from_arrow_type(&ArrowDataType::Float64),
737            ConcreteDataType::Float64(_)
738        ));
739        assert!(matches!(
740            ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8),
741            ConcreteDataType::String(_)
742        ));
743        assert_eq!(
744            ConcreteDataType::from_arrow_type(&ArrowDataType::List(Arc::new(Field::new(
745                "item",
746                ArrowDataType::Int32,
747                true,
748            )))),
749            ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype()))
750        );
751        assert!(matches!(
752            ConcreteDataType::from_arrow_type(&ArrowDataType::Date32),
753            ConcreteDataType::Date(_)
754        ));
755    }
756
757    #[test]
758    fn test_from_arrow_timestamp() {
759        assert_eq!(
760            ConcreteDataType::timestamp_millisecond_datatype(),
761            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Millisecond)
762        );
763        assert_eq!(
764            ConcreteDataType::timestamp_microsecond_datatype(),
765            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Microsecond)
766        );
767        assert_eq!(
768            ConcreteDataType::timestamp_nanosecond_datatype(),
769            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Nanosecond)
770        );
771        assert_eq!(
772            ConcreteDataType::timestamp_second_datatype(),
773            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Second)
774        );
775    }
776
777    #[test]
778    fn test_is_null() {
779        assert!(ConcreteDataType::null_datatype().is_null());
780        assert!(!ConcreteDataType::int32_datatype().is_null());
781    }
782
783    #[test]
784    fn test_is_float() {
785        assert!(!ConcreteDataType::int32_datatype().is_float());
786        assert!(ConcreteDataType::float32_datatype().is_float());
787        assert!(ConcreteDataType::float64_datatype().is_float());
788    }
789
790    #[test]
791    fn test_is_boolean() {
792        assert!(!ConcreteDataType::int32_datatype().is_boolean());
793        assert!(!ConcreteDataType::float32_datatype().is_boolean());
794        assert!(ConcreteDataType::boolean_datatype().is_boolean());
795    }
796
797    #[test]
798    fn test_is_decimal() {
799        assert!(!ConcreteDataType::int32_datatype().is_decimal());
800        assert!(!ConcreteDataType::float32_datatype().is_decimal());
801        assert!(ConcreteDataType::decimal128_datatype(10, 2).is_decimal());
802        assert!(ConcreteDataType::decimal128_datatype(18, 6).is_decimal());
803    }
804
805    #[test]
806    fn test_is_stringifiable() {
807        assert!(!ConcreteDataType::int32_datatype().is_stringifiable());
808        assert!(!ConcreteDataType::float32_datatype().is_stringifiable());
809        assert!(ConcreteDataType::string_datatype().is_stringifiable());
810        assert!(ConcreteDataType::binary_datatype().is_stringifiable());
811        assert!(ConcreteDataType::date_datatype().is_stringifiable());
812        assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable());
813        assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable());
814        assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable());
815        assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_stringifiable());
816        assert!(ConcreteDataType::time_second_datatype().is_stringifiable());
817        assert!(ConcreteDataType::time_millisecond_datatype().is_stringifiable());
818        assert!(ConcreteDataType::time_microsecond_datatype().is_stringifiable());
819        assert!(ConcreteDataType::time_nanosecond_datatype().is_stringifiable());
820
821        assert!(ConcreteDataType::interval_year_month_datatype().is_stringifiable());
822        assert!(ConcreteDataType::interval_day_time_datatype().is_stringifiable());
823        assert!(ConcreteDataType::interval_month_day_nano_datatype().is_stringifiable());
824
825        assert!(ConcreteDataType::duration_second_datatype().is_stringifiable());
826        assert!(ConcreteDataType::duration_millisecond_datatype().is_stringifiable());
827        assert!(ConcreteDataType::duration_microsecond_datatype().is_stringifiable());
828        assert!(ConcreteDataType::duration_nanosecond_datatype().is_stringifiable());
829        assert!(ConcreteDataType::decimal128_datatype(10, 2).is_stringifiable());
830        assert!(ConcreteDataType::vector_default_datatype().is_stringifiable());
831    }
832
833    #[test]
834    fn test_is_signed() {
835        assert!(ConcreteDataType::int8_datatype().is_signed());
836        assert!(ConcreteDataType::int16_datatype().is_signed());
837        assert!(ConcreteDataType::int32_datatype().is_signed());
838        assert!(ConcreteDataType::int64_datatype().is_signed());
839        assert!(ConcreteDataType::date_datatype().is_signed());
840        assert!(ConcreteDataType::timestamp_second_datatype().is_signed());
841        assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed());
842        assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed());
843        assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_signed());
844        assert!(ConcreteDataType::time_second_datatype().is_signed());
845        assert!(ConcreteDataType::time_millisecond_datatype().is_signed());
846        assert!(ConcreteDataType::time_microsecond_datatype().is_signed());
847        assert!(ConcreteDataType::time_nanosecond_datatype().is_signed());
848        assert!(ConcreteDataType::interval_year_month_datatype().is_signed());
849        assert!(ConcreteDataType::interval_day_time_datatype().is_signed());
850        assert!(ConcreteDataType::interval_month_day_nano_datatype().is_signed());
851        assert!(ConcreteDataType::duration_second_datatype().is_signed());
852        assert!(ConcreteDataType::duration_millisecond_datatype().is_signed());
853        assert!(ConcreteDataType::duration_microsecond_datatype().is_signed());
854        assert!(ConcreteDataType::duration_nanosecond_datatype().is_signed());
855
856        assert!(!ConcreteDataType::uint8_datatype().is_signed());
857        assert!(!ConcreteDataType::uint16_datatype().is_signed());
858        assert!(!ConcreteDataType::uint32_datatype().is_signed());
859        assert!(!ConcreteDataType::uint64_datatype().is_signed());
860
861        assert!(!ConcreteDataType::float32_datatype().is_signed());
862        assert!(!ConcreteDataType::float64_datatype().is_signed());
863
864        assert!(ConcreteDataType::decimal128_datatype(10, 2).is_signed());
865    }
866
867    #[test]
868    fn test_is_unsigned() {
869        assert!(!ConcreteDataType::int8_datatype().is_unsigned());
870        assert!(!ConcreteDataType::int16_datatype().is_unsigned());
871        assert!(!ConcreteDataType::int32_datatype().is_unsigned());
872        assert!(!ConcreteDataType::int64_datatype().is_unsigned());
873        assert!(!ConcreteDataType::date_datatype().is_unsigned());
874        assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned());
875        assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned());
876        assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned());
877        assert!(!ConcreteDataType::timestamp_nanosecond_datatype().is_unsigned());
878        assert!(!ConcreteDataType::time_second_datatype().is_unsigned());
879        assert!(!ConcreteDataType::time_millisecond_datatype().is_unsigned());
880        assert!(!ConcreteDataType::time_microsecond_datatype().is_unsigned());
881        assert!(!ConcreteDataType::time_nanosecond_datatype().is_unsigned());
882        assert!(!ConcreteDataType::interval_year_month_datatype().is_unsigned());
883        assert!(!ConcreteDataType::interval_day_time_datatype().is_unsigned());
884        assert!(!ConcreteDataType::interval_month_day_nano_datatype().is_unsigned());
885        assert!(!ConcreteDataType::duration_second_datatype().is_unsigned());
886        assert!(!ConcreteDataType::duration_millisecond_datatype().is_unsigned());
887        assert!(!ConcreteDataType::duration_microsecond_datatype().is_unsigned());
888        assert!(!ConcreteDataType::duration_nanosecond_datatype().is_unsigned());
889        assert!(!ConcreteDataType::decimal128_datatype(10, 2).is_unsigned());
890
891        assert!(ConcreteDataType::uint8_datatype().is_unsigned());
892        assert!(ConcreteDataType::uint16_datatype().is_unsigned());
893        assert!(ConcreteDataType::uint32_datatype().is_unsigned());
894        assert!(ConcreteDataType::uint64_datatype().is_unsigned());
895
896        assert!(!ConcreteDataType::float32_datatype().is_unsigned());
897        assert!(!ConcreteDataType::float64_datatype().is_unsigned());
898    }
899
900    #[test]
901    fn test_numerics() {
902        let nums = ConcreteDataType::numerics();
903        assert_eq!(10, nums.len());
904    }
905
906    #[test]
907    fn test_as_list() {
908        let list_type = ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype());
909        assert_eq!(
910            ListType::new(ConcreteDataType::int32_datatype()),
911            *list_type.as_list().unwrap()
912        );
913        assert!(ConcreteDataType::int32_datatype().as_list().is_none());
914    }
915
916    #[test]
917    fn test_display_concrete_data_type() {
918        assert_eq!(ConcreteDataType::null_datatype().to_string(), "Null");
919        assert_eq!(ConcreteDataType::boolean_datatype().to_string(), "Boolean");
920        assert_eq!(ConcreteDataType::binary_datatype().to_string(), "Binary");
921        assert_eq!(ConcreteDataType::int8_datatype().to_string(), "Int8");
922        assert_eq!(ConcreteDataType::int16_datatype().to_string(), "Int16");
923        assert_eq!(ConcreteDataType::int32_datatype().to_string(), "Int32");
924        assert_eq!(ConcreteDataType::int64_datatype().to_string(), "Int64");
925        assert_eq!(ConcreteDataType::uint8_datatype().to_string(), "UInt8");
926        assert_eq!(ConcreteDataType::uint16_datatype().to_string(), "UInt16");
927        assert_eq!(ConcreteDataType::uint32_datatype().to_string(), "UInt32");
928        assert_eq!(ConcreteDataType::uint64_datatype().to_string(), "UInt64");
929        assert_eq!(ConcreteDataType::float32_datatype().to_string(), "Float32");
930        assert_eq!(ConcreteDataType::float64_datatype().to_string(), "Float64");
931        assert_eq!(ConcreteDataType::string_datatype().to_string(), "String");
932        assert_eq!(ConcreteDataType::date_datatype().to_string(), "Date");
933        assert_eq!(
934            ConcreteDataType::timestamp_millisecond_datatype().to_string(),
935            "TimestampMillisecond"
936        );
937        assert_eq!(
938            ConcreteDataType::time_millisecond_datatype().to_string(),
939            "TimeMillisecond"
940        );
941        assert_eq!(
942            ConcreteDataType::interval_month_day_nano_datatype().to_string(),
943            "IntervalMonthDayNano"
944        );
945        assert_eq!(
946            ConcreteDataType::duration_second_datatype().to_string(),
947            "DurationSecond"
948        );
949        assert_eq!(
950            ConcreteDataType::decimal128_datatype(10, 2).to_string(),
951            "Decimal(10, 2)"
952        );
953        // Nested types
954        assert_eq!(
955            ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()).to_string(),
956            "List<Int32>"
957        );
958        assert_eq!(
959            ConcreteDataType::list_datatype(ConcreteDataType::Dictionary(DictionaryType::new(
960                ConcreteDataType::int32_datatype(),
961                ConcreteDataType::string_datatype()
962            )))
963            .to_string(),
964            "List<Dictionary<Int32, String>>"
965        );
966        assert_eq!(
967            ConcreteDataType::list_datatype(ConcreteDataType::list_datatype(
968                ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype())
969            ))
970            .to_string(),
971            "List<List<List<Int32>>>"
972        );
973        assert_eq!(
974            ConcreteDataType::dictionary_datatype(
975                ConcreteDataType::int32_datatype(),
976                ConcreteDataType::string_datatype()
977            )
978            .to_string(),
979            "Dictionary<Int32, String>"
980        );
981        assert_eq!(
982            ConcreteDataType::vector_datatype(3).to_string(),
983            "Vector(3)"
984        );
985    }
986}