datatypes/
data_type.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt;
16use std::sync::Arc;
17
18use arrow::compute::cast as arrow_array_cast;
19use arrow::datatypes::{
20    DataType as ArrowDataType, IntervalUnit as ArrowIntervalUnit, TimeUnit as ArrowTimeUnit,
21};
22use arrow_schema::DECIMAL_DEFAULT_SCALE;
23use common_decimal::decimal128::DECIMAL128_MAX_PRECISION;
24use common_time::interval::IntervalUnit;
25use common_time::timestamp::TimeUnit;
26use enum_dispatch::enum_dispatch;
27use paste::paste;
28use serde::{Deserialize, Serialize};
29
30use crate::error::{self, Error, Result};
31use crate::type_id::LogicalTypeId;
32use crate::types::{
33    BinaryType, BooleanType, DateType, Decimal128Type, DictionaryType, DurationMicrosecondType,
34    DurationMillisecondType, DurationNanosecondType, DurationSecondType, DurationType, Float32Type,
35    Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalDayTimeType,
36    IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType, ListType, NullType,
37    StringType, StructType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
38    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
39    UInt16Type, UInt32Type, UInt64Type, UInt8Type, VectorType,
40};
41use crate::value::Value;
42use crate::vectors::MutableVector;
43
44#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
45#[enum_dispatch(DataType)]
46pub enum ConcreteDataType {
47    Null(NullType),
48    Boolean(BooleanType),
49
50    // Numeric types:
51    Int8(Int8Type),
52    Int16(Int16Type),
53    Int32(Int32Type),
54    Int64(Int64Type),
55    UInt8(UInt8Type),
56    UInt16(UInt16Type),
57    UInt32(UInt32Type),
58    UInt64(UInt64Type),
59    Float32(Float32Type),
60    Float64(Float64Type),
61
62    // Decimal128 type:
63    Decimal128(Decimal128Type),
64
65    // String types:
66    Binary(BinaryType),
67    String(StringType),
68
69    // Date and time types:
70    Date(DateType),
71    Timestamp(TimestampType),
72    Time(TimeType),
73
74    // Duration type:
75    Duration(DurationType),
76
77    // Interval type:
78    Interval(IntervalType),
79
80    // Compound types:
81    List(ListType),
82    Dictionary(DictionaryType),
83    Struct(StructType),
84
85    // JSON type:
86    Json(JsonType),
87
88    // Vector type:
89    Vector(VectorType),
90}
91
92impl fmt::Display for ConcreteDataType {
93    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
94        match self {
95            ConcreteDataType::Null(v) => write!(f, "{}", v.name()),
96            ConcreteDataType::Boolean(v) => write!(f, "{}", v.name()),
97            ConcreteDataType::Int8(v) => write!(f, "{}", v.name()),
98            ConcreteDataType::Int16(v) => write!(f, "{}", v.name()),
99            ConcreteDataType::Int32(v) => write!(f, "{}", v.name()),
100            ConcreteDataType::Int64(v) => write!(f, "{}", v.name()),
101            ConcreteDataType::UInt8(v) => write!(f, "{}", v.name()),
102            ConcreteDataType::UInt16(v) => write!(f, "{}", v.name()),
103            ConcreteDataType::UInt32(v) => write!(f, "{}", v.name()),
104            ConcreteDataType::UInt64(v) => write!(f, "{}", v.name()),
105            ConcreteDataType::Float32(v) => write!(f, "{}", v.name()),
106            ConcreteDataType::Float64(v) => write!(f, "{}", v.name()),
107            ConcreteDataType::Binary(v) => write!(f, "{}", v.name()),
108            ConcreteDataType::String(v) => write!(f, "{}", v.name()),
109            ConcreteDataType::Date(v) => write!(f, "{}", v.name()),
110            ConcreteDataType::Timestamp(t) => match t {
111                TimestampType::Second(v) => write!(f, "{}", v.name()),
112                TimestampType::Millisecond(v) => write!(f, "{}", v.name()),
113                TimestampType::Microsecond(v) => write!(f, "{}", v.name()),
114                TimestampType::Nanosecond(v) => write!(f, "{}", v.name()),
115            },
116            ConcreteDataType::Time(t) => match t {
117                TimeType::Second(v) => write!(f, "{}", v.name()),
118                TimeType::Millisecond(v) => write!(f, "{}", v.name()),
119                TimeType::Microsecond(v) => write!(f, "{}", v.name()),
120                TimeType::Nanosecond(v) => write!(f, "{}", v.name()),
121            },
122            ConcreteDataType::Interval(i) => match i {
123                IntervalType::YearMonth(v) => write!(f, "{}", v.name()),
124                IntervalType::DayTime(v) => write!(f, "{}", v.name()),
125                IntervalType::MonthDayNano(v) => write!(f, "{}", v.name()),
126            },
127            ConcreteDataType::Duration(d) => match d {
128                DurationType::Second(v) => write!(f, "{}", v.name()),
129                DurationType::Millisecond(v) => write!(f, "{}", v.name()),
130                DurationType::Microsecond(v) => write!(f, "{}", v.name()),
131                DurationType::Nanosecond(v) => write!(f, "{}", v.name()),
132            },
133            ConcreteDataType::Decimal128(v) => write!(f, "{}", v.name()),
134            ConcreteDataType::List(v) => write!(f, "{}", v.name()),
135            ConcreteDataType::Struct(v) => write!(f, "{}", v.name()),
136            ConcreteDataType::Dictionary(v) => write!(f, "{}", v.name()),
137            ConcreteDataType::Json(v) => write!(f, "{}", v.name()),
138            ConcreteDataType::Vector(v) => write!(f, "{}", v.name()),
139        }
140    }
141}
142
143// TODO(yingwen): Refactor these `is_xxx()` methods, such as adding a `properties()` method
144// returning all these properties to the `DataType` trait
145impl ConcreteDataType {
146    pub fn is_float(&self) -> bool {
147        matches!(
148            self,
149            ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_)
150        )
151    }
152
153    pub fn is_boolean(&self) -> bool {
154        matches!(self, ConcreteDataType::Boolean(_))
155    }
156
157    pub fn is_string(&self) -> bool {
158        matches!(self, ConcreteDataType::String(_))
159    }
160
161    pub fn is_stringifiable(&self) -> bool {
162        matches!(
163            self,
164            ConcreteDataType::String(_)
165                | ConcreteDataType::Date(_)
166                | ConcreteDataType::Timestamp(_)
167                | ConcreteDataType::Time(_)
168                | ConcreteDataType::Interval(_)
169                | ConcreteDataType::Duration(_)
170                | ConcreteDataType::Decimal128(_)
171                | ConcreteDataType::Binary(_)
172                | ConcreteDataType::Json(_)
173                | ConcreteDataType::Vector(_)
174        )
175    }
176
177    pub fn is_signed(&self) -> bool {
178        matches!(
179            self,
180            ConcreteDataType::Int8(_)
181                | ConcreteDataType::Int16(_)
182                | ConcreteDataType::Int32(_)
183                | ConcreteDataType::Int64(_)
184                | ConcreteDataType::Date(_)
185                | ConcreteDataType::Timestamp(_)
186                | ConcreteDataType::Time(_)
187                | ConcreteDataType::Interval(_)
188                | ConcreteDataType::Duration(_)
189                | ConcreteDataType::Decimal128(_)
190        )
191    }
192
193    pub fn is_unsigned(&self) -> bool {
194        matches!(
195            self,
196            ConcreteDataType::UInt8(_)
197                | ConcreteDataType::UInt16(_)
198                | ConcreteDataType::UInt32(_)
199                | ConcreteDataType::UInt64(_)
200        )
201    }
202
203    pub fn is_numeric(&self) -> bool {
204        matches!(
205            self,
206            ConcreteDataType::Int8(_)
207                | ConcreteDataType::Int16(_)
208                | ConcreteDataType::Int32(_)
209                | ConcreteDataType::Int64(_)
210                | ConcreteDataType::UInt8(_)
211                | ConcreteDataType::UInt16(_)
212                | ConcreteDataType::UInt32(_)
213                | ConcreteDataType::UInt64(_)
214                | ConcreteDataType::Float32(_)
215                | ConcreteDataType::Float64(_)
216        )
217    }
218
219    pub fn is_timestamp(&self) -> bool {
220        matches!(self, ConcreteDataType::Timestamp(_))
221    }
222
223    pub fn is_decimal(&self) -> bool {
224        matches!(self, ConcreteDataType::Decimal128(_))
225    }
226
227    pub fn is_json(&self) -> bool {
228        matches!(self, ConcreteDataType::Json(_))
229    }
230
231    pub fn is_vector(&self) -> bool {
232        matches!(self, ConcreteDataType::Vector(_))
233    }
234
235    pub fn numerics() -> Vec<ConcreteDataType> {
236        vec![
237            ConcreteDataType::int8_datatype(),
238            ConcreteDataType::int16_datatype(),
239            ConcreteDataType::int32_datatype(),
240            ConcreteDataType::int64_datatype(),
241            ConcreteDataType::uint8_datatype(),
242            ConcreteDataType::uint16_datatype(),
243            ConcreteDataType::uint32_datatype(),
244            ConcreteDataType::uint64_datatype(),
245            ConcreteDataType::float32_datatype(),
246            ConcreteDataType::float64_datatype(),
247        ]
248    }
249
250    pub fn unsigned_integers() -> Vec<ConcreteDataType> {
251        vec![
252            ConcreteDataType::uint8_datatype(),
253            ConcreteDataType::uint16_datatype(),
254            ConcreteDataType::uint32_datatype(),
255            ConcreteDataType::uint64_datatype(),
256        ]
257    }
258
259    pub fn timestamps() -> Vec<ConcreteDataType> {
260        vec![
261            ConcreteDataType::timestamp_second_datatype(),
262            ConcreteDataType::timestamp_millisecond_datatype(),
263            ConcreteDataType::timestamp_microsecond_datatype(),
264            ConcreteDataType::timestamp_nanosecond_datatype(),
265        ]
266    }
267
268    /// Convert arrow data type to [ConcreteDataType].
269    ///
270    /// # Panics
271    /// Panic if given arrow data type is not supported.
272    pub fn from_arrow_type(dt: &ArrowDataType) -> Self {
273        ConcreteDataType::try_from(dt).expect("Unimplemented type")
274    }
275
276    pub fn is_null(&self) -> bool {
277        matches!(self, ConcreteDataType::Null(NullType))
278    }
279
280    /// Try to cast the type as a [`ListType`].
281    pub fn as_list(&self) -> Option<&ListType> {
282        match self {
283            ConcreteDataType::List(t) => Some(t),
284            _ => None,
285        }
286    }
287
288    /// Try to cast data type as a [`TimestampType`].
289    pub fn as_timestamp(&self) -> Option<TimestampType> {
290        match self {
291            ConcreteDataType::Timestamp(t) => Some(*t),
292            _ => None,
293        }
294    }
295
296    /// Try to get numeric precision, returns `None` if it's not numeric type
297    pub fn numeric_precision(&self) -> Option<u8> {
298        match self {
299            ConcreteDataType::Int8(_) | ConcreteDataType::UInt8(_) => Some(3),
300            ConcreteDataType::Int16(_) | ConcreteDataType::UInt16(_) => Some(5),
301            ConcreteDataType::Int32(_) | ConcreteDataType::UInt32(_) => Some(10),
302            ConcreteDataType::Int64(_) => Some(19),
303            ConcreteDataType::UInt64(_) => Some(20),
304            ConcreteDataType::Float32(_) => Some(12),
305            ConcreteDataType::Float64(_) => Some(22),
306            ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.precision()),
307            _ => None,
308        }
309    }
310
311    /// Try to get numeric scale, returns `None` if it's float or not numeric type
312    pub fn numeric_scale(&self) -> Option<i8> {
313        match self {
314            ConcreteDataType::Int8(_)
315            | ConcreteDataType::UInt8(_)
316            | ConcreteDataType::Int16(_)
317            | ConcreteDataType::UInt16(_)
318            | ConcreteDataType::Int32(_)
319            | ConcreteDataType::UInt32(_)
320            | ConcreteDataType::Int64(_)
321            | ConcreteDataType::UInt64(_) => Some(0),
322            ConcreteDataType::Float32(_) | ConcreteDataType::Float64(_) => None,
323            ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.scale()),
324            _ => None,
325        }
326    }
327
328    /// Try to cast data type as a [`TimeType`].
329    pub fn as_time(&self) -> Option<TimeType> {
330        match self {
331            ConcreteDataType::Int64(_) => Some(TimeType::Millisecond(TimeMillisecondType)),
332            ConcreteDataType::Time(t) => Some(*t),
333            _ => None,
334        }
335    }
336
337    pub fn as_decimal128(&self) -> Option<Decimal128Type> {
338        match self {
339            ConcreteDataType::Decimal128(d) => Some(*d),
340            _ => None,
341        }
342    }
343
344    pub fn as_json(&self) -> Option<JsonType> {
345        match self {
346            ConcreteDataType::Json(j) => Some(*j),
347            _ => None,
348        }
349    }
350
351    pub fn as_vector(&self) -> Option<VectorType> {
352        match self {
353            ConcreteDataType::Vector(v) => Some(*v),
354            _ => None,
355        }
356    }
357
358    /// Checks if the data type can cast to another data type.
359    pub fn can_arrow_type_cast_to(&self, to_type: &ConcreteDataType) -> bool {
360        let array = arrow_array::new_empty_array(&self.as_arrow_type());
361        arrow_array_cast(array.as_ref(), &to_type.as_arrow_type()).is_ok()
362    }
363
364    /// Try to cast data type as a [`DurationType`].
365    pub fn as_duration(&self) -> Option<DurationType> {
366        match self {
367            ConcreteDataType::Duration(d) => Some(*d),
368            _ => None,
369        }
370    }
371
372    /// Return the datatype name in postgres type system
373    pub fn postgres_datatype_name(&self) -> &'static str {
374        match self {
375            &ConcreteDataType::Null(_) => "UNKNOWN",
376            &ConcreteDataType::Boolean(_) => "BOOL",
377            &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "CHAR",
378            &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "INT2",
379            &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "INT4",
380            &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "INT8",
381            &ConcreteDataType::Float32(_) => "FLOAT4",
382            &ConcreteDataType::Float64(_) => "FLOAT8",
383            &ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => "BYTEA",
384            &ConcreteDataType::String(_) => "VARCHAR",
385            &ConcreteDataType::Date(_) => "DATE",
386            &ConcreteDataType::Timestamp(_) => "TIMESTAMP",
387            &ConcreteDataType::Time(_) => "TIME",
388            &ConcreteDataType::Interval(_) => "INTERVAL",
389            &ConcreteDataType::Decimal128(_) => "NUMERIC",
390            &ConcreteDataType::Json(_) => "JSON",
391            ConcreteDataType::List(list) => match list.item_type() {
392                &ConcreteDataType::Null(_) => "UNKNOWN",
393                &ConcreteDataType::Boolean(_) => "_BOOL",
394                &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "_CHAR",
395                &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "_INT2",
396                &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "_INT4",
397                &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "_INT8",
398                &ConcreteDataType::Float32(_) => "_FLOAT4",
399                &ConcreteDataType::Float64(_) => "_FLOAT8",
400                &ConcreteDataType::Binary(_) => "_BYTEA",
401                &ConcreteDataType::String(_) => "_VARCHAR",
402                &ConcreteDataType::Date(_) => "_DATE",
403                &ConcreteDataType::Timestamp(_) => "_TIMESTAMP",
404                &ConcreteDataType::Time(_) => "_TIME",
405                &ConcreteDataType::Interval(_) => "_INTERVAL",
406                &ConcreteDataType::Decimal128(_) => "_NUMERIC",
407                &ConcreteDataType::Json(_) => "_JSON",
408                &ConcreteDataType::Duration(_)
409                | &ConcreteDataType::Dictionary(_)
410                | &ConcreteDataType::Vector(_)
411                | &ConcreteDataType::List(_)
412                | &ConcreteDataType::Struct(_) => "UNKNOWN",
413            },
414            &ConcreteDataType::Duration(_)
415            | &ConcreteDataType::Dictionary(_)
416            | &ConcreteDataType::Struct(_) => "UNKNOWN",
417        }
418    }
419}
420
421impl From<&ConcreteDataType> for ConcreteDataType {
422    fn from(t: &ConcreteDataType) -> Self {
423        t.clone()
424    }
425}
426
427impl TryFrom<&ArrowDataType> for ConcreteDataType {
428    type Error = Error;
429
430    fn try_from(dt: &ArrowDataType) -> Result<ConcreteDataType> {
431        let concrete_type = match dt {
432            ArrowDataType::Null => Self::null_datatype(),
433            ArrowDataType::Boolean => Self::boolean_datatype(),
434            ArrowDataType::UInt8 => Self::uint8_datatype(),
435            ArrowDataType::UInt16 => Self::uint16_datatype(),
436            ArrowDataType::UInt32 => Self::uint32_datatype(),
437            ArrowDataType::UInt64 => Self::uint64_datatype(),
438            ArrowDataType::Int8 => Self::int8_datatype(),
439            ArrowDataType::Int16 => Self::int16_datatype(),
440            ArrowDataType::Int32 => Self::int32_datatype(),
441            ArrowDataType::Int64 => Self::int64_datatype(),
442            ArrowDataType::Float32 => Self::float32_datatype(),
443            ArrowDataType::Float64 => Self::float64_datatype(),
444            ArrowDataType::Date32 => Self::date_datatype(),
445            ArrowDataType::Timestamp(u, _) => ConcreteDataType::from_arrow_time_unit(u),
446            ArrowDataType::Interval(u) => ConcreteDataType::from_arrow_interval_unit(u),
447            ArrowDataType::Binary | ArrowDataType::LargeBinary | ArrowDataType::BinaryView => {
448                Self::binary_datatype()
449            }
450            ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View => {
451                Self::string_datatype()
452            }
453            ArrowDataType::List(field) => Self::List(ListType::new(
454                ConcreteDataType::from_arrow_type(field.data_type()),
455            )),
456            ArrowDataType::Dictionary(key_type, value_type) => {
457                let key_type = ConcreteDataType::from_arrow_type(key_type);
458                let value_type = ConcreteDataType::from_arrow_type(value_type);
459                Self::Dictionary(DictionaryType::new(key_type, value_type))
460            }
461            ArrowDataType::Time32(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
462            ArrowDataType::Time64(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
463            ArrowDataType::Duration(u) => {
464                ConcreteDataType::Duration(DurationType::from_unit(u.into()))
465            }
466            ArrowDataType::Decimal128(precision, scale) => {
467                ConcreteDataType::decimal128_datatype(*precision, *scale)
468            }
469            ArrowDataType::Struct(fields) => ConcreteDataType::Struct(fields.try_into()?),
470            ArrowDataType::Float16
471            | ArrowDataType::Date64
472            | ArrowDataType::FixedSizeBinary(_)
473            | ArrowDataType::ListView(_)
474            | ArrowDataType::FixedSizeList(_, _)
475            | ArrowDataType::LargeList(_)
476            | ArrowDataType::LargeListView(_)
477            | ArrowDataType::Union(_, _)
478            | ArrowDataType::Decimal256(_, _)
479            | ArrowDataType::Map(_, _)
480            | ArrowDataType::RunEndEncoded(_, _)
481            | ArrowDataType::Decimal32(_, _)
482            | ArrowDataType::Decimal64(_, _) => {
483                return error::UnsupportedArrowTypeSnafu {
484                    arrow_type: dt.clone(),
485                }
486                .fail()
487            }
488        };
489
490        Ok(concrete_type)
491    }
492}
493
494macro_rules! impl_new_concrete_type_functions {
495    ($($Type: ident), +) => {
496        paste! {
497            impl ConcreteDataType {
498                $(
499                    pub fn [<$Type:lower _datatype>]() -> ConcreteDataType {
500                        ConcreteDataType::$Type([<$Type Type>]::default())
501                    }
502                )+
503            }
504        }
505    }
506}
507
508impl_new_concrete_type_functions!(
509    Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
510    Binary, Date, String, Json
511);
512
513impl ConcreteDataType {
514    pub fn timestamp_second_datatype() -> Self {
515        ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType))
516    }
517
518    pub fn timestamp_millisecond_datatype() -> Self {
519        ConcreteDataType::Timestamp(TimestampType::Millisecond(TimestampMillisecondType))
520    }
521
522    pub fn timestamp_microsecond_datatype() -> Self {
523        ConcreteDataType::Timestamp(TimestampType::Microsecond(TimestampMicrosecondType))
524    }
525
526    pub fn timestamp_nanosecond_datatype() -> Self {
527        ConcreteDataType::Timestamp(TimestampType::Nanosecond(TimestampNanosecondType))
528    }
529
530    /// Returns the time data type with `TimeUnit`.
531    pub fn time_datatype(unit: TimeUnit) -> Self {
532        ConcreteDataType::Time(TimeType::from_unit(unit))
533    }
534
535    /// Creates a [Time(TimeSecondType)] datatype.
536    pub fn time_second_datatype() -> Self {
537        Self::time_datatype(TimeUnit::Second)
538    }
539
540    /// Creates a [Time(TimeMillisecondType)] datatype.
541    pub fn time_millisecond_datatype() -> Self {
542        Self::time_datatype(TimeUnit::Millisecond)
543    }
544
545    /// Creates a [Time(TimeMicrosecond)] datatype.
546    pub fn time_microsecond_datatype() -> Self {
547        Self::time_datatype(TimeUnit::Microsecond)
548    }
549
550    /// Creates a [Time(TimeNanosecond)] datatype.
551    pub fn time_nanosecond_datatype() -> Self {
552        Self::time_datatype(TimeUnit::Nanosecond)
553    }
554
555    /// Creates a [Duration(DurationSecondType)] datatype.
556    pub fn duration_second_datatype() -> Self {
557        ConcreteDataType::Duration(DurationType::Second(DurationSecondType))
558    }
559
560    /// Creates a [Duration(DurationMillisecondType)] datatype.
561    pub fn duration_millisecond_datatype() -> Self {
562        ConcreteDataType::Duration(DurationType::Millisecond(DurationMillisecondType))
563    }
564
565    /// Creates a [Duration(DurationMicrosecondType)] datatype.
566    pub fn duration_microsecond_datatype() -> Self {
567        ConcreteDataType::Duration(DurationType::Microsecond(DurationMicrosecondType))
568    }
569
570    /// Creates a [Duration(DurationNanosecondType)] datatype.
571    pub fn duration_nanosecond_datatype() -> Self {
572        ConcreteDataType::Duration(DurationType::Nanosecond(DurationNanosecondType))
573    }
574
575    /// Creates a [Interval(IntervalMonthDayNanoType)] datatype.
576    pub fn interval_month_day_nano_datatype() -> Self {
577        ConcreteDataType::Interval(IntervalType::MonthDayNano(IntervalMonthDayNanoType))
578    }
579
580    /// Creates a [Interval(IntervalYearMonthType)] datatype.
581    pub fn interval_year_month_datatype() -> Self {
582        ConcreteDataType::Interval(IntervalType::YearMonth(IntervalYearMonthType))
583    }
584
585    /// Creates a [Interval(IntervalDayTimeType)] datatype.
586    pub fn interval_day_time_datatype() -> Self {
587        ConcreteDataType::Interval(IntervalType::DayTime(IntervalDayTimeType))
588    }
589
590    pub fn timestamp_datatype(unit: TimeUnit) -> Self {
591        match unit {
592            TimeUnit::Second => Self::timestamp_second_datatype(),
593            TimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
594            TimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
595            TimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
596        }
597    }
598
599    /// Converts from arrow timestamp unit to
600    pub fn from_arrow_time_unit(t: &ArrowTimeUnit) -> Self {
601        match t {
602            ArrowTimeUnit::Second => Self::timestamp_second_datatype(),
603            ArrowTimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
604            ArrowTimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
605            ArrowTimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
606        }
607    }
608
609    pub fn duration_datatype(unit: TimeUnit) -> Self {
610        match unit {
611            TimeUnit::Second => Self::duration_second_datatype(),
612            TimeUnit::Millisecond => Self::duration_millisecond_datatype(),
613            TimeUnit::Microsecond => Self::duration_microsecond_datatype(),
614            TimeUnit::Nanosecond => Self::duration_nanosecond_datatype(),
615        }
616    }
617
618    pub fn interval_datatype(unit: IntervalUnit) -> Self {
619        match unit {
620            IntervalUnit::YearMonth => Self::interval_year_month_datatype(),
621            IntervalUnit::DayTime => Self::interval_day_time_datatype(),
622            IntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
623        }
624    }
625
626    pub fn from_arrow_interval_unit(u: &ArrowIntervalUnit) -> Self {
627        match u {
628            ArrowIntervalUnit::YearMonth => Self::interval_year_month_datatype(),
629            ArrowIntervalUnit::DayTime => Self::interval_day_time_datatype(),
630            ArrowIntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
631        }
632    }
633
634    pub fn list_datatype(item_type: ConcreteDataType) -> ConcreteDataType {
635        ConcreteDataType::List(ListType::new(item_type))
636    }
637
638    pub fn struct_datatype(fields: StructType) -> ConcreteDataType {
639        ConcreteDataType::Struct(fields)
640    }
641
642    pub fn dictionary_datatype(
643        key_type: ConcreteDataType,
644        value_type: ConcreteDataType,
645    ) -> ConcreteDataType {
646        ConcreteDataType::Dictionary(DictionaryType::new(key_type, value_type))
647    }
648
649    pub fn decimal128_datatype(precision: u8, scale: i8) -> ConcreteDataType {
650        ConcreteDataType::Decimal128(Decimal128Type::new(precision, scale))
651    }
652
653    pub fn decimal128_default_datatype() -> ConcreteDataType {
654        Self::decimal128_datatype(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
655    }
656
657    pub fn vector_datatype(dim: u32) -> ConcreteDataType {
658        ConcreteDataType::Vector(VectorType::new(dim))
659    }
660
661    pub fn vector_default_datatype() -> ConcreteDataType {
662        Self::vector_datatype(0)
663    }
664}
665
666/// Data type abstraction.
667#[enum_dispatch::enum_dispatch]
668pub trait DataType: std::fmt::Debug + Send + Sync {
669    /// Name of this data type.
670    fn name(&self) -> String;
671
672    /// Returns id of the Logical data type.
673    fn logical_type_id(&self) -> LogicalTypeId;
674
675    /// Returns the default value of this type.
676    fn default_value(&self) -> Value;
677
678    /// Convert this type as [arrow::datatypes::DataType].
679    fn as_arrow_type(&self) -> ArrowDataType;
680
681    /// Creates a mutable vector with given `capacity` of this type.
682    fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector>;
683
684    /// Casts the value to specific DataType.
685    /// Return None if cast failed.
686    fn try_cast(&self, from: Value) -> Option<Value>;
687}
688
689pub type DataTypeRef = Arc<dyn DataType>;
690
691#[cfg(test)]
692mod tests {
693    use arrow::datatypes::Field;
694
695    use super::*;
696
697    #[test]
698    fn test_concrete_type_as_datatype_trait() {
699        let concrete_type = ConcreteDataType::boolean_datatype();
700
701        assert_eq!("Boolean", concrete_type.to_string());
702        assert_eq!(Value::Boolean(false), concrete_type.default_value());
703        assert_eq!(LogicalTypeId::Boolean, concrete_type.logical_type_id());
704        assert_eq!(ArrowDataType::Boolean, concrete_type.as_arrow_type());
705    }
706
707    #[test]
708    fn test_from_arrow_type() {
709        assert!(matches!(
710            ConcreteDataType::from_arrow_type(&ArrowDataType::Null),
711            ConcreteDataType::Null(_)
712        ));
713        assert!(matches!(
714            ConcreteDataType::from_arrow_type(&ArrowDataType::Boolean),
715            ConcreteDataType::Boolean(_)
716        ));
717        assert!(matches!(
718            ConcreteDataType::from_arrow_type(&ArrowDataType::Binary),
719            ConcreteDataType::Binary(_)
720        ));
721        assert!(matches!(
722            ConcreteDataType::from_arrow_type(&ArrowDataType::LargeBinary),
723            ConcreteDataType::Binary(_)
724        ));
725        assert!(matches!(
726            ConcreteDataType::from_arrow_type(&ArrowDataType::Int8),
727            ConcreteDataType::Int8(_)
728        ));
729        assert!(matches!(
730            ConcreteDataType::from_arrow_type(&ArrowDataType::Int16),
731            ConcreteDataType::Int16(_)
732        ));
733        assert!(matches!(
734            ConcreteDataType::from_arrow_type(&ArrowDataType::Int32),
735            ConcreteDataType::Int32(_)
736        ));
737        assert!(matches!(
738            ConcreteDataType::from_arrow_type(&ArrowDataType::Int64),
739            ConcreteDataType::Int64(_)
740        ));
741        assert!(matches!(
742            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt8),
743            ConcreteDataType::UInt8(_)
744        ));
745        assert!(matches!(
746            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt16),
747            ConcreteDataType::UInt16(_)
748        ));
749        assert!(matches!(
750            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt32),
751            ConcreteDataType::UInt32(_)
752        ));
753        assert!(matches!(
754            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt64),
755            ConcreteDataType::UInt64(_)
756        ));
757        assert!(matches!(
758            ConcreteDataType::from_arrow_type(&ArrowDataType::Float32),
759            ConcreteDataType::Float32(_)
760        ));
761        assert!(matches!(
762            ConcreteDataType::from_arrow_type(&ArrowDataType::Float64),
763            ConcreteDataType::Float64(_)
764        ));
765        assert!(matches!(
766            ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8),
767            ConcreteDataType::String(_)
768        ));
769        assert_eq!(
770            ConcreteDataType::from_arrow_type(&ArrowDataType::List(Arc::new(Field::new(
771                "item",
772                ArrowDataType::Int32,
773                true,
774            )))),
775            ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype()))
776        );
777        assert!(matches!(
778            ConcreteDataType::from_arrow_type(&ArrowDataType::Date32),
779            ConcreteDataType::Date(_)
780        ));
781    }
782
783    #[test]
784    fn test_from_arrow_timestamp() {
785        assert_eq!(
786            ConcreteDataType::timestamp_millisecond_datatype(),
787            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Millisecond)
788        );
789        assert_eq!(
790            ConcreteDataType::timestamp_microsecond_datatype(),
791            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Microsecond)
792        );
793        assert_eq!(
794            ConcreteDataType::timestamp_nanosecond_datatype(),
795            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Nanosecond)
796        );
797        assert_eq!(
798            ConcreteDataType::timestamp_second_datatype(),
799            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Second)
800        );
801    }
802
803    #[test]
804    fn test_is_null() {
805        assert!(ConcreteDataType::null_datatype().is_null());
806        assert!(!ConcreteDataType::int32_datatype().is_null());
807    }
808
809    #[test]
810    fn test_is_float() {
811        assert!(!ConcreteDataType::int32_datatype().is_float());
812        assert!(ConcreteDataType::float32_datatype().is_float());
813        assert!(ConcreteDataType::float64_datatype().is_float());
814    }
815
816    #[test]
817    fn test_is_boolean() {
818        assert!(!ConcreteDataType::int32_datatype().is_boolean());
819        assert!(!ConcreteDataType::float32_datatype().is_boolean());
820        assert!(ConcreteDataType::boolean_datatype().is_boolean());
821    }
822
823    #[test]
824    fn test_is_decimal() {
825        assert!(!ConcreteDataType::int32_datatype().is_decimal());
826        assert!(!ConcreteDataType::float32_datatype().is_decimal());
827        assert!(ConcreteDataType::decimal128_datatype(10, 2).is_decimal());
828        assert!(ConcreteDataType::decimal128_datatype(18, 6).is_decimal());
829    }
830
831    #[test]
832    fn test_is_stringifiable() {
833        assert!(!ConcreteDataType::int32_datatype().is_stringifiable());
834        assert!(!ConcreteDataType::float32_datatype().is_stringifiable());
835        assert!(ConcreteDataType::string_datatype().is_stringifiable());
836        assert!(ConcreteDataType::binary_datatype().is_stringifiable());
837        assert!(ConcreteDataType::date_datatype().is_stringifiable());
838        assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable());
839        assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable());
840        assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable());
841        assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_stringifiable());
842        assert!(ConcreteDataType::time_second_datatype().is_stringifiable());
843        assert!(ConcreteDataType::time_millisecond_datatype().is_stringifiable());
844        assert!(ConcreteDataType::time_microsecond_datatype().is_stringifiable());
845        assert!(ConcreteDataType::time_nanosecond_datatype().is_stringifiable());
846
847        assert!(ConcreteDataType::interval_year_month_datatype().is_stringifiable());
848        assert!(ConcreteDataType::interval_day_time_datatype().is_stringifiable());
849        assert!(ConcreteDataType::interval_month_day_nano_datatype().is_stringifiable());
850
851        assert!(ConcreteDataType::duration_second_datatype().is_stringifiable());
852        assert!(ConcreteDataType::duration_millisecond_datatype().is_stringifiable());
853        assert!(ConcreteDataType::duration_microsecond_datatype().is_stringifiable());
854        assert!(ConcreteDataType::duration_nanosecond_datatype().is_stringifiable());
855        assert!(ConcreteDataType::decimal128_datatype(10, 2).is_stringifiable());
856        assert!(ConcreteDataType::vector_default_datatype().is_stringifiable());
857    }
858
859    #[test]
860    fn test_is_signed() {
861        assert!(ConcreteDataType::int8_datatype().is_signed());
862        assert!(ConcreteDataType::int16_datatype().is_signed());
863        assert!(ConcreteDataType::int32_datatype().is_signed());
864        assert!(ConcreteDataType::int64_datatype().is_signed());
865        assert!(ConcreteDataType::date_datatype().is_signed());
866        assert!(ConcreteDataType::timestamp_second_datatype().is_signed());
867        assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed());
868        assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed());
869        assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_signed());
870        assert!(ConcreteDataType::time_second_datatype().is_signed());
871        assert!(ConcreteDataType::time_millisecond_datatype().is_signed());
872        assert!(ConcreteDataType::time_microsecond_datatype().is_signed());
873        assert!(ConcreteDataType::time_nanosecond_datatype().is_signed());
874        assert!(ConcreteDataType::interval_year_month_datatype().is_signed());
875        assert!(ConcreteDataType::interval_day_time_datatype().is_signed());
876        assert!(ConcreteDataType::interval_month_day_nano_datatype().is_signed());
877        assert!(ConcreteDataType::duration_second_datatype().is_signed());
878        assert!(ConcreteDataType::duration_millisecond_datatype().is_signed());
879        assert!(ConcreteDataType::duration_microsecond_datatype().is_signed());
880        assert!(ConcreteDataType::duration_nanosecond_datatype().is_signed());
881
882        assert!(!ConcreteDataType::uint8_datatype().is_signed());
883        assert!(!ConcreteDataType::uint16_datatype().is_signed());
884        assert!(!ConcreteDataType::uint32_datatype().is_signed());
885        assert!(!ConcreteDataType::uint64_datatype().is_signed());
886
887        assert!(!ConcreteDataType::float32_datatype().is_signed());
888        assert!(!ConcreteDataType::float64_datatype().is_signed());
889
890        assert!(ConcreteDataType::decimal128_datatype(10, 2).is_signed());
891    }
892
893    #[test]
894    fn test_is_unsigned() {
895        assert!(!ConcreteDataType::int8_datatype().is_unsigned());
896        assert!(!ConcreteDataType::int16_datatype().is_unsigned());
897        assert!(!ConcreteDataType::int32_datatype().is_unsigned());
898        assert!(!ConcreteDataType::int64_datatype().is_unsigned());
899        assert!(!ConcreteDataType::date_datatype().is_unsigned());
900        assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned());
901        assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned());
902        assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned());
903        assert!(!ConcreteDataType::timestamp_nanosecond_datatype().is_unsigned());
904        assert!(!ConcreteDataType::time_second_datatype().is_unsigned());
905        assert!(!ConcreteDataType::time_millisecond_datatype().is_unsigned());
906        assert!(!ConcreteDataType::time_microsecond_datatype().is_unsigned());
907        assert!(!ConcreteDataType::time_nanosecond_datatype().is_unsigned());
908        assert!(!ConcreteDataType::interval_year_month_datatype().is_unsigned());
909        assert!(!ConcreteDataType::interval_day_time_datatype().is_unsigned());
910        assert!(!ConcreteDataType::interval_month_day_nano_datatype().is_unsigned());
911        assert!(!ConcreteDataType::duration_second_datatype().is_unsigned());
912        assert!(!ConcreteDataType::duration_millisecond_datatype().is_unsigned());
913        assert!(!ConcreteDataType::duration_microsecond_datatype().is_unsigned());
914        assert!(!ConcreteDataType::duration_nanosecond_datatype().is_unsigned());
915        assert!(!ConcreteDataType::decimal128_datatype(10, 2).is_unsigned());
916
917        assert!(ConcreteDataType::uint8_datatype().is_unsigned());
918        assert!(ConcreteDataType::uint16_datatype().is_unsigned());
919        assert!(ConcreteDataType::uint32_datatype().is_unsigned());
920        assert!(ConcreteDataType::uint64_datatype().is_unsigned());
921
922        assert!(!ConcreteDataType::float32_datatype().is_unsigned());
923        assert!(!ConcreteDataType::float64_datatype().is_unsigned());
924    }
925
926    #[test]
927    fn test_numerics() {
928        let nums = ConcreteDataType::numerics();
929        assert_eq!(10, nums.len());
930    }
931
932    #[test]
933    fn test_as_list() {
934        let list_type = ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype());
935        assert_eq!(
936            ListType::new(ConcreteDataType::int32_datatype()),
937            *list_type.as_list().unwrap()
938        );
939        assert!(ConcreteDataType::int32_datatype().as_list().is_none());
940    }
941
942    #[test]
943    fn test_display_concrete_data_type() {
944        assert_eq!(ConcreteDataType::null_datatype().to_string(), "Null");
945        assert_eq!(ConcreteDataType::boolean_datatype().to_string(), "Boolean");
946        assert_eq!(ConcreteDataType::binary_datatype().to_string(), "Binary");
947        assert_eq!(ConcreteDataType::int8_datatype().to_string(), "Int8");
948        assert_eq!(ConcreteDataType::int16_datatype().to_string(), "Int16");
949        assert_eq!(ConcreteDataType::int32_datatype().to_string(), "Int32");
950        assert_eq!(ConcreteDataType::int64_datatype().to_string(), "Int64");
951        assert_eq!(ConcreteDataType::uint8_datatype().to_string(), "UInt8");
952        assert_eq!(ConcreteDataType::uint16_datatype().to_string(), "UInt16");
953        assert_eq!(ConcreteDataType::uint32_datatype().to_string(), "UInt32");
954        assert_eq!(ConcreteDataType::uint64_datatype().to_string(), "UInt64");
955        assert_eq!(ConcreteDataType::float32_datatype().to_string(), "Float32");
956        assert_eq!(ConcreteDataType::float64_datatype().to_string(), "Float64");
957        assert_eq!(ConcreteDataType::string_datatype().to_string(), "String");
958        assert_eq!(ConcreteDataType::date_datatype().to_string(), "Date");
959        assert_eq!(
960            ConcreteDataType::timestamp_millisecond_datatype().to_string(),
961            "TimestampMillisecond"
962        );
963        assert_eq!(
964            ConcreteDataType::time_millisecond_datatype().to_string(),
965            "TimeMillisecond"
966        );
967        assert_eq!(
968            ConcreteDataType::interval_month_day_nano_datatype().to_string(),
969            "IntervalMonthDayNano"
970        );
971        assert_eq!(
972            ConcreteDataType::duration_second_datatype().to_string(),
973            "DurationSecond"
974        );
975        assert_eq!(
976            ConcreteDataType::decimal128_datatype(10, 2).to_string(),
977            "Decimal(10, 2)"
978        );
979        // Nested types
980        assert_eq!(
981            ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()).to_string(),
982            "List<Int32>"
983        );
984        assert_eq!(
985            ConcreteDataType::list_datatype(ConcreteDataType::Dictionary(DictionaryType::new(
986                ConcreteDataType::int32_datatype(),
987                ConcreteDataType::string_datatype()
988            )))
989            .to_string(),
990            "List<Dictionary<Int32, String>>"
991        );
992        assert_eq!(
993            ConcreteDataType::list_datatype(ConcreteDataType::list_datatype(
994                ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype())
995            ))
996            .to_string(),
997            "List<List<List<Int32>>>"
998        );
999        assert_eq!(
1000            ConcreteDataType::dictionary_datatype(
1001                ConcreteDataType::int32_datatype(),
1002                ConcreteDataType::string_datatype()
1003            )
1004            .to_string(),
1005            "Dictionary<Int32, String>"
1006        );
1007        assert_eq!(
1008            ConcreteDataType::vector_datatype(3).to_string(),
1009            "Vector(3)"
1010        );
1011    }
1012}