datatypes/
data_type.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt;
16use std::sync::Arc;
17
18use arrow::compute::cast as arrow_array_cast;
19use arrow::datatypes::{
20    DataType as ArrowDataType, IntervalUnit as ArrowIntervalUnit, TimeUnit as ArrowTimeUnit,
21};
22use arrow_schema::DECIMAL_DEFAULT_SCALE;
23use common_decimal::decimal128::DECIMAL128_MAX_PRECISION;
24use common_time::interval::IntervalUnit;
25use common_time::timestamp::TimeUnit;
26use enum_dispatch::enum_dispatch;
27use paste::paste;
28use serde::{Deserialize, Serialize};
29
30use crate::error::{self, Error, Result};
31use crate::type_id::LogicalTypeId;
32use crate::types::{
33    BinaryType, BooleanType, DateType, Decimal128Type, DictionaryType, DurationMicrosecondType,
34    DurationMillisecondType, DurationNanosecondType, DurationSecondType, DurationType, Float32Type,
35    Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalDayTimeType,
36    IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType, ListType, NullType,
37    StringType, StructType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
38    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
39    UInt16Type, UInt32Type, UInt64Type, UInt8Type, VectorType,
40};
41use crate::value::Value;
42use crate::vectors::MutableVector;
43
44#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
45#[enum_dispatch(DataType)]
46pub enum ConcreteDataType {
47    Null(NullType),
48    Boolean(BooleanType),
49
50    // Numeric types:
51    Int8(Int8Type),
52    Int16(Int16Type),
53    Int32(Int32Type),
54    Int64(Int64Type),
55    UInt8(UInt8Type),
56    UInt16(UInt16Type),
57    UInt32(UInt32Type),
58    UInt64(UInt64Type),
59    Float32(Float32Type),
60    Float64(Float64Type),
61
62    // Decimal128 type:
63    Decimal128(Decimal128Type),
64
65    // String types:
66    Binary(BinaryType),
67    String(StringType),
68
69    // Date and time types:
70    Date(DateType),
71    Timestamp(TimestampType),
72    Time(TimeType),
73
74    // Duration type:
75    Duration(DurationType),
76
77    // Interval type:
78    Interval(IntervalType),
79
80    // Compound types:
81    List(ListType),
82    Dictionary(DictionaryType),
83    Struct(StructType),
84
85    // JSON type:
86    Json(JsonType),
87
88    // Vector type:
89    Vector(VectorType),
90}
91
92impl fmt::Display for ConcreteDataType {
93    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
94        match self {
95            ConcreteDataType::Null(v) => write!(f, "{}", v.name()),
96            ConcreteDataType::Boolean(v) => write!(f, "{}", v.name()),
97            ConcreteDataType::Int8(v) => write!(f, "{}", v.name()),
98            ConcreteDataType::Int16(v) => write!(f, "{}", v.name()),
99            ConcreteDataType::Int32(v) => write!(f, "{}", v.name()),
100            ConcreteDataType::Int64(v) => write!(f, "{}", v.name()),
101            ConcreteDataType::UInt8(v) => write!(f, "{}", v.name()),
102            ConcreteDataType::UInt16(v) => write!(f, "{}", v.name()),
103            ConcreteDataType::UInt32(v) => write!(f, "{}", v.name()),
104            ConcreteDataType::UInt64(v) => write!(f, "{}", v.name()),
105            ConcreteDataType::Float32(v) => write!(f, "{}", v.name()),
106            ConcreteDataType::Float64(v) => write!(f, "{}", v.name()),
107            ConcreteDataType::Binary(v) => write!(f, "{}", v.name()),
108            ConcreteDataType::String(v) => write!(f, "{}", v.name()),
109            ConcreteDataType::Date(v) => write!(f, "{}", v.name()),
110            ConcreteDataType::Timestamp(t) => match t {
111                TimestampType::Second(v) => write!(f, "{}", v.name()),
112                TimestampType::Millisecond(v) => write!(f, "{}", v.name()),
113                TimestampType::Microsecond(v) => write!(f, "{}", v.name()),
114                TimestampType::Nanosecond(v) => write!(f, "{}", v.name()),
115            },
116            ConcreteDataType::Time(t) => match t {
117                TimeType::Second(v) => write!(f, "{}", v.name()),
118                TimeType::Millisecond(v) => write!(f, "{}", v.name()),
119                TimeType::Microsecond(v) => write!(f, "{}", v.name()),
120                TimeType::Nanosecond(v) => write!(f, "{}", v.name()),
121            },
122            ConcreteDataType::Interval(i) => match i {
123                IntervalType::YearMonth(v) => write!(f, "{}", v.name()),
124                IntervalType::DayTime(v) => write!(f, "{}", v.name()),
125                IntervalType::MonthDayNano(v) => write!(f, "{}", v.name()),
126            },
127            ConcreteDataType::Duration(d) => match d {
128                DurationType::Second(v) => write!(f, "{}", v.name()),
129                DurationType::Millisecond(v) => write!(f, "{}", v.name()),
130                DurationType::Microsecond(v) => write!(f, "{}", v.name()),
131                DurationType::Nanosecond(v) => write!(f, "{}", v.name()),
132            },
133            ConcreteDataType::Decimal128(v) => write!(f, "{}", v.name()),
134            ConcreteDataType::List(v) => write!(f, "{}", v.name()),
135            ConcreteDataType::Struct(v) => write!(f, "{}", v.name()),
136            ConcreteDataType::Dictionary(v) => write!(f, "{}", v.name()),
137            ConcreteDataType::Json(v) => write!(f, "{}", v.name()),
138            ConcreteDataType::Vector(v) => write!(f, "{}", v.name()),
139        }
140    }
141}
142
143// TODO(yingwen): Refactor these `is_xxx()` methods, such as adding a `properties()` method
144// returning all these properties to the `DataType` trait
145impl ConcreteDataType {
146    pub fn is_float(&self) -> bool {
147        matches!(
148            self,
149            ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_)
150        )
151    }
152
153    pub fn is_boolean(&self) -> bool {
154        matches!(self, ConcreteDataType::Boolean(_))
155    }
156
157    pub fn is_string(&self) -> bool {
158        matches!(self, ConcreteDataType::String(_))
159    }
160
161    pub fn is_stringifiable(&self) -> bool {
162        matches!(
163            self,
164            ConcreteDataType::String(_)
165                | ConcreteDataType::Date(_)
166                | ConcreteDataType::Timestamp(_)
167                | ConcreteDataType::Time(_)
168                | ConcreteDataType::Interval(_)
169                | ConcreteDataType::Duration(_)
170                | ConcreteDataType::Decimal128(_)
171                | ConcreteDataType::Binary(_)
172                | ConcreteDataType::Json(_)
173                | ConcreteDataType::Vector(_)
174        )
175    }
176
177    pub fn is_signed(&self) -> bool {
178        matches!(
179            self,
180            ConcreteDataType::Int8(_)
181                | ConcreteDataType::Int16(_)
182                | ConcreteDataType::Int32(_)
183                | ConcreteDataType::Int64(_)
184                | ConcreteDataType::Date(_)
185                | ConcreteDataType::Timestamp(_)
186                | ConcreteDataType::Time(_)
187                | ConcreteDataType::Interval(_)
188                | ConcreteDataType::Duration(_)
189                | ConcreteDataType::Decimal128(_)
190        )
191    }
192
193    pub fn is_unsigned(&self) -> bool {
194        matches!(
195            self,
196            ConcreteDataType::UInt8(_)
197                | ConcreteDataType::UInt16(_)
198                | ConcreteDataType::UInt32(_)
199                | ConcreteDataType::UInt64(_)
200        )
201    }
202
203    pub fn is_numeric(&self) -> bool {
204        matches!(
205            self,
206            ConcreteDataType::Int8(_)
207                | ConcreteDataType::Int16(_)
208                | ConcreteDataType::Int32(_)
209                | ConcreteDataType::Int64(_)
210                | ConcreteDataType::UInt8(_)
211                | ConcreteDataType::UInt16(_)
212                | ConcreteDataType::UInt32(_)
213                | ConcreteDataType::UInt64(_)
214                | ConcreteDataType::Float32(_)
215                | ConcreteDataType::Float64(_)
216        )
217    }
218
219    pub fn is_timestamp(&self) -> bool {
220        matches!(self, ConcreteDataType::Timestamp(_))
221    }
222
223    pub fn is_decimal(&self) -> bool {
224        matches!(self, ConcreteDataType::Decimal128(_))
225    }
226
227    pub fn is_json(&self) -> bool {
228        matches!(self, ConcreteDataType::Json(_))
229    }
230
231    pub fn is_vector(&self) -> bool {
232        matches!(self, ConcreteDataType::Vector(_))
233    }
234
235    pub fn numerics() -> Vec<ConcreteDataType> {
236        vec![
237            ConcreteDataType::int8_datatype(),
238            ConcreteDataType::int16_datatype(),
239            ConcreteDataType::int32_datatype(),
240            ConcreteDataType::int64_datatype(),
241            ConcreteDataType::uint8_datatype(),
242            ConcreteDataType::uint16_datatype(),
243            ConcreteDataType::uint32_datatype(),
244            ConcreteDataType::uint64_datatype(),
245            ConcreteDataType::float32_datatype(),
246            ConcreteDataType::float64_datatype(),
247        ]
248    }
249
250    pub fn unsigned_integers() -> Vec<ConcreteDataType> {
251        vec![
252            ConcreteDataType::uint8_datatype(),
253            ConcreteDataType::uint16_datatype(),
254            ConcreteDataType::uint32_datatype(),
255            ConcreteDataType::uint64_datatype(),
256        ]
257    }
258
259    pub fn timestamps() -> Vec<ConcreteDataType> {
260        vec![
261            ConcreteDataType::timestamp_second_datatype(),
262            ConcreteDataType::timestamp_millisecond_datatype(),
263            ConcreteDataType::timestamp_microsecond_datatype(),
264            ConcreteDataType::timestamp_nanosecond_datatype(),
265        ]
266    }
267
268    /// Convert arrow data type to [ConcreteDataType].
269    ///
270    /// # Panics
271    /// Panic if given arrow data type is not supported.
272    pub fn from_arrow_type(dt: &ArrowDataType) -> Self {
273        ConcreteDataType::try_from(dt).expect("Unimplemented type")
274    }
275
276    pub fn is_null(&self) -> bool {
277        matches!(self, ConcreteDataType::Null(NullType))
278    }
279
280    /// Try to cast the type as a [`ListType`].
281    pub fn as_list(&self) -> Option<&ListType> {
282        match self {
283            ConcreteDataType::List(t) => Some(t),
284            _ => None,
285        }
286    }
287
288    /// Try to cast data type as a [`TimestampType`].
289    pub fn as_timestamp(&self) -> Option<TimestampType> {
290        match self {
291            ConcreteDataType::Timestamp(t) => Some(*t),
292            _ => None,
293        }
294    }
295
296    /// Try to get numeric precision, returns `None` if it's not numeric type
297    pub fn numeric_precision(&self) -> Option<u8> {
298        match self {
299            ConcreteDataType::Int8(_) | ConcreteDataType::UInt8(_) => Some(3),
300            ConcreteDataType::Int16(_) | ConcreteDataType::UInt16(_) => Some(5),
301            ConcreteDataType::Int32(_) | ConcreteDataType::UInt32(_) => Some(10),
302            ConcreteDataType::Int64(_) => Some(19),
303            ConcreteDataType::UInt64(_) => Some(20),
304            ConcreteDataType::Float32(_) => Some(12),
305            ConcreteDataType::Float64(_) => Some(22),
306            ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.precision()),
307            _ => None,
308        }
309    }
310
311    /// Try to get numeric scale, returns `None` if it's float or not numeric type
312    pub fn numeric_scale(&self) -> Option<i8> {
313        match self {
314            ConcreteDataType::Int8(_)
315            | ConcreteDataType::UInt8(_)
316            | ConcreteDataType::Int16(_)
317            | ConcreteDataType::UInt16(_)
318            | ConcreteDataType::Int32(_)
319            | ConcreteDataType::UInt32(_)
320            | ConcreteDataType::Int64(_)
321            | ConcreteDataType::UInt64(_) => Some(0),
322            ConcreteDataType::Float32(_) | ConcreteDataType::Float64(_) => None,
323            ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.scale()),
324            _ => None,
325        }
326    }
327
328    /// Try to cast data type as a [`TimeType`].
329    pub fn as_time(&self) -> Option<TimeType> {
330        match self {
331            ConcreteDataType::Int64(_) => Some(TimeType::Millisecond(TimeMillisecondType)),
332            ConcreteDataType::Time(t) => Some(*t),
333            _ => None,
334        }
335    }
336
337    pub fn as_decimal128(&self) -> Option<Decimal128Type> {
338        match self {
339            ConcreteDataType::Decimal128(d) => Some(*d),
340            _ => None,
341        }
342    }
343
344    pub fn as_json(&self) -> Option<JsonType> {
345        match self {
346            ConcreteDataType::Json(j) => Some(*j),
347            _ => None,
348        }
349    }
350
351    pub fn as_vector(&self) -> Option<VectorType> {
352        match self {
353            ConcreteDataType::Vector(v) => Some(*v),
354            _ => None,
355        }
356    }
357
358    /// Checks if the data type can cast to another data type.
359    pub fn can_arrow_type_cast_to(&self, to_type: &ConcreteDataType) -> bool {
360        let array = arrow_array::new_empty_array(&self.as_arrow_type());
361        arrow_array_cast(array.as_ref(), &to_type.as_arrow_type()).is_ok()
362    }
363
364    /// Try to cast data type as a [`DurationType`].
365    pub fn as_duration(&self) -> Option<DurationType> {
366        match self {
367            ConcreteDataType::Duration(d) => Some(*d),
368            _ => None,
369        }
370    }
371
372    /// Return the datatype name in postgres type system
373    pub fn postgres_datatype_name(&self) -> &'static str {
374        match self {
375            &ConcreteDataType::Null(_) => "UNKNOWN",
376            &ConcreteDataType::Boolean(_) => "BOOL",
377            &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "CHAR",
378            &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "INT2",
379            &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "INT4",
380            &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "INT8",
381            &ConcreteDataType::Float32(_) => "FLOAT4",
382            &ConcreteDataType::Float64(_) => "FLOAT8",
383            &ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => "BYTEA",
384            &ConcreteDataType::String(_) => "VARCHAR",
385            &ConcreteDataType::Date(_) => "DATE",
386            &ConcreteDataType::Timestamp(_) => "TIMESTAMP",
387            &ConcreteDataType::Time(_) => "TIME",
388            &ConcreteDataType::Interval(_) => "INTERVAL",
389            &ConcreteDataType::Decimal128(_) => "NUMERIC",
390            &ConcreteDataType::Json(_) => "JSON",
391            ConcreteDataType::List(list) => match list.item_type() {
392                &ConcreteDataType::Null(_) => "UNKNOWN",
393                &ConcreteDataType::Boolean(_) => "_BOOL",
394                &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "_CHAR",
395                &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "_INT2",
396                &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "_INT4",
397                &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "_INT8",
398                &ConcreteDataType::Float32(_) => "_FLOAT4",
399                &ConcreteDataType::Float64(_) => "_FLOAT8",
400                &ConcreteDataType::Binary(_) => "_BYTEA",
401                &ConcreteDataType::String(_) => "_VARCHAR",
402                &ConcreteDataType::Date(_) => "_DATE",
403                &ConcreteDataType::Timestamp(_) => "_TIMESTAMP",
404                &ConcreteDataType::Time(_) => "_TIME",
405                &ConcreteDataType::Interval(_) => "_INTERVAL",
406                &ConcreteDataType::Decimal128(_) => "_NUMERIC",
407                &ConcreteDataType::Json(_) => "_JSON",
408                &ConcreteDataType::Duration(_)
409                | &ConcreteDataType::Dictionary(_)
410                | &ConcreteDataType::Vector(_)
411                | &ConcreteDataType::List(_)
412                | &ConcreteDataType::Struct(_) => "UNKNOWN",
413            },
414            &ConcreteDataType::Duration(_)
415            | &ConcreteDataType::Dictionary(_)
416            | &ConcreteDataType::Struct(_) => "UNKNOWN",
417        }
418    }
419}
420
421impl From<&ConcreteDataType> for ConcreteDataType {
422    fn from(t: &ConcreteDataType) -> Self {
423        t.clone()
424    }
425}
426
427impl TryFrom<&ArrowDataType> for ConcreteDataType {
428    type Error = Error;
429
430    fn try_from(dt: &ArrowDataType) -> Result<ConcreteDataType> {
431        let concrete_type = match dt {
432            ArrowDataType::Null => Self::null_datatype(),
433            ArrowDataType::Boolean => Self::boolean_datatype(),
434            ArrowDataType::UInt8 => Self::uint8_datatype(),
435            ArrowDataType::UInt16 => Self::uint16_datatype(),
436            ArrowDataType::UInt32 => Self::uint32_datatype(),
437            ArrowDataType::UInt64 => Self::uint64_datatype(),
438            ArrowDataType::Int8 => Self::int8_datatype(),
439            ArrowDataType::Int16 => Self::int16_datatype(),
440            ArrowDataType::Int32 => Self::int32_datatype(),
441            ArrowDataType::Int64 => Self::int64_datatype(),
442            ArrowDataType::Float32 => Self::float32_datatype(),
443            ArrowDataType::Float64 => Self::float64_datatype(),
444            ArrowDataType::Date32 => Self::date_datatype(),
445            ArrowDataType::Timestamp(u, _) => ConcreteDataType::from_arrow_time_unit(u),
446            ArrowDataType::Interval(u) => ConcreteDataType::from_arrow_interval_unit(u),
447            ArrowDataType::Binary | ArrowDataType::LargeBinary => Self::binary_datatype(),
448            ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => Self::string_datatype(),
449            ArrowDataType::List(field) => Self::List(ListType::new(
450                ConcreteDataType::from_arrow_type(field.data_type()),
451            )),
452            ArrowDataType::Dictionary(key_type, value_type) => {
453                let key_type = ConcreteDataType::from_arrow_type(key_type);
454                let value_type = ConcreteDataType::from_arrow_type(value_type);
455                Self::Dictionary(DictionaryType::new(key_type, value_type))
456            }
457            ArrowDataType::Time32(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
458            ArrowDataType::Time64(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
459            ArrowDataType::Duration(u) => {
460                ConcreteDataType::Duration(DurationType::from_unit(u.into()))
461            }
462            ArrowDataType::Decimal128(precision, scale) => {
463                ConcreteDataType::decimal128_datatype(*precision, *scale)
464            }
465            ArrowDataType::Struct(fields) => ConcreteDataType::Struct(fields.try_into()?),
466            ArrowDataType::Float16
467            | ArrowDataType::Date64
468            | ArrowDataType::FixedSizeBinary(_)
469            | ArrowDataType::BinaryView
470            | ArrowDataType::Utf8View
471            | ArrowDataType::ListView(_)
472            | ArrowDataType::FixedSizeList(_, _)
473            | ArrowDataType::LargeList(_)
474            | ArrowDataType::LargeListView(_)
475            | ArrowDataType::Union(_, _)
476            | ArrowDataType::Decimal256(_, _)
477            | ArrowDataType::Map(_, _)
478            | ArrowDataType::RunEndEncoded(_, _) => {
479                return error::UnsupportedArrowTypeSnafu {
480                    arrow_type: dt.clone(),
481                }
482                .fail()
483            }
484        };
485
486        Ok(concrete_type)
487    }
488}
489
490macro_rules! impl_new_concrete_type_functions {
491    ($($Type: ident), +) => {
492        paste! {
493            impl ConcreteDataType {
494                $(
495                    pub fn [<$Type:lower _datatype>]() -> ConcreteDataType {
496                        ConcreteDataType::$Type([<$Type Type>]::default())
497                    }
498                )+
499            }
500        }
501    }
502}
503
504impl_new_concrete_type_functions!(
505    Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
506    Binary, Date, String, Json
507);
508
509impl ConcreteDataType {
510    pub fn timestamp_second_datatype() -> Self {
511        ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType))
512    }
513
514    pub fn timestamp_millisecond_datatype() -> Self {
515        ConcreteDataType::Timestamp(TimestampType::Millisecond(TimestampMillisecondType))
516    }
517
518    pub fn timestamp_microsecond_datatype() -> Self {
519        ConcreteDataType::Timestamp(TimestampType::Microsecond(TimestampMicrosecondType))
520    }
521
522    pub fn timestamp_nanosecond_datatype() -> Self {
523        ConcreteDataType::Timestamp(TimestampType::Nanosecond(TimestampNanosecondType))
524    }
525
526    /// Returns the time data type with `TimeUnit`.
527    pub fn time_datatype(unit: TimeUnit) -> Self {
528        ConcreteDataType::Time(TimeType::from_unit(unit))
529    }
530
531    /// Creates a [Time(TimeSecondType)] datatype.
532    pub fn time_second_datatype() -> Self {
533        Self::time_datatype(TimeUnit::Second)
534    }
535
536    /// Creates a [Time(TimeMillisecondType)] datatype.
537    pub fn time_millisecond_datatype() -> Self {
538        Self::time_datatype(TimeUnit::Millisecond)
539    }
540
541    /// Creates a [Time(TimeMicrosecond)] datatype.
542    pub fn time_microsecond_datatype() -> Self {
543        Self::time_datatype(TimeUnit::Microsecond)
544    }
545
546    /// Creates a [Time(TimeNanosecond)] datatype.
547    pub fn time_nanosecond_datatype() -> Self {
548        Self::time_datatype(TimeUnit::Nanosecond)
549    }
550
551    /// Creates a [Duration(DurationSecondType)] datatype.
552    pub fn duration_second_datatype() -> Self {
553        ConcreteDataType::Duration(DurationType::Second(DurationSecondType))
554    }
555
556    /// Creates a [Duration(DurationMillisecondType)] datatype.
557    pub fn duration_millisecond_datatype() -> Self {
558        ConcreteDataType::Duration(DurationType::Millisecond(DurationMillisecondType))
559    }
560
561    /// Creates a [Duration(DurationMicrosecondType)] datatype.
562    pub fn duration_microsecond_datatype() -> Self {
563        ConcreteDataType::Duration(DurationType::Microsecond(DurationMicrosecondType))
564    }
565
566    /// Creates a [Duration(DurationNanosecondType)] datatype.
567    pub fn duration_nanosecond_datatype() -> Self {
568        ConcreteDataType::Duration(DurationType::Nanosecond(DurationNanosecondType))
569    }
570
571    /// Creates a [Interval(IntervalMonthDayNanoType)] datatype.
572    pub fn interval_month_day_nano_datatype() -> Self {
573        ConcreteDataType::Interval(IntervalType::MonthDayNano(IntervalMonthDayNanoType))
574    }
575
576    /// Creates a [Interval(IntervalYearMonthType)] datatype.
577    pub fn interval_year_month_datatype() -> Self {
578        ConcreteDataType::Interval(IntervalType::YearMonth(IntervalYearMonthType))
579    }
580
581    /// Creates a [Interval(IntervalDayTimeType)] datatype.
582    pub fn interval_day_time_datatype() -> Self {
583        ConcreteDataType::Interval(IntervalType::DayTime(IntervalDayTimeType))
584    }
585
586    pub fn timestamp_datatype(unit: TimeUnit) -> Self {
587        match unit {
588            TimeUnit::Second => Self::timestamp_second_datatype(),
589            TimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
590            TimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
591            TimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
592        }
593    }
594
595    /// Converts from arrow timestamp unit to
596    pub fn from_arrow_time_unit(t: &ArrowTimeUnit) -> Self {
597        match t {
598            ArrowTimeUnit::Second => Self::timestamp_second_datatype(),
599            ArrowTimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
600            ArrowTimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
601            ArrowTimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
602        }
603    }
604
605    pub fn duration_datatype(unit: TimeUnit) -> Self {
606        match unit {
607            TimeUnit::Second => Self::duration_second_datatype(),
608            TimeUnit::Millisecond => Self::duration_millisecond_datatype(),
609            TimeUnit::Microsecond => Self::duration_microsecond_datatype(),
610            TimeUnit::Nanosecond => Self::duration_nanosecond_datatype(),
611        }
612    }
613
614    pub fn interval_datatype(unit: IntervalUnit) -> Self {
615        match unit {
616            IntervalUnit::YearMonth => Self::interval_year_month_datatype(),
617            IntervalUnit::DayTime => Self::interval_day_time_datatype(),
618            IntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
619        }
620    }
621
622    pub fn from_arrow_interval_unit(u: &ArrowIntervalUnit) -> Self {
623        match u {
624            ArrowIntervalUnit::YearMonth => Self::interval_year_month_datatype(),
625            ArrowIntervalUnit::DayTime => Self::interval_day_time_datatype(),
626            ArrowIntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
627        }
628    }
629
630    pub fn list_datatype(item_type: ConcreteDataType) -> ConcreteDataType {
631        ConcreteDataType::List(ListType::new(item_type))
632    }
633
634    pub fn struct_datatype(fields: StructType) -> ConcreteDataType {
635        ConcreteDataType::Struct(fields)
636    }
637
638    pub fn dictionary_datatype(
639        key_type: ConcreteDataType,
640        value_type: ConcreteDataType,
641    ) -> ConcreteDataType {
642        ConcreteDataType::Dictionary(DictionaryType::new(key_type, value_type))
643    }
644
645    pub fn decimal128_datatype(precision: u8, scale: i8) -> ConcreteDataType {
646        ConcreteDataType::Decimal128(Decimal128Type::new(precision, scale))
647    }
648
649    pub fn decimal128_default_datatype() -> ConcreteDataType {
650        Self::decimal128_datatype(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
651    }
652
653    pub fn vector_datatype(dim: u32) -> ConcreteDataType {
654        ConcreteDataType::Vector(VectorType::new(dim))
655    }
656
657    pub fn vector_default_datatype() -> ConcreteDataType {
658        Self::vector_datatype(0)
659    }
660}
661
662/// Data type abstraction.
663#[enum_dispatch::enum_dispatch]
664pub trait DataType: std::fmt::Debug + Send + Sync {
665    /// Name of this data type.
666    fn name(&self) -> String;
667
668    /// Returns id of the Logical data type.
669    fn logical_type_id(&self) -> LogicalTypeId;
670
671    /// Returns the default value of this type.
672    fn default_value(&self) -> Value;
673
674    /// Convert this type as [arrow::datatypes::DataType].
675    fn as_arrow_type(&self) -> ArrowDataType;
676
677    /// Creates a mutable vector with given `capacity` of this type.
678    fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector>;
679
680    /// Casts the value to specific DataType.
681    /// Return None if cast failed.
682    fn try_cast(&self, from: Value) -> Option<Value>;
683}
684
685pub type DataTypeRef = Arc<dyn DataType>;
686
687#[cfg(test)]
688mod tests {
689    use arrow::datatypes::Field;
690
691    use super::*;
692
693    #[test]
694    fn test_concrete_type_as_datatype_trait() {
695        let concrete_type = ConcreteDataType::boolean_datatype();
696
697        assert_eq!("Boolean", concrete_type.to_string());
698        assert_eq!(Value::Boolean(false), concrete_type.default_value());
699        assert_eq!(LogicalTypeId::Boolean, concrete_type.logical_type_id());
700        assert_eq!(ArrowDataType::Boolean, concrete_type.as_arrow_type());
701    }
702
703    #[test]
704    fn test_from_arrow_type() {
705        assert!(matches!(
706            ConcreteDataType::from_arrow_type(&ArrowDataType::Null),
707            ConcreteDataType::Null(_)
708        ));
709        assert!(matches!(
710            ConcreteDataType::from_arrow_type(&ArrowDataType::Boolean),
711            ConcreteDataType::Boolean(_)
712        ));
713        assert!(matches!(
714            ConcreteDataType::from_arrow_type(&ArrowDataType::Binary),
715            ConcreteDataType::Binary(_)
716        ));
717        assert!(matches!(
718            ConcreteDataType::from_arrow_type(&ArrowDataType::LargeBinary),
719            ConcreteDataType::Binary(_)
720        ));
721        assert!(matches!(
722            ConcreteDataType::from_arrow_type(&ArrowDataType::Int8),
723            ConcreteDataType::Int8(_)
724        ));
725        assert!(matches!(
726            ConcreteDataType::from_arrow_type(&ArrowDataType::Int16),
727            ConcreteDataType::Int16(_)
728        ));
729        assert!(matches!(
730            ConcreteDataType::from_arrow_type(&ArrowDataType::Int32),
731            ConcreteDataType::Int32(_)
732        ));
733        assert!(matches!(
734            ConcreteDataType::from_arrow_type(&ArrowDataType::Int64),
735            ConcreteDataType::Int64(_)
736        ));
737        assert!(matches!(
738            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt8),
739            ConcreteDataType::UInt8(_)
740        ));
741        assert!(matches!(
742            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt16),
743            ConcreteDataType::UInt16(_)
744        ));
745        assert!(matches!(
746            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt32),
747            ConcreteDataType::UInt32(_)
748        ));
749        assert!(matches!(
750            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt64),
751            ConcreteDataType::UInt64(_)
752        ));
753        assert!(matches!(
754            ConcreteDataType::from_arrow_type(&ArrowDataType::Float32),
755            ConcreteDataType::Float32(_)
756        ));
757        assert!(matches!(
758            ConcreteDataType::from_arrow_type(&ArrowDataType::Float64),
759            ConcreteDataType::Float64(_)
760        ));
761        assert!(matches!(
762            ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8),
763            ConcreteDataType::String(_)
764        ));
765        assert_eq!(
766            ConcreteDataType::from_arrow_type(&ArrowDataType::List(Arc::new(Field::new(
767                "item",
768                ArrowDataType::Int32,
769                true,
770            )))),
771            ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype()))
772        );
773        assert!(matches!(
774            ConcreteDataType::from_arrow_type(&ArrowDataType::Date32),
775            ConcreteDataType::Date(_)
776        ));
777    }
778
779    #[test]
780    fn test_from_arrow_timestamp() {
781        assert_eq!(
782            ConcreteDataType::timestamp_millisecond_datatype(),
783            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Millisecond)
784        );
785        assert_eq!(
786            ConcreteDataType::timestamp_microsecond_datatype(),
787            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Microsecond)
788        );
789        assert_eq!(
790            ConcreteDataType::timestamp_nanosecond_datatype(),
791            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Nanosecond)
792        );
793        assert_eq!(
794            ConcreteDataType::timestamp_second_datatype(),
795            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Second)
796        );
797    }
798
799    #[test]
800    fn test_is_null() {
801        assert!(ConcreteDataType::null_datatype().is_null());
802        assert!(!ConcreteDataType::int32_datatype().is_null());
803    }
804
805    #[test]
806    fn test_is_float() {
807        assert!(!ConcreteDataType::int32_datatype().is_float());
808        assert!(ConcreteDataType::float32_datatype().is_float());
809        assert!(ConcreteDataType::float64_datatype().is_float());
810    }
811
812    #[test]
813    fn test_is_boolean() {
814        assert!(!ConcreteDataType::int32_datatype().is_boolean());
815        assert!(!ConcreteDataType::float32_datatype().is_boolean());
816        assert!(ConcreteDataType::boolean_datatype().is_boolean());
817    }
818
819    #[test]
820    fn test_is_decimal() {
821        assert!(!ConcreteDataType::int32_datatype().is_decimal());
822        assert!(!ConcreteDataType::float32_datatype().is_decimal());
823        assert!(ConcreteDataType::decimal128_datatype(10, 2).is_decimal());
824        assert!(ConcreteDataType::decimal128_datatype(18, 6).is_decimal());
825    }
826
827    #[test]
828    fn test_is_stringifiable() {
829        assert!(!ConcreteDataType::int32_datatype().is_stringifiable());
830        assert!(!ConcreteDataType::float32_datatype().is_stringifiable());
831        assert!(ConcreteDataType::string_datatype().is_stringifiable());
832        assert!(ConcreteDataType::binary_datatype().is_stringifiable());
833        assert!(ConcreteDataType::date_datatype().is_stringifiable());
834        assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable());
835        assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable());
836        assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable());
837        assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_stringifiable());
838        assert!(ConcreteDataType::time_second_datatype().is_stringifiable());
839        assert!(ConcreteDataType::time_millisecond_datatype().is_stringifiable());
840        assert!(ConcreteDataType::time_microsecond_datatype().is_stringifiable());
841        assert!(ConcreteDataType::time_nanosecond_datatype().is_stringifiable());
842
843        assert!(ConcreteDataType::interval_year_month_datatype().is_stringifiable());
844        assert!(ConcreteDataType::interval_day_time_datatype().is_stringifiable());
845        assert!(ConcreteDataType::interval_month_day_nano_datatype().is_stringifiable());
846
847        assert!(ConcreteDataType::duration_second_datatype().is_stringifiable());
848        assert!(ConcreteDataType::duration_millisecond_datatype().is_stringifiable());
849        assert!(ConcreteDataType::duration_microsecond_datatype().is_stringifiable());
850        assert!(ConcreteDataType::duration_nanosecond_datatype().is_stringifiable());
851        assert!(ConcreteDataType::decimal128_datatype(10, 2).is_stringifiable());
852        assert!(ConcreteDataType::vector_default_datatype().is_stringifiable());
853    }
854
855    #[test]
856    fn test_is_signed() {
857        assert!(ConcreteDataType::int8_datatype().is_signed());
858        assert!(ConcreteDataType::int16_datatype().is_signed());
859        assert!(ConcreteDataType::int32_datatype().is_signed());
860        assert!(ConcreteDataType::int64_datatype().is_signed());
861        assert!(ConcreteDataType::date_datatype().is_signed());
862        assert!(ConcreteDataType::timestamp_second_datatype().is_signed());
863        assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed());
864        assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed());
865        assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_signed());
866        assert!(ConcreteDataType::time_second_datatype().is_signed());
867        assert!(ConcreteDataType::time_millisecond_datatype().is_signed());
868        assert!(ConcreteDataType::time_microsecond_datatype().is_signed());
869        assert!(ConcreteDataType::time_nanosecond_datatype().is_signed());
870        assert!(ConcreteDataType::interval_year_month_datatype().is_signed());
871        assert!(ConcreteDataType::interval_day_time_datatype().is_signed());
872        assert!(ConcreteDataType::interval_month_day_nano_datatype().is_signed());
873        assert!(ConcreteDataType::duration_second_datatype().is_signed());
874        assert!(ConcreteDataType::duration_millisecond_datatype().is_signed());
875        assert!(ConcreteDataType::duration_microsecond_datatype().is_signed());
876        assert!(ConcreteDataType::duration_nanosecond_datatype().is_signed());
877
878        assert!(!ConcreteDataType::uint8_datatype().is_signed());
879        assert!(!ConcreteDataType::uint16_datatype().is_signed());
880        assert!(!ConcreteDataType::uint32_datatype().is_signed());
881        assert!(!ConcreteDataType::uint64_datatype().is_signed());
882
883        assert!(!ConcreteDataType::float32_datatype().is_signed());
884        assert!(!ConcreteDataType::float64_datatype().is_signed());
885
886        assert!(ConcreteDataType::decimal128_datatype(10, 2).is_signed());
887    }
888
889    #[test]
890    fn test_is_unsigned() {
891        assert!(!ConcreteDataType::int8_datatype().is_unsigned());
892        assert!(!ConcreteDataType::int16_datatype().is_unsigned());
893        assert!(!ConcreteDataType::int32_datatype().is_unsigned());
894        assert!(!ConcreteDataType::int64_datatype().is_unsigned());
895        assert!(!ConcreteDataType::date_datatype().is_unsigned());
896        assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned());
897        assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned());
898        assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned());
899        assert!(!ConcreteDataType::timestamp_nanosecond_datatype().is_unsigned());
900        assert!(!ConcreteDataType::time_second_datatype().is_unsigned());
901        assert!(!ConcreteDataType::time_millisecond_datatype().is_unsigned());
902        assert!(!ConcreteDataType::time_microsecond_datatype().is_unsigned());
903        assert!(!ConcreteDataType::time_nanosecond_datatype().is_unsigned());
904        assert!(!ConcreteDataType::interval_year_month_datatype().is_unsigned());
905        assert!(!ConcreteDataType::interval_day_time_datatype().is_unsigned());
906        assert!(!ConcreteDataType::interval_month_day_nano_datatype().is_unsigned());
907        assert!(!ConcreteDataType::duration_second_datatype().is_unsigned());
908        assert!(!ConcreteDataType::duration_millisecond_datatype().is_unsigned());
909        assert!(!ConcreteDataType::duration_microsecond_datatype().is_unsigned());
910        assert!(!ConcreteDataType::duration_nanosecond_datatype().is_unsigned());
911        assert!(!ConcreteDataType::decimal128_datatype(10, 2).is_unsigned());
912
913        assert!(ConcreteDataType::uint8_datatype().is_unsigned());
914        assert!(ConcreteDataType::uint16_datatype().is_unsigned());
915        assert!(ConcreteDataType::uint32_datatype().is_unsigned());
916        assert!(ConcreteDataType::uint64_datatype().is_unsigned());
917
918        assert!(!ConcreteDataType::float32_datatype().is_unsigned());
919        assert!(!ConcreteDataType::float64_datatype().is_unsigned());
920    }
921
922    #[test]
923    fn test_numerics() {
924        let nums = ConcreteDataType::numerics();
925        assert_eq!(10, nums.len());
926    }
927
928    #[test]
929    fn test_as_list() {
930        let list_type = ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype());
931        assert_eq!(
932            ListType::new(ConcreteDataType::int32_datatype()),
933            *list_type.as_list().unwrap()
934        );
935        assert!(ConcreteDataType::int32_datatype().as_list().is_none());
936    }
937
938    #[test]
939    fn test_display_concrete_data_type() {
940        assert_eq!(ConcreteDataType::null_datatype().to_string(), "Null");
941        assert_eq!(ConcreteDataType::boolean_datatype().to_string(), "Boolean");
942        assert_eq!(ConcreteDataType::binary_datatype().to_string(), "Binary");
943        assert_eq!(ConcreteDataType::int8_datatype().to_string(), "Int8");
944        assert_eq!(ConcreteDataType::int16_datatype().to_string(), "Int16");
945        assert_eq!(ConcreteDataType::int32_datatype().to_string(), "Int32");
946        assert_eq!(ConcreteDataType::int64_datatype().to_string(), "Int64");
947        assert_eq!(ConcreteDataType::uint8_datatype().to_string(), "UInt8");
948        assert_eq!(ConcreteDataType::uint16_datatype().to_string(), "UInt16");
949        assert_eq!(ConcreteDataType::uint32_datatype().to_string(), "UInt32");
950        assert_eq!(ConcreteDataType::uint64_datatype().to_string(), "UInt64");
951        assert_eq!(ConcreteDataType::float32_datatype().to_string(), "Float32");
952        assert_eq!(ConcreteDataType::float64_datatype().to_string(), "Float64");
953        assert_eq!(ConcreteDataType::string_datatype().to_string(), "String");
954        assert_eq!(ConcreteDataType::date_datatype().to_string(), "Date");
955        assert_eq!(
956            ConcreteDataType::timestamp_millisecond_datatype().to_string(),
957            "TimestampMillisecond"
958        );
959        assert_eq!(
960            ConcreteDataType::time_millisecond_datatype().to_string(),
961            "TimeMillisecond"
962        );
963        assert_eq!(
964            ConcreteDataType::interval_month_day_nano_datatype().to_string(),
965            "IntervalMonthDayNano"
966        );
967        assert_eq!(
968            ConcreteDataType::duration_second_datatype().to_string(),
969            "DurationSecond"
970        );
971        assert_eq!(
972            ConcreteDataType::decimal128_datatype(10, 2).to_string(),
973            "Decimal(10, 2)"
974        );
975        // Nested types
976        assert_eq!(
977            ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()).to_string(),
978            "List<Int32>"
979        );
980        assert_eq!(
981            ConcreteDataType::list_datatype(ConcreteDataType::Dictionary(DictionaryType::new(
982                ConcreteDataType::int32_datatype(),
983                ConcreteDataType::string_datatype()
984            )))
985            .to_string(),
986            "List<Dictionary<Int32, String>>"
987        );
988        assert_eq!(
989            ConcreteDataType::list_datatype(ConcreteDataType::list_datatype(
990                ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype())
991            ))
992            .to_string(),
993            "List<List<List<Int32>>>"
994        );
995        assert_eq!(
996            ConcreteDataType::dictionary_datatype(
997                ConcreteDataType::int32_datatype(),
998                ConcreteDataType::string_datatype()
999            )
1000            .to_string(),
1001            "Dictionary<Int32, String>"
1002        );
1003        assert_eq!(
1004            ConcreteDataType::vector_datatype(3).to_string(),
1005            "Vector(3)"
1006        );
1007    }
1008}