datatypes/
data_type.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt;
16use std::sync::Arc;
17
18use arrow::compute::cast as arrow_array_cast;
19use arrow::datatypes::{
20    DataType as ArrowDataType, IntervalUnit as ArrowIntervalUnit, TimeUnit as ArrowTimeUnit,
21};
22use arrow_schema::DECIMAL_DEFAULT_SCALE;
23use common_decimal::decimal128::DECIMAL128_MAX_PRECISION;
24use common_time::interval::IntervalUnit;
25use common_time::timestamp::TimeUnit;
26use enum_dispatch::enum_dispatch;
27use paste::paste;
28use serde::{Deserialize, Serialize};
29
30use crate::error::{self, Error, Result};
31use crate::type_id::LogicalTypeId;
32use crate::types::{
33    BinaryType, BooleanType, DateType, Decimal128Type, DictionaryType, DurationMicrosecondType,
34    DurationMillisecondType, DurationNanosecondType, DurationSecondType, DurationType, Float32Type,
35    Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, IntervalDayTimeType,
36    IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonFormat, JsonType, ListType,
37    NullType, StringType, StructType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
38    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
39    UInt8Type, UInt16Type, UInt32Type, UInt64Type, VectorType,
40};
41use crate::value::Value;
42use crate::vectors::MutableVector;
43
44#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
45#[enum_dispatch(DataType)]
46pub enum ConcreteDataType {
47    Null(NullType),
48    Boolean(BooleanType),
49
50    // Numeric types:
51    Int8(Int8Type),
52    Int16(Int16Type),
53    Int32(Int32Type),
54    Int64(Int64Type),
55    UInt8(UInt8Type),
56    UInt16(UInt16Type),
57    UInt32(UInt32Type),
58    UInt64(UInt64Type),
59    Float32(Float32Type),
60    Float64(Float64Type),
61
62    // Decimal128 type:
63    Decimal128(Decimal128Type),
64
65    // String types:
66    Binary(BinaryType),
67    String(StringType),
68
69    // Date and time types:
70    Date(DateType),
71    Timestamp(TimestampType),
72    Time(TimeType),
73
74    // Duration type:
75    Duration(DurationType),
76
77    // Interval type:
78    Interval(IntervalType),
79
80    // Compound types:
81    List(ListType),
82    Dictionary(DictionaryType),
83    Struct(StructType),
84
85    // JSON type:
86    Json(JsonType),
87
88    // Vector type:
89    Vector(VectorType),
90}
91
92impl fmt::Display for ConcreteDataType {
93    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
94        match self {
95            ConcreteDataType::Null(v) => write!(f, "{}", v.name()),
96            ConcreteDataType::Boolean(v) => write!(f, "{}", v.name()),
97            ConcreteDataType::Int8(v) => write!(f, "{}", v.name()),
98            ConcreteDataType::Int16(v) => write!(f, "{}", v.name()),
99            ConcreteDataType::Int32(v) => write!(f, "{}", v.name()),
100            ConcreteDataType::Int64(v) => write!(f, "{}", v.name()),
101            ConcreteDataType::UInt8(v) => write!(f, "{}", v.name()),
102            ConcreteDataType::UInt16(v) => write!(f, "{}", v.name()),
103            ConcreteDataType::UInt32(v) => write!(f, "{}", v.name()),
104            ConcreteDataType::UInt64(v) => write!(f, "{}", v.name()),
105            ConcreteDataType::Float32(v) => write!(f, "{}", v.name()),
106            ConcreteDataType::Float64(v) => write!(f, "{}", v.name()),
107            ConcreteDataType::Binary(v) => write!(f, "{}", v.name()),
108            ConcreteDataType::String(v) => write!(f, "{}", v.name()),
109            ConcreteDataType::Date(v) => write!(f, "{}", v.name()),
110            ConcreteDataType::Timestamp(t) => match t {
111                TimestampType::Second(v) => write!(f, "{}", v.name()),
112                TimestampType::Millisecond(v) => write!(f, "{}", v.name()),
113                TimestampType::Microsecond(v) => write!(f, "{}", v.name()),
114                TimestampType::Nanosecond(v) => write!(f, "{}", v.name()),
115            },
116            ConcreteDataType::Time(t) => match t {
117                TimeType::Second(v) => write!(f, "{}", v.name()),
118                TimeType::Millisecond(v) => write!(f, "{}", v.name()),
119                TimeType::Microsecond(v) => write!(f, "{}", v.name()),
120                TimeType::Nanosecond(v) => write!(f, "{}", v.name()),
121            },
122            ConcreteDataType::Interval(i) => match i {
123                IntervalType::YearMonth(v) => write!(f, "{}", v.name()),
124                IntervalType::DayTime(v) => write!(f, "{}", v.name()),
125                IntervalType::MonthDayNano(v) => write!(f, "{}", v.name()),
126            },
127            ConcreteDataType::Duration(d) => match d {
128                DurationType::Second(v) => write!(f, "{}", v.name()),
129                DurationType::Millisecond(v) => write!(f, "{}", v.name()),
130                DurationType::Microsecond(v) => write!(f, "{}", v.name()),
131                DurationType::Nanosecond(v) => write!(f, "{}", v.name()),
132            },
133            ConcreteDataType::Decimal128(v) => write!(f, "{}", v.name()),
134            ConcreteDataType::List(v) => write!(f, "{}", v.name()),
135            ConcreteDataType::Struct(v) => write!(f, "{}", v.name()),
136            ConcreteDataType::Dictionary(v) => write!(f, "{}", v.name()),
137            ConcreteDataType::Json(v) => write!(f, "{}", v.name()),
138            ConcreteDataType::Vector(v) => write!(f, "{}", v.name()),
139        }
140    }
141}
142
143// TODO(yingwen): Refactor these `is_xxx()` methods, such as adding a `properties()` method
144// returning all these properties to the `DataType` trait
145impl ConcreteDataType {
146    pub fn is_float(&self) -> bool {
147        matches!(
148            self,
149            ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_)
150        )
151    }
152
153    pub fn is_boolean(&self) -> bool {
154        matches!(self, ConcreteDataType::Boolean(_))
155    }
156
157    pub fn is_string(&self) -> bool {
158        matches!(self, ConcreteDataType::String(_))
159    }
160
161    pub fn is_stringifiable(&self) -> bool {
162        matches!(
163            self,
164            ConcreteDataType::String(_)
165                | ConcreteDataType::Date(_)
166                | ConcreteDataType::Timestamp(_)
167                | ConcreteDataType::Time(_)
168                | ConcreteDataType::Interval(_)
169                | ConcreteDataType::Duration(_)
170                | ConcreteDataType::Decimal128(_)
171                | ConcreteDataType::Binary(_)
172                | ConcreteDataType::Json(_)
173                | ConcreteDataType::Vector(_)
174        )
175    }
176
177    pub fn is_signed(&self) -> bool {
178        matches!(
179            self,
180            ConcreteDataType::Int8(_)
181                | ConcreteDataType::Int16(_)
182                | ConcreteDataType::Int32(_)
183                | ConcreteDataType::Int64(_)
184                | ConcreteDataType::Date(_)
185                | ConcreteDataType::Timestamp(_)
186                | ConcreteDataType::Time(_)
187                | ConcreteDataType::Interval(_)
188                | ConcreteDataType::Duration(_)
189                | ConcreteDataType::Decimal128(_)
190        )
191    }
192
193    pub fn is_unsigned(&self) -> bool {
194        matches!(
195            self,
196            ConcreteDataType::UInt8(_)
197                | ConcreteDataType::UInt16(_)
198                | ConcreteDataType::UInt32(_)
199                | ConcreteDataType::UInt64(_)
200        )
201    }
202
203    pub fn is_numeric(&self) -> bool {
204        matches!(
205            self,
206            ConcreteDataType::Int8(_)
207                | ConcreteDataType::Int16(_)
208                | ConcreteDataType::Int32(_)
209                | ConcreteDataType::Int64(_)
210                | ConcreteDataType::UInt8(_)
211                | ConcreteDataType::UInt16(_)
212                | ConcreteDataType::UInt32(_)
213                | ConcreteDataType::UInt64(_)
214                | ConcreteDataType::Float32(_)
215                | ConcreteDataType::Float64(_)
216        )
217    }
218
219    pub fn is_timestamp(&self) -> bool {
220        matches!(self, ConcreteDataType::Timestamp(_))
221    }
222
223    pub fn is_decimal(&self) -> bool {
224        matches!(self, ConcreteDataType::Decimal128(_))
225    }
226
227    pub fn is_json(&self) -> bool {
228        matches!(self, ConcreteDataType::Json(_))
229    }
230
231    pub fn is_vector(&self) -> bool {
232        matches!(self, ConcreteDataType::Vector(_))
233    }
234
235    pub fn numerics() -> Vec<ConcreteDataType> {
236        vec![
237            ConcreteDataType::int8_datatype(),
238            ConcreteDataType::int16_datatype(),
239            ConcreteDataType::int32_datatype(),
240            ConcreteDataType::int64_datatype(),
241            ConcreteDataType::uint8_datatype(),
242            ConcreteDataType::uint16_datatype(),
243            ConcreteDataType::uint32_datatype(),
244            ConcreteDataType::uint64_datatype(),
245            ConcreteDataType::float32_datatype(),
246            ConcreteDataType::float64_datatype(),
247        ]
248    }
249
250    pub fn unsigned_integers() -> Vec<ConcreteDataType> {
251        vec![
252            ConcreteDataType::uint8_datatype(),
253            ConcreteDataType::uint16_datatype(),
254            ConcreteDataType::uint32_datatype(),
255            ConcreteDataType::uint64_datatype(),
256        ]
257    }
258
259    pub fn timestamps() -> Vec<ConcreteDataType> {
260        vec![
261            ConcreteDataType::timestamp_second_datatype(),
262            ConcreteDataType::timestamp_millisecond_datatype(),
263            ConcreteDataType::timestamp_microsecond_datatype(),
264            ConcreteDataType::timestamp_nanosecond_datatype(),
265        ]
266    }
267
268    /// Convert arrow data type to [ConcreteDataType].
269    ///
270    /// # Panics
271    /// Panic if given arrow data type is not supported.
272    pub fn from_arrow_type(dt: &ArrowDataType) -> Self {
273        ConcreteDataType::try_from(dt).expect("Unimplemented type")
274    }
275
276    pub fn is_null(&self) -> bool {
277        matches!(self, ConcreteDataType::Null(NullType))
278    }
279
280    pub(crate) fn is_struct(&self) -> bool {
281        matches!(self, ConcreteDataType::Struct(_))
282    }
283
284    /// Try to cast the type as a [`ListType`].
285    pub fn as_list(&self) -> Option<&ListType> {
286        match self {
287            ConcreteDataType::List(t) => Some(t),
288            _ => None,
289        }
290    }
291
292    pub fn as_struct(&self) -> Option<&StructType> {
293        match self {
294            ConcreteDataType::Struct(s) => Some(s),
295            _ => None,
296        }
297    }
298
299    /// Try to cast data type as a [`TimestampType`].
300    pub fn as_timestamp(&self) -> Option<TimestampType> {
301        match self {
302            ConcreteDataType::Timestamp(t) => Some(*t),
303            _ => None,
304        }
305    }
306
307    /// Try to get numeric precision, returns `None` if it's not numeric type
308    pub fn numeric_precision(&self) -> Option<u8> {
309        match self {
310            ConcreteDataType::Int8(_) | ConcreteDataType::UInt8(_) => Some(3),
311            ConcreteDataType::Int16(_) | ConcreteDataType::UInt16(_) => Some(5),
312            ConcreteDataType::Int32(_) | ConcreteDataType::UInt32(_) => Some(10),
313            ConcreteDataType::Int64(_) => Some(19),
314            ConcreteDataType::UInt64(_) => Some(20),
315            ConcreteDataType::Float32(_) => Some(12),
316            ConcreteDataType::Float64(_) => Some(22),
317            ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.precision()),
318            _ => None,
319        }
320    }
321
322    /// Try to get numeric scale, returns `None` if it's float or not numeric type
323    pub fn numeric_scale(&self) -> Option<i8> {
324        match self {
325            ConcreteDataType::Int8(_)
326            | ConcreteDataType::UInt8(_)
327            | ConcreteDataType::Int16(_)
328            | ConcreteDataType::UInt16(_)
329            | ConcreteDataType::Int32(_)
330            | ConcreteDataType::UInt32(_)
331            | ConcreteDataType::Int64(_)
332            | ConcreteDataType::UInt64(_) => Some(0),
333            ConcreteDataType::Float32(_) | ConcreteDataType::Float64(_) => None,
334            ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.scale()),
335            _ => None,
336        }
337    }
338
339    /// Try to cast data type as a [`TimeType`].
340    pub fn as_time(&self) -> Option<TimeType> {
341        match self {
342            ConcreteDataType::Int64(_) => Some(TimeType::Millisecond(TimeMillisecondType)),
343            ConcreteDataType::Time(t) => Some(*t),
344            _ => None,
345        }
346    }
347
348    pub fn as_decimal128(&self) -> Option<Decimal128Type> {
349        match self {
350            ConcreteDataType::Decimal128(d) => Some(*d),
351            _ => None,
352        }
353    }
354
355    pub fn as_json(&self) -> Option<&JsonType> {
356        match self {
357            ConcreteDataType::Json(j) => Some(j),
358            _ => None,
359        }
360    }
361
362    pub fn as_vector(&self) -> Option<VectorType> {
363        match self {
364            ConcreteDataType::Vector(v) => Some(*v),
365            _ => None,
366        }
367    }
368
369    /// Checks if the data type can cast to another data type.
370    pub fn can_arrow_type_cast_to(&self, to_type: &ConcreteDataType) -> bool {
371        let array = arrow_array::new_empty_array(&self.as_arrow_type());
372        arrow_array_cast(array.as_ref(), &to_type.as_arrow_type()).is_ok()
373    }
374
375    /// Try to cast data type as a [`DurationType`].
376    pub fn as_duration(&self) -> Option<DurationType> {
377        match self {
378            ConcreteDataType::Duration(d) => Some(*d),
379            _ => None,
380        }
381    }
382
383    /// Return the datatype name in postgres type system
384    pub fn postgres_datatype_name(&self) -> &'static str {
385        match self {
386            &ConcreteDataType::Null(_) => "UNKNOWN",
387            &ConcreteDataType::Boolean(_) => "BOOL",
388            &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "CHAR",
389            &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "INT2",
390            &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "INT4",
391            &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "INT8",
392            &ConcreteDataType::Float32(_) => "FLOAT4",
393            &ConcreteDataType::Float64(_) => "FLOAT8",
394            &ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => "BYTEA",
395            &ConcreteDataType::String(_) => "VARCHAR",
396            &ConcreteDataType::Date(_) => "DATE",
397            &ConcreteDataType::Timestamp(_) => "TIMESTAMP",
398            &ConcreteDataType::Time(_) => "TIME",
399            &ConcreteDataType::Interval(_) => "INTERVAL",
400            &ConcreteDataType::Decimal128(_) => "NUMERIC",
401            &ConcreteDataType::Json(_) => "JSON",
402            ConcreteDataType::List(list) => match list.item_type() {
403                &ConcreteDataType::Null(_) => "UNKNOWN",
404                &ConcreteDataType::Boolean(_) => "_BOOL",
405                &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "_CHAR",
406                &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "_INT2",
407                &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "_INT4",
408                &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "_INT8",
409                &ConcreteDataType::Float32(_) => "_FLOAT4",
410                &ConcreteDataType::Float64(_) => "_FLOAT8",
411                &ConcreteDataType::Binary(_) => "_BYTEA",
412                &ConcreteDataType::String(_) => "_VARCHAR",
413                &ConcreteDataType::Date(_) => "_DATE",
414                &ConcreteDataType::Timestamp(_) => "_TIMESTAMP",
415                &ConcreteDataType::Time(_) => "_TIME",
416                &ConcreteDataType::Interval(_) => "_INTERVAL",
417                &ConcreteDataType::Decimal128(_) => "_NUMERIC",
418                &ConcreteDataType::Json(_) => "_JSON",
419                &ConcreteDataType::Duration(_)
420                | &ConcreteDataType::Dictionary(_)
421                | &ConcreteDataType::Vector(_)
422                | &ConcreteDataType::List(_)
423                | &ConcreteDataType::Struct(_) => "UNKNOWN",
424            },
425            &ConcreteDataType::Duration(_)
426            | &ConcreteDataType::Dictionary(_)
427            | &ConcreteDataType::Struct(_) => "UNKNOWN",
428        }
429    }
430}
431
432impl From<&ConcreteDataType> for ConcreteDataType {
433    fn from(t: &ConcreteDataType) -> Self {
434        t.clone()
435    }
436}
437
438impl TryFrom<&ArrowDataType> for ConcreteDataType {
439    type Error = Error;
440
441    fn try_from(dt: &ArrowDataType) -> Result<ConcreteDataType> {
442        let concrete_type = match dt {
443            ArrowDataType::Null => Self::null_datatype(),
444            ArrowDataType::Boolean => Self::boolean_datatype(),
445            ArrowDataType::UInt8 => Self::uint8_datatype(),
446            ArrowDataType::UInt16 => Self::uint16_datatype(),
447            ArrowDataType::UInt32 => Self::uint32_datatype(),
448            ArrowDataType::UInt64 => Self::uint64_datatype(),
449            ArrowDataType::Int8 => Self::int8_datatype(),
450            ArrowDataType::Int16 => Self::int16_datatype(),
451            ArrowDataType::Int32 => Self::int32_datatype(),
452            ArrowDataType::Int64 => Self::int64_datatype(),
453            ArrowDataType::Float32 => Self::float32_datatype(),
454            ArrowDataType::Float64 => Self::float64_datatype(),
455            ArrowDataType::Date32 => Self::date_datatype(),
456            ArrowDataType::Timestamp(u, _) => ConcreteDataType::from_arrow_time_unit(u),
457            ArrowDataType::Interval(u) => ConcreteDataType::from_arrow_interval_unit(u),
458            ArrowDataType::Binary | ArrowDataType::LargeBinary | ArrowDataType::BinaryView => {
459                Self::binary_datatype()
460            }
461            ArrowDataType::Utf8 | ArrowDataType::Utf8View => Self::string_datatype(),
462            ArrowDataType::LargeUtf8 => Self::large_string_datatype(),
463            ArrowDataType::List(field) => Self::List(ListType::new(Arc::new(
464                ConcreteDataType::from_arrow_type(field.data_type()),
465            ))),
466            ArrowDataType::Dictionary(key_type, value_type) => {
467                let key_type = ConcreteDataType::from_arrow_type(key_type);
468                let value_type = ConcreteDataType::from_arrow_type(value_type);
469                Self::Dictionary(DictionaryType::new(key_type, value_type))
470            }
471            ArrowDataType::Time32(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
472            ArrowDataType::Time64(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
473            ArrowDataType::Duration(u) => {
474                ConcreteDataType::Duration(DurationType::from_unit(u.into()))
475            }
476            ArrowDataType::Decimal128(precision, scale) => {
477                ConcreteDataType::decimal128_datatype(*precision, *scale)
478            }
479            ArrowDataType::Struct(fields) => ConcreteDataType::Struct(fields.try_into()?),
480            ArrowDataType::Float16
481            | ArrowDataType::Date64
482            | ArrowDataType::FixedSizeBinary(_)
483            | ArrowDataType::ListView(_)
484            | ArrowDataType::FixedSizeList(_, _)
485            | ArrowDataType::LargeList(_)
486            | ArrowDataType::LargeListView(_)
487            | ArrowDataType::Union(_, _)
488            | ArrowDataType::Decimal256(_, _)
489            | ArrowDataType::Map(_, _)
490            | ArrowDataType::RunEndEncoded(_, _)
491            | ArrowDataType::Decimal32(_, _)
492            | ArrowDataType::Decimal64(_, _) => {
493                return error::UnsupportedArrowTypeSnafu {
494                    arrow_type: dt.clone(),
495                }
496                .fail();
497            }
498        };
499
500        Ok(concrete_type)
501    }
502}
503
504macro_rules! impl_new_concrete_type_functions {
505    ($($Type: ident), +) => {
506        paste! {
507            impl ConcreteDataType {
508                $(
509                    pub fn [<$Type:lower _datatype>]() -> ConcreteDataType {
510                        ConcreteDataType::$Type([<$Type Type>]::default())
511                    }
512                )+
513            }
514        }
515    }
516}
517
518impl_new_concrete_type_functions!(
519    Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
520    Binary, Date, String, Json
521);
522
523impl ConcreteDataType {
524    pub fn large_string_datatype() -> Self {
525        ConcreteDataType::String(StringType::large_utf8())
526    }
527
528    pub fn timestamp_second_datatype() -> Self {
529        ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType))
530    }
531
532    pub fn timestamp_millisecond_datatype() -> Self {
533        ConcreteDataType::Timestamp(TimestampType::Millisecond(TimestampMillisecondType))
534    }
535
536    pub fn timestamp_microsecond_datatype() -> Self {
537        ConcreteDataType::Timestamp(TimestampType::Microsecond(TimestampMicrosecondType))
538    }
539
540    pub fn timestamp_nanosecond_datatype() -> Self {
541        ConcreteDataType::Timestamp(TimestampType::Nanosecond(TimestampNanosecondType))
542    }
543
544    /// Returns the time data type with `TimeUnit`.
545    pub fn time_datatype(unit: TimeUnit) -> Self {
546        ConcreteDataType::Time(TimeType::from_unit(unit))
547    }
548
549    /// Creates a [Time(TimeSecondType)] datatype.
550    pub fn time_second_datatype() -> Self {
551        Self::time_datatype(TimeUnit::Second)
552    }
553
554    /// Creates a [Time(TimeMillisecondType)] datatype.
555    pub fn time_millisecond_datatype() -> Self {
556        Self::time_datatype(TimeUnit::Millisecond)
557    }
558
559    /// Creates a [Time(TimeMicrosecond)] datatype.
560    pub fn time_microsecond_datatype() -> Self {
561        Self::time_datatype(TimeUnit::Microsecond)
562    }
563
564    /// Creates a [Time(TimeNanosecond)] datatype.
565    pub fn time_nanosecond_datatype() -> Self {
566        Self::time_datatype(TimeUnit::Nanosecond)
567    }
568
569    /// Creates a [Duration(DurationSecondType)] datatype.
570    pub fn duration_second_datatype() -> Self {
571        ConcreteDataType::Duration(DurationType::Second(DurationSecondType))
572    }
573
574    /// Creates a [Duration(DurationMillisecondType)] datatype.
575    pub fn duration_millisecond_datatype() -> Self {
576        ConcreteDataType::Duration(DurationType::Millisecond(DurationMillisecondType))
577    }
578
579    /// Creates a [Duration(DurationMicrosecondType)] datatype.
580    pub fn duration_microsecond_datatype() -> Self {
581        ConcreteDataType::Duration(DurationType::Microsecond(DurationMicrosecondType))
582    }
583
584    /// Creates a [Duration(DurationNanosecondType)] datatype.
585    pub fn duration_nanosecond_datatype() -> Self {
586        ConcreteDataType::Duration(DurationType::Nanosecond(DurationNanosecondType))
587    }
588
589    /// Creates a [Interval(IntervalMonthDayNanoType)] datatype.
590    pub fn interval_month_day_nano_datatype() -> Self {
591        ConcreteDataType::Interval(IntervalType::MonthDayNano(IntervalMonthDayNanoType))
592    }
593
594    /// Creates a [Interval(IntervalYearMonthType)] datatype.
595    pub fn interval_year_month_datatype() -> Self {
596        ConcreteDataType::Interval(IntervalType::YearMonth(IntervalYearMonthType))
597    }
598
599    /// Creates a [Interval(IntervalDayTimeType)] datatype.
600    pub fn interval_day_time_datatype() -> Self {
601        ConcreteDataType::Interval(IntervalType::DayTime(IntervalDayTimeType))
602    }
603
604    pub fn timestamp_datatype(unit: TimeUnit) -> Self {
605        match unit {
606            TimeUnit::Second => Self::timestamp_second_datatype(),
607            TimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
608            TimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
609            TimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
610        }
611    }
612
613    /// Converts from arrow timestamp unit to
614    pub fn from_arrow_time_unit(t: &ArrowTimeUnit) -> Self {
615        match t {
616            ArrowTimeUnit::Second => Self::timestamp_second_datatype(),
617            ArrowTimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
618            ArrowTimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
619            ArrowTimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
620        }
621    }
622
623    pub fn duration_datatype(unit: TimeUnit) -> Self {
624        match unit {
625            TimeUnit::Second => Self::duration_second_datatype(),
626            TimeUnit::Millisecond => Self::duration_millisecond_datatype(),
627            TimeUnit::Microsecond => Self::duration_microsecond_datatype(),
628            TimeUnit::Nanosecond => Self::duration_nanosecond_datatype(),
629        }
630    }
631
632    pub fn interval_datatype(unit: IntervalUnit) -> Self {
633        match unit {
634            IntervalUnit::YearMonth => Self::interval_year_month_datatype(),
635            IntervalUnit::DayTime => Self::interval_day_time_datatype(),
636            IntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
637        }
638    }
639
640    pub fn from_arrow_interval_unit(u: &ArrowIntervalUnit) -> Self {
641        match u {
642            ArrowIntervalUnit::YearMonth => Self::interval_year_month_datatype(),
643            ArrowIntervalUnit::DayTime => Self::interval_day_time_datatype(),
644            ArrowIntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
645        }
646    }
647
648    pub fn list_datatype(item_type: Arc<ConcreteDataType>) -> ConcreteDataType {
649        ConcreteDataType::List(ListType::new(item_type))
650    }
651
652    pub fn struct_datatype(fields: StructType) -> ConcreteDataType {
653        ConcreteDataType::Struct(fields)
654    }
655
656    pub fn dictionary_datatype(
657        key_type: ConcreteDataType,
658        value_type: ConcreteDataType,
659    ) -> ConcreteDataType {
660        ConcreteDataType::Dictionary(DictionaryType::new(key_type, value_type))
661    }
662
663    pub fn decimal128_datatype(precision: u8, scale: i8) -> ConcreteDataType {
664        ConcreteDataType::Decimal128(Decimal128Type::new(precision, scale))
665    }
666
667    pub fn decimal128_default_datatype() -> ConcreteDataType {
668        Self::decimal128_datatype(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
669    }
670
671    pub fn vector_datatype(dim: u32) -> ConcreteDataType {
672        ConcreteDataType::Vector(VectorType::new(dim))
673    }
674
675    pub fn vector_default_datatype() -> ConcreteDataType {
676        Self::vector_datatype(0)
677    }
678
679    pub fn json_native_datatype(inner_type: ConcreteDataType) -> ConcreteDataType {
680        ConcreteDataType::Json(JsonType::new(JsonFormat::Native(Box::new(inner_type))))
681    }
682}
683
684/// Data type abstraction.
685#[enum_dispatch::enum_dispatch]
686pub trait DataType: std::fmt::Debug + Send + Sync {
687    /// Name of this data type.
688    fn name(&self) -> String;
689
690    /// Returns id of the Logical data type.
691    fn logical_type_id(&self) -> LogicalTypeId;
692
693    /// Returns the default value of this type.
694    fn default_value(&self) -> Value;
695
696    /// Convert this type as [arrow::datatypes::DataType].
697    fn as_arrow_type(&self) -> ArrowDataType;
698
699    /// Creates a mutable vector with given `capacity` of this type.
700    fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector>;
701
702    /// Casts the value to specific DataType.
703    /// Return None if cast failed.
704    fn try_cast(&self, from: Value) -> Option<Value>;
705}
706
707pub type DataTypeRef = Arc<dyn DataType>;
708
709#[cfg(test)]
710mod tests {
711    use arrow::datatypes::Field;
712
713    use super::*;
714
715    #[test]
716    fn test_concrete_type_as_datatype_trait() {
717        let concrete_type = ConcreteDataType::boolean_datatype();
718
719        assert_eq!("Boolean", concrete_type.to_string());
720        assert_eq!(Value::Boolean(false), concrete_type.default_value());
721        assert_eq!(LogicalTypeId::Boolean, concrete_type.logical_type_id());
722        assert_eq!(ArrowDataType::Boolean, concrete_type.as_arrow_type());
723    }
724
725    #[test]
726    fn test_from_arrow_type() {
727        assert!(matches!(
728            ConcreteDataType::from_arrow_type(&ArrowDataType::Null),
729            ConcreteDataType::Null(_)
730        ));
731        assert!(matches!(
732            ConcreteDataType::from_arrow_type(&ArrowDataType::Boolean),
733            ConcreteDataType::Boolean(_)
734        ));
735        assert!(matches!(
736            ConcreteDataType::from_arrow_type(&ArrowDataType::Binary),
737            ConcreteDataType::Binary(_)
738        ));
739        assert!(matches!(
740            ConcreteDataType::from_arrow_type(&ArrowDataType::LargeBinary),
741            ConcreteDataType::Binary(_)
742        ));
743        assert!(matches!(
744            ConcreteDataType::from_arrow_type(&ArrowDataType::Int8),
745            ConcreteDataType::Int8(_)
746        ));
747        assert!(matches!(
748            ConcreteDataType::from_arrow_type(&ArrowDataType::Int16),
749            ConcreteDataType::Int16(_)
750        ));
751        assert!(matches!(
752            ConcreteDataType::from_arrow_type(&ArrowDataType::Int32),
753            ConcreteDataType::Int32(_)
754        ));
755        assert!(matches!(
756            ConcreteDataType::from_arrow_type(&ArrowDataType::Int64),
757            ConcreteDataType::Int64(_)
758        ));
759        assert!(matches!(
760            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt8),
761            ConcreteDataType::UInt8(_)
762        ));
763        assert!(matches!(
764            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt16),
765            ConcreteDataType::UInt16(_)
766        ));
767        assert!(matches!(
768            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt32),
769            ConcreteDataType::UInt32(_)
770        ));
771        assert!(matches!(
772            ConcreteDataType::from_arrow_type(&ArrowDataType::UInt64),
773            ConcreteDataType::UInt64(_)
774        ));
775        assert!(matches!(
776            ConcreteDataType::from_arrow_type(&ArrowDataType::Float32),
777            ConcreteDataType::Float32(_)
778        ));
779        assert!(matches!(
780            ConcreteDataType::from_arrow_type(&ArrowDataType::Float64),
781            ConcreteDataType::Float64(_)
782        ));
783        assert!(matches!(
784            ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8),
785            ConcreteDataType::String(_)
786        ));
787        // Test LargeUtf8 mapping to large String type
788        let large_string_type = ConcreteDataType::from_arrow_type(&ArrowDataType::LargeUtf8);
789        assert!(matches!(large_string_type, ConcreteDataType::String(_)));
790        if let ConcreteDataType::String(string_type) = &large_string_type {
791            assert!(string_type.is_large());
792        } else {
793            panic!("Expected a String type");
794        }
795        assert_eq!(
796            ConcreteDataType::from_arrow_type(&ArrowDataType::List(Arc::new(Field::new(
797                "item",
798                ArrowDataType::Int32,
799                true,
800            )))),
801            ConcreteDataType::List(ListType::new(Arc::new(ConcreteDataType::int32_datatype())))
802        );
803        assert!(matches!(
804            ConcreteDataType::from_arrow_type(&ArrowDataType::Date32),
805            ConcreteDataType::Date(_)
806        ));
807    }
808
809    #[test]
810    fn test_large_utf8_round_trip() {
811        // Test round-trip conversion for LargeUtf8
812        let large_utf8_arrow = ArrowDataType::LargeUtf8;
813        let concrete_type = ConcreteDataType::from_arrow_type(&large_utf8_arrow);
814        let back_to_arrow = concrete_type.as_arrow_type();
815
816        assert!(matches!(concrete_type, ConcreteDataType::String(_)));
817        // Round-trip should preserve the LargeUtf8 type
818        assert_eq!(large_utf8_arrow, back_to_arrow);
819
820        // Test that Utf8 and LargeUtf8 map to different string variants
821        let utf8_concrete = ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8);
822        let large_utf8_concrete = ConcreteDataType::from_arrow_type(&ArrowDataType::LargeUtf8);
823
824        assert!(matches!(utf8_concrete, ConcreteDataType::String(_)));
825        assert!(matches!(large_utf8_concrete, ConcreteDataType::String(_)));
826
827        // They should have different size types
828        if let (ConcreteDataType::String(utf8_type), ConcreteDataType::String(large_type)) =
829            (&utf8_concrete, &large_utf8_concrete)
830        {
831            assert!(!utf8_type.is_large());
832            assert!(large_type.is_large());
833        } else {
834            panic!("Expected both to be String types");
835        }
836
837        // They should be different types
838        assert_ne!(utf8_concrete, large_utf8_concrete);
839    }
840
841    #[test]
842    fn test_from_arrow_timestamp() {
843        assert_eq!(
844            ConcreteDataType::timestamp_millisecond_datatype(),
845            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Millisecond)
846        );
847        assert_eq!(
848            ConcreteDataType::timestamp_microsecond_datatype(),
849            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Microsecond)
850        );
851        assert_eq!(
852            ConcreteDataType::timestamp_nanosecond_datatype(),
853            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Nanosecond)
854        );
855        assert_eq!(
856            ConcreteDataType::timestamp_second_datatype(),
857            ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Second)
858        );
859    }
860
861    #[test]
862    fn test_is_null() {
863        assert!(ConcreteDataType::null_datatype().is_null());
864        assert!(!ConcreteDataType::int32_datatype().is_null());
865    }
866
867    #[test]
868    fn test_is_float() {
869        assert!(!ConcreteDataType::int32_datatype().is_float());
870        assert!(ConcreteDataType::float32_datatype().is_float());
871        assert!(ConcreteDataType::float64_datatype().is_float());
872    }
873
874    #[test]
875    fn test_is_boolean() {
876        assert!(!ConcreteDataType::int32_datatype().is_boolean());
877        assert!(!ConcreteDataType::float32_datatype().is_boolean());
878        assert!(ConcreteDataType::boolean_datatype().is_boolean());
879    }
880
881    #[test]
882    fn test_is_decimal() {
883        assert!(!ConcreteDataType::int32_datatype().is_decimal());
884        assert!(!ConcreteDataType::float32_datatype().is_decimal());
885        assert!(ConcreteDataType::decimal128_datatype(10, 2).is_decimal());
886        assert!(ConcreteDataType::decimal128_datatype(18, 6).is_decimal());
887    }
888
889    #[test]
890    fn test_is_stringifiable() {
891        assert!(!ConcreteDataType::int32_datatype().is_stringifiable());
892        assert!(!ConcreteDataType::float32_datatype().is_stringifiable());
893        assert!(ConcreteDataType::string_datatype().is_stringifiable());
894        assert!(ConcreteDataType::binary_datatype().is_stringifiable());
895        assert!(ConcreteDataType::date_datatype().is_stringifiable());
896        assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable());
897        assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable());
898        assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable());
899        assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_stringifiable());
900        assert!(ConcreteDataType::time_second_datatype().is_stringifiable());
901        assert!(ConcreteDataType::time_millisecond_datatype().is_stringifiable());
902        assert!(ConcreteDataType::time_microsecond_datatype().is_stringifiable());
903        assert!(ConcreteDataType::time_nanosecond_datatype().is_stringifiable());
904
905        assert!(ConcreteDataType::interval_year_month_datatype().is_stringifiable());
906        assert!(ConcreteDataType::interval_day_time_datatype().is_stringifiable());
907        assert!(ConcreteDataType::interval_month_day_nano_datatype().is_stringifiable());
908
909        assert!(ConcreteDataType::duration_second_datatype().is_stringifiable());
910        assert!(ConcreteDataType::duration_millisecond_datatype().is_stringifiable());
911        assert!(ConcreteDataType::duration_microsecond_datatype().is_stringifiable());
912        assert!(ConcreteDataType::duration_nanosecond_datatype().is_stringifiable());
913        assert!(ConcreteDataType::decimal128_datatype(10, 2).is_stringifiable());
914        assert!(ConcreteDataType::vector_default_datatype().is_stringifiable());
915    }
916
917    #[test]
918    fn test_is_signed() {
919        assert!(ConcreteDataType::int8_datatype().is_signed());
920        assert!(ConcreteDataType::int16_datatype().is_signed());
921        assert!(ConcreteDataType::int32_datatype().is_signed());
922        assert!(ConcreteDataType::int64_datatype().is_signed());
923        assert!(ConcreteDataType::date_datatype().is_signed());
924        assert!(ConcreteDataType::timestamp_second_datatype().is_signed());
925        assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed());
926        assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed());
927        assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_signed());
928        assert!(ConcreteDataType::time_second_datatype().is_signed());
929        assert!(ConcreteDataType::time_millisecond_datatype().is_signed());
930        assert!(ConcreteDataType::time_microsecond_datatype().is_signed());
931        assert!(ConcreteDataType::time_nanosecond_datatype().is_signed());
932        assert!(ConcreteDataType::interval_year_month_datatype().is_signed());
933        assert!(ConcreteDataType::interval_day_time_datatype().is_signed());
934        assert!(ConcreteDataType::interval_month_day_nano_datatype().is_signed());
935        assert!(ConcreteDataType::duration_second_datatype().is_signed());
936        assert!(ConcreteDataType::duration_millisecond_datatype().is_signed());
937        assert!(ConcreteDataType::duration_microsecond_datatype().is_signed());
938        assert!(ConcreteDataType::duration_nanosecond_datatype().is_signed());
939
940        assert!(!ConcreteDataType::uint8_datatype().is_signed());
941        assert!(!ConcreteDataType::uint16_datatype().is_signed());
942        assert!(!ConcreteDataType::uint32_datatype().is_signed());
943        assert!(!ConcreteDataType::uint64_datatype().is_signed());
944
945        assert!(!ConcreteDataType::float32_datatype().is_signed());
946        assert!(!ConcreteDataType::float64_datatype().is_signed());
947
948        assert!(ConcreteDataType::decimal128_datatype(10, 2).is_signed());
949    }
950
951    #[test]
952    fn test_is_unsigned() {
953        assert!(!ConcreteDataType::int8_datatype().is_unsigned());
954        assert!(!ConcreteDataType::int16_datatype().is_unsigned());
955        assert!(!ConcreteDataType::int32_datatype().is_unsigned());
956        assert!(!ConcreteDataType::int64_datatype().is_unsigned());
957        assert!(!ConcreteDataType::date_datatype().is_unsigned());
958        assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned());
959        assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned());
960        assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned());
961        assert!(!ConcreteDataType::timestamp_nanosecond_datatype().is_unsigned());
962        assert!(!ConcreteDataType::time_second_datatype().is_unsigned());
963        assert!(!ConcreteDataType::time_millisecond_datatype().is_unsigned());
964        assert!(!ConcreteDataType::time_microsecond_datatype().is_unsigned());
965        assert!(!ConcreteDataType::time_nanosecond_datatype().is_unsigned());
966        assert!(!ConcreteDataType::interval_year_month_datatype().is_unsigned());
967        assert!(!ConcreteDataType::interval_day_time_datatype().is_unsigned());
968        assert!(!ConcreteDataType::interval_month_day_nano_datatype().is_unsigned());
969        assert!(!ConcreteDataType::duration_second_datatype().is_unsigned());
970        assert!(!ConcreteDataType::duration_millisecond_datatype().is_unsigned());
971        assert!(!ConcreteDataType::duration_microsecond_datatype().is_unsigned());
972        assert!(!ConcreteDataType::duration_nanosecond_datatype().is_unsigned());
973        assert!(!ConcreteDataType::decimal128_datatype(10, 2).is_unsigned());
974
975        assert!(ConcreteDataType::uint8_datatype().is_unsigned());
976        assert!(ConcreteDataType::uint16_datatype().is_unsigned());
977        assert!(ConcreteDataType::uint32_datatype().is_unsigned());
978        assert!(ConcreteDataType::uint64_datatype().is_unsigned());
979
980        assert!(!ConcreteDataType::float32_datatype().is_unsigned());
981        assert!(!ConcreteDataType::float64_datatype().is_unsigned());
982    }
983
984    #[test]
985    fn test_numerics() {
986        let nums = ConcreteDataType::numerics();
987        assert_eq!(10, nums.len());
988    }
989
990    #[test]
991    fn test_as_list() {
992        let list_type =
993            ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()));
994        assert_eq!(
995            ListType::new(Arc::new(ConcreteDataType::int32_datatype())),
996            *list_type.as_list().unwrap()
997        );
998        assert!(ConcreteDataType::int32_datatype().as_list().is_none());
999    }
1000
1001    #[test]
1002    fn test_display_concrete_data_type() {
1003        assert_eq!(ConcreteDataType::null_datatype().to_string(), "Null");
1004        assert_eq!(ConcreteDataType::boolean_datatype().to_string(), "Boolean");
1005        assert_eq!(ConcreteDataType::binary_datatype().to_string(), "Binary");
1006        assert_eq!(ConcreteDataType::int8_datatype().to_string(), "Int8");
1007        assert_eq!(ConcreteDataType::int16_datatype().to_string(), "Int16");
1008        assert_eq!(ConcreteDataType::int32_datatype().to_string(), "Int32");
1009        assert_eq!(ConcreteDataType::int64_datatype().to_string(), "Int64");
1010        assert_eq!(ConcreteDataType::uint8_datatype().to_string(), "UInt8");
1011        assert_eq!(ConcreteDataType::uint16_datatype().to_string(), "UInt16");
1012        assert_eq!(ConcreteDataType::uint32_datatype().to_string(), "UInt32");
1013        assert_eq!(ConcreteDataType::uint64_datatype().to_string(), "UInt64");
1014        assert_eq!(ConcreteDataType::float32_datatype().to_string(), "Float32");
1015        assert_eq!(ConcreteDataType::float64_datatype().to_string(), "Float64");
1016        assert_eq!(ConcreteDataType::string_datatype().to_string(), "String");
1017        assert_eq!(ConcreteDataType::date_datatype().to_string(), "Date");
1018        assert_eq!(
1019            ConcreteDataType::timestamp_millisecond_datatype().to_string(),
1020            "TimestampMillisecond"
1021        );
1022        assert_eq!(
1023            ConcreteDataType::time_millisecond_datatype().to_string(),
1024            "TimeMillisecond"
1025        );
1026        assert_eq!(
1027            ConcreteDataType::interval_month_day_nano_datatype().to_string(),
1028            "IntervalMonthDayNano"
1029        );
1030        assert_eq!(
1031            ConcreteDataType::duration_second_datatype().to_string(),
1032            "DurationSecond"
1033        );
1034        assert_eq!(
1035            ConcreteDataType::decimal128_datatype(10, 2).to_string(),
1036            "Decimal(10, 2)"
1037        );
1038        // Nested types
1039        assert_eq!(
1040            ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()))
1041                .to_string(),
1042            "List<Int32>"
1043        );
1044        assert_eq!(
1045            ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::Dictionary(
1046                DictionaryType::new(
1047                    ConcreteDataType::int32_datatype(),
1048                    ConcreteDataType::string_datatype()
1049                )
1050            )))
1051            .to_string(),
1052            "List<Dictionary<Int32, String>>"
1053        );
1054        assert_eq!(
1055            ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::list_datatype(Arc::new(
1056                ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()))
1057            ))))
1058            .to_string(),
1059            "List<List<List<Int32>>>"
1060        );
1061        assert_eq!(
1062            ConcreteDataType::dictionary_datatype(
1063                ConcreteDataType::int32_datatype(),
1064                ConcreteDataType::string_datatype()
1065            )
1066            .to_string(),
1067            "Dictionary<Int32, String>"
1068        );
1069        assert_eq!(
1070            ConcreteDataType::vector_datatype(3).to_string(),
1071            "Vector(3)"
1072        );
1073    }
1074}