1use std::fmt;
16use std::sync::Arc;
17
18use arrow::compute::cast as arrow_array_cast;
19use arrow::datatypes::{
20 DataType as ArrowDataType, IntervalUnit as ArrowIntervalUnit, TimeUnit as ArrowTimeUnit,
21};
22use arrow_schema::DECIMAL_DEFAULT_SCALE;
23use common_decimal::decimal128::DECIMAL128_MAX_PRECISION;
24use common_time::interval::IntervalUnit;
25use common_time::timestamp::TimeUnit;
26use enum_dispatch::enum_dispatch;
27use paste::paste;
28use serde::{Deserialize, Serialize};
29
30use crate::error::{self, Error, Result};
31use crate::type_id::LogicalTypeId;
32use crate::types::{
33 BinaryType, BooleanType, DateType, Decimal128Type, DictionaryType, DurationMicrosecondType,
34 DurationMillisecondType, DurationNanosecondType, DurationSecondType, DurationType, Float32Type,
35 Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalDayTimeType,
36 IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType, ListType, NullType,
37 StringType, StructType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
38 TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
39 UInt16Type, UInt32Type, UInt64Type, UInt8Type, VectorType,
40};
41use crate::value::Value;
42use crate::vectors::MutableVector;
43
44#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
45#[enum_dispatch(DataType)]
46pub enum ConcreteDataType {
47 Null(NullType),
48 Boolean(BooleanType),
49
50 Int8(Int8Type),
52 Int16(Int16Type),
53 Int32(Int32Type),
54 Int64(Int64Type),
55 UInt8(UInt8Type),
56 UInt16(UInt16Type),
57 UInt32(UInt32Type),
58 UInt64(UInt64Type),
59 Float32(Float32Type),
60 Float64(Float64Type),
61
62 Decimal128(Decimal128Type),
64
65 Binary(BinaryType),
67 String(StringType),
68
69 Date(DateType),
71 Timestamp(TimestampType),
72 Time(TimeType),
73
74 Duration(DurationType),
76
77 Interval(IntervalType),
79
80 List(ListType),
82 Dictionary(DictionaryType),
83 Struct(StructType),
84
85 Json(JsonType),
87
88 Vector(VectorType),
90}
91
92impl fmt::Display for ConcreteDataType {
93 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
94 match self {
95 ConcreteDataType::Null(v) => write!(f, "{}", v.name()),
96 ConcreteDataType::Boolean(v) => write!(f, "{}", v.name()),
97 ConcreteDataType::Int8(v) => write!(f, "{}", v.name()),
98 ConcreteDataType::Int16(v) => write!(f, "{}", v.name()),
99 ConcreteDataType::Int32(v) => write!(f, "{}", v.name()),
100 ConcreteDataType::Int64(v) => write!(f, "{}", v.name()),
101 ConcreteDataType::UInt8(v) => write!(f, "{}", v.name()),
102 ConcreteDataType::UInt16(v) => write!(f, "{}", v.name()),
103 ConcreteDataType::UInt32(v) => write!(f, "{}", v.name()),
104 ConcreteDataType::UInt64(v) => write!(f, "{}", v.name()),
105 ConcreteDataType::Float32(v) => write!(f, "{}", v.name()),
106 ConcreteDataType::Float64(v) => write!(f, "{}", v.name()),
107 ConcreteDataType::Binary(v) => write!(f, "{}", v.name()),
108 ConcreteDataType::String(v) => write!(f, "{}", v.name()),
109 ConcreteDataType::Date(v) => write!(f, "{}", v.name()),
110 ConcreteDataType::Timestamp(t) => match t {
111 TimestampType::Second(v) => write!(f, "{}", v.name()),
112 TimestampType::Millisecond(v) => write!(f, "{}", v.name()),
113 TimestampType::Microsecond(v) => write!(f, "{}", v.name()),
114 TimestampType::Nanosecond(v) => write!(f, "{}", v.name()),
115 },
116 ConcreteDataType::Time(t) => match t {
117 TimeType::Second(v) => write!(f, "{}", v.name()),
118 TimeType::Millisecond(v) => write!(f, "{}", v.name()),
119 TimeType::Microsecond(v) => write!(f, "{}", v.name()),
120 TimeType::Nanosecond(v) => write!(f, "{}", v.name()),
121 },
122 ConcreteDataType::Interval(i) => match i {
123 IntervalType::YearMonth(v) => write!(f, "{}", v.name()),
124 IntervalType::DayTime(v) => write!(f, "{}", v.name()),
125 IntervalType::MonthDayNano(v) => write!(f, "{}", v.name()),
126 },
127 ConcreteDataType::Duration(d) => match d {
128 DurationType::Second(v) => write!(f, "{}", v.name()),
129 DurationType::Millisecond(v) => write!(f, "{}", v.name()),
130 DurationType::Microsecond(v) => write!(f, "{}", v.name()),
131 DurationType::Nanosecond(v) => write!(f, "{}", v.name()),
132 },
133 ConcreteDataType::Decimal128(v) => write!(f, "{}", v.name()),
134 ConcreteDataType::List(v) => write!(f, "{}", v.name()),
135 ConcreteDataType::Struct(v) => write!(f, "{}", v.name()),
136 ConcreteDataType::Dictionary(v) => write!(f, "{}", v.name()),
137 ConcreteDataType::Json(v) => write!(f, "{}", v.name()),
138 ConcreteDataType::Vector(v) => write!(f, "{}", v.name()),
139 }
140 }
141}
142
143impl ConcreteDataType {
146 pub fn is_float(&self) -> bool {
147 matches!(
148 self,
149 ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_)
150 )
151 }
152
153 pub fn is_boolean(&self) -> bool {
154 matches!(self, ConcreteDataType::Boolean(_))
155 }
156
157 pub fn is_string(&self) -> bool {
158 matches!(self, ConcreteDataType::String(_))
159 }
160
161 pub fn is_stringifiable(&self) -> bool {
162 matches!(
163 self,
164 ConcreteDataType::String(_)
165 | ConcreteDataType::Date(_)
166 | ConcreteDataType::Timestamp(_)
167 | ConcreteDataType::Time(_)
168 | ConcreteDataType::Interval(_)
169 | ConcreteDataType::Duration(_)
170 | ConcreteDataType::Decimal128(_)
171 | ConcreteDataType::Binary(_)
172 | ConcreteDataType::Json(_)
173 | ConcreteDataType::Vector(_)
174 )
175 }
176
177 pub fn is_signed(&self) -> bool {
178 matches!(
179 self,
180 ConcreteDataType::Int8(_)
181 | ConcreteDataType::Int16(_)
182 | ConcreteDataType::Int32(_)
183 | ConcreteDataType::Int64(_)
184 | ConcreteDataType::Date(_)
185 | ConcreteDataType::Timestamp(_)
186 | ConcreteDataType::Time(_)
187 | ConcreteDataType::Interval(_)
188 | ConcreteDataType::Duration(_)
189 | ConcreteDataType::Decimal128(_)
190 )
191 }
192
193 pub fn is_unsigned(&self) -> bool {
194 matches!(
195 self,
196 ConcreteDataType::UInt8(_)
197 | ConcreteDataType::UInt16(_)
198 | ConcreteDataType::UInt32(_)
199 | ConcreteDataType::UInt64(_)
200 )
201 }
202
203 pub fn is_numeric(&self) -> bool {
204 matches!(
205 self,
206 ConcreteDataType::Int8(_)
207 | ConcreteDataType::Int16(_)
208 | ConcreteDataType::Int32(_)
209 | ConcreteDataType::Int64(_)
210 | ConcreteDataType::UInt8(_)
211 | ConcreteDataType::UInt16(_)
212 | ConcreteDataType::UInt32(_)
213 | ConcreteDataType::UInt64(_)
214 | ConcreteDataType::Float32(_)
215 | ConcreteDataType::Float64(_)
216 )
217 }
218
219 pub fn is_timestamp(&self) -> bool {
220 matches!(self, ConcreteDataType::Timestamp(_))
221 }
222
223 pub fn is_decimal(&self) -> bool {
224 matches!(self, ConcreteDataType::Decimal128(_))
225 }
226
227 pub fn is_json(&self) -> bool {
228 matches!(self, ConcreteDataType::Json(_))
229 }
230
231 pub fn is_vector(&self) -> bool {
232 matches!(self, ConcreteDataType::Vector(_))
233 }
234
235 pub fn numerics() -> Vec<ConcreteDataType> {
236 vec![
237 ConcreteDataType::int8_datatype(),
238 ConcreteDataType::int16_datatype(),
239 ConcreteDataType::int32_datatype(),
240 ConcreteDataType::int64_datatype(),
241 ConcreteDataType::uint8_datatype(),
242 ConcreteDataType::uint16_datatype(),
243 ConcreteDataType::uint32_datatype(),
244 ConcreteDataType::uint64_datatype(),
245 ConcreteDataType::float32_datatype(),
246 ConcreteDataType::float64_datatype(),
247 ]
248 }
249
250 pub fn unsigned_integers() -> Vec<ConcreteDataType> {
251 vec![
252 ConcreteDataType::uint8_datatype(),
253 ConcreteDataType::uint16_datatype(),
254 ConcreteDataType::uint32_datatype(),
255 ConcreteDataType::uint64_datatype(),
256 ]
257 }
258
259 pub fn timestamps() -> Vec<ConcreteDataType> {
260 vec![
261 ConcreteDataType::timestamp_second_datatype(),
262 ConcreteDataType::timestamp_millisecond_datatype(),
263 ConcreteDataType::timestamp_microsecond_datatype(),
264 ConcreteDataType::timestamp_nanosecond_datatype(),
265 ]
266 }
267
268 pub fn from_arrow_type(dt: &ArrowDataType) -> Self {
273 ConcreteDataType::try_from(dt).expect("Unimplemented type")
274 }
275
276 pub fn is_null(&self) -> bool {
277 matches!(self, ConcreteDataType::Null(NullType))
278 }
279
280 pub fn as_list(&self) -> Option<&ListType> {
282 match self {
283 ConcreteDataType::List(t) => Some(t),
284 _ => None,
285 }
286 }
287
288 pub fn as_timestamp(&self) -> Option<TimestampType> {
290 match self {
291 ConcreteDataType::Timestamp(t) => Some(*t),
292 _ => None,
293 }
294 }
295
296 pub fn numeric_precision(&self) -> Option<u8> {
298 match self {
299 ConcreteDataType::Int8(_) | ConcreteDataType::UInt8(_) => Some(3),
300 ConcreteDataType::Int16(_) | ConcreteDataType::UInt16(_) => Some(5),
301 ConcreteDataType::Int32(_) | ConcreteDataType::UInt32(_) => Some(10),
302 ConcreteDataType::Int64(_) => Some(19),
303 ConcreteDataType::UInt64(_) => Some(20),
304 ConcreteDataType::Float32(_) => Some(12),
305 ConcreteDataType::Float64(_) => Some(22),
306 ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.precision()),
307 _ => None,
308 }
309 }
310
311 pub fn numeric_scale(&self) -> Option<i8> {
313 match self {
314 ConcreteDataType::Int8(_)
315 | ConcreteDataType::UInt8(_)
316 | ConcreteDataType::Int16(_)
317 | ConcreteDataType::UInt16(_)
318 | ConcreteDataType::Int32(_)
319 | ConcreteDataType::UInt32(_)
320 | ConcreteDataType::Int64(_)
321 | ConcreteDataType::UInt64(_) => Some(0),
322 ConcreteDataType::Float32(_) | ConcreteDataType::Float64(_) => None,
323 ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.scale()),
324 _ => None,
325 }
326 }
327
328 pub fn as_time(&self) -> Option<TimeType> {
330 match self {
331 ConcreteDataType::Int64(_) => Some(TimeType::Millisecond(TimeMillisecondType)),
332 ConcreteDataType::Time(t) => Some(*t),
333 _ => None,
334 }
335 }
336
337 pub fn as_decimal128(&self) -> Option<Decimal128Type> {
338 match self {
339 ConcreteDataType::Decimal128(d) => Some(*d),
340 _ => None,
341 }
342 }
343
344 pub fn as_json(&self) -> Option<JsonType> {
345 match self {
346 ConcreteDataType::Json(j) => Some(*j),
347 _ => None,
348 }
349 }
350
351 pub fn as_vector(&self) -> Option<VectorType> {
352 match self {
353 ConcreteDataType::Vector(v) => Some(*v),
354 _ => None,
355 }
356 }
357
358 pub fn can_arrow_type_cast_to(&self, to_type: &ConcreteDataType) -> bool {
360 let array = arrow_array::new_empty_array(&self.as_arrow_type());
361 arrow_array_cast(array.as_ref(), &to_type.as_arrow_type()).is_ok()
362 }
363
364 pub fn as_duration(&self) -> Option<DurationType> {
366 match self {
367 ConcreteDataType::Duration(d) => Some(*d),
368 _ => None,
369 }
370 }
371
372 pub fn postgres_datatype_name(&self) -> &'static str {
374 match self {
375 &ConcreteDataType::Null(_) => "UNKNOWN",
376 &ConcreteDataType::Boolean(_) => "BOOL",
377 &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "CHAR",
378 &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "INT2",
379 &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "INT4",
380 &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "INT8",
381 &ConcreteDataType::Float32(_) => "FLOAT4",
382 &ConcreteDataType::Float64(_) => "FLOAT8",
383 &ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => "BYTEA",
384 &ConcreteDataType::String(_) => "VARCHAR",
385 &ConcreteDataType::Date(_) => "DATE",
386 &ConcreteDataType::Timestamp(_) => "TIMESTAMP",
387 &ConcreteDataType::Time(_) => "TIME",
388 &ConcreteDataType::Interval(_) => "INTERVAL",
389 &ConcreteDataType::Decimal128(_) => "NUMERIC",
390 &ConcreteDataType::Json(_) => "JSON",
391 ConcreteDataType::List(list) => match list.item_type() {
392 &ConcreteDataType::Null(_) => "UNKNOWN",
393 &ConcreteDataType::Boolean(_) => "_BOOL",
394 &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "_CHAR",
395 &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "_INT2",
396 &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "_INT4",
397 &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "_INT8",
398 &ConcreteDataType::Float32(_) => "_FLOAT4",
399 &ConcreteDataType::Float64(_) => "_FLOAT8",
400 &ConcreteDataType::Binary(_) => "_BYTEA",
401 &ConcreteDataType::String(_) => "_VARCHAR",
402 &ConcreteDataType::Date(_) => "_DATE",
403 &ConcreteDataType::Timestamp(_) => "_TIMESTAMP",
404 &ConcreteDataType::Time(_) => "_TIME",
405 &ConcreteDataType::Interval(_) => "_INTERVAL",
406 &ConcreteDataType::Decimal128(_) => "_NUMERIC",
407 &ConcreteDataType::Json(_) => "_JSON",
408 &ConcreteDataType::Duration(_)
409 | &ConcreteDataType::Dictionary(_)
410 | &ConcreteDataType::Vector(_)
411 | &ConcreteDataType::List(_)
412 | &ConcreteDataType::Struct(_) => "UNKNOWN",
413 },
414 &ConcreteDataType::Duration(_)
415 | &ConcreteDataType::Dictionary(_)
416 | &ConcreteDataType::Struct(_) => "UNKNOWN",
417 }
418 }
419}
420
421impl From<&ConcreteDataType> for ConcreteDataType {
422 fn from(t: &ConcreteDataType) -> Self {
423 t.clone()
424 }
425}
426
427impl TryFrom<&ArrowDataType> for ConcreteDataType {
428 type Error = Error;
429
430 fn try_from(dt: &ArrowDataType) -> Result<ConcreteDataType> {
431 let concrete_type = match dt {
432 ArrowDataType::Null => Self::null_datatype(),
433 ArrowDataType::Boolean => Self::boolean_datatype(),
434 ArrowDataType::UInt8 => Self::uint8_datatype(),
435 ArrowDataType::UInt16 => Self::uint16_datatype(),
436 ArrowDataType::UInt32 => Self::uint32_datatype(),
437 ArrowDataType::UInt64 => Self::uint64_datatype(),
438 ArrowDataType::Int8 => Self::int8_datatype(),
439 ArrowDataType::Int16 => Self::int16_datatype(),
440 ArrowDataType::Int32 => Self::int32_datatype(),
441 ArrowDataType::Int64 => Self::int64_datatype(),
442 ArrowDataType::Float32 => Self::float32_datatype(),
443 ArrowDataType::Float64 => Self::float64_datatype(),
444 ArrowDataType::Date32 => Self::date_datatype(),
445 ArrowDataType::Timestamp(u, _) => ConcreteDataType::from_arrow_time_unit(u),
446 ArrowDataType::Interval(u) => ConcreteDataType::from_arrow_interval_unit(u),
447 ArrowDataType::Binary | ArrowDataType::LargeBinary => Self::binary_datatype(),
448 ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => Self::string_datatype(),
449 ArrowDataType::List(field) => Self::List(ListType::new(
450 ConcreteDataType::from_arrow_type(field.data_type()),
451 )),
452 ArrowDataType::Dictionary(key_type, value_type) => {
453 let key_type = ConcreteDataType::from_arrow_type(key_type);
454 let value_type = ConcreteDataType::from_arrow_type(value_type);
455 Self::Dictionary(DictionaryType::new(key_type, value_type))
456 }
457 ArrowDataType::Time32(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
458 ArrowDataType::Time64(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
459 ArrowDataType::Duration(u) => {
460 ConcreteDataType::Duration(DurationType::from_unit(u.into()))
461 }
462 ArrowDataType::Decimal128(precision, scale) => {
463 ConcreteDataType::decimal128_datatype(*precision, *scale)
464 }
465 ArrowDataType::Struct(fields) => ConcreteDataType::Struct(fields.try_into()?),
466 ArrowDataType::Float16
467 | ArrowDataType::Date64
468 | ArrowDataType::FixedSizeBinary(_)
469 | ArrowDataType::BinaryView
470 | ArrowDataType::Utf8View
471 | ArrowDataType::ListView(_)
472 | ArrowDataType::FixedSizeList(_, _)
473 | ArrowDataType::LargeList(_)
474 | ArrowDataType::LargeListView(_)
475 | ArrowDataType::Union(_, _)
476 | ArrowDataType::Decimal256(_, _)
477 | ArrowDataType::Map(_, _)
478 | ArrowDataType::RunEndEncoded(_, _) => {
479 return error::UnsupportedArrowTypeSnafu {
480 arrow_type: dt.clone(),
481 }
482 .fail()
483 }
484 };
485
486 Ok(concrete_type)
487 }
488}
489
490macro_rules! impl_new_concrete_type_functions {
491 ($($Type: ident), +) => {
492 paste! {
493 impl ConcreteDataType {
494 $(
495 pub fn [<$Type:lower _datatype>]() -> ConcreteDataType {
496 ConcreteDataType::$Type([<$Type Type>]::default())
497 }
498 )+
499 }
500 }
501 }
502}
503
504impl_new_concrete_type_functions!(
505 Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
506 Binary, Date, String, Json
507);
508
509impl ConcreteDataType {
510 pub fn timestamp_second_datatype() -> Self {
511 ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType))
512 }
513
514 pub fn timestamp_millisecond_datatype() -> Self {
515 ConcreteDataType::Timestamp(TimestampType::Millisecond(TimestampMillisecondType))
516 }
517
518 pub fn timestamp_microsecond_datatype() -> Self {
519 ConcreteDataType::Timestamp(TimestampType::Microsecond(TimestampMicrosecondType))
520 }
521
522 pub fn timestamp_nanosecond_datatype() -> Self {
523 ConcreteDataType::Timestamp(TimestampType::Nanosecond(TimestampNanosecondType))
524 }
525
526 pub fn time_datatype(unit: TimeUnit) -> Self {
528 ConcreteDataType::Time(TimeType::from_unit(unit))
529 }
530
531 pub fn time_second_datatype() -> Self {
533 Self::time_datatype(TimeUnit::Second)
534 }
535
536 pub fn time_millisecond_datatype() -> Self {
538 Self::time_datatype(TimeUnit::Millisecond)
539 }
540
541 pub fn time_microsecond_datatype() -> Self {
543 Self::time_datatype(TimeUnit::Microsecond)
544 }
545
546 pub fn time_nanosecond_datatype() -> Self {
548 Self::time_datatype(TimeUnit::Nanosecond)
549 }
550
551 pub fn duration_second_datatype() -> Self {
553 ConcreteDataType::Duration(DurationType::Second(DurationSecondType))
554 }
555
556 pub fn duration_millisecond_datatype() -> Self {
558 ConcreteDataType::Duration(DurationType::Millisecond(DurationMillisecondType))
559 }
560
561 pub fn duration_microsecond_datatype() -> Self {
563 ConcreteDataType::Duration(DurationType::Microsecond(DurationMicrosecondType))
564 }
565
566 pub fn duration_nanosecond_datatype() -> Self {
568 ConcreteDataType::Duration(DurationType::Nanosecond(DurationNanosecondType))
569 }
570
571 pub fn interval_month_day_nano_datatype() -> Self {
573 ConcreteDataType::Interval(IntervalType::MonthDayNano(IntervalMonthDayNanoType))
574 }
575
576 pub fn interval_year_month_datatype() -> Self {
578 ConcreteDataType::Interval(IntervalType::YearMonth(IntervalYearMonthType))
579 }
580
581 pub fn interval_day_time_datatype() -> Self {
583 ConcreteDataType::Interval(IntervalType::DayTime(IntervalDayTimeType))
584 }
585
586 pub fn timestamp_datatype(unit: TimeUnit) -> Self {
587 match unit {
588 TimeUnit::Second => Self::timestamp_second_datatype(),
589 TimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
590 TimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
591 TimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
592 }
593 }
594
595 pub fn from_arrow_time_unit(t: &ArrowTimeUnit) -> Self {
597 match t {
598 ArrowTimeUnit::Second => Self::timestamp_second_datatype(),
599 ArrowTimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
600 ArrowTimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
601 ArrowTimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
602 }
603 }
604
605 pub fn duration_datatype(unit: TimeUnit) -> Self {
606 match unit {
607 TimeUnit::Second => Self::duration_second_datatype(),
608 TimeUnit::Millisecond => Self::duration_millisecond_datatype(),
609 TimeUnit::Microsecond => Self::duration_microsecond_datatype(),
610 TimeUnit::Nanosecond => Self::duration_nanosecond_datatype(),
611 }
612 }
613
614 pub fn interval_datatype(unit: IntervalUnit) -> Self {
615 match unit {
616 IntervalUnit::YearMonth => Self::interval_year_month_datatype(),
617 IntervalUnit::DayTime => Self::interval_day_time_datatype(),
618 IntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
619 }
620 }
621
622 pub fn from_arrow_interval_unit(u: &ArrowIntervalUnit) -> Self {
623 match u {
624 ArrowIntervalUnit::YearMonth => Self::interval_year_month_datatype(),
625 ArrowIntervalUnit::DayTime => Self::interval_day_time_datatype(),
626 ArrowIntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
627 }
628 }
629
630 pub fn list_datatype(item_type: ConcreteDataType) -> ConcreteDataType {
631 ConcreteDataType::List(ListType::new(item_type))
632 }
633
634 pub fn struct_datatype(fields: StructType) -> ConcreteDataType {
635 ConcreteDataType::Struct(fields)
636 }
637
638 pub fn dictionary_datatype(
639 key_type: ConcreteDataType,
640 value_type: ConcreteDataType,
641 ) -> ConcreteDataType {
642 ConcreteDataType::Dictionary(DictionaryType::new(key_type, value_type))
643 }
644
645 pub fn decimal128_datatype(precision: u8, scale: i8) -> ConcreteDataType {
646 ConcreteDataType::Decimal128(Decimal128Type::new(precision, scale))
647 }
648
649 pub fn decimal128_default_datatype() -> ConcreteDataType {
650 Self::decimal128_datatype(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
651 }
652
653 pub fn vector_datatype(dim: u32) -> ConcreteDataType {
654 ConcreteDataType::Vector(VectorType::new(dim))
655 }
656
657 pub fn vector_default_datatype() -> ConcreteDataType {
658 Self::vector_datatype(0)
659 }
660}
661
662#[enum_dispatch::enum_dispatch]
664pub trait DataType: std::fmt::Debug + Send + Sync {
665 fn name(&self) -> String;
667
668 fn logical_type_id(&self) -> LogicalTypeId;
670
671 fn default_value(&self) -> Value;
673
674 fn as_arrow_type(&self) -> ArrowDataType;
676
677 fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector>;
679
680 fn try_cast(&self, from: Value) -> Option<Value>;
683}
684
685pub type DataTypeRef = Arc<dyn DataType>;
686
687#[cfg(test)]
688mod tests {
689 use arrow::datatypes::Field;
690
691 use super::*;
692
693 #[test]
694 fn test_concrete_type_as_datatype_trait() {
695 let concrete_type = ConcreteDataType::boolean_datatype();
696
697 assert_eq!("Boolean", concrete_type.to_string());
698 assert_eq!(Value::Boolean(false), concrete_type.default_value());
699 assert_eq!(LogicalTypeId::Boolean, concrete_type.logical_type_id());
700 assert_eq!(ArrowDataType::Boolean, concrete_type.as_arrow_type());
701 }
702
703 #[test]
704 fn test_from_arrow_type() {
705 assert!(matches!(
706 ConcreteDataType::from_arrow_type(&ArrowDataType::Null),
707 ConcreteDataType::Null(_)
708 ));
709 assert!(matches!(
710 ConcreteDataType::from_arrow_type(&ArrowDataType::Boolean),
711 ConcreteDataType::Boolean(_)
712 ));
713 assert!(matches!(
714 ConcreteDataType::from_arrow_type(&ArrowDataType::Binary),
715 ConcreteDataType::Binary(_)
716 ));
717 assert!(matches!(
718 ConcreteDataType::from_arrow_type(&ArrowDataType::LargeBinary),
719 ConcreteDataType::Binary(_)
720 ));
721 assert!(matches!(
722 ConcreteDataType::from_arrow_type(&ArrowDataType::Int8),
723 ConcreteDataType::Int8(_)
724 ));
725 assert!(matches!(
726 ConcreteDataType::from_arrow_type(&ArrowDataType::Int16),
727 ConcreteDataType::Int16(_)
728 ));
729 assert!(matches!(
730 ConcreteDataType::from_arrow_type(&ArrowDataType::Int32),
731 ConcreteDataType::Int32(_)
732 ));
733 assert!(matches!(
734 ConcreteDataType::from_arrow_type(&ArrowDataType::Int64),
735 ConcreteDataType::Int64(_)
736 ));
737 assert!(matches!(
738 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt8),
739 ConcreteDataType::UInt8(_)
740 ));
741 assert!(matches!(
742 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt16),
743 ConcreteDataType::UInt16(_)
744 ));
745 assert!(matches!(
746 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt32),
747 ConcreteDataType::UInt32(_)
748 ));
749 assert!(matches!(
750 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt64),
751 ConcreteDataType::UInt64(_)
752 ));
753 assert!(matches!(
754 ConcreteDataType::from_arrow_type(&ArrowDataType::Float32),
755 ConcreteDataType::Float32(_)
756 ));
757 assert!(matches!(
758 ConcreteDataType::from_arrow_type(&ArrowDataType::Float64),
759 ConcreteDataType::Float64(_)
760 ));
761 assert!(matches!(
762 ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8),
763 ConcreteDataType::String(_)
764 ));
765 assert_eq!(
766 ConcreteDataType::from_arrow_type(&ArrowDataType::List(Arc::new(Field::new(
767 "item",
768 ArrowDataType::Int32,
769 true,
770 )))),
771 ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype()))
772 );
773 assert!(matches!(
774 ConcreteDataType::from_arrow_type(&ArrowDataType::Date32),
775 ConcreteDataType::Date(_)
776 ));
777 }
778
779 #[test]
780 fn test_from_arrow_timestamp() {
781 assert_eq!(
782 ConcreteDataType::timestamp_millisecond_datatype(),
783 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Millisecond)
784 );
785 assert_eq!(
786 ConcreteDataType::timestamp_microsecond_datatype(),
787 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Microsecond)
788 );
789 assert_eq!(
790 ConcreteDataType::timestamp_nanosecond_datatype(),
791 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Nanosecond)
792 );
793 assert_eq!(
794 ConcreteDataType::timestamp_second_datatype(),
795 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Second)
796 );
797 }
798
799 #[test]
800 fn test_is_null() {
801 assert!(ConcreteDataType::null_datatype().is_null());
802 assert!(!ConcreteDataType::int32_datatype().is_null());
803 }
804
805 #[test]
806 fn test_is_float() {
807 assert!(!ConcreteDataType::int32_datatype().is_float());
808 assert!(ConcreteDataType::float32_datatype().is_float());
809 assert!(ConcreteDataType::float64_datatype().is_float());
810 }
811
812 #[test]
813 fn test_is_boolean() {
814 assert!(!ConcreteDataType::int32_datatype().is_boolean());
815 assert!(!ConcreteDataType::float32_datatype().is_boolean());
816 assert!(ConcreteDataType::boolean_datatype().is_boolean());
817 }
818
819 #[test]
820 fn test_is_decimal() {
821 assert!(!ConcreteDataType::int32_datatype().is_decimal());
822 assert!(!ConcreteDataType::float32_datatype().is_decimal());
823 assert!(ConcreteDataType::decimal128_datatype(10, 2).is_decimal());
824 assert!(ConcreteDataType::decimal128_datatype(18, 6).is_decimal());
825 }
826
827 #[test]
828 fn test_is_stringifiable() {
829 assert!(!ConcreteDataType::int32_datatype().is_stringifiable());
830 assert!(!ConcreteDataType::float32_datatype().is_stringifiable());
831 assert!(ConcreteDataType::string_datatype().is_stringifiable());
832 assert!(ConcreteDataType::binary_datatype().is_stringifiable());
833 assert!(ConcreteDataType::date_datatype().is_stringifiable());
834 assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable());
835 assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable());
836 assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable());
837 assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_stringifiable());
838 assert!(ConcreteDataType::time_second_datatype().is_stringifiable());
839 assert!(ConcreteDataType::time_millisecond_datatype().is_stringifiable());
840 assert!(ConcreteDataType::time_microsecond_datatype().is_stringifiable());
841 assert!(ConcreteDataType::time_nanosecond_datatype().is_stringifiable());
842
843 assert!(ConcreteDataType::interval_year_month_datatype().is_stringifiable());
844 assert!(ConcreteDataType::interval_day_time_datatype().is_stringifiable());
845 assert!(ConcreteDataType::interval_month_day_nano_datatype().is_stringifiable());
846
847 assert!(ConcreteDataType::duration_second_datatype().is_stringifiable());
848 assert!(ConcreteDataType::duration_millisecond_datatype().is_stringifiable());
849 assert!(ConcreteDataType::duration_microsecond_datatype().is_stringifiable());
850 assert!(ConcreteDataType::duration_nanosecond_datatype().is_stringifiable());
851 assert!(ConcreteDataType::decimal128_datatype(10, 2).is_stringifiable());
852 assert!(ConcreteDataType::vector_default_datatype().is_stringifiable());
853 }
854
855 #[test]
856 fn test_is_signed() {
857 assert!(ConcreteDataType::int8_datatype().is_signed());
858 assert!(ConcreteDataType::int16_datatype().is_signed());
859 assert!(ConcreteDataType::int32_datatype().is_signed());
860 assert!(ConcreteDataType::int64_datatype().is_signed());
861 assert!(ConcreteDataType::date_datatype().is_signed());
862 assert!(ConcreteDataType::timestamp_second_datatype().is_signed());
863 assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed());
864 assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed());
865 assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_signed());
866 assert!(ConcreteDataType::time_second_datatype().is_signed());
867 assert!(ConcreteDataType::time_millisecond_datatype().is_signed());
868 assert!(ConcreteDataType::time_microsecond_datatype().is_signed());
869 assert!(ConcreteDataType::time_nanosecond_datatype().is_signed());
870 assert!(ConcreteDataType::interval_year_month_datatype().is_signed());
871 assert!(ConcreteDataType::interval_day_time_datatype().is_signed());
872 assert!(ConcreteDataType::interval_month_day_nano_datatype().is_signed());
873 assert!(ConcreteDataType::duration_second_datatype().is_signed());
874 assert!(ConcreteDataType::duration_millisecond_datatype().is_signed());
875 assert!(ConcreteDataType::duration_microsecond_datatype().is_signed());
876 assert!(ConcreteDataType::duration_nanosecond_datatype().is_signed());
877
878 assert!(!ConcreteDataType::uint8_datatype().is_signed());
879 assert!(!ConcreteDataType::uint16_datatype().is_signed());
880 assert!(!ConcreteDataType::uint32_datatype().is_signed());
881 assert!(!ConcreteDataType::uint64_datatype().is_signed());
882
883 assert!(!ConcreteDataType::float32_datatype().is_signed());
884 assert!(!ConcreteDataType::float64_datatype().is_signed());
885
886 assert!(ConcreteDataType::decimal128_datatype(10, 2).is_signed());
887 }
888
889 #[test]
890 fn test_is_unsigned() {
891 assert!(!ConcreteDataType::int8_datatype().is_unsigned());
892 assert!(!ConcreteDataType::int16_datatype().is_unsigned());
893 assert!(!ConcreteDataType::int32_datatype().is_unsigned());
894 assert!(!ConcreteDataType::int64_datatype().is_unsigned());
895 assert!(!ConcreteDataType::date_datatype().is_unsigned());
896 assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned());
897 assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned());
898 assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned());
899 assert!(!ConcreteDataType::timestamp_nanosecond_datatype().is_unsigned());
900 assert!(!ConcreteDataType::time_second_datatype().is_unsigned());
901 assert!(!ConcreteDataType::time_millisecond_datatype().is_unsigned());
902 assert!(!ConcreteDataType::time_microsecond_datatype().is_unsigned());
903 assert!(!ConcreteDataType::time_nanosecond_datatype().is_unsigned());
904 assert!(!ConcreteDataType::interval_year_month_datatype().is_unsigned());
905 assert!(!ConcreteDataType::interval_day_time_datatype().is_unsigned());
906 assert!(!ConcreteDataType::interval_month_day_nano_datatype().is_unsigned());
907 assert!(!ConcreteDataType::duration_second_datatype().is_unsigned());
908 assert!(!ConcreteDataType::duration_millisecond_datatype().is_unsigned());
909 assert!(!ConcreteDataType::duration_microsecond_datatype().is_unsigned());
910 assert!(!ConcreteDataType::duration_nanosecond_datatype().is_unsigned());
911 assert!(!ConcreteDataType::decimal128_datatype(10, 2).is_unsigned());
912
913 assert!(ConcreteDataType::uint8_datatype().is_unsigned());
914 assert!(ConcreteDataType::uint16_datatype().is_unsigned());
915 assert!(ConcreteDataType::uint32_datatype().is_unsigned());
916 assert!(ConcreteDataType::uint64_datatype().is_unsigned());
917
918 assert!(!ConcreteDataType::float32_datatype().is_unsigned());
919 assert!(!ConcreteDataType::float64_datatype().is_unsigned());
920 }
921
922 #[test]
923 fn test_numerics() {
924 let nums = ConcreteDataType::numerics();
925 assert_eq!(10, nums.len());
926 }
927
928 #[test]
929 fn test_as_list() {
930 let list_type = ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype());
931 assert_eq!(
932 ListType::new(ConcreteDataType::int32_datatype()),
933 *list_type.as_list().unwrap()
934 );
935 assert!(ConcreteDataType::int32_datatype().as_list().is_none());
936 }
937
938 #[test]
939 fn test_display_concrete_data_type() {
940 assert_eq!(ConcreteDataType::null_datatype().to_string(), "Null");
941 assert_eq!(ConcreteDataType::boolean_datatype().to_string(), "Boolean");
942 assert_eq!(ConcreteDataType::binary_datatype().to_string(), "Binary");
943 assert_eq!(ConcreteDataType::int8_datatype().to_string(), "Int8");
944 assert_eq!(ConcreteDataType::int16_datatype().to_string(), "Int16");
945 assert_eq!(ConcreteDataType::int32_datatype().to_string(), "Int32");
946 assert_eq!(ConcreteDataType::int64_datatype().to_string(), "Int64");
947 assert_eq!(ConcreteDataType::uint8_datatype().to_string(), "UInt8");
948 assert_eq!(ConcreteDataType::uint16_datatype().to_string(), "UInt16");
949 assert_eq!(ConcreteDataType::uint32_datatype().to_string(), "UInt32");
950 assert_eq!(ConcreteDataType::uint64_datatype().to_string(), "UInt64");
951 assert_eq!(ConcreteDataType::float32_datatype().to_string(), "Float32");
952 assert_eq!(ConcreteDataType::float64_datatype().to_string(), "Float64");
953 assert_eq!(ConcreteDataType::string_datatype().to_string(), "String");
954 assert_eq!(ConcreteDataType::date_datatype().to_string(), "Date");
955 assert_eq!(
956 ConcreteDataType::timestamp_millisecond_datatype().to_string(),
957 "TimestampMillisecond"
958 );
959 assert_eq!(
960 ConcreteDataType::time_millisecond_datatype().to_string(),
961 "TimeMillisecond"
962 );
963 assert_eq!(
964 ConcreteDataType::interval_month_day_nano_datatype().to_string(),
965 "IntervalMonthDayNano"
966 );
967 assert_eq!(
968 ConcreteDataType::duration_second_datatype().to_string(),
969 "DurationSecond"
970 );
971 assert_eq!(
972 ConcreteDataType::decimal128_datatype(10, 2).to_string(),
973 "Decimal(10, 2)"
974 );
975 assert_eq!(
977 ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()).to_string(),
978 "List<Int32>"
979 );
980 assert_eq!(
981 ConcreteDataType::list_datatype(ConcreteDataType::Dictionary(DictionaryType::new(
982 ConcreteDataType::int32_datatype(),
983 ConcreteDataType::string_datatype()
984 )))
985 .to_string(),
986 "List<Dictionary<Int32, String>>"
987 );
988 assert_eq!(
989 ConcreteDataType::list_datatype(ConcreteDataType::list_datatype(
990 ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype())
991 ))
992 .to_string(),
993 "List<List<List<Int32>>>"
994 );
995 assert_eq!(
996 ConcreteDataType::dictionary_datatype(
997 ConcreteDataType::int32_datatype(),
998 ConcreteDataType::string_datatype()
999 )
1000 .to_string(),
1001 "Dictionary<Int32, String>"
1002 );
1003 assert_eq!(
1004 ConcreteDataType::vector_datatype(3).to_string(),
1005 "Vector(3)"
1006 );
1007 }
1008}