1use std::fmt;
16use std::sync::Arc;
17
18use arrow::datatypes::{
19 DataType as ArrowDataType, IntervalUnit as ArrowIntervalUnit, TimeUnit as ArrowTimeUnit,
20};
21use arrow_schema::DECIMAL_DEFAULT_SCALE;
22use common_decimal::decimal128::DECIMAL128_MAX_PRECISION;
23use common_time::interval::IntervalUnit;
24use common_time::timestamp::TimeUnit;
25use enum_dispatch::enum_dispatch;
26use paste::paste;
27use serde::{Deserialize, Serialize};
28
29use crate::error::{self, Error, Result};
30use crate::type_id::LogicalTypeId;
31use crate::types::{
32 BinaryType, BooleanType, DateType, Decimal128Type, DictionaryType, DurationMicrosecondType,
33 DurationMillisecondType, DurationNanosecondType, DurationSecondType, DurationType, Float32Type,
34 Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, IntervalDayTimeType,
35 IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType, ListType, NullType,
36 StringType, StructType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
37 TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
38 UInt8Type, UInt16Type, UInt32Type, UInt64Type, VectorType,
39};
40use crate::value::Value;
41use crate::vectors::MutableVector;
42
43#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
44#[enum_dispatch(DataType)]
45pub enum ConcreteDataType {
46 Null(NullType),
47 Boolean(BooleanType),
48
49 Int8(Int8Type),
51 Int16(Int16Type),
52 Int32(Int32Type),
53 Int64(Int64Type),
54 UInt8(UInt8Type),
55 UInt16(UInt16Type),
56 UInt32(UInt32Type),
57 UInt64(UInt64Type),
58 Float32(Float32Type),
59 Float64(Float64Type),
60
61 Decimal128(Decimal128Type),
63
64 Binary(BinaryType),
66 String(StringType),
67
68 Date(DateType),
70 Timestamp(TimestampType),
71 Time(TimeType),
72
73 Duration(DurationType),
75
76 Interval(IntervalType),
78
79 List(ListType),
81 Dictionary(DictionaryType),
82 Struct(StructType),
83
84 Json(JsonType),
86
87 Vector(VectorType),
89}
90
91impl fmt::Display for ConcreteDataType {
92 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
93 match self {
94 ConcreteDataType::Null(v) => write!(f, "{}", v.name()),
95 ConcreteDataType::Boolean(v) => write!(f, "{}", v.name()),
96 ConcreteDataType::Int8(v) => write!(f, "{}", v.name()),
97 ConcreteDataType::Int16(v) => write!(f, "{}", v.name()),
98 ConcreteDataType::Int32(v) => write!(f, "{}", v.name()),
99 ConcreteDataType::Int64(v) => write!(f, "{}", v.name()),
100 ConcreteDataType::UInt8(v) => write!(f, "{}", v.name()),
101 ConcreteDataType::UInt16(v) => write!(f, "{}", v.name()),
102 ConcreteDataType::UInt32(v) => write!(f, "{}", v.name()),
103 ConcreteDataType::UInt64(v) => write!(f, "{}", v.name()),
104 ConcreteDataType::Float32(v) => write!(f, "{}", v.name()),
105 ConcreteDataType::Float64(v) => write!(f, "{}", v.name()),
106 ConcreteDataType::Binary(v) => write!(f, "{}", v.name()),
107 ConcreteDataType::String(v) => write!(f, "{}", v.name()),
108 ConcreteDataType::Date(v) => write!(f, "{}", v.name()),
109 ConcreteDataType::Timestamp(t) => match t {
110 TimestampType::Second(v) => write!(f, "{}", v.name()),
111 TimestampType::Millisecond(v) => write!(f, "{}", v.name()),
112 TimestampType::Microsecond(v) => write!(f, "{}", v.name()),
113 TimestampType::Nanosecond(v) => write!(f, "{}", v.name()),
114 },
115 ConcreteDataType::Time(t) => match t {
116 TimeType::Second(v) => write!(f, "{}", v.name()),
117 TimeType::Millisecond(v) => write!(f, "{}", v.name()),
118 TimeType::Microsecond(v) => write!(f, "{}", v.name()),
119 TimeType::Nanosecond(v) => write!(f, "{}", v.name()),
120 },
121 ConcreteDataType::Interval(i) => match i {
122 IntervalType::YearMonth(v) => write!(f, "{}", v.name()),
123 IntervalType::DayTime(v) => write!(f, "{}", v.name()),
124 IntervalType::MonthDayNano(v) => write!(f, "{}", v.name()),
125 },
126 ConcreteDataType::Duration(d) => match d {
127 DurationType::Second(v) => write!(f, "{}", v.name()),
128 DurationType::Millisecond(v) => write!(f, "{}", v.name()),
129 DurationType::Microsecond(v) => write!(f, "{}", v.name()),
130 DurationType::Nanosecond(v) => write!(f, "{}", v.name()),
131 },
132 ConcreteDataType::Decimal128(v) => write!(f, "{}", v.name()),
133 ConcreteDataType::List(v) => write!(f, "{}", v.name()),
134 ConcreteDataType::Struct(v) => write!(f, "{}", v.name()),
135 ConcreteDataType::Dictionary(v) => write!(f, "{}", v.name()),
136 ConcreteDataType::Json(v) => write!(f, "{}", v.name()),
137 ConcreteDataType::Vector(v) => write!(f, "{}", v.name()),
138 }
139 }
140}
141
142impl ConcreteDataType {
145 pub fn is_float(&self) -> bool {
146 matches!(
147 self,
148 ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_)
149 )
150 }
151
152 pub fn is_boolean(&self) -> bool {
153 matches!(self, ConcreteDataType::Boolean(_))
154 }
155
156 pub fn is_string(&self) -> bool {
157 matches!(self, ConcreteDataType::String(_))
158 }
159
160 pub fn is_stringifiable(&self) -> bool {
161 matches!(
162 self,
163 ConcreteDataType::String(_)
164 | ConcreteDataType::Date(_)
165 | ConcreteDataType::Timestamp(_)
166 | ConcreteDataType::Time(_)
167 | ConcreteDataType::Interval(_)
168 | ConcreteDataType::Duration(_)
169 | ConcreteDataType::Decimal128(_)
170 | ConcreteDataType::Binary(_)
171 | ConcreteDataType::Json(_)
172 | ConcreteDataType::Vector(_)
173 )
174 }
175
176 pub fn is_signed(&self) -> bool {
177 matches!(
178 self,
179 ConcreteDataType::Int8(_)
180 | ConcreteDataType::Int16(_)
181 | ConcreteDataType::Int32(_)
182 | ConcreteDataType::Int64(_)
183 | ConcreteDataType::Date(_)
184 | ConcreteDataType::Timestamp(_)
185 | ConcreteDataType::Time(_)
186 | ConcreteDataType::Interval(_)
187 | ConcreteDataType::Duration(_)
188 | ConcreteDataType::Decimal128(_)
189 )
190 }
191
192 pub fn is_unsigned(&self) -> bool {
193 matches!(
194 self,
195 ConcreteDataType::UInt8(_)
196 | ConcreteDataType::UInt16(_)
197 | ConcreteDataType::UInt32(_)
198 | ConcreteDataType::UInt64(_)
199 )
200 }
201
202 pub fn is_numeric(&self) -> bool {
203 matches!(
204 self,
205 ConcreteDataType::Int8(_)
206 | ConcreteDataType::Int16(_)
207 | ConcreteDataType::Int32(_)
208 | ConcreteDataType::Int64(_)
209 | ConcreteDataType::UInt8(_)
210 | ConcreteDataType::UInt16(_)
211 | ConcreteDataType::UInt32(_)
212 | ConcreteDataType::UInt64(_)
213 | ConcreteDataType::Float32(_)
214 | ConcreteDataType::Float64(_)
215 )
216 }
217
218 pub fn is_timestamp(&self) -> bool {
219 matches!(self, ConcreteDataType::Timestamp(_))
220 }
221
222 pub fn is_decimal(&self) -> bool {
223 matches!(self, ConcreteDataType::Decimal128(_))
224 }
225
226 pub fn is_json(&self) -> bool {
227 matches!(self, ConcreteDataType::Json(_))
228 }
229
230 pub fn is_vector(&self) -> bool {
231 matches!(self, ConcreteDataType::Vector(_))
232 }
233
234 pub fn numerics() -> Vec<ConcreteDataType> {
235 vec![
236 ConcreteDataType::int8_datatype(),
237 ConcreteDataType::int16_datatype(),
238 ConcreteDataType::int32_datatype(),
239 ConcreteDataType::int64_datatype(),
240 ConcreteDataType::uint8_datatype(),
241 ConcreteDataType::uint16_datatype(),
242 ConcreteDataType::uint32_datatype(),
243 ConcreteDataType::uint64_datatype(),
244 ConcreteDataType::float32_datatype(),
245 ConcreteDataType::float64_datatype(),
246 ]
247 }
248
249 pub fn unsigned_integers() -> Vec<ConcreteDataType> {
250 vec![
251 ConcreteDataType::uint8_datatype(),
252 ConcreteDataType::uint16_datatype(),
253 ConcreteDataType::uint32_datatype(),
254 ConcreteDataType::uint64_datatype(),
255 ]
256 }
257
258 pub fn timestamps() -> Vec<ConcreteDataType> {
259 vec![
260 ConcreteDataType::timestamp_second_datatype(),
261 ConcreteDataType::timestamp_millisecond_datatype(),
262 ConcreteDataType::timestamp_microsecond_datatype(),
263 ConcreteDataType::timestamp_nanosecond_datatype(),
264 ]
265 }
266
267 pub fn from_arrow_type(dt: &ArrowDataType) -> Self {
272 ConcreteDataType::try_from(dt).expect("Unimplemented type")
273 }
274
275 pub fn is_null(&self) -> bool {
276 matches!(self, ConcreteDataType::Null(NullType))
277 }
278
279 pub(crate) fn is_struct(&self) -> bool {
280 matches!(self, ConcreteDataType::Struct(_))
281 }
282
283 pub fn as_list(&self) -> Option<&ListType> {
285 match self {
286 ConcreteDataType::List(t) => Some(t),
287 _ => None,
288 }
289 }
290
291 pub fn as_struct(&self) -> Option<&StructType> {
292 match self {
293 ConcreteDataType::Struct(s) => Some(s),
294 _ => None,
295 }
296 }
297
298 pub fn as_timestamp(&self) -> Option<TimestampType> {
300 match self {
301 ConcreteDataType::Timestamp(t) => Some(*t),
302 _ => None,
303 }
304 }
305
306 pub fn numeric_precision(&self) -> Option<u8> {
308 match self {
309 ConcreteDataType::Int8(_) | ConcreteDataType::UInt8(_) => Some(3),
310 ConcreteDataType::Int16(_) | ConcreteDataType::UInt16(_) => Some(5),
311 ConcreteDataType::Int32(_) | ConcreteDataType::UInt32(_) => Some(10),
312 ConcreteDataType::Int64(_) => Some(19),
313 ConcreteDataType::UInt64(_) => Some(20),
314 ConcreteDataType::Float32(_) => Some(12),
315 ConcreteDataType::Float64(_) => Some(22),
316 ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.precision()),
317 _ => None,
318 }
319 }
320
321 pub fn numeric_scale(&self) -> Option<i8> {
323 match self {
324 ConcreteDataType::Int8(_)
325 | ConcreteDataType::UInt8(_)
326 | ConcreteDataType::Int16(_)
327 | ConcreteDataType::UInt16(_)
328 | ConcreteDataType::Int32(_)
329 | ConcreteDataType::UInt32(_)
330 | ConcreteDataType::Int64(_)
331 | ConcreteDataType::UInt64(_) => Some(0),
332 ConcreteDataType::Float32(_) | ConcreteDataType::Float64(_) => None,
333 ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.scale()),
334 _ => None,
335 }
336 }
337
338 pub fn as_time(&self) -> Option<TimeType> {
340 match self {
341 ConcreteDataType::Int64(_) => Some(TimeType::Millisecond(TimeMillisecondType)),
342 ConcreteDataType::Time(t) => Some(*t),
343 _ => None,
344 }
345 }
346
347 pub fn as_decimal128(&self) -> Option<Decimal128Type> {
348 match self {
349 ConcreteDataType::Decimal128(d) => Some(*d),
350 _ => None,
351 }
352 }
353
354 pub fn as_json(&self) -> Option<&JsonType> {
355 match self {
356 ConcreteDataType::Json(j) => Some(j),
357 _ => None,
358 }
359 }
360
361 pub fn as_vector(&self) -> Option<VectorType> {
362 match self {
363 ConcreteDataType::Vector(v) => Some(*v),
364 _ => None,
365 }
366 }
367
368 pub fn can_arrow_type_cast_to(&self, to_type: &ConcreteDataType) -> bool {
370 match (self, to_type) {
371 (ConcreteDataType::Json(this), ConcreteDataType::Json(that)) => that.is_include(this),
372 _ => arrow::compute::can_cast_types(&self.as_arrow_type(), &to_type.as_arrow_type()),
373 }
374 }
375
376 pub fn as_duration(&self) -> Option<DurationType> {
378 match self {
379 ConcreteDataType::Duration(d) => Some(*d),
380 _ => None,
381 }
382 }
383
384 pub fn postgres_datatype_name(&self) -> &'static str {
386 match self {
387 &ConcreteDataType::Null(_) => "UNKNOWN",
388 &ConcreteDataType::Boolean(_) => "BOOL",
389 &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "CHAR",
390 &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "INT2",
391 &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "INT4",
392 &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "INT8",
393 &ConcreteDataType::Float32(_) => "FLOAT4",
394 &ConcreteDataType::Float64(_) => "FLOAT8",
395 &ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => "BYTEA",
396 &ConcreteDataType::String(_) => "VARCHAR",
397 &ConcreteDataType::Date(_) => "DATE",
398 &ConcreteDataType::Timestamp(_) => "TIMESTAMP",
399 &ConcreteDataType::Time(_) => "TIME",
400 &ConcreteDataType::Interval(_) => "INTERVAL",
401 &ConcreteDataType::Decimal128(_) => "NUMERIC",
402 &ConcreteDataType::Json(_) => "JSON",
403 ConcreteDataType::List(list) => match list.item_type() {
404 &ConcreteDataType::Null(_) => "UNKNOWN",
405 &ConcreteDataType::Boolean(_) => "_BOOL",
406 &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "_CHAR",
407 &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "_INT2",
408 &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "_INT4",
409 &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "_INT8",
410 &ConcreteDataType::Float32(_) => "_FLOAT4",
411 &ConcreteDataType::Float64(_) => "_FLOAT8",
412 &ConcreteDataType::Binary(_) => "_BYTEA",
413 &ConcreteDataType::String(_) => "_VARCHAR",
414 &ConcreteDataType::Date(_) => "_DATE",
415 &ConcreteDataType::Timestamp(_) => "_TIMESTAMP",
416 &ConcreteDataType::Time(_) => "_TIME",
417 &ConcreteDataType::Interval(_) => "_INTERVAL",
418 &ConcreteDataType::Decimal128(_) => "_NUMERIC",
419 &ConcreteDataType::Json(_) => "_JSON",
420 &ConcreteDataType::Duration(_)
421 | &ConcreteDataType::Dictionary(_)
422 | &ConcreteDataType::Vector(_)
423 | &ConcreteDataType::List(_)
424 | &ConcreteDataType::Struct(_) => "UNKNOWN",
425 },
426 &ConcreteDataType::Duration(_)
427 | &ConcreteDataType::Dictionary(_)
428 | &ConcreteDataType::Struct(_) => "UNKNOWN",
429 }
430 }
431}
432
433impl From<&ConcreteDataType> for ConcreteDataType {
434 fn from(t: &ConcreteDataType) -> Self {
435 t.clone()
436 }
437}
438
439impl TryFrom<&ArrowDataType> for ConcreteDataType {
440 type Error = Error;
441
442 fn try_from(dt: &ArrowDataType) -> Result<ConcreteDataType> {
443 let concrete_type = match dt {
444 ArrowDataType::Null => Self::null_datatype(),
445 ArrowDataType::Boolean => Self::boolean_datatype(),
446 ArrowDataType::UInt8 => Self::uint8_datatype(),
447 ArrowDataType::UInt16 => Self::uint16_datatype(),
448 ArrowDataType::UInt32 => Self::uint32_datatype(),
449 ArrowDataType::UInt64 => Self::uint64_datatype(),
450 ArrowDataType::Int8 => Self::int8_datatype(),
451 ArrowDataType::Int16 => Self::int16_datatype(),
452 ArrowDataType::Int32 => Self::int32_datatype(),
453 ArrowDataType::Int64 => Self::int64_datatype(),
454 ArrowDataType::Float32 => Self::float32_datatype(),
455 ArrowDataType::Float64 => Self::float64_datatype(),
456 ArrowDataType::Date32 => Self::date_datatype(),
457 ArrowDataType::Timestamp(u, _) => ConcreteDataType::from_arrow_time_unit(u),
458 ArrowDataType::Interval(u) => ConcreteDataType::from_arrow_interval_unit(u),
459 ArrowDataType::Binary | ArrowDataType::LargeBinary => {
460 Self::Binary(BinaryType::binary())
461 }
462 ArrowDataType::BinaryView => Self::Binary(BinaryType::binary_view()),
463 ArrowDataType::Utf8 => Self::String(StringType::utf8()),
464 ArrowDataType::Utf8View => Self::String(StringType::utf8_view()),
465 ArrowDataType::LargeUtf8 => Self::String(StringType::large_utf8()),
466 ArrowDataType::List(field) => Self::List(ListType::new(Arc::new(
467 ConcreteDataType::from_arrow_type(field.data_type()),
468 ))),
469 ArrowDataType::Dictionary(key_type, value_type) => {
470 let key_type = ConcreteDataType::from_arrow_type(key_type);
471 let value_type = ConcreteDataType::from_arrow_type(value_type);
472 Self::Dictionary(DictionaryType::new(key_type, value_type))
473 }
474 ArrowDataType::Time32(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
475 ArrowDataType::Time64(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
476 ArrowDataType::Duration(u) => {
477 ConcreteDataType::Duration(DurationType::from_unit(u.into()))
478 }
479 ArrowDataType::Decimal128(precision, scale) => {
480 ConcreteDataType::decimal128_datatype(*precision, *scale)
481 }
482 ArrowDataType::Struct(fields) => ConcreteDataType::Struct(fields.try_into()?),
483 ArrowDataType::Float16
484 | ArrowDataType::Date64
485 | ArrowDataType::FixedSizeBinary(_)
486 | ArrowDataType::ListView(_)
487 | ArrowDataType::FixedSizeList(_, _)
488 | ArrowDataType::LargeList(_)
489 | ArrowDataType::LargeListView(_)
490 | ArrowDataType::Union(_, _)
491 | ArrowDataType::Decimal256(_, _)
492 | ArrowDataType::Map(_, _)
493 | ArrowDataType::RunEndEncoded(_, _)
494 | ArrowDataType::Decimal32(_, _)
495 | ArrowDataType::Decimal64(_, _) => {
496 return error::UnsupportedArrowTypeSnafu {
497 arrow_type: dt.clone(),
498 }
499 .fail();
500 }
501 };
502
503 Ok(concrete_type)
504 }
505}
506
507macro_rules! impl_new_concrete_type_functions {
508 ($($Type: ident), +) => {
509 paste! {
510 impl ConcreteDataType {
511 $(
512 pub fn [<$Type:lower _datatype>]() -> ConcreteDataType {
513 ConcreteDataType::$Type([<$Type Type>]::default())
514 }
515 )+
516 }
517 }
518 }
519}
520
521impl_new_concrete_type_functions!(
522 Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
523 Binary, Date, String, Json
524);
525
526impl ConcreteDataType {
527 pub fn large_string_datatype() -> Self {
528 ConcreteDataType::String(StringType::large_utf8())
529 }
530
531 pub fn utf8_view_datatype() -> Self {
532 ConcreteDataType::String(StringType::utf8_view())
533 }
534
535 pub fn binary_view_datatype() -> Self {
536 ConcreteDataType::Binary(BinaryType::binary_view())
537 }
538
539 pub fn timestamp_second_datatype() -> Self {
540 ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType))
541 }
542
543 pub fn timestamp_millisecond_datatype() -> Self {
544 ConcreteDataType::Timestamp(TimestampType::Millisecond(TimestampMillisecondType))
545 }
546
547 pub fn timestamp_microsecond_datatype() -> Self {
548 ConcreteDataType::Timestamp(TimestampType::Microsecond(TimestampMicrosecondType))
549 }
550
551 pub fn timestamp_nanosecond_datatype() -> Self {
552 ConcreteDataType::Timestamp(TimestampType::Nanosecond(TimestampNanosecondType))
553 }
554
555 pub fn time_datatype(unit: TimeUnit) -> Self {
557 ConcreteDataType::Time(TimeType::from_unit(unit))
558 }
559
560 pub fn time_second_datatype() -> Self {
562 Self::time_datatype(TimeUnit::Second)
563 }
564
565 pub fn time_millisecond_datatype() -> Self {
567 Self::time_datatype(TimeUnit::Millisecond)
568 }
569
570 pub fn time_microsecond_datatype() -> Self {
572 Self::time_datatype(TimeUnit::Microsecond)
573 }
574
575 pub fn time_nanosecond_datatype() -> Self {
577 Self::time_datatype(TimeUnit::Nanosecond)
578 }
579
580 pub fn duration_second_datatype() -> Self {
582 ConcreteDataType::Duration(DurationType::Second(DurationSecondType))
583 }
584
585 pub fn duration_millisecond_datatype() -> Self {
587 ConcreteDataType::Duration(DurationType::Millisecond(DurationMillisecondType))
588 }
589
590 pub fn duration_microsecond_datatype() -> Self {
592 ConcreteDataType::Duration(DurationType::Microsecond(DurationMicrosecondType))
593 }
594
595 pub fn duration_nanosecond_datatype() -> Self {
597 ConcreteDataType::Duration(DurationType::Nanosecond(DurationNanosecondType))
598 }
599
600 pub fn interval_month_day_nano_datatype() -> Self {
602 ConcreteDataType::Interval(IntervalType::MonthDayNano(IntervalMonthDayNanoType))
603 }
604
605 pub fn interval_year_month_datatype() -> Self {
607 ConcreteDataType::Interval(IntervalType::YearMonth(IntervalYearMonthType))
608 }
609
610 pub fn interval_day_time_datatype() -> Self {
612 ConcreteDataType::Interval(IntervalType::DayTime(IntervalDayTimeType))
613 }
614
615 pub fn timestamp_datatype(unit: TimeUnit) -> Self {
616 match unit {
617 TimeUnit::Second => Self::timestamp_second_datatype(),
618 TimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
619 TimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
620 TimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
621 }
622 }
623
624 pub fn from_arrow_time_unit(t: &ArrowTimeUnit) -> Self {
626 match t {
627 ArrowTimeUnit::Second => Self::timestamp_second_datatype(),
628 ArrowTimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
629 ArrowTimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
630 ArrowTimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
631 }
632 }
633
634 pub fn duration_datatype(unit: TimeUnit) -> Self {
635 match unit {
636 TimeUnit::Second => Self::duration_second_datatype(),
637 TimeUnit::Millisecond => Self::duration_millisecond_datatype(),
638 TimeUnit::Microsecond => Self::duration_microsecond_datatype(),
639 TimeUnit::Nanosecond => Self::duration_nanosecond_datatype(),
640 }
641 }
642
643 pub fn interval_datatype(unit: IntervalUnit) -> Self {
644 match unit {
645 IntervalUnit::YearMonth => Self::interval_year_month_datatype(),
646 IntervalUnit::DayTime => Self::interval_day_time_datatype(),
647 IntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
648 }
649 }
650
651 pub fn from_arrow_interval_unit(u: &ArrowIntervalUnit) -> Self {
652 match u {
653 ArrowIntervalUnit::YearMonth => Self::interval_year_month_datatype(),
654 ArrowIntervalUnit::DayTime => Self::interval_day_time_datatype(),
655 ArrowIntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
656 }
657 }
658
659 pub fn list_datatype(item_type: Arc<ConcreteDataType>) -> ConcreteDataType {
660 ConcreteDataType::List(ListType::new(item_type))
661 }
662
663 pub fn struct_datatype(fields: StructType) -> ConcreteDataType {
664 ConcreteDataType::Struct(fields)
665 }
666
667 pub fn dictionary_datatype(
668 key_type: ConcreteDataType,
669 value_type: ConcreteDataType,
670 ) -> ConcreteDataType {
671 ConcreteDataType::Dictionary(DictionaryType::new(key_type, value_type))
672 }
673
674 pub fn decimal128_datatype(precision: u8, scale: i8) -> ConcreteDataType {
675 ConcreteDataType::Decimal128(Decimal128Type::new(precision, scale))
676 }
677
678 pub fn decimal128_default_datatype() -> ConcreteDataType {
679 Self::decimal128_datatype(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
680 }
681
682 pub fn vector_datatype(dim: u32) -> ConcreteDataType {
683 ConcreteDataType::Vector(VectorType::new(dim))
684 }
685
686 pub fn vector_default_datatype() -> ConcreteDataType {
687 Self::vector_datatype(0)
688 }
689
690 pub fn json_native_datatype(inner_type: ConcreteDataType) -> ConcreteDataType {
691 ConcreteDataType::Json(JsonType::new_native((&inner_type).into()))
692 }
693}
694
695#[enum_dispatch::enum_dispatch]
697pub trait DataType: std::fmt::Debug + Send + Sync {
698 fn name(&self) -> String;
700
701 fn logical_type_id(&self) -> LogicalTypeId;
703
704 fn default_value(&self) -> Value;
706
707 fn as_arrow_type(&self) -> ArrowDataType;
709
710 fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector>;
712
713 fn try_cast(&self, from: Value) -> Option<Value>;
716}
717
718pub type DataTypeRef = Arc<dyn DataType>;
719
720#[cfg(test)]
721mod tests {
722 use arrow::datatypes::Field;
723
724 use super::*;
725
726 #[test]
727 fn test_concrete_type_as_datatype_trait() {
728 let concrete_type = ConcreteDataType::boolean_datatype();
729
730 assert_eq!("Boolean", concrete_type.to_string());
731 assert_eq!(Value::Boolean(false), concrete_type.default_value());
732 assert_eq!(LogicalTypeId::Boolean, concrete_type.logical_type_id());
733 assert_eq!(ArrowDataType::Boolean, concrete_type.as_arrow_type());
734 }
735
736 #[test]
737 fn test_from_arrow_type() {
738 assert!(matches!(
739 ConcreteDataType::from_arrow_type(&ArrowDataType::Null),
740 ConcreteDataType::Null(_)
741 ));
742 assert!(matches!(
743 ConcreteDataType::from_arrow_type(&ArrowDataType::Boolean),
744 ConcreteDataType::Boolean(_)
745 ));
746 assert!(matches!(
747 ConcreteDataType::from_arrow_type(&ArrowDataType::Binary),
748 ConcreteDataType::Binary(_)
749 ));
750 assert!(matches!(
751 ConcreteDataType::from_arrow_type(&ArrowDataType::LargeBinary),
752 ConcreteDataType::Binary(_)
753 ));
754 assert!(matches!(
755 ConcreteDataType::from_arrow_type(&ArrowDataType::Int8),
756 ConcreteDataType::Int8(_)
757 ));
758 assert!(matches!(
759 ConcreteDataType::from_arrow_type(&ArrowDataType::Int16),
760 ConcreteDataType::Int16(_)
761 ));
762 assert!(matches!(
763 ConcreteDataType::from_arrow_type(&ArrowDataType::Int32),
764 ConcreteDataType::Int32(_)
765 ));
766 assert!(matches!(
767 ConcreteDataType::from_arrow_type(&ArrowDataType::Int64),
768 ConcreteDataType::Int64(_)
769 ));
770 assert!(matches!(
771 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt8),
772 ConcreteDataType::UInt8(_)
773 ));
774 assert!(matches!(
775 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt16),
776 ConcreteDataType::UInt16(_)
777 ));
778 assert!(matches!(
779 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt32),
780 ConcreteDataType::UInt32(_)
781 ));
782 assert!(matches!(
783 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt64),
784 ConcreteDataType::UInt64(_)
785 ));
786 assert!(matches!(
787 ConcreteDataType::from_arrow_type(&ArrowDataType::Float32),
788 ConcreteDataType::Float32(_)
789 ));
790 assert!(matches!(
791 ConcreteDataType::from_arrow_type(&ArrowDataType::Float64),
792 ConcreteDataType::Float64(_)
793 ));
794 assert!(matches!(
795 ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8),
796 ConcreteDataType::String(_)
797 ));
798 let utf8_view_string_type = ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8View);
799 assert!(matches!(utf8_view_string_type, ConcreteDataType::String(_)));
800 assert_eq!(
801 ArrowDataType::Utf8View,
802 utf8_view_string_type.as_arrow_type()
803 );
804 let large_string_type = ConcreteDataType::from_arrow_type(&ArrowDataType::LargeUtf8);
806 assert!(matches!(large_string_type, ConcreteDataType::String(_)));
807 if let ConcreteDataType::String(string_type) = &large_string_type {
808 assert!(string_type.is_large());
809 } else {
810 panic!("Expected a String type");
811 }
812 assert_eq!(
813 ConcreteDataType::from_arrow_type(&ArrowDataType::List(Arc::new(Field::new(
814 "item",
815 ArrowDataType::Int32,
816 true,
817 )))),
818 ConcreteDataType::List(ListType::new(Arc::new(ConcreteDataType::int32_datatype())))
819 );
820 assert!(matches!(
821 ConcreteDataType::from_arrow_type(&ArrowDataType::Date32),
822 ConcreteDataType::Date(_)
823 ));
824 }
825
826 #[test]
827 fn test_view_round_trip() {
828 let utf8_view_arrow = ArrowDataType::Utf8View;
829 let concrete_type = ConcreteDataType::from_arrow_type(&utf8_view_arrow);
830 let back_to_arrow = concrete_type.as_arrow_type();
831 assert_eq!(utf8_view_arrow, back_to_arrow);
832
833 let binary_view_arrow = ArrowDataType::BinaryView;
834 let concrete_type = ConcreteDataType::from_arrow_type(&binary_view_arrow);
835 let back_to_arrow = concrete_type.as_arrow_type();
836 assert_eq!(binary_view_arrow, back_to_arrow);
837 }
838
839 #[test]
840 fn test_large_utf8_round_trip() {
841 let large_utf8_arrow = ArrowDataType::LargeUtf8;
843 let concrete_type = ConcreteDataType::from_arrow_type(&large_utf8_arrow);
844 let back_to_arrow = concrete_type.as_arrow_type();
845
846 assert!(matches!(concrete_type, ConcreteDataType::String(_)));
847 assert_eq!(large_utf8_arrow, back_to_arrow);
849
850 let utf8_concrete = ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8);
852 let large_utf8_concrete = ConcreteDataType::from_arrow_type(&ArrowDataType::LargeUtf8);
853
854 assert!(matches!(utf8_concrete, ConcreteDataType::String(_)));
855 assert!(matches!(large_utf8_concrete, ConcreteDataType::String(_)));
856
857 if let (ConcreteDataType::String(utf8_type), ConcreteDataType::String(large_type)) =
859 (&utf8_concrete, &large_utf8_concrete)
860 {
861 assert!(!utf8_type.is_large());
862 assert!(large_type.is_large());
863 } else {
864 panic!("Expected both to be String types");
865 }
866
867 let view_concrete = ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8View);
869 assert_ne!(utf8_concrete, view_concrete);
870 assert_ne!(large_utf8_concrete, view_concrete);
871
872 assert_ne!(utf8_concrete, large_utf8_concrete);
874 }
875
876 #[test]
877 fn test_from_arrow_timestamp() {
878 assert_eq!(
879 ConcreteDataType::timestamp_millisecond_datatype(),
880 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Millisecond)
881 );
882 assert_eq!(
883 ConcreteDataType::timestamp_microsecond_datatype(),
884 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Microsecond)
885 );
886 assert_eq!(
887 ConcreteDataType::timestamp_nanosecond_datatype(),
888 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Nanosecond)
889 );
890 assert_eq!(
891 ConcreteDataType::timestamp_second_datatype(),
892 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Second)
893 );
894 }
895
896 #[test]
897 fn test_is_null() {
898 assert!(ConcreteDataType::null_datatype().is_null());
899 assert!(!ConcreteDataType::int32_datatype().is_null());
900 }
901
902 #[test]
903 fn test_is_float() {
904 assert!(!ConcreteDataType::int32_datatype().is_float());
905 assert!(ConcreteDataType::float32_datatype().is_float());
906 assert!(ConcreteDataType::float64_datatype().is_float());
907 }
908
909 #[test]
910 fn test_is_boolean() {
911 assert!(!ConcreteDataType::int32_datatype().is_boolean());
912 assert!(!ConcreteDataType::float32_datatype().is_boolean());
913 assert!(ConcreteDataType::boolean_datatype().is_boolean());
914 }
915
916 #[test]
917 fn test_is_decimal() {
918 assert!(!ConcreteDataType::int32_datatype().is_decimal());
919 assert!(!ConcreteDataType::float32_datatype().is_decimal());
920 assert!(ConcreteDataType::decimal128_datatype(10, 2).is_decimal());
921 assert!(ConcreteDataType::decimal128_datatype(18, 6).is_decimal());
922 }
923
924 #[test]
925 fn test_is_stringifiable() {
926 assert!(!ConcreteDataType::int32_datatype().is_stringifiable());
927 assert!(!ConcreteDataType::float32_datatype().is_stringifiable());
928 assert!(ConcreteDataType::string_datatype().is_stringifiable());
929 assert!(ConcreteDataType::binary_datatype().is_stringifiable());
930 assert!(ConcreteDataType::date_datatype().is_stringifiable());
931 assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable());
932 assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable());
933 assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable());
934 assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_stringifiable());
935 assert!(ConcreteDataType::time_second_datatype().is_stringifiable());
936 assert!(ConcreteDataType::time_millisecond_datatype().is_stringifiable());
937 assert!(ConcreteDataType::time_microsecond_datatype().is_stringifiable());
938 assert!(ConcreteDataType::time_nanosecond_datatype().is_stringifiable());
939
940 assert!(ConcreteDataType::interval_year_month_datatype().is_stringifiable());
941 assert!(ConcreteDataType::interval_day_time_datatype().is_stringifiable());
942 assert!(ConcreteDataType::interval_month_day_nano_datatype().is_stringifiable());
943
944 assert!(ConcreteDataType::duration_second_datatype().is_stringifiable());
945 assert!(ConcreteDataType::duration_millisecond_datatype().is_stringifiable());
946 assert!(ConcreteDataType::duration_microsecond_datatype().is_stringifiable());
947 assert!(ConcreteDataType::duration_nanosecond_datatype().is_stringifiable());
948 assert!(ConcreteDataType::decimal128_datatype(10, 2).is_stringifiable());
949 assert!(ConcreteDataType::vector_default_datatype().is_stringifiable());
950 }
951
952 #[test]
953 fn test_is_signed() {
954 assert!(ConcreteDataType::int8_datatype().is_signed());
955 assert!(ConcreteDataType::int16_datatype().is_signed());
956 assert!(ConcreteDataType::int32_datatype().is_signed());
957 assert!(ConcreteDataType::int64_datatype().is_signed());
958 assert!(ConcreteDataType::date_datatype().is_signed());
959 assert!(ConcreteDataType::timestamp_second_datatype().is_signed());
960 assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed());
961 assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed());
962 assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_signed());
963 assert!(ConcreteDataType::time_second_datatype().is_signed());
964 assert!(ConcreteDataType::time_millisecond_datatype().is_signed());
965 assert!(ConcreteDataType::time_microsecond_datatype().is_signed());
966 assert!(ConcreteDataType::time_nanosecond_datatype().is_signed());
967 assert!(ConcreteDataType::interval_year_month_datatype().is_signed());
968 assert!(ConcreteDataType::interval_day_time_datatype().is_signed());
969 assert!(ConcreteDataType::interval_month_day_nano_datatype().is_signed());
970 assert!(ConcreteDataType::duration_second_datatype().is_signed());
971 assert!(ConcreteDataType::duration_millisecond_datatype().is_signed());
972 assert!(ConcreteDataType::duration_microsecond_datatype().is_signed());
973 assert!(ConcreteDataType::duration_nanosecond_datatype().is_signed());
974
975 assert!(!ConcreteDataType::uint8_datatype().is_signed());
976 assert!(!ConcreteDataType::uint16_datatype().is_signed());
977 assert!(!ConcreteDataType::uint32_datatype().is_signed());
978 assert!(!ConcreteDataType::uint64_datatype().is_signed());
979
980 assert!(!ConcreteDataType::float32_datatype().is_signed());
981 assert!(!ConcreteDataType::float64_datatype().is_signed());
982
983 assert!(ConcreteDataType::decimal128_datatype(10, 2).is_signed());
984 }
985
986 #[test]
987 fn test_is_unsigned() {
988 assert!(!ConcreteDataType::int8_datatype().is_unsigned());
989 assert!(!ConcreteDataType::int16_datatype().is_unsigned());
990 assert!(!ConcreteDataType::int32_datatype().is_unsigned());
991 assert!(!ConcreteDataType::int64_datatype().is_unsigned());
992 assert!(!ConcreteDataType::date_datatype().is_unsigned());
993 assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned());
994 assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned());
995 assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned());
996 assert!(!ConcreteDataType::timestamp_nanosecond_datatype().is_unsigned());
997 assert!(!ConcreteDataType::time_second_datatype().is_unsigned());
998 assert!(!ConcreteDataType::time_millisecond_datatype().is_unsigned());
999 assert!(!ConcreteDataType::time_microsecond_datatype().is_unsigned());
1000 assert!(!ConcreteDataType::time_nanosecond_datatype().is_unsigned());
1001 assert!(!ConcreteDataType::interval_year_month_datatype().is_unsigned());
1002 assert!(!ConcreteDataType::interval_day_time_datatype().is_unsigned());
1003 assert!(!ConcreteDataType::interval_month_day_nano_datatype().is_unsigned());
1004 assert!(!ConcreteDataType::duration_second_datatype().is_unsigned());
1005 assert!(!ConcreteDataType::duration_millisecond_datatype().is_unsigned());
1006 assert!(!ConcreteDataType::duration_microsecond_datatype().is_unsigned());
1007 assert!(!ConcreteDataType::duration_nanosecond_datatype().is_unsigned());
1008 assert!(!ConcreteDataType::decimal128_datatype(10, 2).is_unsigned());
1009
1010 assert!(ConcreteDataType::uint8_datatype().is_unsigned());
1011 assert!(ConcreteDataType::uint16_datatype().is_unsigned());
1012 assert!(ConcreteDataType::uint32_datatype().is_unsigned());
1013 assert!(ConcreteDataType::uint64_datatype().is_unsigned());
1014
1015 assert!(!ConcreteDataType::float32_datatype().is_unsigned());
1016 assert!(!ConcreteDataType::float64_datatype().is_unsigned());
1017 }
1018
1019 #[test]
1020 fn test_numerics() {
1021 let nums = ConcreteDataType::numerics();
1022 assert_eq!(10, nums.len());
1023 }
1024
1025 #[test]
1026 fn test_as_list() {
1027 let list_type =
1028 ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()));
1029 assert_eq!(
1030 ListType::new(Arc::new(ConcreteDataType::int32_datatype())),
1031 *list_type.as_list().unwrap()
1032 );
1033 assert!(ConcreteDataType::int32_datatype().as_list().is_none());
1034 }
1035
1036 #[test]
1037 fn test_display_concrete_data_type() {
1038 assert_eq!(ConcreteDataType::null_datatype().to_string(), "Null");
1039 assert_eq!(ConcreteDataType::boolean_datatype().to_string(), "Boolean");
1040 assert_eq!(ConcreteDataType::binary_datatype().to_string(), "Binary");
1041 assert_eq!(ConcreteDataType::int8_datatype().to_string(), "Int8");
1042 assert_eq!(ConcreteDataType::int16_datatype().to_string(), "Int16");
1043 assert_eq!(ConcreteDataType::int32_datatype().to_string(), "Int32");
1044 assert_eq!(ConcreteDataType::int64_datatype().to_string(), "Int64");
1045 assert_eq!(ConcreteDataType::uint8_datatype().to_string(), "UInt8");
1046 assert_eq!(ConcreteDataType::uint16_datatype().to_string(), "UInt16");
1047 assert_eq!(ConcreteDataType::uint32_datatype().to_string(), "UInt32");
1048 assert_eq!(ConcreteDataType::uint64_datatype().to_string(), "UInt64");
1049 assert_eq!(ConcreteDataType::float32_datatype().to_string(), "Float32");
1050 assert_eq!(ConcreteDataType::float64_datatype().to_string(), "Float64");
1051 assert_eq!(ConcreteDataType::string_datatype().to_string(), "String");
1052 assert_eq!(ConcreteDataType::date_datatype().to_string(), "Date");
1053 assert_eq!(
1054 ConcreteDataType::timestamp_millisecond_datatype().to_string(),
1055 "TimestampMillisecond"
1056 );
1057 assert_eq!(
1058 ConcreteDataType::time_millisecond_datatype().to_string(),
1059 "TimeMillisecond"
1060 );
1061 assert_eq!(
1062 ConcreteDataType::interval_month_day_nano_datatype().to_string(),
1063 "IntervalMonthDayNano"
1064 );
1065 assert_eq!(
1066 ConcreteDataType::duration_second_datatype().to_string(),
1067 "DurationSecond"
1068 );
1069 assert_eq!(
1070 ConcreteDataType::decimal128_datatype(10, 2).to_string(),
1071 "Decimal(10, 2)"
1072 );
1073 assert_eq!(
1075 ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()))
1076 .to_string(),
1077 "List<Int32>"
1078 );
1079 assert_eq!(
1080 ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::Dictionary(
1081 DictionaryType::new(
1082 ConcreteDataType::int32_datatype(),
1083 ConcreteDataType::string_datatype()
1084 )
1085 )))
1086 .to_string(),
1087 "List<Dictionary<Int32, String>>"
1088 );
1089 assert_eq!(
1090 ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::list_datatype(Arc::new(
1091 ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()))
1092 ))))
1093 .to_string(),
1094 "List<List<List<Int32>>>"
1095 );
1096 assert_eq!(
1097 ConcreteDataType::dictionary_datatype(
1098 ConcreteDataType::int32_datatype(),
1099 ConcreteDataType::string_datatype()
1100 )
1101 .to_string(),
1102 "Dictionary<Int32, String>"
1103 );
1104 assert_eq!(
1105 ConcreteDataType::vector_datatype(3).to_string(),
1106 "Vector(3)"
1107 );
1108 }
1109}