1use std::fmt;
16use std::sync::Arc;
17
18use arrow::datatypes::{
19 DataType as ArrowDataType, IntervalUnit as ArrowIntervalUnit, TimeUnit as ArrowTimeUnit,
20};
21use arrow_schema::DECIMAL_DEFAULT_SCALE;
22use common_decimal::decimal128::DECIMAL128_MAX_PRECISION;
23use common_time::interval::IntervalUnit;
24use common_time::timestamp::TimeUnit;
25use enum_dispatch::enum_dispatch;
26use paste::paste;
27use serde::{Deserialize, Serialize};
28
29use crate::error::{self, Error, Result};
30use crate::type_id::LogicalTypeId;
31use crate::types::{
32 BinaryType, BooleanType, DateType, Decimal128Type, DictionaryType, DurationMicrosecondType,
33 DurationMillisecondType, DurationNanosecondType, DurationSecondType, DurationType, Float32Type,
34 Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, IntervalDayTimeType,
35 IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType, ListType, NullType,
36 StringType, StructType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
37 TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
38 UInt8Type, UInt16Type, UInt32Type, UInt64Type, VectorType,
39};
40use crate::value::Value;
41use crate::vectors::MutableVector;
42
43#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
44#[enum_dispatch(DataType)]
45pub enum ConcreteDataType {
46 Null(NullType),
47 Boolean(BooleanType),
48
49 Int8(Int8Type),
51 Int16(Int16Type),
52 Int32(Int32Type),
53 Int64(Int64Type),
54 UInt8(UInt8Type),
55 UInt16(UInt16Type),
56 UInt32(UInt32Type),
57 UInt64(UInt64Type),
58 Float32(Float32Type),
59 Float64(Float64Type),
60
61 Decimal128(Decimal128Type),
63
64 Binary(BinaryType),
66 String(StringType),
67
68 Date(DateType),
70 Timestamp(TimestampType),
71 Time(TimeType),
72
73 Duration(DurationType),
75
76 Interval(IntervalType),
78
79 List(ListType),
81 Dictionary(DictionaryType),
82 Struct(StructType),
83
84 Json(JsonType),
86
87 Vector(VectorType),
89}
90
91impl fmt::Display for ConcreteDataType {
92 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
93 match self {
94 ConcreteDataType::Null(v) => write!(f, "{}", v.name()),
95 ConcreteDataType::Boolean(v) => write!(f, "{}", v.name()),
96 ConcreteDataType::Int8(v) => write!(f, "{}", v.name()),
97 ConcreteDataType::Int16(v) => write!(f, "{}", v.name()),
98 ConcreteDataType::Int32(v) => write!(f, "{}", v.name()),
99 ConcreteDataType::Int64(v) => write!(f, "{}", v.name()),
100 ConcreteDataType::UInt8(v) => write!(f, "{}", v.name()),
101 ConcreteDataType::UInt16(v) => write!(f, "{}", v.name()),
102 ConcreteDataType::UInt32(v) => write!(f, "{}", v.name()),
103 ConcreteDataType::UInt64(v) => write!(f, "{}", v.name()),
104 ConcreteDataType::Float32(v) => write!(f, "{}", v.name()),
105 ConcreteDataType::Float64(v) => write!(f, "{}", v.name()),
106 ConcreteDataType::Binary(v) => write!(f, "{}", v.name()),
107 ConcreteDataType::String(v) => write!(f, "{}", v.name()),
108 ConcreteDataType::Date(v) => write!(f, "{}", v.name()),
109 ConcreteDataType::Timestamp(t) => match t {
110 TimestampType::Second(v) => write!(f, "{}", v.name()),
111 TimestampType::Millisecond(v) => write!(f, "{}", v.name()),
112 TimestampType::Microsecond(v) => write!(f, "{}", v.name()),
113 TimestampType::Nanosecond(v) => write!(f, "{}", v.name()),
114 },
115 ConcreteDataType::Time(t) => match t {
116 TimeType::Second(v) => write!(f, "{}", v.name()),
117 TimeType::Millisecond(v) => write!(f, "{}", v.name()),
118 TimeType::Microsecond(v) => write!(f, "{}", v.name()),
119 TimeType::Nanosecond(v) => write!(f, "{}", v.name()),
120 },
121 ConcreteDataType::Interval(i) => match i {
122 IntervalType::YearMonth(v) => write!(f, "{}", v.name()),
123 IntervalType::DayTime(v) => write!(f, "{}", v.name()),
124 IntervalType::MonthDayNano(v) => write!(f, "{}", v.name()),
125 },
126 ConcreteDataType::Duration(d) => match d {
127 DurationType::Second(v) => write!(f, "{}", v.name()),
128 DurationType::Millisecond(v) => write!(f, "{}", v.name()),
129 DurationType::Microsecond(v) => write!(f, "{}", v.name()),
130 DurationType::Nanosecond(v) => write!(f, "{}", v.name()),
131 },
132 ConcreteDataType::Decimal128(v) => write!(f, "{}", v.name()),
133 ConcreteDataType::List(v) => write!(f, "{}", v.name()),
134 ConcreteDataType::Struct(v) => write!(f, "{}", v.name()),
135 ConcreteDataType::Dictionary(v) => write!(f, "{}", v.name()),
136 ConcreteDataType::Json(v) => write!(f, "{}", v.name()),
137 ConcreteDataType::Vector(v) => write!(f, "{}", v.name()),
138 }
139 }
140}
141
142impl ConcreteDataType {
145 pub fn is_float(&self) -> bool {
146 matches!(
147 self,
148 ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_)
149 )
150 }
151
152 pub fn is_boolean(&self) -> bool {
153 matches!(self, ConcreteDataType::Boolean(_))
154 }
155
156 pub fn is_string(&self) -> bool {
157 matches!(self, ConcreteDataType::String(_))
158 }
159
160 pub fn is_stringifiable(&self) -> bool {
161 matches!(
162 self,
163 ConcreteDataType::String(_)
164 | ConcreteDataType::Date(_)
165 | ConcreteDataType::Timestamp(_)
166 | ConcreteDataType::Time(_)
167 | ConcreteDataType::Interval(_)
168 | ConcreteDataType::Duration(_)
169 | ConcreteDataType::Decimal128(_)
170 | ConcreteDataType::Binary(_)
171 | ConcreteDataType::Json(_)
172 | ConcreteDataType::Vector(_)
173 )
174 }
175
176 pub fn is_signed(&self) -> bool {
177 matches!(
178 self,
179 ConcreteDataType::Int8(_)
180 | ConcreteDataType::Int16(_)
181 | ConcreteDataType::Int32(_)
182 | ConcreteDataType::Int64(_)
183 | ConcreteDataType::Date(_)
184 | ConcreteDataType::Timestamp(_)
185 | ConcreteDataType::Time(_)
186 | ConcreteDataType::Interval(_)
187 | ConcreteDataType::Duration(_)
188 | ConcreteDataType::Decimal128(_)
189 )
190 }
191
192 pub fn is_unsigned(&self) -> bool {
193 matches!(
194 self,
195 ConcreteDataType::UInt8(_)
196 | ConcreteDataType::UInt16(_)
197 | ConcreteDataType::UInt32(_)
198 | ConcreteDataType::UInt64(_)
199 )
200 }
201
202 pub fn is_numeric(&self) -> bool {
203 matches!(
204 self,
205 ConcreteDataType::Int8(_)
206 | ConcreteDataType::Int16(_)
207 | ConcreteDataType::Int32(_)
208 | ConcreteDataType::Int64(_)
209 | ConcreteDataType::UInt8(_)
210 | ConcreteDataType::UInt16(_)
211 | ConcreteDataType::UInt32(_)
212 | ConcreteDataType::UInt64(_)
213 | ConcreteDataType::Float32(_)
214 | ConcreteDataType::Float64(_)
215 )
216 }
217
218 pub fn is_timestamp(&self) -> bool {
219 matches!(self, ConcreteDataType::Timestamp(_))
220 }
221
222 pub fn is_decimal(&self) -> bool {
223 matches!(self, ConcreteDataType::Decimal128(_))
224 }
225
226 pub fn is_json(&self) -> bool {
227 matches!(self, ConcreteDataType::Json(_))
228 }
229
230 pub fn is_vector(&self) -> bool {
231 matches!(self, ConcreteDataType::Vector(_))
232 }
233
234 pub fn numerics() -> Vec<ConcreteDataType> {
235 vec![
236 ConcreteDataType::int8_datatype(),
237 ConcreteDataType::int16_datatype(),
238 ConcreteDataType::int32_datatype(),
239 ConcreteDataType::int64_datatype(),
240 ConcreteDataType::uint8_datatype(),
241 ConcreteDataType::uint16_datatype(),
242 ConcreteDataType::uint32_datatype(),
243 ConcreteDataType::uint64_datatype(),
244 ConcreteDataType::float32_datatype(),
245 ConcreteDataType::float64_datatype(),
246 ]
247 }
248
249 pub fn unsigned_integers() -> Vec<ConcreteDataType> {
250 vec![
251 ConcreteDataType::uint8_datatype(),
252 ConcreteDataType::uint16_datatype(),
253 ConcreteDataType::uint32_datatype(),
254 ConcreteDataType::uint64_datatype(),
255 ]
256 }
257
258 pub fn timestamps() -> Vec<ConcreteDataType> {
259 vec![
260 ConcreteDataType::timestamp_second_datatype(),
261 ConcreteDataType::timestamp_millisecond_datatype(),
262 ConcreteDataType::timestamp_microsecond_datatype(),
263 ConcreteDataType::timestamp_nanosecond_datatype(),
264 ]
265 }
266
267 pub fn from_arrow_type(dt: &ArrowDataType) -> Self {
272 ConcreteDataType::try_from(dt).expect("Unimplemented type")
273 }
274
275 pub fn is_null(&self) -> bool {
276 matches!(self, ConcreteDataType::Null(NullType))
277 }
278
279 pub fn as_list(&self) -> Option<&ListType> {
281 match self {
282 ConcreteDataType::List(t) => Some(t),
283 _ => None,
284 }
285 }
286
287 pub fn as_struct(&self) -> Option<&StructType> {
288 match self {
289 ConcreteDataType::Struct(s) => Some(s),
290 _ => None,
291 }
292 }
293
294 pub fn as_timestamp(&self) -> Option<TimestampType> {
296 match self {
297 ConcreteDataType::Timestamp(t) => Some(*t),
298 _ => None,
299 }
300 }
301
302 pub fn numeric_precision(&self) -> Option<u8> {
304 match self {
305 ConcreteDataType::Int8(_) | ConcreteDataType::UInt8(_) => Some(3),
306 ConcreteDataType::Int16(_) | ConcreteDataType::UInt16(_) => Some(5),
307 ConcreteDataType::Int32(_) | ConcreteDataType::UInt32(_) => Some(10),
308 ConcreteDataType::Int64(_) => Some(19),
309 ConcreteDataType::UInt64(_) => Some(20),
310 ConcreteDataType::Float32(_) => Some(12),
311 ConcreteDataType::Float64(_) => Some(22),
312 ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.precision()),
313 _ => None,
314 }
315 }
316
317 pub fn numeric_scale(&self) -> Option<i8> {
319 match self {
320 ConcreteDataType::Int8(_)
321 | ConcreteDataType::UInt8(_)
322 | ConcreteDataType::Int16(_)
323 | ConcreteDataType::UInt16(_)
324 | ConcreteDataType::Int32(_)
325 | ConcreteDataType::UInt32(_)
326 | ConcreteDataType::Int64(_)
327 | ConcreteDataType::UInt64(_) => Some(0),
328 ConcreteDataType::Float32(_) | ConcreteDataType::Float64(_) => None,
329 ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.scale()),
330 _ => None,
331 }
332 }
333
334 pub fn as_time(&self) -> Option<TimeType> {
336 match self {
337 ConcreteDataType::Int64(_) => Some(TimeType::Millisecond(TimeMillisecondType)),
338 ConcreteDataType::Time(t) => Some(*t),
339 _ => None,
340 }
341 }
342
343 pub fn as_decimal128(&self) -> Option<Decimal128Type> {
344 match self {
345 ConcreteDataType::Decimal128(d) => Some(*d),
346 _ => None,
347 }
348 }
349
350 pub fn as_json(&self) -> Option<&JsonType> {
351 match self {
352 ConcreteDataType::Json(j) => Some(j),
353 _ => None,
354 }
355 }
356
357 pub fn as_vector(&self) -> Option<VectorType> {
358 match self {
359 ConcreteDataType::Vector(v) => Some(*v),
360 _ => None,
361 }
362 }
363
364 pub fn can_arrow_type_cast_to(&self, to_type: &ConcreteDataType) -> bool {
366 match (self, to_type) {
367 (ConcreteDataType::Json(this), ConcreteDataType::Json(that)) => that.is_include(this),
368 _ => arrow::compute::can_cast_types(&self.as_arrow_type(), &to_type.as_arrow_type()),
369 }
370 }
371
372 pub fn as_duration(&self) -> Option<DurationType> {
374 match self {
375 ConcreteDataType::Duration(d) => Some(*d),
376 _ => None,
377 }
378 }
379
380 pub fn postgres_datatype_name(&self) -> &'static str {
382 match self {
383 &ConcreteDataType::Null(_) => "UNKNOWN",
384 &ConcreteDataType::Boolean(_) => "BOOL",
385 &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "CHAR",
386 &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "INT2",
387 &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "INT4",
388 &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "INT8",
389 &ConcreteDataType::Float32(_) => "FLOAT4",
390 &ConcreteDataType::Float64(_) => "FLOAT8",
391 &ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => "BYTEA",
392 &ConcreteDataType::String(_) => "VARCHAR",
393 &ConcreteDataType::Date(_) => "DATE",
394 &ConcreteDataType::Timestamp(_) => "TIMESTAMP",
395 &ConcreteDataType::Time(_) => "TIME",
396 &ConcreteDataType::Interval(_) => "INTERVAL",
397 &ConcreteDataType::Decimal128(_) => "NUMERIC",
398 &ConcreteDataType::Json(_) => "JSON",
399 ConcreteDataType::List(list) => match list.item_type() {
400 &ConcreteDataType::Null(_) => "UNKNOWN",
401 &ConcreteDataType::Boolean(_) => "_BOOL",
402 &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "_CHAR",
403 &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "_INT2",
404 &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "_INT4",
405 &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "_INT8",
406 &ConcreteDataType::Float32(_) => "_FLOAT4",
407 &ConcreteDataType::Float64(_) => "_FLOAT8",
408 &ConcreteDataType::Binary(_) => "_BYTEA",
409 &ConcreteDataType::String(_) => "_VARCHAR",
410 &ConcreteDataType::Date(_) => "_DATE",
411 &ConcreteDataType::Timestamp(_) => "_TIMESTAMP",
412 &ConcreteDataType::Time(_) => "_TIME",
413 &ConcreteDataType::Interval(_) => "_INTERVAL",
414 &ConcreteDataType::Decimal128(_) => "_NUMERIC",
415 &ConcreteDataType::Json(_) => "_JSON",
416 &ConcreteDataType::Duration(_)
417 | &ConcreteDataType::Dictionary(_)
418 | &ConcreteDataType::Vector(_)
419 | &ConcreteDataType::List(_)
420 | &ConcreteDataType::Struct(_) => "UNKNOWN",
421 },
422 &ConcreteDataType::Duration(_)
423 | &ConcreteDataType::Dictionary(_)
424 | &ConcreteDataType::Struct(_) => "UNKNOWN",
425 }
426 }
427}
428
429impl From<&ConcreteDataType> for ConcreteDataType {
430 fn from(t: &ConcreteDataType) -> Self {
431 t.clone()
432 }
433}
434
435impl TryFrom<&ArrowDataType> for ConcreteDataType {
436 type Error = Error;
437
438 fn try_from(dt: &ArrowDataType) -> Result<ConcreteDataType> {
439 let concrete_type = match dt {
440 ArrowDataType::Null => Self::null_datatype(),
441 ArrowDataType::Boolean => Self::boolean_datatype(),
442 ArrowDataType::UInt8 => Self::uint8_datatype(),
443 ArrowDataType::UInt16 => Self::uint16_datatype(),
444 ArrowDataType::UInt32 => Self::uint32_datatype(),
445 ArrowDataType::UInt64 => Self::uint64_datatype(),
446 ArrowDataType::Int8 => Self::int8_datatype(),
447 ArrowDataType::Int16 => Self::int16_datatype(),
448 ArrowDataType::Int32 => Self::int32_datatype(),
449 ArrowDataType::Int64 => Self::int64_datatype(),
450 ArrowDataType::Float32 => Self::float32_datatype(),
451 ArrowDataType::Float64 => Self::float64_datatype(),
452 ArrowDataType::Date32 => Self::date_datatype(),
453 ArrowDataType::Timestamp(u, _) => ConcreteDataType::from_arrow_time_unit(u),
454 ArrowDataType::Interval(u) => ConcreteDataType::from_arrow_interval_unit(u),
455 ArrowDataType::Binary | ArrowDataType::LargeBinary => {
456 Self::Binary(BinaryType::binary())
457 }
458 ArrowDataType::BinaryView => Self::Binary(BinaryType::binary_view()),
459 ArrowDataType::Utf8 => Self::String(StringType::utf8()),
460 ArrowDataType::Utf8View => Self::String(StringType::utf8_view()),
461 ArrowDataType::LargeUtf8 => Self::String(StringType::large_utf8()),
462 ArrowDataType::List(field) => Self::List(ListType::new(Arc::new(
463 ConcreteDataType::from_arrow_type(field.data_type()),
464 ))),
465 ArrowDataType::Dictionary(key_type, value_type) => {
466 let key_type = ConcreteDataType::from_arrow_type(key_type);
467 let value_type = ConcreteDataType::from_arrow_type(value_type);
468 Self::Dictionary(DictionaryType::new(key_type, value_type))
469 }
470 ArrowDataType::Time32(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
471 ArrowDataType::Time64(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
472 ArrowDataType::Duration(u) => {
473 ConcreteDataType::Duration(DurationType::from_unit(u.into()))
474 }
475 ArrowDataType::Decimal128(precision, scale) => {
476 ConcreteDataType::decimal128_datatype(*precision, *scale)
477 }
478 ArrowDataType::Struct(fields) => ConcreteDataType::Struct(fields.try_into()?),
479 ArrowDataType::Float16
480 | ArrowDataType::Date64
481 | ArrowDataType::FixedSizeBinary(_)
482 | ArrowDataType::ListView(_)
483 | ArrowDataType::FixedSizeList(_, _)
484 | ArrowDataType::LargeList(_)
485 | ArrowDataType::LargeListView(_)
486 | ArrowDataType::Union(_, _)
487 | ArrowDataType::Decimal256(_, _)
488 | ArrowDataType::Map(_, _)
489 | ArrowDataType::RunEndEncoded(_, _)
490 | ArrowDataType::Decimal32(_, _)
491 | ArrowDataType::Decimal64(_, _) => {
492 return error::UnsupportedArrowTypeSnafu {
493 arrow_type: dt.clone(),
494 }
495 .fail();
496 }
497 };
498
499 Ok(concrete_type)
500 }
501}
502
503macro_rules! impl_new_concrete_type_functions {
504 ($($Type: ident), +) => {
505 paste! {
506 impl ConcreteDataType {
507 $(
508 pub fn [<$Type:lower _datatype>]() -> ConcreteDataType {
509 ConcreteDataType::$Type([<$Type Type>]::default())
510 }
511 )+
512 }
513 }
514 }
515}
516
517impl_new_concrete_type_functions!(
518 Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
519 Binary, Date, String, Json
520);
521
522impl ConcreteDataType {
523 pub fn large_string_datatype() -> Self {
524 ConcreteDataType::String(StringType::large_utf8())
525 }
526
527 pub fn utf8_view_datatype() -> Self {
528 ConcreteDataType::String(StringType::utf8_view())
529 }
530
531 pub fn binary_view_datatype() -> Self {
532 ConcreteDataType::Binary(BinaryType::binary_view())
533 }
534
535 pub fn timestamp_second_datatype() -> Self {
536 ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType))
537 }
538
539 pub fn timestamp_millisecond_datatype() -> Self {
540 ConcreteDataType::Timestamp(TimestampType::Millisecond(TimestampMillisecondType))
541 }
542
543 pub fn timestamp_microsecond_datatype() -> Self {
544 ConcreteDataType::Timestamp(TimestampType::Microsecond(TimestampMicrosecondType))
545 }
546
547 pub fn timestamp_nanosecond_datatype() -> Self {
548 ConcreteDataType::Timestamp(TimestampType::Nanosecond(TimestampNanosecondType))
549 }
550
551 pub fn time_datatype(unit: TimeUnit) -> Self {
553 ConcreteDataType::Time(TimeType::from_unit(unit))
554 }
555
556 pub fn time_second_datatype() -> Self {
558 Self::time_datatype(TimeUnit::Second)
559 }
560
561 pub fn time_millisecond_datatype() -> Self {
563 Self::time_datatype(TimeUnit::Millisecond)
564 }
565
566 pub fn time_microsecond_datatype() -> Self {
568 Self::time_datatype(TimeUnit::Microsecond)
569 }
570
571 pub fn time_nanosecond_datatype() -> Self {
573 Self::time_datatype(TimeUnit::Nanosecond)
574 }
575
576 pub fn duration_second_datatype() -> Self {
578 ConcreteDataType::Duration(DurationType::Second(DurationSecondType))
579 }
580
581 pub fn duration_millisecond_datatype() -> Self {
583 ConcreteDataType::Duration(DurationType::Millisecond(DurationMillisecondType))
584 }
585
586 pub fn duration_microsecond_datatype() -> Self {
588 ConcreteDataType::Duration(DurationType::Microsecond(DurationMicrosecondType))
589 }
590
591 pub fn duration_nanosecond_datatype() -> Self {
593 ConcreteDataType::Duration(DurationType::Nanosecond(DurationNanosecondType))
594 }
595
596 pub fn interval_month_day_nano_datatype() -> Self {
598 ConcreteDataType::Interval(IntervalType::MonthDayNano(IntervalMonthDayNanoType))
599 }
600
601 pub fn interval_year_month_datatype() -> Self {
603 ConcreteDataType::Interval(IntervalType::YearMonth(IntervalYearMonthType))
604 }
605
606 pub fn interval_day_time_datatype() -> Self {
608 ConcreteDataType::Interval(IntervalType::DayTime(IntervalDayTimeType))
609 }
610
611 pub fn timestamp_datatype(unit: TimeUnit) -> Self {
612 match unit {
613 TimeUnit::Second => Self::timestamp_second_datatype(),
614 TimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
615 TimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
616 TimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
617 }
618 }
619
620 pub fn from_arrow_time_unit(t: &ArrowTimeUnit) -> Self {
622 match t {
623 ArrowTimeUnit::Second => Self::timestamp_second_datatype(),
624 ArrowTimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
625 ArrowTimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
626 ArrowTimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
627 }
628 }
629
630 pub fn duration_datatype(unit: TimeUnit) -> Self {
631 match unit {
632 TimeUnit::Second => Self::duration_second_datatype(),
633 TimeUnit::Millisecond => Self::duration_millisecond_datatype(),
634 TimeUnit::Microsecond => Self::duration_microsecond_datatype(),
635 TimeUnit::Nanosecond => Self::duration_nanosecond_datatype(),
636 }
637 }
638
639 pub fn interval_datatype(unit: IntervalUnit) -> Self {
640 match unit {
641 IntervalUnit::YearMonth => Self::interval_year_month_datatype(),
642 IntervalUnit::DayTime => Self::interval_day_time_datatype(),
643 IntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
644 }
645 }
646
647 pub fn from_arrow_interval_unit(u: &ArrowIntervalUnit) -> Self {
648 match u {
649 ArrowIntervalUnit::YearMonth => Self::interval_year_month_datatype(),
650 ArrowIntervalUnit::DayTime => Self::interval_day_time_datatype(),
651 ArrowIntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
652 }
653 }
654
655 pub fn list_datatype(item_type: Arc<ConcreteDataType>) -> ConcreteDataType {
656 ConcreteDataType::List(ListType::new(item_type))
657 }
658
659 pub fn struct_datatype(fields: StructType) -> ConcreteDataType {
660 ConcreteDataType::Struct(fields)
661 }
662
663 pub fn dictionary_datatype(
664 key_type: ConcreteDataType,
665 value_type: ConcreteDataType,
666 ) -> ConcreteDataType {
667 ConcreteDataType::Dictionary(DictionaryType::new(key_type, value_type))
668 }
669
670 pub fn decimal128_datatype(precision: u8, scale: i8) -> ConcreteDataType {
671 ConcreteDataType::Decimal128(Decimal128Type::new(precision, scale))
672 }
673
674 pub fn decimal128_default_datatype() -> ConcreteDataType {
675 Self::decimal128_datatype(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
676 }
677
678 pub fn vector_datatype(dim: u32) -> ConcreteDataType {
679 ConcreteDataType::Vector(VectorType::new(dim))
680 }
681
682 pub fn vector_default_datatype() -> ConcreteDataType {
683 Self::vector_datatype(0)
684 }
685
686 pub fn json_native_datatype(inner_type: ConcreteDataType) -> ConcreteDataType {
687 ConcreteDataType::Json(JsonType::new_json2((&inner_type).into()))
688 }
689}
690
691#[enum_dispatch::enum_dispatch]
693pub trait DataType: std::fmt::Debug + Send + Sync {
694 fn name(&self) -> String;
696
697 fn logical_type_id(&self) -> LogicalTypeId;
699
700 fn default_value(&self) -> Value;
702
703 fn as_arrow_type(&self) -> ArrowDataType;
705
706 fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector>;
708
709 fn try_cast(&self, from: Value) -> Option<Value>;
712}
713
714pub type DataTypeRef = Arc<dyn DataType>;
715
716#[cfg(test)]
717mod tests {
718 use arrow::datatypes::Field;
719
720 use super::*;
721
722 #[test]
723 fn test_concrete_type_as_datatype_trait() {
724 let concrete_type = ConcreteDataType::boolean_datatype();
725
726 assert_eq!("Boolean", concrete_type.to_string());
727 assert_eq!(Value::Boolean(false), concrete_type.default_value());
728 assert_eq!(LogicalTypeId::Boolean, concrete_type.logical_type_id());
729 assert_eq!(ArrowDataType::Boolean, concrete_type.as_arrow_type());
730 }
731
732 #[test]
733 fn test_from_arrow_type() {
734 assert!(matches!(
735 ConcreteDataType::from_arrow_type(&ArrowDataType::Null),
736 ConcreteDataType::Null(_)
737 ));
738 assert!(matches!(
739 ConcreteDataType::from_arrow_type(&ArrowDataType::Boolean),
740 ConcreteDataType::Boolean(_)
741 ));
742 assert!(matches!(
743 ConcreteDataType::from_arrow_type(&ArrowDataType::Binary),
744 ConcreteDataType::Binary(_)
745 ));
746 assert!(matches!(
747 ConcreteDataType::from_arrow_type(&ArrowDataType::LargeBinary),
748 ConcreteDataType::Binary(_)
749 ));
750 assert!(matches!(
751 ConcreteDataType::from_arrow_type(&ArrowDataType::Int8),
752 ConcreteDataType::Int8(_)
753 ));
754 assert!(matches!(
755 ConcreteDataType::from_arrow_type(&ArrowDataType::Int16),
756 ConcreteDataType::Int16(_)
757 ));
758 assert!(matches!(
759 ConcreteDataType::from_arrow_type(&ArrowDataType::Int32),
760 ConcreteDataType::Int32(_)
761 ));
762 assert!(matches!(
763 ConcreteDataType::from_arrow_type(&ArrowDataType::Int64),
764 ConcreteDataType::Int64(_)
765 ));
766 assert!(matches!(
767 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt8),
768 ConcreteDataType::UInt8(_)
769 ));
770 assert!(matches!(
771 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt16),
772 ConcreteDataType::UInt16(_)
773 ));
774 assert!(matches!(
775 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt32),
776 ConcreteDataType::UInt32(_)
777 ));
778 assert!(matches!(
779 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt64),
780 ConcreteDataType::UInt64(_)
781 ));
782 assert!(matches!(
783 ConcreteDataType::from_arrow_type(&ArrowDataType::Float32),
784 ConcreteDataType::Float32(_)
785 ));
786 assert!(matches!(
787 ConcreteDataType::from_arrow_type(&ArrowDataType::Float64),
788 ConcreteDataType::Float64(_)
789 ));
790 assert!(matches!(
791 ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8),
792 ConcreteDataType::String(_)
793 ));
794 let utf8_view_string_type = ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8View);
795 assert!(matches!(utf8_view_string_type, ConcreteDataType::String(_)));
796 assert_eq!(
797 ArrowDataType::Utf8View,
798 utf8_view_string_type.as_arrow_type()
799 );
800 let large_string_type = ConcreteDataType::from_arrow_type(&ArrowDataType::LargeUtf8);
802 assert!(matches!(large_string_type, ConcreteDataType::String(_)));
803 if let ConcreteDataType::String(string_type) = &large_string_type {
804 assert!(string_type.is_large());
805 } else {
806 panic!("Expected a String type");
807 }
808 assert_eq!(
809 ConcreteDataType::from_arrow_type(&ArrowDataType::List(Arc::new(Field::new(
810 "item",
811 ArrowDataType::Int32,
812 true,
813 )))),
814 ConcreteDataType::List(ListType::new(Arc::new(ConcreteDataType::int32_datatype())))
815 );
816 assert!(matches!(
817 ConcreteDataType::from_arrow_type(&ArrowDataType::Date32),
818 ConcreteDataType::Date(_)
819 ));
820 }
821
822 #[test]
823 fn test_view_round_trip() {
824 let utf8_view_arrow = ArrowDataType::Utf8View;
825 let concrete_type = ConcreteDataType::from_arrow_type(&utf8_view_arrow);
826 let back_to_arrow = concrete_type.as_arrow_type();
827 assert_eq!(utf8_view_arrow, back_to_arrow);
828
829 let binary_view_arrow = ArrowDataType::BinaryView;
830 let concrete_type = ConcreteDataType::from_arrow_type(&binary_view_arrow);
831 let back_to_arrow = concrete_type.as_arrow_type();
832 assert_eq!(binary_view_arrow, back_to_arrow);
833 }
834
835 #[test]
836 fn test_large_utf8_round_trip() {
837 let large_utf8_arrow = ArrowDataType::LargeUtf8;
839 let concrete_type = ConcreteDataType::from_arrow_type(&large_utf8_arrow);
840 let back_to_arrow = concrete_type.as_arrow_type();
841
842 assert!(matches!(concrete_type, ConcreteDataType::String(_)));
843 assert_eq!(large_utf8_arrow, back_to_arrow);
845
846 let utf8_concrete = ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8);
848 let large_utf8_concrete = ConcreteDataType::from_arrow_type(&ArrowDataType::LargeUtf8);
849
850 assert!(matches!(utf8_concrete, ConcreteDataType::String(_)));
851 assert!(matches!(large_utf8_concrete, ConcreteDataType::String(_)));
852
853 if let (ConcreteDataType::String(utf8_type), ConcreteDataType::String(large_type)) =
855 (&utf8_concrete, &large_utf8_concrete)
856 {
857 assert!(!utf8_type.is_large());
858 assert!(large_type.is_large());
859 } else {
860 panic!("Expected both to be String types");
861 }
862
863 let view_concrete = ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8View);
865 assert_ne!(utf8_concrete, view_concrete);
866 assert_ne!(large_utf8_concrete, view_concrete);
867
868 assert_ne!(utf8_concrete, large_utf8_concrete);
870 }
871
872 #[test]
873 fn test_from_arrow_timestamp() {
874 assert_eq!(
875 ConcreteDataType::timestamp_millisecond_datatype(),
876 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Millisecond)
877 );
878 assert_eq!(
879 ConcreteDataType::timestamp_microsecond_datatype(),
880 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Microsecond)
881 );
882 assert_eq!(
883 ConcreteDataType::timestamp_nanosecond_datatype(),
884 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Nanosecond)
885 );
886 assert_eq!(
887 ConcreteDataType::timestamp_second_datatype(),
888 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Second)
889 );
890 }
891
892 #[test]
893 fn test_is_null() {
894 assert!(ConcreteDataType::null_datatype().is_null());
895 assert!(!ConcreteDataType::int32_datatype().is_null());
896 }
897
898 #[test]
899 fn test_is_float() {
900 assert!(!ConcreteDataType::int32_datatype().is_float());
901 assert!(ConcreteDataType::float32_datatype().is_float());
902 assert!(ConcreteDataType::float64_datatype().is_float());
903 }
904
905 #[test]
906 fn test_is_boolean() {
907 assert!(!ConcreteDataType::int32_datatype().is_boolean());
908 assert!(!ConcreteDataType::float32_datatype().is_boolean());
909 assert!(ConcreteDataType::boolean_datatype().is_boolean());
910 }
911
912 #[test]
913 fn test_is_decimal() {
914 assert!(!ConcreteDataType::int32_datatype().is_decimal());
915 assert!(!ConcreteDataType::float32_datatype().is_decimal());
916 assert!(ConcreteDataType::decimal128_datatype(10, 2).is_decimal());
917 assert!(ConcreteDataType::decimal128_datatype(18, 6).is_decimal());
918 }
919
920 #[test]
921 fn test_is_stringifiable() {
922 assert!(!ConcreteDataType::int32_datatype().is_stringifiable());
923 assert!(!ConcreteDataType::float32_datatype().is_stringifiable());
924 assert!(ConcreteDataType::string_datatype().is_stringifiable());
925 assert!(ConcreteDataType::binary_datatype().is_stringifiable());
926 assert!(ConcreteDataType::date_datatype().is_stringifiable());
927 assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable());
928 assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable());
929 assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable());
930 assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_stringifiable());
931 assert!(ConcreteDataType::time_second_datatype().is_stringifiable());
932 assert!(ConcreteDataType::time_millisecond_datatype().is_stringifiable());
933 assert!(ConcreteDataType::time_microsecond_datatype().is_stringifiable());
934 assert!(ConcreteDataType::time_nanosecond_datatype().is_stringifiable());
935
936 assert!(ConcreteDataType::interval_year_month_datatype().is_stringifiable());
937 assert!(ConcreteDataType::interval_day_time_datatype().is_stringifiable());
938 assert!(ConcreteDataType::interval_month_day_nano_datatype().is_stringifiable());
939
940 assert!(ConcreteDataType::duration_second_datatype().is_stringifiable());
941 assert!(ConcreteDataType::duration_millisecond_datatype().is_stringifiable());
942 assert!(ConcreteDataType::duration_microsecond_datatype().is_stringifiable());
943 assert!(ConcreteDataType::duration_nanosecond_datatype().is_stringifiable());
944 assert!(ConcreteDataType::decimal128_datatype(10, 2).is_stringifiable());
945 assert!(ConcreteDataType::vector_default_datatype().is_stringifiable());
946 }
947
948 #[test]
949 fn test_is_signed() {
950 assert!(ConcreteDataType::int8_datatype().is_signed());
951 assert!(ConcreteDataType::int16_datatype().is_signed());
952 assert!(ConcreteDataType::int32_datatype().is_signed());
953 assert!(ConcreteDataType::int64_datatype().is_signed());
954 assert!(ConcreteDataType::date_datatype().is_signed());
955 assert!(ConcreteDataType::timestamp_second_datatype().is_signed());
956 assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed());
957 assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed());
958 assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_signed());
959 assert!(ConcreteDataType::time_second_datatype().is_signed());
960 assert!(ConcreteDataType::time_millisecond_datatype().is_signed());
961 assert!(ConcreteDataType::time_microsecond_datatype().is_signed());
962 assert!(ConcreteDataType::time_nanosecond_datatype().is_signed());
963 assert!(ConcreteDataType::interval_year_month_datatype().is_signed());
964 assert!(ConcreteDataType::interval_day_time_datatype().is_signed());
965 assert!(ConcreteDataType::interval_month_day_nano_datatype().is_signed());
966 assert!(ConcreteDataType::duration_second_datatype().is_signed());
967 assert!(ConcreteDataType::duration_millisecond_datatype().is_signed());
968 assert!(ConcreteDataType::duration_microsecond_datatype().is_signed());
969 assert!(ConcreteDataType::duration_nanosecond_datatype().is_signed());
970
971 assert!(!ConcreteDataType::uint8_datatype().is_signed());
972 assert!(!ConcreteDataType::uint16_datatype().is_signed());
973 assert!(!ConcreteDataType::uint32_datatype().is_signed());
974 assert!(!ConcreteDataType::uint64_datatype().is_signed());
975
976 assert!(!ConcreteDataType::float32_datatype().is_signed());
977 assert!(!ConcreteDataType::float64_datatype().is_signed());
978
979 assert!(ConcreteDataType::decimal128_datatype(10, 2).is_signed());
980 }
981
982 #[test]
983 fn test_is_unsigned() {
984 assert!(!ConcreteDataType::int8_datatype().is_unsigned());
985 assert!(!ConcreteDataType::int16_datatype().is_unsigned());
986 assert!(!ConcreteDataType::int32_datatype().is_unsigned());
987 assert!(!ConcreteDataType::int64_datatype().is_unsigned());
988 assert!(!ConcreteDataType::date_datatype().is_unsigned());
989 assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned());
990 assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned());
991 assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned());
992 assert!(!ConcreteDataType::timestamp_nanosecond_datatype().is_unsigned());
993 assert!(!ConcreteDataType::time_second_datatype().is_unsigned());
994 assert!(!ConcreteDataType::time_millisecond_datatype().is_unsigned());
995 assert!(!ConcreteDataType::time_microsecond_datatype().is_unsigned());
996 assert!(!ConcreteDataType::time_nanosecond_datatype().is_unsigned());
997 assert!(!ConcreteDataType::interval_year_month_datatype().is_unsigned());
998 assert!(!ConcreteDataType::interval_day_time_datatype().is_unsigned());
999 assert!(!ConcreteDataType::interval_month_day_nano_datatype().is_unsigned());
1000 assert!(!ConcreteDataType::duration_second_datatype().is_unsigned());
1001 assert!(!ConcreteDataType::duration_millisecond_datatype().is_unsigned());
1002 assert!(!ConcreteDataType::duration_microsecond_datatype().is_unsigned());
1003 assert!(!ConcreteDataType::duration_nanosecond_datatype().is_unsigned());
1004 assert!(!ConcreteDataType::decimal128_datatype(10, 2).is_unsigned());
1005
1006 assert!(ConcreteDataType::uint8_datatype().is_unsigned());
1007 assert!(ConcreteDataType::uint16_datatype().is_unsigned());
1008 assert!(ConcreteDataType::uint32_datatype().is_unsigned());
1009 assert!(ConcreteDataType::uint64_datatype().is_unsigned());
1010
1011 assert!(!ConcreteDataType::float32_datatype().is_unsigned());
1012 assert!(!ConcreteDataType::float64_datatype().is_unsigned());
1013 }
1014
1015 #[test]
1016 fn test_numerics() {
1017 let nums = ConcreteDataType::numerics();
1018 assert_eq!(10, nums.len());
1019 }
1020
1021 #[test]
1022 fn test_as_list() {
1023 let list_type =
1024 ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()));
1025 assert_eq!(
1026 ListType::new(Arc::new(ConcreteDataType::int32_datatype())),
1027 *list_type.as_list().unwrap()
1028 );
1029 assert!(ConcreteDataType::int32_datatype().as_list().is_none());
1030 }
1031
1032 #[test]
1033 fn test_display_concrete_data_type() {
1034 assert_eq!(ConcreteDataType::null_datatype().to_string(), "Null");
1035 assert_eq!(ConcreteDataType::boolean_datatype().to_string(), "Boolean");
1036 assert_eq!(ConcreteDataType::binary_datatype().to_string(), "Binary");
1037 assert_eq!(ConcreteDataType::int8_datatype().to_string(), "Int8");
1038 assert_eq!(ConcreteDataType::int16_datatype().to_string(), "Int16");
1039 assert_eq!(ConcreteDataType::int32_datatype().to_string(), "Int32");
1040 assert_eq!(ConcreteDataType::int64_datatype().to_string(), "Int64");
1041 assert_eq!(ConcreteDataType::uint8_datatype().to_string(), "UInt8");
1042 assert_eq!(ConcreteDataType::uint16_datatype().to_string(), "UInt16");
1043 assert_eq!(ConcreteDataType::uint32_datatype().to_string(), "UInt32");
1044 assert_eq!(ConcreteDataType::uint64_datatype().to_string(), "UInt64");
1045 assert_eq!(ConcreteDataType::float32_datatype().to_string(), "Float32");
1046 assert_eq!(ConcreteDataType::float64_datatype().to_string(), "Float64");
1047 assert_eq!(ConcreteDataType::string_datatype().to_string(), "String");
1048 assert_eq!(ConcreteDataType::date_datatype().to_string(), "Date");
1049 assert_eq!(
1050 ConcreteDataType::timestamp_millisecond_datatype().to_string(),
1051 "TimestampMillisecond"
1052 );
1053 assert_eq!(
1054 ConcreteDataType::time_millisecond_datatype().to_string(),
1055 "TimeMillisecond"
1056 );
1057 assert_eq!(
1058 ConcreteDataType::interval_month_day_nano_datatype().to_string(),
1059 "IntervalMonthDayNano"
1060 );
1061 assert_eq!(
1062 ConcreteDataType::duration_second_datatype().to_string(),
1063 "DurationSecond"
1064 );
1065 assert_eq!(
1066 ConcreteDataType::decimal128_datatype(10, 2).to_string(),
1067 "Decimal(10, 2)"
1068 );
1069 assert_eq!(
1071 ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()))
1072 .to_string(),
1073 "List<Int32>"
1074 );
1075 assert_eq!(
1076 ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::Dictionary(
1077 DictionaryType::new(
1078 ConcreteDataType::int32_datatype(),
1079 ConcreteDataType::string_datatype()
1080 )
1081 )))
1082 .to_string(),
1083 "List<Dictionary<Int32, String>>"
1084 );
1085 assert_eq!(
1086 ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::list_datatype(Arc::new(
1087 ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()))
1088 ))))
1089 .to_string(),
1090 "List<List<List<Int32>>>"
1091 );
1092 assert_eq!(
1093 ConcreteDataType::dictionary_datatype(
1094 ConcreteDataType::int32_datatype(),
1095 ConcreteDataType::string_datatype()
1096 )
1097 .to_string(),
1098 "Dictionary<Int32, String>"
1099 );
1100 assert_eq!(
1101 ConcreteDataType::vector_datatype(3).to_string(),
1102 "Vector(3)"
1103 );
1104 }
1105}