1use std::fmt;
16use std::sync::Arc;
17
18use arrow::compute::cast as arrow_array_cast;
19use arrow::datatypes::{
20 DataType as ArrowDataType, IntervalUnit as ArrowIntervalUnit, TimeUnit as ArrowTimeUnit,
21};
22use arrow_schema::DECIMAL_DEFAULT_SCALE;
23use common_decimal::decimal128::DECIMAL128_MAX_PRECISION;
24use common_time::interval::IntervalUnit;
25use common_time::timestamp::TimeUnit;
26use enum_dispatch::enum_dispatch;
27use paste::paste;
28use serde::{Deserialize, Serialize};
29
30use crate::error::{self, Error, Result};
31use crate::type_id::LogicalTypeId;
32use crate::types::{
33 BinaryType, BooleanType, DateType, Decimal128Type, DictionaryType, DurationMicrosecondType,
34 DurationMillisecondType, DurationNanosecondType, DurationSecondType, DurationType, Float32Type,
35 Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalDayTimeType,
36 IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType, ListType, NullType,
37 StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType, TimestampMillisecondType,
38 TimestampNanosecondType, TimestampSecondType, TimestampType, UInt16Type, UInt32Type,
39 UInt64Type, UInt8Type, VectorType,
40};
41use crate::value::Value;
42use crate::vectors::MutableVector;
43
44#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
45#[enum_dispatch(DataType)]
46pub enum ConcreteDataType {
47 Null(NullType),
48 Boolean(BooleanType),
49
50 Int8(Int8Type),
52 Int16(Int16Type),
53 Int32(Int32Type),
54 Int64(Int64Type),
55 UInt8(UInt8Type),
56 UInt16(UInt16Type),
57 UInt32(UInt32Type),
58 UInt64(UInt64Type),
59 Float32(Float32Type),
60 Float64(Float64Type),
61
62 Decimal128(Decimal128Type),
64
65 Binary(BinaryType),
67 String(StringType),
68
69 Date(DateType),
71 Timestamp(TimestampType),
72 Time(TimeType),
73
74 Duration(DurationType),
76
77 Interval(IntervalType),
79
80 List(ListType),
82 Dictionary(DictionaryType),
83
84 Json(JsonType),
86
87 Vector(VectorType),
89}
90
91impl fmt::Display for ConcreteDataType {
92 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
93 match self {
94 ConcreteDataType::Null(v) => write!(f, "{}", v.name()),
95 ConcreteDataType::Boolean(v) => write!(f, "{}", v.name()),
96 ConcreteDataType::Int8(v) => write!(f, "{}", v.name()),
97 ConcreteDataType::Int16(v) => write!(f, "{}", v.name()),
98 ConcreteDataType::Int32(v) => write!(f, "{}", v.name()),
99 ConcreteDataType::Int64(v) => write!(f, "{}", v.name()),
100 ConcreteDataType::UInt8(v) => write!(f, "{}", v.name()),
101 ConcreteDataType::UInt16(v) => write!(f, "{}", v.name()),
102 ConcreteDataType::UInt32(v) => write!(f, "{}", v.name()),
103 ConcreteDataType::UInt64(v) => write!(f, "{}", v.name()),
104 ConcreteDataType::Float32(v) => write!(f, "{}", v.name()),
105 ConcreteDataType::Float64(v) => write!(f, "{}", v.name()),
106 ConcreteDataType::Binary(v) => write!(f, "{}", v.name()),
107 ConcreteDataType::String(v) => write!(f, "{}", v.name()),
108 ConcreteDataType::Date(v) => write!(f, "{}", v.name()),
109 ConcreteDataType::Timestamp(t) => match t {
110 TimestampType::Second(v) => write!(f, "{}", v.name()),
111 TimestampType::Millisecond(v) => write!(f, "{}", v.name()),
112 TimestampType::Microsecond(v) => write!(f, "{}", v.name()),
113 TimestampType::Nanosecond(v) => write!(f, "{}", v.name()),
114 },
115 ConcreteDataType::Time(t) => match t {
116 TimeType::Second(v) => write!(f, "{}", v.name()),
117 TimeType::Millisecond(v) => write!(f, "{}", v.name()),
118 TimeType::Microsecond(v) => write!(f, "{}", v.name()),
119 TimeType::Nanosecond(v) => write!(f, "{}", v.name()),
120 },
121 ConcreteDataType::Interval(i) => match i {
122 IntervalType::YearMonth(v) => write!(f, "{}", v.name()),
123 IntervalType::DayTime(v) => write!(f, "{}", v.name()),
124 IntervalType::MonthDayNano(v) => write!(f, "{}", v.name()),
125 },
126 ConcreteDataType::Duration(d) => match d {
127 DurationType::Second(v) => write!(f, "{}", v.name()),
128 DurationType::Millisecond(v) => write!(f, "{}", v.name()),
129 DurationType::Microsecond(v) => write!(f, "{}", v.name()),
130 DurationType::Nanosecond(v) => write!(f, "{}", v.name()),
131 },
132 ConcreteDataType::Decimal128(v) => write!(f, "{}", v.name()),
133 ConcreteDataType::List(v) => write!(f, "{}", v.name()),
134 ConcreteDataType::Dictionary(v) => write!(f, "{}", v.name()),
135 ConcreteDataType::Json(v) => write!(f, "{}", v.name()),
136 ConcreteDataType::Vector(v) => write!(f, "{}", v.name()),
137 }
138 }
139}
140
141impl ConcreteDataType {
144 pub fn is_float(&self) -> bool {
145 matches!(
146 self,
147 ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_)
148 )
149 }
150
151 pub fn is_boolean(&self) -> bool {
152 matches!(self, ConcreteDataType::Boolean(_))
153 }
154
155 pub fn is_string(&self) -> bool {
156 matches!(self, ConcreteDataType::String(_))
157 }
158
159 pub fn is_stringifiable(&self) -> bool {
160 matches!(
161 self,
162 ConcreteDataType::String(_)
163 | ConcreteDataType::Date(_)
164 | ConcreteDataType::Timestamp(_)
165 | ConcreteDataType::Time(_)
166 | ConcreteDataType::Interval(_)
167 | ConcreteDataType::Duration(_)
168 | ConcreteDataType::Decimal128(_)
169 | ConcreteDataType::Binary(_)
170 | ConcreteDataType::Json(_)
171 | ConcreteDataType::Vector(_)
172 )
173 }
174
175 pub fn is_signed(&self) -> bool {
176 matches!(
177 self,
178 ConcreteDataType::Int8(_)
179 | ConcreteDataType::Int16(_)
180 | ConcreteDataType::Int32(_)
181 | ConcreteDataType::Int64(_)
182 | ConcreteDataType::Date(_)
183 | ConcreteDataType::Timestamp(_)
184 | ConcreteDataType::Time(_)
185 | ConcreteDataType::Interval(_)
186 | ConcreteDataType::Duration(_)
187 | ConcreteDataType::Decimal128(_)
188 )
189 }
190
191 pub fn is_unsigned(&self) -> bool {
192 matches!(
193 self,
194 ConcreteDataType::UInt8(_)
195 | ConcreteDataType::UInt16(_)
196 | ConcreteDataType::UInt32(_)
197 | ConcreteDataType::UInt64(_)
198 )
199 }
200
201 pub fn is_numeric(&self) -> bool {
202 matches!(
203 self,
204 ConcreteDataType::Int8(_)
205 | ConcreteDataType::Int16(_)
206 | ConcreteDataType::Int32(_)
207 | ConcreteDataType::Int64(_)
208 | ConcreteDataType::UInt8(_)
209 | ConcreteDataType::UInt16(_)
210 | ConcreteDataType::UInt32(_)
211 | ConcreteDataType::UInt64(_)
212 | ConcreteDataType::Float32(_)
213 | ConcreteDataType::Float64(_)
214 )
215 }
216
217 pub fn is_timestamp(&self) -> bool {
218 matches!(self, ConcreteDataType::Timestamp(_))
219 }
220
221 pub fn is_decimal(&self) -> bool {
222 matches!(self, ConcreteDataType::Decimal128(_))
223 }
224
225 pub fn is_json(&self) -> bool {
226 matches!(self, ConcreteDataType::Json(_))
227 }
228
229 pub fn is_vector(&self) -> bool {
230 matches!(self, ConcreteDataType::Vector(_))
231 }
232
233 pub fn numerics() -> Vec<ConcreteDataType> {
234 vec![
235 ConcreteDataType::int8_datatype(),
236 ConcreteDataType::int16_datatype(),
237 ConcreteDataType::int32_datatype(),
238 ConcreteDataType::int64_datatype(),
239 ConcreteDataType::uint8_datatype(),
240 ConcreteDataType::uint16_datatype(),
241 ConcreteDataType::uint32_datatype(),
242 ConcreteDataType::uint64_datatype(),
243 ConcreteDataType::float32_datatype(),
244 ConcreteDataType::float64_datatype(),
245 ]
246 }
247
248 pub fn unsigned_integers() -> Vec<ConcreteDataType> {
249 vec![
250 ConcreteDataType::uint8_datatype(),
251 ConcreteDataType::uint16_datatype(),
252 ConcreteDataType::uint32_datatype(),
253 ConcreteDataType::uint64_datatype(),
254 ]
255 }
256
257 pub fn timestamps() -> Vec<ConcreteDataType> {
258 vec![
259 ConcreteDataType::timestamp_second_datatype(),
260 ConcreteDataType::timestamp_millisecond_datatype(),
261 ConcreteDataType::timestamp_microsecond_datatype(),
262 ConcreteDataType::timestamp_nanosecond_datatype(),
263 ]
264 }
265
266 pub fn from_arrow_type(dt: &ArrowDataType) -> Self {
271 ConcreteDataType::try_from(dt).expect("Unimplemented type")
272 }
273
274 pub fn is_null(&self) -> bool {
275 matches!(self, ConcreteDataType::Null(NullType))
276 }
277
278 pub fn as_list(&self) -> Option<&ListType> {
280 match self {
281 ConcreteDataType::List(t) => Some(t),
282 _ => None,
283 }
284 }
285
286 pub fn as_timestamp(&self) -> Option<TimestampType> {
288 match self {
289 ConcreteDataType::Timestamp(t) => Some(*t),
290 _ => None,
291 }
292 }
293
294 pub fn numeric_precision(&self) -> Option<u8> {
296 match self {
297 ConcreteDataType::Int8(_) | ConcreteDataType::UInt8(_) => Some(3),
298 ConcreteDataType::Int16(_) | ConcreteDataType::UInt16(_) => Some(5),
299 ConcreteDataType::Int32(_) | ConcreteDataType::UInt32(_) => Some(10),
300 ConcreteDataType::Int64(_) => Some(19),
301 ConcreteDataType::UInt64(_) => Some(20),
302 ConcreteDataType::Float32(_) => Some(12),
303 ConcreteDataType::Float64(_) => Some(22),
304 ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.precision()),
305 _ => None,
306 }
307 }
308
309 pub fn numeric_scale(&self) -> Option<i8> {
311 match self {
312 ConcreteDataType::Int8(_)
313 | ConcreteDataType::UInt8(_)
314 | ConcreteDataType::Int16(_)
315 | ConcreteDataType::UInt16(_)
316 | ConcreteDataType::Int32(_)
317 | ConcreteDataType::UInt32(_)
318 | ConcreteDataType::Int64(_)
319 | ConcreteDataType::UInt64(_) => Some(0),
320 ConcreteDataType::Float32(_) | ConcreteDataType::Float64(_) => None,
321 ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.scale()),
322 _ => None,
323 }
324 }
325
326 pub fn as_time(&self) -> Option<TimeType> {
328 match self {
329 ConcreteDataType::Int64(_) => Some(TimeType::Millisecond(TimeMillisecondType)),
330 ConcreteDataType::Time(t) => Some(*t),
331 _ => None,
332 }
333 }
334
335 pub fn as_decimal128(&self) -> Option<Decimal128Type> {
336 match self {
337 ConcreteDataType::Decimal128(d) => Some(*d),
338 _ => None,
339 }
340 }
341
342 pub fn as_json(&self) -> Option<JsonType> {
343 match self {
344 ConcreteDataType::Json(j) => Some(*j),
345 _ => None,
346 }
347 }
348
349 pub fn as_vector(&self) -> Option<VectorType> {
350 match self {
351 ConcreteDataType::Vector(v) => Some(*v),
352 _ => None,
353 }
354 }
355
356 pub fn can_arrow_type_cast_to(&self, to_type: &ConcreteDataType) -> bool {
358 let array = arrow_array::new_empty_array(&self.as_arrow_type());
359 arrow_array_cast(array.as_ref(), &to_type.as_arrow_type()).is_ok()
360 }
361
362 pub fn as_duration(&self) -> Option<DurationType> {
364 match self {
365 ConcreteDataType::Duration(d) => Some(*d),
366 _ => None,
367 }
368 }
369
370 pub fn postgres_datatype_name(&self) -> &'static str {
372 match self {
373 &ConcreteDataType::Null(_) => "UNKNOWN",
374 &ConcreteDataType::Boolean(_) => "BOOL",
375 &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "CHAR",
376 &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "INT2",
377 &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "INT4",
378 &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "INT8",
379 &ConcreteDataType::Float32(_) => "FLOAT4",
380 &ConcreteDataType::Float64(_) => "FLOAT8",
381 &ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => "BYTEA",
382 &ConcreteDataType::String(_) => "VARCHAR",
383 &ConcreteDataType::Date(_) => "DATE",
384 &ConcreteDataType::Timestamp(_) => "TIMESTAMP",
385 &ConcreteDataType::Time(_) => "TIME",
386 &ConcreteDataType::Interval(_) => "INTERVAL",
387 &ConcreteDataType::Decimal128(_) => "NUMERIC",
388 &ConcreteDataType::Json(_) => "JSON",
389 ConcreteDataType::List(list) => match list.item_type() {
390 &ConcreteDataType::Null(_) => "UNKNOWN",
391 &ConcreteDataType::Boolean(_) => "_BOOL",
392 &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "_CHAR",
393 &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "_INT2",
394 &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "_INT4",
395 &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "_INT8",
396 &ConcreteDataType::Float32(_) => "_FLOAT4",
397 &ConcreteDataType::Float64(_) => "_FLOAT8",
398 &ConcreteDataType::Binary(_) => "_BYTEA",
399 &ConcreteDataType::String(_) => "_VARCHAR",
400 &ConcreteDataType::Date(_) => "_DATE",
401 &ConcreteDataType::Timestamp(_) => "_TIMESTAMP",
402 &ConcreteDataType::Time(_) => "_TIME",
403 &ConcreteDataType::Interval(_) => "_INTERVAL",
404 &ConcreteDataType::Decimal128(_) => "_NUMERIC",
405 &ConcreteDataType::Json(_) => "_JSON",
406 &ConcreteDataType::Duration(_)
407 | &ConcreteDataType::Dictionary(_)
408 | &ConcreteDataType::Vector(_)
409 | &ConcreteDataType::List(_) => "UNKNOWN",
410 },
411 &ConcreteDataType::Duration(_) | &ConcreteDataType::Dictionary(_) => "UNKNOWN",
412 }
413 }
414}
415
416impl From<&ConcreteDataType> for ConcreteDataType {
417 fn from(t: &ConcreteDataType) -> Self {
418 t.clone()
419 }
420}
421
422impl TryFrom<&ArrowDataType> for ConcreteDataType {
423 type Error = Error;
424
425 fn try_from(dt: &ArrowDataType) -> Result<ConcreteDataType> {
426 let concrete_type = match dt {
427 ArrowDataType::Null => Self::null_datatype(),
428 ArrowDataType::Boolean => Self::boolean_datatype(),
429 ArrowDataType::UInt8 => Self::uint8_datatype(),
430 ArrowDataType::UInt16 => Self::uint16_datatype(),
431 ArrowDataType::UInt32 => Self::uint32_datatype(),
432 ArrowDataType::UInt64 => Self::uint64_datatype(),
433 ArrowDataType::Int8 => Self::int8_datatype(),
434 ArrowDataType::Int16 => Self::int16_datatype(),
435 ArrowDataType::Int32 => Self::int32_datatype(),
436 ArrowDataType::Int64 => Self::int64_datatype(),
437 ArrowDataType::Float32 => Self::float32_datatype(),
438 ArrowDataType::Float64 => Self::float64_datatype(),
439 ArrowDataType::Date32 => Self::date_datatype(),
440 ArrowDataType::Timestamp(u, _) => ConcreteDataType::from_arrow_time_unit(u),
441 ArrowDataType::Interval(u) => ConcreteDataType::from_arrow_interval_unit(u),
442 ArrowDataType::Binary | ArrowDataType::LargeBinary => Self::binary_datatype(),
443 ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => Self::string_datatype(),
444 ArrowDataType::List(field) => Self::List(ListType::new(
445 ConcreteDataType::from_arrow_type(field.data_type()),
446 )),
447 ArrowDataType::Dictionary(key_type, value_type) => {
448 let key_type = ConcreteDataType::from_arrow_type(key_type);
449 let value_type = ConcreteDataType::from_arrow_type(value_type);
450 Self::Dictionary(DictionaryType::new(key_type, value_type))
451 }
452 ArrowDataType::Time32(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
453 ArrowDataType::Time64(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
454 ArrowDataType::Duration(u) => {
455 ConcreteDataType::Duration(DurationType::from_unit(u.into()))
456 }
457 ArrowDataType::Decimal128(precision, scale) => {
458 ConcreteDataType::decimal128_datatype(*precision, *scale)
459 }
460 _ => {
461 return error::UnsupportedArrowTypeSnafu {
462 arrow_type: dt.clone(),
463 }
464 .fail()
465 }
466 };
467
468 Ok(concrete_type)
469 }
470}
471
472macro_rules! impl_new_concrete_type_functions {
473 ($($Type: ident), +) => {
474 paste! {
475 impl ConcreteDataType {
476 $(
477 pub fn [<$Type:lower _datatype>]() -> ConcreteDataType {
478 ConcreteDataType::$Type([<$Type Type>]::default())
479 }
480 )+
481 }
482 }
483 }
484}
485
486impl_new_concrete_type_functions!(
487 Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
488 Binary, Date, String, Json
489);
490
491impl ConcreteDataType {
492 pub fn timestamp_second_datatype() -> Self {
493 ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType))
494 }
495
496 pub fn timestamp_millisecond_datatype() -> Self {
497 ConcreteDataType::Timestamp(TimestampType::Millisecond(TimestampMillisecondType))
498 }
499
500 pub fn timestamp_microsecond_datatype() -> Self {
501 ConcreteDataType::Timestamp(TimestampType::Microsecond(TimestampMicrosecondType))
502 }
503
504 pub fn timestamp_nanosecond_datatype() -> Self {
505 ConcreteDataType::Timestamp(TimestampType::Nanosecond(TimestampNanosecondType))
506 }
507
508 pub fn time_datatype(unit: TimeUnit) -> Self {
510 ConcreteDataType::Time(TimeType::from_unit(unit))
511 }
512
513 pub fn time_second_datatype() -> Self {
515 Self::time_datatype(TimeUnit::Second)
516 }
517
518 pub fn time_millisecond_datatype() -> Self {
520 Self::time_datatype(TimeUnit::Millisecond)
521 }
522
523 pub fn time_microsecond_datatype() -> Self {
525 Self::time_datatype(TimeUnit::Microsecond)
526 }
527
528 pub fn time_nanosecond_datatype() -> Self {
530 Self::time_datatype(TimeUnit::Nanosecond)
531 }
532
533 pub fn duration_second_datatype() -> Self {
535 ConcreteDataType::Duration(DurationType::Second(DurationSecondType))
536 }
537
538 pub fn duration_millisecond_datatype() -> Self {
540 ConcreteDataType::Duration(DurationType::Millisecond(DurationMillisecondType))
541 }
542
543 pub fn duration_microsecond_datatype() -> Self {
545 ConcreteDataType::Duration(DurationType::Microsecond(DurationMicrosecondType))
546 }
547
548 pub fn duration_nanosecond_datatype() -> Self {
550 ConcreteDataType::Duration(DurationType::Nanosecond(DurationNanosecondType))
551 }
552
553 pub fn interval_month_day_nano_datatype() -> Self {
555 ConcreteDataType::Interval(IntervalType::MonthDayNano(IntervalMonthDayNanoType))
556 }
557
558 pub fn interval_year_month_datatype() -> Self {
560 ConcreteDataType::Interval(IntervalType::YearMonth(IntervalYearMonthType))
561 }
562
563 pub fn interval_day_time_datatype() -> Self {
565 ConcreteDataType::Interval(IntervalType::DayTime(IntervalDayTimeType))
566 }
567
568 pub fn timestamp_datatype(unit: TimeUnit) -> Self {
569 match unit {
570 TimeUnit::Second => Self::timestamp_second_datatype(),
571 TimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
572 TimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
573 TimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
574 }
575 }
576
577 pub fn from_arrow_time_unit(t: &ArrowTimeUnit) -> Self {
579 match t {
580 ArrowTimeUnit::Second => Self::timestamp_second_datatype(),
581 ArrowTimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
582 ArrowTimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
583 ArrowTimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
584 }
585 }
586
587 pub fn duration_datatype(unit: TimeUnit) -> Self {
588 match unit {
589 TimeUnit::Second => Self::duration_second_datatype(),
590 TimeUnit::Millisecond => Self::duration_millisecond_datatype(),
591 TimeUnit::Microsecond => Self::duration_microsecond_datatype(),
592 TimeUnit::Nanosecond => Self::duration_nanosecond_datatype(),
593 }
594 }
595
596 pub fn interval_datatype(unit: IntervalUnit) -> Self {
597 match unit {
598 IntervalUnit::YearMonth => Self::interval_year_month_datatype(),
599 IntervalUnit::DayTime => Self::interval_day_time_datatype(),
600 IntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
601 }
602 }
603
604 pub fn from_arrow_interval_unit(u: &ArrowIntervalUnit) -> Self {
605 match u {
606 ArrowIntervalUnit::YearMonth => Self::interval_year_month_datatype(),
607 ArrowIntervalUnit::DayTime => Self::interval_day_time_datatype(),
608 ArrowIntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
609 }
610 }
611
612 pub fn list_datatype(item_type: ConcreteDataType) -> ConcreteDataType {
613 ConcreteDataType::List(ListType::new(item_type))
614 }
615
616 pub fn dictionary_datatype(
617 key_type: ConcreteDataType,
618 value_type: ConcreteDataType,
619 ) -> ConcreteDataType {
620 ConcreteDataType::Dictionary(DictionaryType::new(key_type, value_type))
621 }
622
623 pub fn decimal128_datatype(precision: u8, scale: i8) -> ConcreteDataType {
624 ConcreteDataType::Decimal128(Decimal128Type::new(precision, scale))
625 }
626
627 pub fn decimal128_default_datatype() -> ConcreteDataType {
628 Self::decimal128_datatype(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
629 }
630
631 pub fn vector_datatype(dim: u32) -> ConcreteDataType {
632 ConcreteDataType::Vector(VectorType::new(dim))
633 }
634
635 pub fn vector_default_datatype() -> ConcreteDataType {
636 Self::vector_datatype(0)
637 }
638}
639
640#[enum_dispatch::enum_dispatch]
642pub trait DataType: std::fmt::Debug + Send + Sync {
643 fn name(&self) -> String;
645
646 fn logical_type_id(&self) -> LogicalTypeId;
648
649 fn default_value(&self) -> Value;
651
652 fn as_arrow_type(&self) -> ArrowDataType;
654
655 fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector>;
657
658 fn try_cast(&self, from: Value) -> Option<Value>;
661}
662
663pub type DataTypeRef = Arc<dyn DataType>;
664
665#[cfg(test)]
666mod tests {
667 use arrow::datatypes::Field;
668
669 use super::*;
670
671 #[test]
672 fn test_concrete_type_as_datatype_trait() {
673 let concrete_type = ConcreteDataType::boolean_datatype();
674
675 assert_eq!("Boolean", concrete_type.to_string());
676 assert_eq!(Value::Boolean(false), concrete_type.default_value());
677 assert_eq!(LogicalTypeId::Boolean, concrete_type.logical_type_id());
678 assert_eq!(ArrowDataType::Boolean, concrete_type.as_arrow_type());
679 }
680
681 #[test]
682 fn test_from_arrow_type() {
683 assert!(matches!(
684 ConcreteDataType::from_arrow_type(&ArrowDataType::Null),
685 ConcreteDataType::Null(_)
686 ));
687 assert!(matches!(
688 ConcreteDataType::from_arrow_type(&ArrowDataType::Boolean),
689 ConcreteDataType::Boolean(_)
690 ));
691 assert!(matches!(
692 ConcreteDataType::from_arrow_type(&ArrowDataType::Binary),
693 ConcreteDataType::Binary(_)
694 ));
695 assert!(matches!(
696 ConcreteDataType::from_arrow_type(&ArrowDataType::LargeBinary),
697 ConcreteDataType::Binary(_)
698 ));
699 assert!(matches!(
700 ConcreteDataType::from_arrow_type(&ArrowDataType::Int8),
701 ConcreteDataType::Int8(_)
702 ));
703 assert!(matches!(
704 ConcreteDataType::from_arrow_type(&ArrowDataType::Int16),
705 ConcreteDataType::Int16(_)
706 ));
707 assert!(matches!(
708 ConcreteDataType::from_arrow_type(&ArrowDataType::Int32),
709 ConcreteDataType::Int32(_)
710 ));
711 assert!(matches!(
712 ConcreteDataType::from_arrow_type(&ArrowDataType::Int64),
713 ConcreteDataType::Int64(_)
714 ));
715 assert!(matches!(
716 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt8),
717 ConcreteDataType::UInt8(_)
718 ));
719 assert!(matches!(
720 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt16),
721 ConcreteDataType::UInt16(_)
722 ));
723 assert!(matches!(
724 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt32),
725 ConcreteDataType::UInt32(_)
726 ));
727 assert!(matches!(
728 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt64),
729 ConcreteDataType::UInt64(_)
730 ));
731 assert!(matches!(
732 ConcreteDataType::from_arrow_type(&ArrowDataType::Float32),
733 ConcreteDataType::Float32(_)
734 ));
735 assert!(matches!(
736 ConcreteDataType::from_arrow_type(&ArrowDataType::Float64),
737 ConcreteDataType::Float64(_)
738 ));
739 assert!(matches!(
740 ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8),
741 ConcreteDataType::String(_)
742 ));
743 assert_eq!(
744 ConcreteDataType::from_arrow_type(&ArrowDataType::List(Arc::new(Field::new(
745 "item",
746 ArrowDataType::Int32,
747 true,
748 )))),
749 ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype()))
750 );
751 assert!(matches!(
752 ConcreteDataType::from_arrow_type(&ArrowDataType::Date32),
753 ConcreteDataType::Date(_)
754 ));
755 }
756
757 #[test]
758 fn test_from_arrow_timestamp() {
759 assert_eq!(
760 ConcreteDataType::timestamp_millisecond_datatype(),
761 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Millisecond)
762 );
763 assert_eq!(
764 ConcreteDataType::timestamp_microsecond_datatype(),
765 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Microsecond)
766 );
767 assert_eq!(
768 ConcreteDataType::timestamp_nanosecond_datatype(),
769 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Nanosecond)
770 );
771 assert_eq!(
772 ConcreteDataType::timestamp_second_datatype(),
773 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Second)
774 );
775 }
776
777 #[test]
778 fn test_is_null() {
779 assert!(ConcreteDataType::null_datatype().is_null());
780 assert!(!ConcreteDataType::int32_datatype().is_null());
781 }
782
783 #[test]
784 fn test_is_float() {
785 assert!(!ConcreteDataType::int32_datatype().is_float());
786 assert!(ConcreteDataType::float32_datatype().is_float());
787 assert!(ConcreteDataType::float64_datatype().is_float());
788 }
789
790 #[test]
791 fn test_is_boolean() {
792 assert!(!ConcreteDataType::int32_datatype().is_boolean());
793 assert!(!ConcreteDataType::float32_datatype().is_boolean());
794 assert!(ConcreteDataType::boolean_datatype().is_boolean());
795 }
796
797 #[test]
798 fn test_is_decimal() {
799 assert!(!ConcreteDataType::int32_datatype().is_decimal());
800 assert!(!ConcreteDataType::float32_datatype().is_decimal());
801 assert!(ConcreteDataType::decimal128_datatype(10, 2).is_decimal());
802 assert!(ConcreteDataType::decimal128_datatype(18, 6).is_decimal());
803 }
804
805 #[test]
806 fn test_is_stringifiable() {
807 assert!(!ConcreteDataType::int32_datatype().is_stringifiable());
808 assert!(!ConcreteDataType::float32_datatype().is_stringifiable());
809 assert!(ConcreteDataType::string_datatype().is_stringifiable());
810 assert!(ConcreteDataType::binary_datatype().is_stringifiable());
811 assert!(ConcreteDataType::date_datatype().is_stringifiable());
812 assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable());
813 assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable());
814 assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable());
815 assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_stringifiable());
816 assert!(ConcreteDataType::time_second_datatype().is_stringifiable());
817 assert!(ConcreteDataType::time_millisecond_datatype().is_stringifiable());
818 assert!(ConcreteDataType::time_microsecond_datatype().is_stringifiable());
819 assert!(ConcreteDataType::time_nanosecond_datatype().is_stringifiable());
820
821 assert!(ConcreteDataType::interval_year_month_datatype().is_stringifiable());
822 assert!(ConcreteDataType::interval_day_time_datatype().is_stringifiable());
823 assert!(ConcreteDataType::interval_month_day_nano_datatype().is_stringifiable());
824
825 assert!(ConcreteDataType::duration_second_datatype().is_stringifiable());
826 assert!(ConcreteDataType::duration_millisecond_datatype().is_stringifiable());
827 assert!(ConcreteDataType::duration_microsecond_datatype().is_stringifiable());
828 assert!(ConcreteDataType::duration_nanosecond_datatype().is_stringifiable());
829 assert!(ConcreteDataType::decimal128_datatype(10, 2).is_stringifiable());
830 assert!(ConcreteDataType::vector_default_datatype().is_stringifiable());
831 }
832
833 #[test]
834 fn test_is_signed() {
835 assert!(ConcreteDataType::int8_datatype().is_signed());
836 assert!(ConcreteDataType::int16_datatype().is_signed());
837 assert!(ConcreteDataType::int32_datatype().is_signed());
838 assert!(ConcreteDataType::int64_datatype().is_signed());
839 assert!(ConcreteDataType::date_datatype().is_signed());
840 assert!(ConcreteDataType::timestamp_second_datatype().is_signed());
841 assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed());
842 assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed());
843 assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_signed());
844 assert!(ConcreteDataType::time_second_datatype().is_signed());
845 assert!(ConcreteDataType::time_millisecond_datatype().is_signed());
846 assert!(ConcreteDataType::time_microsecond_datatype().is_signed());
847 assert!(ConcreteDataType::time_nanosecond_datatype().is_signed());
848 assert!(ConcreteDataType::interval_year_month_datatype().is_signed());
849 assert!(ConcreteDataType::interval_day_time_datatype().is_signed());
850 assert!(ConcreteDataType::interval_month_day_nano_datatype().is_signed());
851 assert!(ConcreteDataType::duration_second_datatype().is_signed());
852 assert!(ConcreteDataType::duration_millisecond_datatype().is_signed());
853 assert!(ConcreteDataType::duration_microsecond_datatype().is_signed());
854 assert!(ConcreteDataType::duration_nanosecond_datatype().is_signed());
855
856 assert!(!ConcreteDataType::uint8_datatype().is_signed());
857 assert!(!ConcreteDataType::uint16_datatype().is_signed());
858 assert!(!ConcreteDataType::uint32_datatype().is_signed());
859 assert!(!ConcreteDataType::uint64_datatype().is_signed());
860
861 assert!(!ConcreteDataType::float32_datatype().is_signed());
862 assert!(!ConcreteDataType::float64_datatype().is_signed());
863
864 assert!(ConcreteDataType::decimal128_datatype(10, 2).is_signed());
865 }
866
867 #[test]
868 fn test_is_unsigned() {
869 assert!(!ConcreteDataType::int8_datatype().is_unsigned());
870 assert!(!ConcreteDataType::int16_datatype().is_unsigned());
871 assert!(!ConcreteDataType::int32_datatype().is_unsigned());
872 assert!(!ConcreteDataType::int64_datatype().is_unsigned());
873 assert!(!ConcreteDataType::date_datatype().is_unsigned());
874 assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned());
875 assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned());
876 assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned());
877 assert!(!ConcreteDataType::timestamp_nanosecond_datatype().is_unsigned());
878 assert!(!ConcreteDataType::time_second_datatype().is_unsigned());
879 assert!(!ConcreteDataType::time_millisecond_datatype().is_unsigned());
880 assert!(!ConcreteDataType::time_microsecond_datatype().is_unsigned());
881 assert!(!ConcreteDataType::time_nanosecond_datatype().is_unsigned());
882 assert!(!ConcreteDataType::interval_year_month_datatype().is_unsigned());
883 assert!(!ConcreteDataType::interval_day_time_datatype().is_unsigned());
884 assert!(!ConcreteDataType::interval_month_day_nano_datatype().is_unsigned());
885 assert!(!ConcreteDataType::duration_second_datatype().is_unsigned());
886 assert!(!ConcreteDataType::duration_millisecond_datatype().is_unsigned());
887 assert!(!ConcreteDataType::duration_microsecond_datatype().is_unsigned());
888 assert!(!ConcreteDataType::duration_nanosecond_datatype().is_unsigned());
889 assert!(!ConcreteDataType::decimal128_datatype(10, 2).is_unsigned());
890
891 assert!(ConcreteDataType::uint8_datatype().is_unsigned());
892 assert!(ConcreteDataType::uint16_datatype().is_unsigned());
893 assert!(ConcreteDataType::uint32_datatype().is_unsigned());
894 assert!(ConcreteDataType::uint64_datatype().is_unsigned());
895
896 assert!(!ConcreteDataType::float32_datatype().is_unsigned());
897 assert!(!ConcreteDataType::float64_datatype().is_unsigned());
898 }
899
900 #[test]
901 fn test_numerics() {
902 let nums = ConcreteDataType::numerics();
903 assert_eq!(10, nums.len());
904 }
905
906 #[test]
907 fn test_as_list() {
908 let list_type = ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype());
909 assert_eq!(
910 ListType::new(ConcreteDataType::int32_datatype()),
911 *list_type.as_list().unwrap()
912 );
913 assert!(ConcreteDataType::int32_datatype().as_list().is_none());
914 }
915
916 #[test]
917 fn test_display_concrete_data_type() {
918 assert_eq!(ConcreteDataType::null_datatype().to_string(), "Null");
919 assert_eq!(ConcreteDataType::boolean_datatype().to_string(), "Boolean");
920 assert_eq!(ConcreteDataType::binary_datatype().to_string(), "Binary");
921 assert_eq!(ConcreteDataType::int8_datatype().to_string(), "Int8");
922 assert_eq!(ConcreteDataType::int16_datatype().to_string(), "Int16");
923 assert_eq!(ConcreteDataType::int32_datatype().to_string(), "Int32");
924 assert_eq!(ConcreteDataType::int64_datatype().to_string(), "Int64");
925 assert_eq!(ConcreteDataType::uint8_datatype().to_string(), "UInt8");
926 assert_eq!(ConcreteDataType::uint16_datatype().to_string(), "UInt16");
927 assert_eq!(ConcreteDataType::uint32_datatype().to_string(), "UInt32");
928 assert_eq!(ConcreteDataType::uint64_datatype().to_string(), "UInt64");
929 assert_eq!(ConcreteDataType::float32_datatype().to_string(), "Float32");
930 assert_eq!(ConcreteDataType::float64_datatype().to_string(), "Float64");
931 assert_eq!(ConcreteDataType::string_datatype().to_string(), "String");
932 assert_eq!(ConcreteDataType::date_datatype().to_string(), "Date");
933 assert_eq!(
934 ConcreteDataType::timestamp_millisecond_datatype().to_string(),
935 "TimestampMillisecond"
936 );
937 assert_eq!(
938 ConcreteDataType::time_millisecond_datatype().to_string(),
939 "TimeMillisecond"
940 );
941 assert_eq!(
942 ConcreteDataType::interval_month_day_nano_datatype().to_string(),
943 "IntervalMonthDayNano"
944 );
945 assert_eq!(
946 ConcreteDataType::duration_second_datatype().to_string(),
947 "DurationSecond"
948 );
949 assert_eq!(
950 ConcreteDataType::decimal128_datatype(10, 2).to_string(),
951 "Decimal(10, 2)"
952 );
953 assert_eq!(
955 ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()).to_string(),
956 "List<Int32>"
957 );
958 assert_eq!(
959 ConcreteDataType::list_datatype(ConcreteDataType::Dictionary(DictionaryType::new(
960 ConcreteDataType::int32_datatype(),
961 ConcreteDataType::string_datatype()
962 )))
963 .to_string(),
964 "List<Dictionary<Int32, String>>"
965 );
966 assert_eq!(
967 ConcreteDataType::list_datatype(ConcreteDataType::list_datatype(
968 ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype())
969 ))
970 .to_string(),
971 "List<List<List<Int32>>>"
972 );
973 assert_eq!(
974 ConcreteDataType::dictionary_datatype(
975 ConcreteDataType::int32_datatype(),
976 ConcreteDataType::string_datatype()
977 )
978 .to_string(),
979 "Dictionary<Int32, String>"
980 );
981 assert_eq!(
982 ConcreteDataType::vector_datatype(3).to_string(),
983 "Vector(3)"
984 );
985 }
986}