1use std::fmt;
16use std::sync::Arc;
17
18use arrow::datatypes::{
19 DataType as ArrowDataType, IntervalUnit as ArrowIntervalUnit, TimeUnit as ArrowTimeUnit,
20};
21use arrow_schema::DECIMAL_DEFAULT_SCALE;
22use common_decimal::decimal128::DECIMAL128_MAX_PRECISION;
23use common_time::interval::IntervalUnit;
24use common_time::timestamp::TimeUnit;
25use enum_dispatch::enum_dispatch;
26use paste::paste;
27use serde::{Deserialize, Serialize};
28
29use crate::error::{self, Error, Result};
30use crate::type_id::LogicalTypeId;
31use crate::types::json_type::JsonNativeType;
32use crate::types::{
33 BinaryType, BooleanType, DateType, Decimal128Type, DictionaryType, DurationMicrosecondType,
34 DurationMillisecondType, DurationNanosecondType, DurationSecondType, DurationType, Float32Type,
35 Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, IntervalDayTimeType,
36 IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType, ListType, NullType,
37 StringType, StructType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
38 TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
39 UInt8Type, UInt16Type, UInt32Type, UInt64Type, VectorType,
40};
41use crate::value::Value;
42use crate::vectors::MutableVector;
43
44#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
45#[enum_dispatch(DataType)]
46pub enum ConcreteDataType {
47 Null(NullType),
48 Boolean(BooleanType),
49
50 Int8(Int8Type),
52 Int16(Int16Type),
53 Int32(Int32Type),
54 Int64(Int64Type),
55 UInt8(UInt8Type),
56 UInt16(UInt16Type),
57 UInt32(UInt32Type),
58 UInt64(UInt64Type),
59 Float32(Float32Type),
60 Float64(Float64Type),
61
62 Decimal128(Decimal128Type),
64
65 Binary(BinaryType),
67 String(StringType),
68
69 Date(DateType),
71 Timestamp(TimestampType),
72 Time(TimeType),
73
74 Duration(DurationType),
76
77 Interval(IntervalType),
79
80 List(ListType),
82 Dictionary(DictionaryType),
83 Struct(StructType),
84
85 Json(JsonType),
87
88 Vector(VectorType),
90}
91
92impl fmt::Display for ConcreteDataType {
93 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
94 match self {
95 ConcreteDataType::Null(v) => write!(f, "{}", v.name()),
96 ConcreteDataType::Boolean(v) => write!(f, "{}", v.name()),
97 ConcreteDataType::Int8(v) => write!(f, "{}", v.name()),
98 ConcreteDataType::Int16(v) => write!(f, "{}", v.name()),
99 ConcreteDataType::Int32(v) => write!(f, "{}", v.name()),
100 ConcreteDataType::Int64(v) => write!(f, "{}", v.name()),
101 ConcreteDataType::UInt8(v) => write!(f, "{}", v.name()),
102 ConcreteDataType::UInt16(v) => write!(f, "{}", v.name()),
103 ConcreteDataType::UInt32(v) => write!(f, "{}", v.name()),
104 ConcreteDataType::UInt64(v) => write!(f, "{}", v.name()),
105 ConcreteDataType::Float32(v) => write!(f, "{}", v.name()),
106 ConcreteDataType::Float64(v) => write!(f, "{}", v.name()),
107 ConcreteDataType::Binary(v) => write!(f, "{}", v.name()),
108 ConcreteDataType::String(v) => write!(f, "{}", v.name()),
109 ConcreteDataType::Date(v) => write!(f, "{}", v.name()),
110 ConcreteDataType::Timestamp(t) => match t {
111 TimestampType::Second(v) => write!(f, "{}", v.name()),
112 TimestampType::Millisecond(v) => write!(f, "{}", v.name()),
113 TimestampType::Microsecond(v) => write!(f, "{}", v.name()),
114 TimestampType::Nanosecond(v) => write!(f, "{}", v.name()),
115 },
116 ConcreteDataType::Time(t) => match t {
117 TimeType::Second(v) => write!(f, "{}", v.name()),
118 TimeType::Millisecond(v) => write!(f, "{}", v.name()),
119 TimeType::Microsecond(v) => write!(f, "{}", v.name()),
120 TimeType::Nanosecond(v) => write!(f, "{}", v.name()),
121 },
122 ConcreteDataType::Interval(i) => match i {
123 IntervalType::YearMonth(v) => write!(f, "{}", v.name()),
124 IntervalType::DayTime(v) => write!(f, "{}", v.name()),
125 IntervalType::MonthDayNano(v) => write!(f, "{}", v.name()),
126 },
127 ConcreteDataType::Duration(d) => match d {
128 DurationType::Second(v) => write!(f, "{}", v.name()),
129 DurationType::Millisecond(v) => write!(f, "{}", v.name()),
130 DurationType::Microsecond(v) => write!(f, "{}", v.name()),
131 DurationType::Nanosecond(v) => write!(f, "{}", v.name()),
132 },
133 ConcreteDataType::Decimal128(v) => write!(f, "{}", v.name()),
134 ConcreteDataType::List(v) => write!(f, "{}", v.name()),
135 ConcreteDataType::Struct(v) => write!(f, "{}", v.name()),
136 ConcreteDataType::Dictionary(v) => write!(f, "{}", v.name()),
137 ConcreteDataType::Json(v) => write!(f, "{}", v.name()),
138 ConcreteDataType::Vector(v) => write!(f, "{}", v.name()),
139 }
140 }
141}
142
143impl ConcreteDataType {
146 pub fn is_float(&self) -> bool {
147 matches!(
148 self,
149 ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_)
150 )
151 }
152
153 pub fn is_boolean(&self) -> bool {
154 matches!(self, ConcreteDataType::Boolean(_))
155 }
156
157 pub fn is_string(&self) -> bool {
158 matches!(self, ConcreteDataType::String(_))
159 }
160
161 pub fn is_stringifiable(&self) -> bool {
162 matches!(
163 self,
164 ConcreteDataType::String(_)
165 | ConcreteDataType::Date(_)
166 | ConcreteDataType::Timestamp(_)
167 | ConcreteDataType::Time(_)
168 | ConcreteDataType::Interval(_)
169 | ConcreteDataType::Duration(_)
170 | ConcreteDataType::Decimal128(_)
171 | ConcreteDataType::Binary(_)
172 | ConcreteDataType::Json(_)
173 | ConcreteDataType::Vector(_)
174 )
175 }
176
177 pub fn is_signed(&self) -> bool {
178 matches!(
179 self,
180 ConcreteDataType::Int8(_)
181 | ConcreteDataType::Int16(_)
182 | ConcreteDataType::Int32(_)
183 | ConcreteDataType::Int64(_)
184 | ConcreteDataType::Date(_)
185 | ConcreteDataType::Timestamp(_)
186 | ConcreteDataType::Time(_)
187 | ConcreteDataType::Interval(_)
188 | ConcreteDataType::Duration(_)
189 | ConcreteDataType::Decimal128(_)
190 )
191 }
192
193 pub fn is_unsigned(&self) -> bool {
194 matches!(
195 self,
196 ConcreteDataType::UInt8(_)
197 | ConcreteDataType::UInt16(_)
198 | ConcreteDataType::UInt32(_)
199 | ConcreteDataType::UInt64(_)
200 )
201 }
202
203 pub fn is_numeric(&self) -> bool {
204 matches!(
205 self,
206 ConcreteDataType::Int8(_)
207 | ConcreteDataType::Int16(_)
208 | ConcreteDataType::Int32(_)
209 | ConcreteDataType::Int64(_)
210 | ConcreteDataType::UInt8(_)
211 | ConcreteDataType::UInt16(_)
212 | ConcreteDataType::UInt32(_)
213 | ConcreteDataType::UInt64(_)
214 | ConcreteDataType::Float32(_)
215 | ConcreteDataType::Float64(_)
216 )
217 }
218
219 pub fn is_timestamp(&self) -> bool {
220 matches!(self, ConcreteDataType::Timestamp(_))
221 }
222
223 pub fn is_decimal(&self) -> bool {
224 matches!(self, ConcreteDataType::Decimal128(_))
225 }
226
227 pub fn is_json(&self) -> bool {
228 matches!(self, ConcreteDataType::Json(_))
229 }
230
231 pub fn is_vector(&self) -> bool {
232 matches!(self, ConcreteDataType::Vector(_))
233 }
234
235 pub fn numerics() -> Vec<ConcreteDataType> {
236 vec![
237 ConcreteDataType::int8_datatype(),
238 ConcreteDataType::int16_datatype(),
239 ConcreteDataType::int32_datatype(),
240 ConcreteDataType::int64_datatype(),
241 ConcreteDataType::uint8_datatype(),
242 ConcreteDataType::uint16_datatype(),
243 ConcreteDataType::uint32_datatype(),
244 ConcreteDataType::uint64_datatype(),
245 ConcreteDataType::float32_datatype(),
246 ConcreteDataType::float64_datatype(),
247 ]
248 }
249
250 pub fn unsigned_integers() -> Vec<ConcreteDataType> {
251 vec![
252 ConcreteDataType::uint8_datatype(),
253 ConcreteDataType::uint16_datatype(),
254 ConcreteDataType::uint32_datatype(),
255 ConcreteDataType::uint64_datatype(),
256 ]
257 }
258
259 pub fn timestamps() -> Vec<ConcreteDataType> {
260 vec![
261 ConcreteDataType::timestamp_second_datatype(),
262 ConcreteDataType::timestamp_millisecond_datatype(),
263 ConcreteDataType::timestamp_microsecond_datatype(),
264 ConcreteDataType::timestamp_nanosecond_datatype(),
265 ]
266 }
267
268 pub fn from_arrow_type(dt: &ArrowDataType) -> Self {
273 ConcreteDataType::try_from(dt).expect("Unimplemented type")
274 }
275
276 pub fn is_null(&self) -> bool {
277 matches!(self, ConcreteDataType::Null(NullType))
278 }
279
280 pub fn as_list(&self) -> Option<&ListType> {
282 match self {
283 ConcreteDataType::List(t) => Some(t),
284 _ => None,
285 }
286 }
287
288 pub fn as_struct(&self) -> Option<&StructType> {
289 match self {
290 ConcreteDataType::Struct(s) => Some(s),
291 _ => None,
292 }
293 }
294
295 pub fn as_timestamp(&self) -> Option<TimestampType> {
297 match self {
298 ConcreteDataType::Timestamp(t) => Some(*t),
299 _ => None,
300 }
301 }
302
303 pub fn numeric_precision(&self) -> Option<u8> {
305 match self {
306 ConcreteDataType::Int8(_) | ConcreteDataType::UInt8(_) => Some(3),
307 ConcreteDataType::Int16(_) | ConcreteDataType::UInt16(_) => Some(5),
308 ConcreteDataType::Int32(_) | ConcreteDataType::UInt32(_) => Some(10),
309 ConcreteDataType::Int64(_) => Some(19),
310 ConcreteDataType::UInt64(_) => Some(20),
311 ConcreteDataType::Float32(_) => Some(12),
312 ConcreteDataType::Float64(_) => Some(22),
313 ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.precision()),
314 _ => None,
315 }
316 }
317
318 pub fn numeric_scale(&self) -> Option<i8> {
320 match self {
321 ConcreteDataType::Int8(_)
322 | ConcreteDataType::UInt8(_)
323 | ConcreteDataType::Int16(_)
324 | ConcreteDataType::UInt16(_)
325 | ConcreteDataType::Int32(_)
326 | ConcreteDataType::UInt32(_)
327 | ConcreteDataType::Int64(_)
328 | ConcreteDataType::UInt64(_) => Some(0),
329 ConcreteDataType::Float32(_) | ConcreteDataType::Float64(_) => None,
330 ConcreteDataType::Decimal128(decimal_type) => Some(decimal_type.scale()),
331 _ => None,
332 }
333 }
334
335 pub fn as_time(&self) -> Option<TimeType> {
337 match self {
338 ConcreteDataType::Int64(_) => Some(TimeType::Millisecond(TimeMillisecondType)),
339 ConcreteDataType::Time(t) => Some(*t),
340 _ => None,
341 }
342 }
343
344 pub fn as_decimal128(&self) -> Option<Decimal128Type> {
345 match self {
346 ConcreteDataType::Decimal128(d) => Some(*d),
347 _ => None,
348 }
349 }
350
351 pub fn as_json(&self) -> Option<&JsonType> {
352 match self {
353 ConcreteDataType::Json(j) => Some(j),
354 _ => None,
355 }
356 }
357
358 pub fn as_vector(&self) -> Option<VectorType> {
359 match self {
360 ConcreteDataType::Vector(v) => Some(*v),
361 _ => None,
362 }
363 }
364
365 pub fn can_arrow_type_cast_to(&self, to_type: &ConcreteDataType) -> bool {
367 match (self, to_type) {
368 (ConcreteDataType::Json(this), ConcreteDataType::Json(that)) => that.is_include(this),
369 _ => arrow::compute::can_cast_types(&self.as_arrow_type(), &to_type.as_arrow_type()),
370 }
371 }
372
373 pub fn as_duration(&self) -> Option<DurationType> {
375 match self {
376 ConcreteDataType::Duration(d) => Some(*d),
377 _ => None,
378 }
379 }
380
381 pub fn postgres_datatype_name(&self) -> &'static str {
383 match self {
384 &ConcreteDataType::Null(_) => "UNKNOWN",
385 &ConcreteDataType::Boolean(_) => "BOOL",
386 &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "CHAR",
387 &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "INT2",
388 &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "INT4",
389 &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "INT8",
390 &ConcreteDataType::Float32(_) => "FLOAT4",
391 &ConcreteDataType::Float64(_) => "FLOAT8",
392 &ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => "BYTEA",
393 &ConcreteDataType::String(_) => "VARCHAR",
394 &ConcreteDataType::Date(_) => "DATE",
395 &ConcreteDataType::Timestamp(_) => "TIMESTAMP",
396 &ConcreteDataType::Time(_) => "TIME",
397 &ConcreteDataType::Interval(_) => "INTERVAL",
398 &ConcreteDataType::Decimal128(_) => "NUMERIC",
399 &ConcreteDataType::Json(_) => "JSON",
400 ConcreteDataType::List(list) => match list.item_type() {
401 &ConcreteDataType::Null(_) => "UNKNOWN",
402 &ConcreteDataType::Boolean(_) => "_BOOL",
403 &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "_CHAR",
404 &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "_INT2",
405 &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "_INT4",
406 &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "_INT8",
407 &ConcreteDataType::Float32(_) => "_FLOAT4",
408 &ConcreteDataType::Float64(_) => "_FLOAT8",
409 &ConcreteDataType::Binary(_) => "_BYTEA",
410 &ConcreteDataType::String(_) => "_VARCHAR",
411 &ConcreteDataType::Date(_) => "_DATE",
412 &ConcreteDataType::Timestamp(_) => "_TIMESTAMP",
413 &ConcreteDataType::Time(_) => "_TIME",
414 &ConcreteDataType::Interval(_) => "_INTERVAL",
415 &ConcreteDataType::Decimal128(_) => "_NUMERIC",
416 &ConcreteDataType::Json(_) => "_JSON",
417 &ConcreteDataType::Duration(_)
418 | &ConcreteDataType::Dictionary(_)
419 | &ConcreteDataType::Vector(_)
420 | &ConcreteDataType::List(_)
421 | &ConcreteDataType::Struct(_) => "UNKNOWN",
422 },
423 &ConcreteDataType::Duration(_)
424 | &ConcreteDataType::Dictionary(_)
425 | &ConcreteDataType::Struct(_) => "UNKNOWN",
426 }
427 }
428}
429
430impl From<&ConcreteDataType> for ConcreteDataType {
431 fn from(t: &ConcreteDataType) -> Self {
432 t.clone()
433 }
434}
435
436impl TryFrom<&ArrowDataType> for ConcreteDataType {
437 type Error = Error;
438
439 fn try_from(dt: &ArrowDataType) -> Result<ConcreteDataType> {
440 let concrete_type = match dt {
441 ArrowDataType::Null => Self::null_datatype(),
442 ArrowDataType::Boolean => Self::boolean_datatype(),
443 ArrowDataType::UInt8 => Self::uint8_datatype(),
444 ArrowDataType::UInt16 => Self::uint16_datatype(),
445 ArrowDataType::UInt32 => Self::uint32_datatype(),
446 ArrowDataType::UInt64 => Self::uint64_datatype(),
447 ArrowDataType::Int8 => Self::int8_datatype(),
448 ArrowDataType::Int16 => Self::int16_datatype(),
449 ArrowDataType::Int32 => Self::int32_datatype(),
450 ArrowDataType::Int64 => Self::int64_datatype(),
451 ArrowDataType::Float32 => Self::float32_datatype(),
452 ArrowDataType::Float64 => Self::float64_datatype(),
453 ArrowDataType::Date32 => Self::date_datatype(),
454 ArrowDataType::Timestamp(u, _) => ConcreteDataType::from_arrow_time_unit(u),
455 ArrowDataType::Interval(u) => ConcreteDataType::from_arrow_interval_unit(u),
456 ArrowDataType::Binary | ArrowDataType::LargeBinary => {
457 Self::Binary(BinaryType::binary())
458 }
459 ArrowDataType::BinaryView => Self::Binary(BinaryType::binary_view()),
460 ArrowDataType::Utf8 => Self::String(StringType::utf8()),
461 ArrowDataType::Utf8View => Self::String(StringType::utf8_view()),
462 ArrowDataType::LargeUtf8 => Self::String(StringType::large_utf8()),
463 ArrowDataType::List(field) => Self::List(ListType::new(Arc::new(
464 ConcreteDataType::from_arrow_type(field.data_type()),
465 ))),
466 ArrowDataType::Dictionary(key_type, value_type) => {
467 let key_type = ConcreteDataType::from_arrow_type(key_type);
468 let value_type = ConcreteDataType::from_arrow_type(value_type);
469 Self::Dictionary(DictionaryType::new(key_type, value_type))
470 }
471 ArrowDataType::Time32(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
472 ArrowDataType::Time64(u) => ConcreteDataType::Time(TimeType::from_unit(u.into())),
473 ArrowDataType::Duration(u) => {
474 ConcreteDataType::Duration(DurationType::from_unit(u.into()))
475 }
476 ArrowDataType::Decimal128(precision, scale) => {
477 ConcreteDataType::decimal128_datatype(*precision, *scale)
478 }
479 ArrowDataType::Struct(fields) => ConcreteDataType::Struct(StructType::from(fields)),
480 ArrowDataType::Float16
481 | ArrowDataType::Date64
482 | ArrowDataType::FixedSizeBinary(_)
483 | ArrowDataType::ListView(_)
484 | ArrowDataType::FixedSizeList(_, _)
485 | ArrowDataType::LargeList(_)
486 | ArrowDataType::LargeListView(_)
487 | ArrowDataType::Union(_, _)
488 | ArrowDataType::Decimal256(_, _)
489 | ArrowDataType::Map(_, _)
490 | ArrowDataType::RunEndEncoded(_, _)
491 | ArrowDataType::Decimal32(_, _)
492 | ArrowDataType::Decimal64(_, _) => {
493 return error::UnsupportedArrowTypeSnafu {
494 arrow_type: dt.clone(),
495 }
496 .fail();
497 }
498 };
499
500 Ok(concrete_type)
501 }
502}
503
504macro_rules! impl_new_concrete_type_functions {
505 ($($Type: ident), +) => {
506 paste! {
507 impl ConcreteDataType {
508 $(
509 pub fn [<$Type:lower _datatype>]() -> ConcreteDataType {
510 ConcreteDataType::$Type([<$Type Type>]::default())
511 }
512 )+
513 }
514 }
515 }
516}
517
518impl_new_concrete_type_functions!(
519 Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
520 Binary, Date, String, Json
521);
522
523impl ConcreteDataType {
524 pub fn large_string_datatype() -> Self {
525 ConcreteDataType::String(StringType::large_utf8())
526 }
527
528 pub fn utf8_view_datatype() -> Self {
529 ConcreteDataType::String(StringType::utf8_view())
530 }
531
532 pub fn binary_view_datatype() -> Self {
533 ConcreteDataType::Binary(BinaryType::binary_view())
534 }
535
536 pub fn timestamp_second_datatype() -> Self {
537 ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType))
538 }
539
540 pub fn timestamp_millisecond_datatype() -> Self {
541 ConcreteDataType::Timestamp(TimestampType::Millisecond(TimestampMillisecondType))
542 }
543
544 pub fn timestamp_microsecond_datatype() -> Self {
545 ConcreteDataType::Timestamp(TimestampType::Microsecond(TimestampMicrosecondType))
546 }
547
548 pub fn timestamp_nanosecond_datatype() -> Self {
549 ConcreteDataType::Timestamp(TimestampType::Nanosecond(TimestampNanosecondType))
550 }
551
552 pub fn time_datatype(unit: TimeUnit) -> Self {
554 ConcreteDataType::Time(TimeType::from_unit(unit))
555 }
556
557 pub fn time_second_datatype() -> Self {
559 Self::time_datatype(TimeUnit::Second)
560 }
561
562 pub fn time_millisecond_datatype() -> Self {
564 Self::time_datatype(TimeUnit::Millisecond)
565 }
566
567 pub fn time_microsecond_datatype() -> Self {
569 Self::time_datatype(TimeUnit::Microsecond)
570 }
571
572 pub fn time_nanosecond_datatype() -> Self {
574 Self::time_datatype(TimeUnit::Nanosecond)
575 }
576
577 pub fn duration_second_datatype() -> Self {
579 ConcreteDataType::Duration(DurationType::Second(DurationSecondType))
580 }
581
582 pub fn duration_millisecond_datatype() -> Self {
584 ConcreteDataType::Duration(DurationType::Millisecond(DurationMillisecondType))
585 }
586
587 pub fn duration_microsecond_datatype() -> Self {
589 ConcreteDataType::Duration(DurationType::Microsecond(DurationMicrosecondType))
590 }
591
592 pub fn duration_nanosecond_datatype() -> Self {
594 ConcreteDataType::Duration(DurationType::Nanosecond(DurationNanosecondType))
595 }
596
597 pub fn interval_month_day_nano_datatype() -> Self {
599 ConcreteDataType::Interval(IntervalType::MonthDayNano(IntervalMonthDayNanoType))
600 }
601
602 pub fn interval_year_month_datatype() -> Self {
604 ConcreteDataType::Interval(IntervalType::YearMonth(IntervalYearMonthType))
605 }
606
607 pub fn interval_day_time_datatype() -> Self {
609 ConcreteDataType::Interval(IntervalType::DayTime(IntervalDayTimeType))
610 }
611
612 pub fn timestamp_datatype(unit: TimeUnit) -> Self {
613 match unit {
614 TimeUnit::Second => Self::timestamp_second_datatype(),
615 TimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
616 TimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
617 TimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
618 }
619 }
620
621 pub fn from_arrow_time_unit(t: &ArrowTimeUnit) -> Self {
623 match t {
624 ArrowTimeUnit::Second => Self::timestamp_second_datatype(),
625 ArrowTimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
626 ArrowTimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
627 ArrowTimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
628 }
629 }
630
631 pub fn duration_datatype(unit: TimeUnit) -> Self {
632 match unit {
633 TimeUnit::Second => Self::duration_second_datatype(),
634 TimeUnit::Millisecond => Self::duration_millisecond_datatype(),
635 TimeUnit::Microsecond => Self::duration_microsecond_datatype(),
636 TimeUnit::Nanosecond => Self::duration_nanosecond_datatype(),
637 }
638 }
639
640 pub fn interval_datatype(unit: IntervalUnit) -> Self {
641 match unit {
642 IntervalUnit::YearMonth => Self::interval_year_month_datatype(),
643 IntervalUnit::DayTime => Self::interval_day_time_datatype(),
644 IntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
645 }
646 }
647
648 pub fn from_arrow_interval_unit(u: &ArrowIntervalUnit) -> Self {
649 match u {
650 ArrowIntervalUnit::YearMonth => Self::interval_year_month_datatype(),
651 ArrowIntervalUnit::DayTime => Self::interval_day_time_datatype(),
652 ArrowIntervalUnit::MonthDayNano => Self::interval_month_day_nano_datatype(),
653 }
654 }
655
656 pub fn list_datatype(item_type: Arc<ConcreteDataType>) -> ConcreteDataType {
657 ConcreteDataType::List(ListType::new(item_type))
658 }
659
660 pub fn struct_datatype(fields: StructType) -> ConcreteDataType {
661 ConcreteDataType::Struct(fields)
662 }
663
664 pub fn dictionary_datatype(
665 key_type: ConcreteDataType,
666 value_type: ConcreteDataType,
667 ) -> ConcreteDataType {
668 ConcreteDataType::Dictionary(DictionaryType::new(key_type, value_type))
669 }
670
671 pub fn decimal128_datatype(precision: u8, scale: i8) -> ConcreteDataType {
672 ConcreteDataType::Decimal128(Decimal128Type::new(precision, scale))
673 }
674
675 pub fn decimal128_default_datatype() -> ConcreteDataType {
676 Self::decimal128_datatype(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
677 }
678
679 pub fn vector_datatype(dim: u32) -> ConcreteDataType {
680 ConcreteDataType::Vector(VectorType::new(dim))
681 }
682
683 pub fn vector_default_datatype() -> ConcreteDataType {
684 Self::vector_datatype(0)
685 }
686
687 pub fn json2(native_type: JsonNativeType) -> ConcreteDataType {
688 ConcreteDataType::Json(JsonType::new_json2(native_type))
689 }
690}
691
692#[enum_dispatch::enum_dispatch]
694pub trait DataType: std::fmt::Debug + Send + Sync {
695 fn name(&self) -> String;
697
698 fn logical_type_id(&self) -> LogicalTypeId;
700
701 fn default_value(&self) -> Value;
703
704 fn as_arrow_type(&self) -> ArrowDataType;
706
707 fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector>;
709
710 fn try_cast(&self, from: Value) -> Option<Value>;
713}
714
715pub type DataTypeRef = Arc<dyn DataType>;
716
717#[cfg(test)]
718mod tests {
719 use arrow::datatypes::Field;
720
721 use super::*;
722
723 #[test]
724 fn test_concrete_type_as_datatype_trait() {
725 let concrete_type = ConcreteDataType::boolean_datatype();
726
727 assert_eq!("Boolean", concrete_type.to_string());
728 assert_eq!(Value::Boolean(false), concrete_type.default_value());
729 assert_eq!(LogicalTypeId::Boolean, concrete_type.logical_type_id());
730 assert_eq!(ArrowDataType::Boolean, concrete_type.as_arrow_type());
731 }
732
733 #[test]
734 fn test_from_arrow_type() {
735 assert!(matches!(
736 ConcreteDataType::from_arrow_type(&ArrowDataType::Null),
737 ConcreteDataType::Null(_)
738 ));
739 assert!(matches!(
740 ConcreteDataType::from_arrow_type(&ArrowDataType::Boolean),
741 ConcreteDataType::Boolean(_)
742 ));
743 assert!(matches!(
744 ConcreteDataType::from_arrow_type(&ArrowDataType::Binary),
745 ConcreteDataType::Binary(_)
746 ));
747 assert!(matches!(
748 ConcreteDataType::from_arrow_type(&ArrowDataType::LargeBinary),
749 ConcreteDataType::Binary(_)
750 ));
751 assert!(matches!(
752 ConcreteDataType::from_arrow_type(&ArrowDataType::Int8),
753 ConcreteDataType::Int8(_)
754 ));
755 assert!(matches!(
756 ConcreteDataType::from_arrow_type(&ArrowDataType::Int16),
757 ConcreteDataType::Int16(_)
758 ));
759 assert!(matches!(
760 ConcreteDataType::from_arrow_type(&ArrowDataType::Int32),
761 ConcreteDataType::Int32(_)
762 ));
763 assert!(matches!(
764 ConcreteDataType::from_arrow_type(&ArrowDataType::Int64),
765 ConcreteDataType::Int64(_)
766 ));
767 assert!(matches!(
768 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt8),
769 ConcreteDataType::UInt8(_)
770 ));
771 assert!(matches!(
772 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt16),
773 ConcreteDataType::UInt16(_)
774 ));
775 assert!(matches!(
776 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt32),
777 ConcreteDataType::UInt32(_)
778 ));
779 assert!(matches!(
780 ConcreteDataType::from_arrow_type(&ArrowDataType::UInt64),
781 ConcreteDataType::UInt64(_)
782 ));
783 assert!(matches!(
784 ConcreteDataType::from_arrow_type(&ArrowDataType::Float32),
785 ConcreteDataType::Float32(_)
786 ));
787 assert!(matches!(
788 ConcreteDataType::from_arrow_type(&ArrowDataType::Float64),
789 ConcreteDataType::Float64(_)
790 ));
791 assert!(matches!(
792 ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8),
793 ConcreteDataType::String(_)
794 ));
795 let utf8_view_string_type = ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8View);
796 assert!(matches!(utf8_view_string_type, ConcreteDataType::String(_)));
797 assert_eq!(
798 ArrowDataType::Utf8View,
799 utf8_view_string_type.as_arrow_type()
800 );
801 let large_string_type = ConcreteDataType::from_arrow_type(&ArrowDataType::LargeUtf8);
803 assert!(matches!(large_string_type, ConcreteDataType::String(_)));
804 if let ConcreteDataType::String(string_type) = &large_string_type {
805 assert!(string_type.is_large());
806 } else {
807 panic!("Expected a String type");
808 }
809 assert_eq!(
810 ConcreteDataType::from_arrow_type(&ArrowDataType::List(Arc::new(Field::new(
811 "item",
812 ArrowDataType::Int32,
813 true,
814 )))),
815 ConcreteDataType::List(ListType::new(Arc::new(ConcreteDataType::int32_datatype())))
816 );
817 assert!(matches!(
818 ConcreteDataType::from_arrow_type(&ArrowDataType::Date32),
819 ConcreteDataType::Date(_)
820 ));
821 }
822
823 #[test]
824 fn test_view_round_trip() {
825 let utf8_view_arrow = ArrowDataType::Utf8View;
826 let concrete_type = ConcreteDataType::from_arrow_type(&utf8_view_arrow);
827 let back_to_arrow = concrete_type.as_arrow_type();
828 assert_eq!(utf8_view_arrow, back_to_arrow);
829
830 let binary_view_arrow = ArrowDataType::BinaryView;
831 let concrete_type = ConcreteDataType::from_arrow_type(&binary_view_arrow);
832 let back_to_arrow = concrete_type.as_arrow_type();
833 assert_eq!(binary_view_arrow, back_to_arrow);
834 }
835
836 #[test]
837 fn test_large_utf8_round_trip() {
838 let large_utf8_arrow = ArrowDataType::LargeUtf8;
840 let concrete_type = ConcreteDataType::from_arrow_type(&large_utf8_arrow);
841 let back_to_arrow = concrete_type.as_arrow_type();
842
843 assert!(matches!(concrete_type, ConcreteDataType::String(_)));
844 assert_eq!(large_utf8_arrow, back_to_arrow);
846
847 let utf8_concrete = ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8);
849 let large_utf8_concrete = ConcreteDataType::from_arrow_type(&ArrowDataType::LargeUtf8);
850
851 assert!(matches!(utf8_concrete, ConcreteDataType::String(_)));
852 assert!(matches!(large_utf8_concrete, ConcreteDataType::String(_)));
853
854 if let (ConcreteDataType::String(utf8_type), ConcreteDataType::String(large_type)) =
856 (&utf8_concrete, &large_utf8_concrete)
857 {
858 assert!(!utf8_type.is_large());
859 assert!(large_type.is_large());
860 } else {
861 panic!("Expected both to be String types");
862 }
863
864 let view_concrete = ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8View);
866 assert_ne!(utf8_concrete, view_concrete);
867 assert_ne!(large_utf8_concrete, view_concrete);
868
869 assert_ne!(utf8_concrete, large_utf8_concrete);
871 }
872
873 #[test]
874 fn test_from_arrow_timestamp() {
875 assert_eq!(
876 ConcreteDataType::timestamp_millisecond_datatype(),
877 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Millisecond)
878 );
879 assert_eq!(
880 ConcreteDataType::timestamp_microsecond_datatype(),
881 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Microsecond)
882 );
883 assert_eq!(
884 ConcreteDataType::timestamp_nanosecond_datatype(),
885 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Nanosecond)
886 );
887 assert_eq!(
888 ConcreteDataType::timestamp_second_datatype(),
889 ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Second)
890 );
891 }
892
893 #[test]
894 fn test_is_null() {
895 assert!(ConcreteDataType::null_datatype().is_null());
896 assert!(!ConcreteDataType::int32_datatype().is_null());
897 }
898
899 #[test]
900 fn test_is_float() {
901 assert!(!ConcreteDataType::int32_datatype().is_float());
902 assert!(ConcreteDataType::float32_datatype().is_float());
903 assert!(ConcreteDataType::float64_datatype().is_float());
904 }
905
906 #[test]
907 fn test_is_boolean() {
908 assert!(!ConcreteDataType::int32_datatype().is_boolean());
909 assert!(!ConcreteDataType::float32_datatype().is_boolean());
910 assert!(ConcreteDataType::boolean_datatype().is_boolean());
911 }
912
913 #[test]
914 fn test_is_decimal() {
915 assert!(!ConcreteDataType::int32_datatype().is_decimal());
916 assert!(!ConcreteDataType::float32_datatype().is_decimal());
917 assert!(ConcreteDataType::decimal128_datatype(10, 2).is_decimal());
918 assert!(ConcreteDataType::decimal128_datatype(18, 6).is_decimal());
919 }
920
921 #[test]
922 fn test_is_stringifiable() {
923 assert!(!ConcreteDataType::int32_datatype().is_stringifiable());
924 assert!(!ConcreteDataType::float32_datatype().is_stringifiable());
925 assert!(ConcreteDataType::string_datatype().is_stringifiable());
926 assert!(ConcreteDataType::binary_datatype().is_stringifiable());
927 assert!(ConcreteDataType::date_datatype().is_stringifiable());
928 assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable());
929 assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable());
930 assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable());
931 assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_stringifiable());
932 assert!(ConcreteDataType::time_second_datatype().is_stringifiable());
933 assert!(ConcreteDataType::time_millisecond_datatype().is_stringifiable());
934 assert!(ConcreteDataType::time_microsecond_datatype().is_stringifiable());
935 assert!(ConcreteDataType::time_nanosecond_datatype().is_stringifiable());
936
937 assert!(ConcreteDataType::interval_year_month_datatype().is_stringifiable());
938 assert!(ConcreteDataType::interval_day_time_datatype().is_stringifiable());
939 assert!(ConcreteDataType::interval_month_day_nano_datatype().is_stringifiable());
940
941 assert!(ConcreteDataType::duration_second_datatype().is_stringifiable());
942 assert!(ConcreteDataType::duration_millisecond_datatype().is_stringifiable());
943 assert!(ConcreteDataType::duration_microsecond_datatype().is_stringifiable());
944 assert!(ConcreteDataType::duration_nanosecond_datatype().is_stringifiable());
945 assert!(ConcreteDataType::decimal128_datatype(10, 2).is_stringifiable());
946 assert!(ConcreteDataType::vector_default_datatype().is_stringifiable());
947 }
948
949 #[test]
950 fn test_is_signed() {
951 assert!(ConcreteDataType::int8_datatype().is_signed());
952 assert!(ConcreteDataType::int16_datatype().is_signed());
953 assert!(ConcreteDataType::int32_datatype().is_signed());
954 assert!(ConcreteDataType::int64_datatype().is_signed());
955 assert!(ConcreteDataType::date_datatype().is_signed());
956 assert!(ConcreteDataType::timestamp_second_datatype().is_signed());
957 assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed());
958 assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed());
959 assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_signed());
960 assert!(ConcreteDataType::time_second_datatype().is_signed());
961 assert!(ConcreteDataType::time_millisecond_datatype().is_signed());
962 assert!(ConcreteDataType::time_microsecond_datatype().is_signed());
963 assert!(ConcreteDataType::time_nanosecond_datatype().is_signed());
964 assert!(ConcreteDataType::interval_year_month_datatype().is_signed());
965 assert!(ConcreteDataType::interval_day_time_datatype().is_signed());
966 assert!(ConcreteDataType::interval_month_day_nano_datatype().is_signed());
967 assert!(ConcreteDataType::duration_second_datatype().is_signed());
968 assert!(ConcreteDataType::duration_millisecond_datatype().is_signed());
969 assert!(ConcreteDataType::duration_microsecond_datatype().is_signed());
970 assert!(ConcreteDataType::duration_nanosecond_datatype().is_signed());
971
972 assert!(!ConcreteDataType::uint8_datatype().is_signed());
973 assert!(!ConcreteDataType::uint16_datatype().is_signed());
974 assert!(!ConcreteDataType::uint32_datatype().is_signed());
975 assert!(!ConcreteDataType::uint64_datatype().is_signed());
976
977 assert!(!ConcreteDataType::float32_datatype().is_signed());
978 assert!(!ConcreteDataType::float64_datatype().is_signed());
979
980 assert!(ConcreteDataType::decimal128_datatype(10, 2).is_signed());
981 }
982
983 #[test]
984 fn test_is_unsigned() {
985 assert!(!ConcreteDataType::int8_datatype().is_unsigned());
986 assert!(!ConcreteDataType::int16_datatype().is_unsigned());
987 assert!(!ConcreteDataType::int32_datatype().is_unsigned());
988 assert!(!ConcreteDataType::int64_datatype().is_unsigned());
989 assert!(!ConcreteDataType::date_datatype().is_unsigned());
990 assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned());
991 assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned());
992 assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned());
993 assert!(!ConcreteDataType::timestamp_nanosecond_datatype().is_unsigned());
994 assert!(!ConcreteDataType::time_second_datatype().is_unsigned());
995 assert!(!ConcreteDataType::time_millisecond_datatype().is_unsigned());
996 assert!(!ConcreteDataType::time_microsecond_datatype().is_unsigned());
997 assert!(!ConcreteDataType::time_nanosecond_datatype().is_unsigned());
998 assert!(!ConcreteDataType::interval_year_month_datatype().is_unsigned());
999 assert!(!ConcreteDataType::interval_day_time_datatype().is_unsigned());
1000 assert!(!ConcreteDataType::interval_month_day_nano_datatype().is_unsigned());
1001 assert!(!ConcreteDataType::duration_second_datatype().is_unsigned());
1002 assert!(!ConcreteDataType::duration_millisecond_datatype().is_unsigned());
1003 assert!(!ConcreteDataType::duration_microsecond_datatype().is_unsigned());
1004 assert!(!ConcreteDataType::duration_nanosecond_datatype().is_unsigned());
1005 assert!(!ConcreteDataType::decimal128_datatype(10, 2).is_unsigned());
1006
1007 assert!(ConcreteDataType::uint8_datatype().is_unsigned());
1008 assert!(ConcreteDataType::uint16_datatype().is_unsigned());
1009 assert!(ConcreteDataType::uint32_datatype().is_unsigned());
1010 assert!(ConcreteDataType::uint64_datatype().is_unsigned());
1011
1012 assert!(!ConcreteDataType::float32_datatype().is_unsigned());
1013 assert!(!ConcreteDataType::float64_datatype().is_unsigned());
1014 }
1015
1016 #[test]
1017 fn test_numerics() {
1018 let nums = ConcreteDataType::numerics();
1019 assert_eq!(10, nums.len());
1020 }
1021
1022 #[test]
1023 fn test_as_list() {
1024 let list_type =
1025 ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()));
1026 assert_eq!(
1027 ListType::new(Arc::new(ConcreteDataType::int32_datatype())),
1028 *list_type.as_list().unwrap()
1029 );
1030 assert!(ConcreteDataType::int32_datatype().as_list().is_none());
1031 }
1032
1033 #[test]
1034 fn test_display_concrete_data_type() {
1035 assert_eq!(ConcreteDataType::null_datatype().to_string(), "Null");
1036 assert_eq!(ConcreteDataType::boolean_datatype().to_string(), "Boolean");
1037 assert_eq!(ConcreteDataType::binary_datatype().to_string(), "Binary");
1038 assert_eq!(ConcreteDataType::int8_datatype().to_string(), "Int8");
1039 assert_eq!(ConcreteDataType::int16_datatype().to_string(), "Int16");
1040 assert_eq!(ConcreteDataType::int32_datatype().to_string(), "Int32");
1041 assert_eq!(ConcreteDataType::int64_datatype().to_string(), "Int64");
1042 assert_eq!(ConcreteDataType::uint8_datatype().to_string(), "UInt8");
1043 assert_eq!(ConcreteDataType::uint16_datatype().to_string(), "UInt16");
1044 assert_eq!(ConcreteDataType::uint32_datatype().to_string(), "UInt32");
1045 assert_eq!(ConcreteDataType::uint64_datatype().to_string(), "UInt64");
1046 assert_eq!(ConcreteDataType::float32_datatype().to_string(), "Float32");
1047 assert_eq!(ConcreteDataType::float64_datatype().to_string(), "Float64");
1048 assert_eq!(ConcreteDataType::string_datatype().to_string(), "String");
1049 assert_eq!(ConcreteDataType::date_datatype().to_string(), "Date");
1050 assert_eq!(
1051 ConcreteDataType::timestamp_millisecond_datatype().to_string(),
1052 "TimestampMillisecond"
1053 );
1054 assert_eq!(
1055 ConcreteDataType::time_millisecond_datatype().to_string(),
1056 "TimeMillisecond"
1057 );
1058 assert_eq!(
1059 ConcreteDataType::interval_month_day_nano_datatype().to_string(),
1060 "IntervalMonthDayNano"
1061 );
1062 assert_eq!(
1063 ConcreteDataType::duration_second_datatype().to_string(),
1064 "DurationSecond"
1065 );
1066 assert_eq!(
1067 ConcreteDataType::decimal128_datatype(10, 2).to_string(),
1068 "Decimal(10, 2)"
1069 );
1070 assert_eq!(
1072 ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()))
1073 .to_string(),
1074 "List<Int32>"
1075 );
1076 assert_eq!(
1077 ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::Dictionary(
1078 DictionaryType::new(
1079 ConcreteDataType::int32_datatype(),
1080 ConcreteDataType::string_datatype()
1081 )
1082 )))
1083 .to_string(),
1084 "List<Dictionary<Int32, String>>"
1085 );
1086 assert_eq!(
1087 ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::list_datatype(Arc::new(
1088 ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()))
1089 ))))
1090 .to_string(),
1091 "List<List<List<Int32>>>"
1092 );
1093 assert_eq!(
1094 ConcreteDataType::dictionary_datatype(
1095 ConcreteDataType::int32_datatype(),
1096 ConcreteDataType::string_datatype()
1097 )
1098 .to_string(),
1099 "Dictionary<Int32, String>"
1100 );
1101 assert_eq!(
1102 ConcreteDataType::vector_datatype(3).to_string(),
1103 "Vector(3)"
1104 );
1105 }
1106}