datatypes/vectors/
helper.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Vector helper functions, inspired by databend Series mod
16
17use std::any::Any;
18use std::sync::Arc;
19
20use arrow::array::{Array, ArrayRef, StringArray};
21use arrow::compute;
22use arrow::compute::kernels::comparison;
23use arrow::datatypes::{
24    DataType as ArrowDataType, Int8Type, Int16Type, Int32Type, Int64Type, TimeUnit, UInt8Type,
25    UInt16Type, UInt32Type, UInt64Type,
26};
27use arrow_array::{DictionaryArray, StructArray};
28use arrow_schema::IntervalUnit;
29use datafusion_common::ScalarValue;
30use snafu::{OptionExt, ResultExt};
31
32use crate::data_type::ConcreteDataType;
33use crate::error::{self, ConvertArrowArrayToScalarsSnafu, Result};
34use crate::prelude::DataType;
35use crate::scalars::{Scalar, ScalarVectorBuilder};
36use crate::types::StructType;
37use crate::value::{ListValue, ListValueRef, Value};
38use crate::vectors::struct_vector::StructVector;
39use crate::vectors::{
40    BinaryVector, BooleanVector, ConstantVector, DateVector, Decimal128Vector, DictionaryVector,
41    DurationMicrosecondVector, DurationMillisecondVector, DurationNanosecondVector,
42    DurationSecondVector, Float32Vector, Float64Vector, Int8Vector, Int16Vector, Int32Vector,
43    Int64Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector, IntervalYearMonthVector,
44    ListVector, ListVectorBuilder, MutableVector, NullVector, StringVector, TimeMicrosecondVector,
45    TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector,
46    TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt8Vector,
47    UInt16Vector, UInt32Vector, UInt64Vector, Vector, VectorRef,
48};
49
50/// Helper functions for `Vector`.
51pub struct Helper;
52
53impl Helper {
54    /// Get a pointer to the underlying data of this vectors.
55    /// Can be useful for fast comparisons.
56    /// # Safety
57    /// Assumes that the `vector` is  T.
58    pub unsafe fn static_cast<T: Any>(vector: &VectorRef) -> &T {
59        let object = vector.as_ref();
60        debug_assert!(object.as_any().is::<T>());
61        unsafe { &*(object as *const dyn Vector as *const T) }
62    }
63
64    pub fn check_get_scalar<T: Scalar>(vector: &VectorRef) -> Result<&<T as Scalar>::VectorType> {
65        vector
66            .as_any()
67            .downcast_ref::<<T as Scalar>::VectorType>()
68            .with_context(|| error::UnknownVectorSnafu {
69                msg: format!(
70                    "downcast vector error, vector type: {:?}, expected vector: {:?}",
71                    vector.vector_type_name(),
72                    std::any::type_name::<T>(),
73                ),
74            })
75    }
76
77    pub fn check_get<T: 'static + Vector>(vector: &VectorRef) -> Result<&T> {
78        vector
79            .as_any()
80            .downcast_ref::<T>()
81            .with_context(|| error::UnknownVectorSnafu {
82                msg: format!(
83                    "downcast vector error, vector type: {:?}, expected vector: {:?}",
84                    vector.vector_type_name(),
85                    std::any::type_name::<T>(),
86                ),
87            })
88    }
89
90    pub fn check_get_mutable_vector<T: 'static + MutableVector>(
91        vector: &mut dyn MutableVector,
92    ) -> Result<&mut T> {
93        let ty = vector.data_type();
94        vector
95            .as_mut_any()
96            .downcast_mut()
97            .with_context(|| error::UnknownVectorSnafu {
98                msg: format!(
99                    "downcast vector error, vector type: {:?}, expected vector: {:?}",
100                    ty,
101                    std::any::type_name::<T>(),
102                ),
103            })
104    }
105
106    pub fn check_get_scalar_vector<T: Scalar>(
107        vector: &VectorRef,
108    ) -> Result<&<T as Scalar>::VectorType> {
109        vector
110            .as_any()
111            .downcast_ref::<<T as Scalar>::VectorType>()
112            .with_context(|| error::UnknownVectorSnafu {
113                msg: format!(
114                    "downcast vector error, vector type: {:?}, expected vector: {:?}",
115                    vector.vector_type_name(),
116                    std::any::type_name::<T>(),
117                ),
118            })
119    }
120
121    /// Try to cast an arrow scalar value into vector
122    pub fn try_from_scalar_value(value: ScalarValue, length: usize) -> Result<VectorRef> {
123        let vector = match value {
124            ScalarValue::Null => ConstantVector::new(Arc::new(NullVector::new(1)), length),
125            ScalarValue::Boolean(v) => {
126                ConstantVector::new(Arc::new(BooleanVector::from(vec![v])), length)
127            }
128            ScalarValue::Float16(v) => ConstantVector::new(
129                Arc::new(Float32Vector::from(vec![v.map(f32::from)])),
130                length,
131            ),
132            ScalarValue::Float32(v) => {
133                ConstantVector::new(Arc::new(Float32Vector::from(vec![v])), length)
134            }
135            ScalarValue::Float64(v) => {
136                ConstantVector::new(Arc::new(Float64Vector::from(vec![v])), length)
137            }
138            ScalarValue::Int8(v) => {
139                ConstantVector::new(Arc::new(Int8Vector::from(vec![v])), length)
140            }
141            ScalarValue::Int16(v) => {
142                ConstantVector::new(Arc::new(Int16Vector::from(vec![v])), length)
143            }
144            ScalarValue::Int32(v) => {
145                ConstantVector::new(Arc::new(Int32Vector::from(vec![v])), length)
146            }
147            ScalarValue::Int64(v) => {
148                ConstantVector::new(Arc::new(Int64Vector::from(vec![v])), length)
149            }
150            ScalarValue::UInt8(v) => {
151                ConstantVector::new(Arc::new(UInt8Vector::from(vec![v])), length)
152            }
153            ScalarValue::UInt16(v) => {
154                ConstantVector::new(Arc::new(UInt16Vector::from(vec![v])), length)
155            }
156            ScalarValue::UInt32(v) => {
157                ConstantVector::new(Arc::new(UInt32Vector::from(vec![v])), length)
158            }
159            ScalarValue::UInt64(v) => {
160                ConstantVector::new(Arc::new(UInt64Vector::from(vec![v])), length)
161            }
162            ScalarValue::Utf8(v) | ScalarValue::LargeUtf8(v) => {
163                ConstantVector::new(Arc::new(StringVector::from(vec![v])), length)
164            }
165            ScalarValue::Binary(v)
166            | ScalarValue::LargeBinary(v)
167            | ScalarValue::FixedSizeBinary(_, v) => {
168                ConstantVector::new(Arc::new(BinaryVector::from(vec![v])), length)
169            }
170            ScalarValue::List(array) => {
171                let item_type = Arc::new(ConcreteDataType::try_from(&array.value_type())?);
172                let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1);
173                let values = ScalarValue::convert_array_to_scalar_vec(array.as_ref())
174                    .context(ConvertArrowArrayToScalarsSnafu)?
175                    .into_iter()
176                    .flatten()
177                    .map(ScalarValue::try_into)
178                    .collect::<Result<Vec<Value>>>()?;
179                builder.push(Some(ListValueRef::Ref {
180                    val: &ListValue::new(values, item_type),
181                }));
182                let list_vector = builder.to_vector();
183                ConstantVector::new(list_vector, length)
184            }
185            ScalarValue::Date32(v) => {
186                ConstantVector::new(Arc::new(DateVector::from(vec![v])), length)
187            }
188            ScalarValue::TimestampSecond(v, _) => {
189                // Timezone is unimplemented now.
190                ConstantVector::new(Arc::new(TimestampSecondVector::from(vec![v])), length)
191            }
192            ScalarValue::TimestampMillisecond(v, _) => {
193                // Timezone is unimplemented now.
194                ConstantVector::new(Arc::new(TimestampMillisecondVector::from(vec![v])), length)
195            }
196            ScalarValue::TimestampMicrosecond(v, _) => {
197                // Timezone is unimplemented now.
198                ConstantVector::new(Arc::new(TimestampMicrosecondVector::from(vec![v])), length)
199            }
200            ScalarValue::TimestampNanosecond(v, _) => {
201                // Timezone is unimplemented now.
202                ConstantVector::new(Arc::new(TimestampNanosecondVector::from(vec![v])), length)
203            }
204            ScalarValue::Time32Second(v) => {
205                ConstantVector::new(Arc::new(TimeSecondVector::from(vec![v])), length)
206            }
207            ScalarValue::Time32Millisecond(v) => {
208                ConstantVector::new(Arc::new(TimeMillisecondVector::from(vec![v])), length)
209            }
210            ScalarValue::Time64Microsecond(v) => {
211                ConstantVector::new(Arc::new(TimeMicrosecondVector::from(vec![v])), length)
212            }
213            ScalarValue::Time64Nanosecond(v) => {
214                ConstantVector::new(Arc::new(TimeNanosecondVector::from(vec![v])), length)
215            }
216            ScalarValue::IntervalYearMonth(v) => {
217                ConstantVector::new(Arc::new(IntervalYearMonthVector::from(vec![v])), length)
218            }
219            ScalarValue::IntervalDayTime(v) => {
220                ConstantVector::new(Arc::new(IntervalDayTimeVector::from(vec![v])), length)
221            }
222            ScalarValue::IntervalMonthDayNano(v) => {
223                ConstantVector::new(Arc::new(IntervalMonthDayNanoVector::from(vec![v])), length)
224            }
225            ScalarValue::DurationSecond(v) => {
226                ConstantVector::new(Arc::new(DurationSecondVector::from(vec![v])), length)
227            }
228            ScalarValue::DurationMillisecond(v) => {
229                ConstantVector::new(Arc::new(DurationMillisecondVector::from(vec![v])), length)
230            }
231            ScalarValue::DurationMicrosecond(v) => {
232                ConstantVector::new(Arc::new(DurationMicrosecondVector::from(vec![v])), length)
233            }
234            ScalarValue::DurationNanosecond(v) => {
235                ConstantVector::new(Arc::new(DurationNanosecondVector::from(vec![v])), length)
236            }
237            ScalarValue::Decimal128(v, p, s) => {
238                let vector = Decimal128Vector::from(vec![v]).with_precision_and_scale(p, s)?;
239                ConstantVector::new(Arc::new(vector), length)
240            }
241            ScalarValue::Struct(v) => {
242                let struct_type = StructType::try_from(v.fields())?;
243                ConstantVector::new(
244                    Arc::new(StructVector::try_new(struct_type, (*v).clone())?),
245                    length,
246                )
247            }
248            ScalarValue::Decimal32(_, _, _)
249            | ScalarValue::Decimal64(_, _, _)
250            | ScalarValue::Decimal256(_, _, _)
251            | ScalarValue::FixedSizeList(_)
252            | ScalarValue::LargeList(_)
253            | ScalarValue::Dictionary(_, _)
254            | ScalarValue::Union(_, _, _)
255            | ScalarValue::Utf8View(_)
256            | ScalarValue::BinaryView(_)
257            | ScalarValue::Map(_)
258            | ScalarValue::Date64(_) => {
259                return error::ConversionSnafu {
260                    from: format!("Unsupported scalar value: {value}"),
261                }
262                .fail();
263            }
264        };
265
266        Ok(Arc::new(vector))
267    }
268
269    /// Try to cast an arrow array into vector
270    ///
271    /// # Panics
272    /// Panic if given arrow data type is not supported.
273    pub fn try_into_vector(array: impl AsRef<dyn Array>) -> Result<VectorRef> {
274        Ok(match array.as_ref().data_type() {
275            ArrowDataType::Null => Arc::new(NullVector::try_from_arrow_array(array)?),
276            ArrowDataType::Boolean => Arc::new(BooleanVector::try_from_arrow_array(array)?),
277            ArrowDataType::Binary => Arc::new(BinaryVector::try_from_arrow_array(array)?),
278            ArrowDataType::LargeBinary
279            | ArrowDataType::FixedSizeBinary(_)
280            | ArrowDataType::BinaryView => {
281                let array = arrow::compute::cast(array.as_ref(), &ArrowDataType::Binary)
282                    .context(crate::error::ArrowComputeSnafu)?;
283                Arc::new(BinaryVector::try_from_arrow_array(array)?)
284            }
285            ArrowDataType::Int8 => Arc::new(Int8Vector::try_from_arrow_array(array)?),
286            ArrowDataType::Int16 => Arc::new(Int16Vector::try_from_arrow_array(array)?),
287            ArrowDataType::Int32 => Arc::new(Int32Vector::try_from_arrow_array(array)?),
288            ArrowDataType::Int64 => Arc::new(Int64Vector::try_from_arrow_array(array)?),
289            ArrowDataType::UInt8 => Arc::new(UInt8Vector::try_from_arrow_array(array)?),
290            ArrowDataType::UInt16 => Arc::new(UInt16Vector::try_from_arrow_array(array)?),
291            ArrowDataType::UInt32 => Arc::new(UInt32Vector::try_from_arrow_array(array)?),
292            ArrowDataType::UInt64 => Arc::new(UInt64Vector::try_from_arrow_array(array)?),
293            ArrowDataType::Float32 => Arc::new(Float32Vector::try_from_arrow_array(array)?),
294            ArrowDataType::Float64 => Arc::new(Float64Vector::try_from_arrow_array(array)?),
295            ArrowDataType::Utf8 => Arc::new(StringVector::try_from_arrow_array(array)?),
296            ArrowDataType::LargeUtf8 => Arc::new(StringVector::try_from_arrow_array(array)?),
297            ArrowDataType::Utf8View => {
298                let array = arrow::compute::cast(array.as_ref(), &ArrowDataType::Utf8)
299                    .context(crate::error::ArrowComputeSnafu)?;
300                Arc::new(StringVector::try_from_arrow_array(array)?)
301            }
302            ArrowDataType::Date32 => Arc::new(DateVector::try_from_arrow_array(array)?),
303            ArrowDataType::List(_) => Arc::new(ListVector::try_from_arrow_array(array)?),
304            ArrowDataType::Timestamp(unit, _) => match unit {
305                TimeUnit::Second => Arc::new(TimestampSecondVector::try_from_arrow_array(array)?),
306                TimeUnit::Millisecond => {
307                    Arc::new(TimestampMillisecondVector::try_from_arrow_array(array)?)
308                }
309                TimeUnit::Microsecond => {
310                    Arc::new(TimestampMicrosecondVector::try_from_arrow_array(array)?)
311                }
312                TimeUnit::Nanosecond => {
313                    Arc::new(TimestampNanosecondVector::try_from_arrow_array(array)?)
314                }
315            },
316            ArrowDataType::Time32(unit) => match unit {
317                TimeUnit::Second => Arc::new(TimeSecondVector::try_from_arrow_array(array)?),
318                TimeUnit::Millisecond => {
319                    Arc::new(TimeMillisecondVector::try_from_arrow_array(array)?)
320                }
321                // Arrow use time32 for second/millisecond.
322                _ => unreachable!(
323                    "unexpected arrow array datatype: {:?}",
324                    array.as_ref().data_type()
325                ),
326            },
327            ArrowDataType::Time64(unit) => match unit {
328                TimeUnit::Microsecond => {
329                    Arc::new(TimeMicrosecondVector::try_from_arrow_array(array)?)
330                }
331                TimeUnit::Nanosecond => {
332                    Arc::new(TimeNanosecondVector::try_from_arrow_array(array)?)
333                }
334                // Arrow use time64 for microsecond/nanosecond.
335                _ => unreachable!(
336                    "unexpected arrow array datatype: {:?}",
337                    array.as_ref().data_type()
338                ),
339            },
340            ArrowDataType::Interval(unit) => match unit {
341                IntervalUnit::YearMonth => {
342                    Arc::new(IntervalYearMonthVector::try_from_arrow_array(array)?)
343                }
344                IntervalUnit::DayTime => {
345                    Arc::new(IntervalDayTimeVector::try_from_arrow_array(array)?)
346                }
347                IntervalUnit::MonthDayNano => {
348                    Arc::new(IntervalMonthDayNanoVector::try_from_arrow_array(array)?)
349                }
350            },
351            ArrowDataType::Duration(unit) => match unit {
352                TimeUnit::Second => Arc::new(DurationSecondVector::try_from_arrow_array(array)?),
353                TimeUnit::Millisecond => {
354                    Arc::new(DurationMillisecondVector::try_from_arrow_array(array)?)
355                }
356                TimeUnit::Microsecond => {
357                    Arc::new(DurationMicrosecondVector::try_from_arrow_array(array)?)
358                }
359                TimeUnit::Nanosecond => {
360                    Arc::new(DurationNanosecondVector::try_from_arrow_array(array)?)
361                }
362            },
363            ArrowDataType::Decimal128(_, _) => {
364                Arc::new(Decimal128Vector::try_from_arrow_array(array)?)
365            }
366            ArrowDataType::Dictionary(key, value) => {
367                macro_rules! handle_dictionary_key_type {
368                    ($key_type:ident) => {{
369                        let array = array
370                            .as_ref()
371                            .as_any()
372                            .downcast_ref::<DictionaryArray<$key_type>>()
373                            .unwrap(); // Safety: the type is guarded by match arm condition
374                        Arc::new(DictionaryVector::new(
375                            array.clone(),
376                            ConcreteDataType::try_from(value.as_ref())?,
377                        )?)
378                    }};
379                }
380
381                match key.as_ref() {
382                    ArrowDataType::Int8 => handle_dictionary_key_type!(Int8Type),
383                    ArrowDataType::Int16 => handle_dictionary_key_type!(Int16Type),
384                    ArrowDataType::Int32 => handle_dictionary_key_type!(Int32Type),
385                    ArrowDataType::Int64 => handle_dictionary_key_type!(Int64Type),
386                    ArrowDataType::UInt8 => handle_dictionary_key_type!(UInt8Type),
387                    ArrowDataType::UInt16 => handle_dictionary_key_type!(UInt16Type),
388                    ArrowDataType::UInt32 => handle_dictionary_key_type!(UInt32Type),
389                    ArrowDataType::UInt64 => handle_dictionary_key_type!(UInt64Type),
390                    _ => {
391                        return error::UnsupportedArrowTypeSnafu {
392                            arrow_type: array.as_ref().data_type().clone(),
393                        }
394                        .fail();
395                    }
396                }
397            }
398
399            ArrowDataType::Struct(fields) => {
400                let array = array
401                    .as_ref()
402                    .as_any()
403                    .downcast_ref::<StructArray>()
404                    .unwrap();
405                Arc::new(StructVector::try_new(
406                    StructType::try_from(fields)?,
407                    array.clone(),
408                )?)
409            }
410            ArrowDataType::Float16
411            | ArrowDataType::LargeList(_)
412            | ArrowDataType::FixedSizeList(_, _)
413            | ArrowDataType::Union(_, _)
414            | ArrowDataType::Decimal256(_, _)
415            | ArrowDataType::Map(_, _)
416            | ArrowDataType::RunEndEncoded(_, _)
417            | ArrowDataType::ListView(_)
418            | ArrowDataType::LargeListView(_)
419            | ArrowDataType::Date64
420            | ArrowDataType::Decimal32(_, _)
421            | ArrowDataType::Decimal64(_, _) => {
422                return error::UnsupportedArrowTypeSnafu {
423                    arrow_type: array.as_ref().data_type().clone(),
424                }
425                .fail();
426            }
427        })
428    }
429
430    /// Try to cast an vec of values into vector, fail if type is not the same across all values.
431    pub fn try_from_row_into_vector(row: &[Value], dt: &ConcreteDataType) -> Result<VectorRef> {
432        let mut builder = dt.create_mutable_vector(row.len());
433        for val in row {
434            builder.try_push_value_ref(&val.as_value_ref())?;
435        }
436        let vector = builder.to_vector();
437        Ok(vector)
438    }
439
440    /// Try to cast slice of `arrays` to vectors.
441    pub fn try_into_vectors(arrays: &[ArrayRef]) -> Result<Vec<VectorRef>> {
442        arrays.iter().map(Self::try_into_vector).collect()
443    }
444
445    /// Perform SQL like operation on `names` and a scalar `s`.
446    pub fn like_utf8(names: Vec<String>, s: &str) -> Result<VectorRef> {
447        let array = StringArray::from(names);
448
449        let s = StringArray::new_scalar(s);
450        let filter = comparison::like(&array, &s).context(error::ArrowComputeSnafu)?;
451
452        let result = compute::filter(&array, &filter).context(error::ArrowComputeSnafu)?;
453        Helper::try_into_vector(result)
454    }
455
456    pub fn like_utf8_filter(names: Vec<String>, s: &str) -> Result<(VectorRef, BooleanVector)> {
457        let array = StringArray::from(names);
458        let s = StringArray::new_scalar(s);
459        let filter = comparison::like(&array, &s).context(error::ArrowComputeSnafu)?;
460        let result = compute::filter(&array, &filter).context(error::ArrowComputeSnafu)?;
461        let vector = Helper::try_into_vector(result)?;
462
463        Ok((vector, BooleanVector::from(filter)))
464    }
465}
466
467#[cfg(test)]
468pub(crate) fn pretty_print(vector: VectorRef) -> String {
469    let array = vector.to_arrow_array();
470    arrow::util::pretty::pretty_format_columns(&vector.vector_type_name(), &[array])
471        .map(|x| x.to_string())
472        .unwrap_or_else(|e| e.to_string())
473}
474
475#[cfg(test)]
476mod tests {
477    use arrow::array::{
478        ArrayRef, BooleanArray, Date32Array, Float32Array, Float64Array, Int8Array, Int16Array,
479        Int32Array, Int64Array, LargeBinaryArray, ListArray, NullArray, Time32MillisecondArray,
480        Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
481        TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
482        TimestampSecondArray, UInt8Array, UInt16Array, UInt32Array, UInt64Array,
483    };
484    use arrow::buffer::Buffer;
485    use arrow::datatypes::{Int32Type, IntervalMonthDayNano};
486    use arrow_array::{BinaryArray, DictionaryArray, FixedSizeBinaryArray, LargeStringArray};
487    use arrow_schema::DataType;
488    use common_decimal::Decimal128;
489    use common_time::time::Time;
490    use common_time::timestamp::TimeUnit;
491    use common_time::{Date, Duration};
492
493    use super::*;
494    use crate::value::Value;
495    use crate::vectors::ConcreteDataType;
496
497    #[test]
498    fn test_try_into_vectors() {
499        let arrays: Vec<ArrayRef> = vec![
500            Arc::new(Int32Array::from(vec![1])),
501            Arc::new(Int32Array::from(vec![2])),
502            Arc::new(Int32Array::from(vec![3])),
503        ];
504        let vectors = Helper::try_into_vectors(&arrays).unwrap();
505        vectors.iter().for_each(|v| assert_eq!(1, v.len()));
506        assert_eq!(Value::Int32(1), vectors[0].get(0));
507        assert_eq!(Value::Int32(2), vectors[1].get(0));
508        assert_eq!(Value::Int32(3), vectors[2].get(0));
509    }
510
511    #[test]
512    fn test_try_into_date_vector() {
513        let vector = DateVector::from(vec![Some(1), Some(2), None]);
514        let arrow_array = vector.to_arrow_array();
515        assert_eq!(&ArrowDataType::Date32, arrow_array.data_type());
516        let vector_converted = Helper::try_into_vector(arrow_array).unwrap();
517        assert_eq!(vector.len(), vector_converted.len());
518        for i in 0..vector_converted.len() {
519            assert_eq!(vector.get(i), vector_converted.get(i));
520        }
521    }
522
523    #[test]
524    fn test_try_from_scalar_date_value() {
525        let vector = Helper::try_from_scalar_value(ScalarValue::Date32(Some(42)), 3).unwrap();
526        assert_eq!(ConcreteDataType::date_datatype(), vector.data_type());
527        assert_eq!(3, vector.len());
528        for i in 0..vector.len() {
529            assert_eq!(Value::Date(Date::new(42)), vector.get(i));
530        }
531    }
532
533    #[test]
534    fn test_try_from_scalar_duration_value() {
535        let vector =
536            Helper::try_from_scalar_value(ScalarValue::DurationSecond(Some(42)), 3).unwrap();
537        assert_eq!(
538            ConcreteDataType::duration_second_datatype(),
539            vector.data_type()
540        );
541        assert_eq!(3, vector.len());
542        for i in 0..vector.len() {
543            assert_eq!(
544                Value::Duration(Duration::new(42, TimeUnit::Second)),
545                vector.get(i)
546            );
547        }
548    }
549
550    #[test]
551    fn test_try_from_scalar_decimal128_value() {
552        let vector =
553            Helper::try_from_scalar_value(ScalarValue::Decimal128(Some(42), 3, 1), 3).unwrap();
554        assert_eq!(
555            ConcreteDataType::decimal128_datatype(3, 1),
556            vector.data_type()
557        );
558        assert_eq!(3, vector.len());
559        for i in 0..vector.len() {
560            assert_eq!(Value::Decimal128(Decimal128::new(42, 3, 1)), vector.get(i));
561        }
562    }
563
564    #[test]
565    fn test_try_from_list_value() {
566        let value = ScalarValue::List(ScalarValue::new_list(
567            &[ScalarValue::Int32(Some(1)), ScalarValue::Int32(Some(2))],
568            &ArrowDataType::Int32,
569            true,
570        ));
571        let vector = Helper::try_from_scalar_value(value, 3).unwrap();
572        assert_eq!(
573            ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype())),
574            vector.data_type()
575        );
576        assert_eq!(3, vector.len());
577        for i in 0..vector.len() {
578            let v = vector.get(i);
579            let items = v.as_list().unwrap().unwrap().items();
580            assert_eq!(vec![Value::Int32(1), Value::Int32(2)], items);
581        }
582    }
583
584    #[test]
585    fn test_like_utf8() {
586        fn assert_vector(expected: Vec<&str>, actual: &VectorRef) {
587            let actual = actual.as_any().downcast_ref::<StringVector>().unwrap();
588            assert_eq!(*actual, StringVector::from(expected));
589        }
590
591        let names: Vec<String> = vec!["greptime", "hello", "public", "world"]
592            .into_iter()
593            .map(|x| x.to_string())
594            .collect();
595
596        let ret = Helper::like_utf8(names.clone(), "%ll%").unwrap();
597        assert_vector(vec!["hello"], &ret);
598
599        let ret = Helper::like_utf8(names.clone(), "%time").unwrap();
600        assert_vector(vec!["greptime"], &ret);
601
602        let ret = Helper::like_utf8(names.clone(), "%ld").unwrap();
603        assert_vector(vec!["world"], &ret);
604
605        let ret = Helper::like_utf8(names, "%").unwrap();
606        assert_vector(vec!["greptime", "hello", "public", "world"], &ret);
607    }
608
609    #[test]
610    fn test_like_utf8_filter() {
611        fn assert_vector(expected: Vec<&str>, actual: &VectorRef) {
612            let actual = actual.as_any().downcast_ref::<StringVector>().unwrap();
613            assert_eq!(*actual, StringVector::from(expected));
614        }
615
616        fn assert_filter(array: Vec<String>, s: &str, expected_filter: &BooleanVector) {
617            let array = StringArray::from(array);
618            let s = StringArray::new_scalar(s);
619            let actual_filter = comparison::like(&array, &s).unwrap();
620            assert_eq!(BooleanVector::from(actual_filter), *expected_filter);
621        }
622
623        let names: Vec<String> = vec!["greptime", "timeseries", "cloud", "database"]
624            .into_iter()
625            .map(|x| x.to_string())
626            .collect();
627
628        let (table, filter) = Helper::like_utf8_filter(names.clone(), "%ti%").unwrap();
629        assert_vector(vec!["greptime", "timeseries"], &table);
630        assert_filter(names.clone(), "%ti%", &filter);
631
632        let (tables, filter) = Helper::like_utf8_filter(names.clone(), "%lou").unwrap();
633        assert_vector(vec![], &tables);
634        assert_filter(names.clone(), "%lou", &filter);
635
636        let (tables, filter) = Helper::like_utf8_filter(names.clone(), "%d%").unwrap();
637        assert_vector(vec!["cloud", "database"], &tables);
638        assert_filter(names.clone(), "%d%", &filter);
639    }
640
641    fn check_try_into_vector(array: impl Array + 'static) {
642        let array: ArrayRef = Arc::new(array);
643        let vector = Helper::try_into_vector(array.clone()).unwrap();
644        assert_eq!(&array, &vector.to_arrow_array());
645    }
646
647    #[test]
648    fn test_try_into_vector() {
649        check_try_into_vector(NullArray::new(2));
650        check_try_into_vector(BooleanArray::from(vec![true, false]));
651        check_try_into_vector(Int8Array::from(vec![1, 2, 3]));
652        check_try_into_vector(Int16Array::from(vec![1, 2, 3]));
653        check_try_into_vector(Int32Array::from(vec![1, 2, 3]));
654        check_try_into_vector(Int64Array::from(vec![1, 2, 3]));
655        check_try_into_vector(UInt8Array::from(vec![1, 2, 3]));
656        check_try_into_vector(UInt16Array::from(vec![1, 2, 3]));
657        check_try_into_vector(UInt32Array::from(vec![1, 2, 3]));
658        check_try_into_vector(UInt64Array::from(vec![1, 2, 3]));
659        check_try_into_vector(Float32Array::from(vec![1.0, 2.0, 3.0]));
660        check_try_into_vector(Float64Array::from(vec![1.0, 2.0, 3.0]));
661        check_try_into_vector(StringArray::from(vec!["hello", "world"]));
662        check_try_into_vector(Date32Array::from(vec![1, 2, 3]));
663        let data = vec![None, Some(vec![Some(6), Some(7)])];
664        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
665        check_try_into_vector(list_array);
666        check_try_into_vector(TimestampSecondArray::from(vec![1, 2, 3]));
667        check_try_into_vector(TimestampMillisecondArray::from(vec![1, 2, 3]));
668        check_try_into_vector(TimestampMicrosecondArray::from(vec![1, 2, 3]));
669        check_try_into_vector(TimestampNanosecondArray::from(vec![1, 2, 3]));
670        check_try_into_vector(Time32SecondArray::from(vec![1, 2, 3]));
671        check_try_into_vector(Time32MillisecondArray::from(vec![1, 2, 3]));
672        check_try_into_vector(Time64MicrosecondArray::from(vec![1, 2, 3]));
673        check_try_into_vector(Time64NanosecondArray::from(vec![1, 2, 3]));
674
675        // Test dictionary arrays with different key types
676        let values = StringArray::from_iter_values(["a", "b", "c"]);
677
678        // Test Int8 keys
679        let keys = Int8Array::from_iter_values([0, 0, 1, 2]);
680        let array: ArrayRef =
681            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
682        Helper::try_into_vector(array).unwrap();
683
684        // Test Int16 keys
685        let keys = Int16Array::from_iter_values([0, 0, 1, 2]);
686        let array: ArrayRef =
687            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
688        Helper::try_into_vector(array).unwrap();
689
690        // Test Int32 keys
691        let keys = Int32Array::from_iter_values([0, 0, 1, 2]);
692        let array: ArrayRef =
693            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
694        Helper::try_into_vector(array).unwrap();
695
696        // Test Int64 keys
697        let keys = Int64Array::from_iter_values([0, 0, 1, 2]);
698        let array: ArrayRef =
699            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
700        Helper::try_into_vector(array).unwrap();
701
702        // Test UInt8 keys
703        let keys = UInt8Array::from_iter_values([0, 0, 1, 2]);
704        let array: ArrayRef =
705            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
706        Helper::try_into_vector(array).unwrap();
707
708        // Test UInt16 keys
709        let keys = UInt16Array::from_iter_values([0, 0, 1, 2]);
710        let array: ArrayRef =
711            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
712        Helper::try_into_vector(array).unwrap();
713
714        // Test UInt32 keys
715        let keys = UInt32Array::from_iter_values([0, 0, 1, 2]);
716        let array: ArrayRef =
717            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
718        Helper::try_into_vector(array).unwrap();
719
720        // Test UInt64 keys
721        let keys = UInt64Array::from_iter_values([0, 0, 1, 2]);
722        let array: ArrayRef = Arc::new(DictionaryArray::try_new(keys, Arc::new(values)).unwrap());
723        Helper::try_into_vector(array).unwrap();
724    }
725
726    #[test]
727    fn test_try_binary_array_into_vector() {
728        let input_vec: Vec<&[u8]> = vec!["hello".as_bytes(), "world".as_bytes()];
729        let assertion_vector = BinaryVector::from(input_vec.clone());
730
731        let input_arrays: Vec<ArrayRef> = vec![
732            Arc::new(LargeBinaryArray::from(input_vec.clone())) as ArrayRef,
733            Arc::new(BinaryArray::from(input_vec.clone())) as ArrayRef,
734            Arc::new(FixedSizeBinaryArray::new(
735                5,
736                Buffer::from_vec("helloworld".as_bytes().to_vec()),
737                None,
738            )) as ArrayRef,
739        ];
740
741        for input_array in input_arrays {
742            let vector = Helper::try_into_vector(input_array).unwrap();
743
744            assert_eq!(2, vector.len());
745            assert_eq!(0, vector.null_count());
746
747            let output_arrow_array: ArrayRef = vector.to_arrow_array();
748            assert_eq!(&DataType::Binary, output_arrow_array.data_type());
749            assert_eq!(&assertion_vector.to_arrow_array(), &output_arrow_array);
750        }
751    }
752
753    #[test]
754    fn test_large_string_array_into_vector() {
755        let input_vec = vec!["a", "b"];
756        let assertion_array = LargeStringArray::from(input_vec.clone());
757
758        let large_string_array: ArrayRef = Arc::new(LargeStringArray::from(input_vec));
759        let vector = Helper::try_into_vector(large_string_array).unwrap();
760        assert_eq!(2, vector.len());
761        assert_eq!(0, vector.null_count());
762
763        let output_arrow_array: LargeStringArray = vector
764            .to_arrow_array()
765            .as_any()
766            .downcast_ref::<LargeStringArray>()
767            .unwrap()
768            .clone();
769        assert_eq!(&assertion_array, &output_arrow_array);
770    }
771
772    #[test]
773    fn test_try_from_scalar_time_value() {
774        let vector = Helper::try_from_scalar_value(ScalarValue::Time32Second(Some(42)), 3).unwrap();
775        assert_eq!(ConcreteDataType::time_second_datatype(), vector.data_type());
776        assert_eq!(3, vector.len());
777        for i in 0..vector.len() {
778            assert_eq!(Value::Time(Time::new_second(42)), vector.get(i));
779        }
780    }
781
782    #[test]
783    fn test_try_from_scalar_interval_value() {
784        let vector = Helper::try_from_scalar_value(
785            ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::new(1, 1, 2000))),
786            3,
787        )
788        .unwrap();
789
790        assert_eq!(
791            ConcreteDataType::interval_month_day_nano_datatype(),
792            vector.data_type()
793        );
794        assert_eq!(3, vector.len());
795        for i in 0..vector.len() {
796            assert_eq!(
797                Value::IntervalMonthDayNano(IntervalMonthDayNano::new(1, 1, 2000).into()),
798                vector.get(i)
799            );
800        }
801    }
802
803    fn check_try_from_row_to_vector(row: Vec<Value>, dt: &ConcreteDataType) {
804        let vector = Helper::try_from_row_into_vector(&row, dt).unwrap();
805        for (i, item) in row.iter().enumerate().take(vector.len()) {
806            assert_eq!(*item, vector.get(i));
807        }
808    }
809
810    fn check_into_and_from(array: impl Array + 'static) {
811        let array: ArrayRef = Arc::new(array);
812        let vector = Helper::try_into_vector(array.clone()).unwrap();
813        assert_eq!(&array, &vector.to_arrow_array());
814        let row: Vec<Value> = (0..array.len()).map(|i| vector.get(i)).collect();
815        let dt = vector.data_type();
816        check_try_from_row_to_vector(row, &dt);
817    }
818
819    #[test]
820    fn test_try_from_row_to_vector() {
821        check_into_and_from(NullArray::new(2));
822        check_into_and_from(BooleanArray::from(vec![true, false]));
823        check_into_and_from(Int8Array::from(vec![1, 2, 3]));
824        check_into_and_from(Int16Array::from(vec![1, 2, 3]));
825        check_into_and_from(Int32Array::from(vec![1, 2, 3]));
826        check_into_and_from(Int64Array::from(vec![1, 2, 3]));
827        check_into_and_from(UInt8Array::from(vec![1, 2, 3]));
828        check_into_and_from(UInt16Array::from(vec![1, 2, 3]));
829        check_into_and_from(UInt32Array::from(vec![1, 2, 3]));
830        check_into_and_from(UInt64Array::from(vec![1, 2, 3]));
831        check_into_and_from(Float32Array::from(vec![1.0, 2.0, 3.0]));
832        check_into_and_from(Float64Array::from(vec![1.0, 2.0, 3.0]));
833        check_into_and_from(StringArray::from(vec!["hello", "world"]));
834        check_into_and_from(Date32Array::from(vec![1, 2, 3]));
835
836        check_into_and_from(TimestampSecondArray::from(vec![1, 2, 3]));
837        check_into_and_from(TimestampMillisecondArray::from(vec![1, 2, 3]));
838        check_into_and_from(TimestampMicrosecondArray::from(vec![1, 2, 3]));
839        check_into_and_from(TimestampNanosecondArray::from(vec![1, 2, 3]));
840        check_into_and_from(Time32SecondArray::from(vec![1, 2, 3]));
841        check_into_and_from(Time32MillisecondArray::from(vec![1, 2, 3]));
842        check_into_and_from(Time64MicrosecondArray::from(vec![1, 2, 3]));
843        check_into_and_from(Time64NanosecondArray::from(vec![1, 2, 3]));
844    }
845}