datatypes/vectors/
helper.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Vector helper functions, inspired by databend Series mod
16
17use std::any::Any;
18use std::sync::Arc;
19
20use arrow::array::{Array, ArrayRef, StringArray};
21use arrow::compute;
22use arrow::compute::kernels::comparison;
23use arrow::datatypes::{DataType as ArrowDataType, Int64Type, TimeUnit};
24use arrow_array::DictionaryArray;
25use arrow_schema::IntervalUnit;
26use datafusion_common::ScalarValue;
27use snafu::{OptionExt, ResultExt};
28
29use crate::data_type::ConcreteDataType;
30use crate::error::{self, ConvertArrowArrayToScalarsSnafu, Result};
31use crate::prelude::DataType;
32use crate::scalars::{Scalar, ScalarVectorBuilder};
33use crate::value::{ListValue, ListValueRef, Value};
34use crate::vectors::{
35    BinaryVector, BooleanVector, ConstantVector, DateVector, Decimal128Vector, DictionaryVector,
36    DurationMicrosecondVector, DurationMillisecondVector, DurationNanosecondVector,
37    DurationSecondVector, Float32Vector, Float64Vector, Int16Vector, Int32Vector, Int64Vector,
38    Int8Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector, IntervalYearMonthVector,
39    ListVector, ListVectorBuilder, MutableVector, NullVector, StringVector, TimeMicrosecondVector,
40    TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector,
41    TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt16Vector,
42    UInt32Vector, UInt64Vector, UInt8Vector, Vector, VectorRef,
43};
44
45/// Helper functions for `Vector`.
46pub struct Helper;
47
48impl Helper {
49    /// Get a pointer to the underlying data of this vectors.
50    /// Can be useful for fast comparisons.
51    /// # Safety
52    /// Assumes that the `vector` is  T.
53    pub unsafe fn static_cast<T: Any>(vector: &VectorRef) -> &T {
54        let object = vector.as_ref();
55        debug_assert!(object.as_any().is::<T>());
56        &*(object as *const dyn Vector as *const T)
57    }
58
59    pub fn check_get_scalar<T: Scalar>(vector: &VectorRef) -> Result<&<T as Scalar>::VectorType> {
60        let arr = vector
61            .as_any()
62            .downcast_ref::<<T as Scalar>::VectorType>()
63            .with_context(|| error::UnknownVectorSnafu {
64                msg: format!(
65                    "downcast vector error, vector type: {:?}, expected vector: {:?}",
66                    vector.vector_type_name(),
67                    std::any::type_name::<T>(),
68                ),
69            });
70        arr
71    }
72
73    pub fn check_get<T: 'static + Vector>(vector: &VectorRef) -> Result<&T> {
74        let arr = vector
75            .as_any()
76            .downcast_ref::<T>()
77            .with_context(|| error::UnknownVectorSnafu {
78                msg: format!(
79                    "downcast vector error, vector type: {:?}, expected vector: {:?}",
80                    vector.vector_type_name(),
81                    std::any::type_name::<T>(),
82                ),
83            });
84        arr
85    }
86
87    pub fn check_get_mutable_vector<T: 'static + MutableVector>(
88        vector: &mut dyn MutableVector,
89    ) -> Result<&mut T> {
90        let ty = vector.data_type();
91        let arr = vector
92            .as_mut_any()
93            .downcast_mut()
94            .with_context(|| error::UnknownVectorSnafu {
95                msg: format!(
96                    "downcast vector error, vector type: {:?}, expected vector: {:?}",
97                    ty,
98                    std::any::type_name::<T>(),
99                ),
100            });
101        arr
102    }
103
104    pub fn check_get_scalar_vector<T: Scalar>(
105        vector: &VectorRef,
106    ) -> Result<&<T as Scalar>::VectorType> {
107        let arr = vector
108            .as_any()
109            .downcast_ref::<<T as Scalar>::VectorType>()
110            .with_context(|| error::UnknownVectorSnafu {
111                msg: format!(
112                    "downcast vector error, vector type: {:?}, expected vector: {:?}",
113                    vector.vector_type_name(),
114                    std::any::type_name::<T>(),
115                ),
116            });
117        arr
118    }
119
120    /// Try to cast an arrow scalar value into vector
121    pub fn try_from_scalar_value(value: ScalarValue, length: usize) -> Result<VectorRef> {
122        let vector = match value {
123            ScalarValue::Null => ConstantVector::new(Arc::new(NullVector::new(1)), length),
124            ScalarValue::Boolean(v) => {
125                ConstantVector::new(Arc::new(BooleanVector::from(vec![v])), length)
126            }
127            ScalarValue::Float32(v) => {
128                ConstantVector::new(Arc::new(Float32Vector::from(vec![v])), length)
129            }
130            ScalarValue::Float64(v) => {
131                ConstantVector::new(Arc::new(Float64Vector::from(vec![v])), length)
132            }
133            ScalarValue::Int8(v) => {
134                ConstantVector::new(Arc::new(Int8Vector::from(vec![v])), length)
135            }
136            ScalarValue::Int16(v) => {
137                ConstantVector::new(Arc::new(Int16Vector::from(vec![v])), length)
138            }
139            ScalarValue::Int32(v) => {
140                ConstantVector::new(Arc::new(Int32Vector::from(vec![v])), length)
141            }
142            ScalarValue::Int64(v) => {
143                ConstantVector::new(Arc::new(Int64Vector::from(vec![v])), length)
144            }
145            ScalarValue::UInt8(v) => {
146                ConstantVector::new(Arc::new(UInt8Vector::from(vec![v])), length)
147            }
148            ScalarValue::UInt16(v) => {
149                ConstantVector::new(Arc::new(UInt16Vector::from(vec![v])), length)
150            }
151            ScalarValue::UInt32(v) => {
152                ConstantVector::new(Arc::new(UInt32Vector::from(vec![v])), length)
153            }
154            ScalarValue::UInt64(v) => {
155                ConstantVector::new(Arc::new(UInt64Vector::from(vec![v])), length)
156            }
157            ScalarValue::Utf8(v) | ScalarValue::LargeUtf8(v) => {
158                ConstantVector::new(Arc::new(StringVector::from(vec![v])), length)
159            }
160            ScalarValue::Binary(v)
161            | ScalarValue::LargeBinary(v)
162            | ScalarValue::FixedSizeBinary(_, v) => {
163                ConstantVector::new(Arc::new(BinaryVector::from(vec![v])), length)
164            }
165            ScalarValue::List(array) => {
166                let item_type = ConcreteDataType::try_from(&array.value_type())?;
167                let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1);
168                let values = ScalarValue::convert_array_to_scalar_vec(array.as_ref())
169                    .context(ConvertArrowArrayToScalarsSnafu)?
170                    .into_iter()
171                    .flatten()
172                    .map(ScalarValue::try_into)
173                    .collect::<Result<Vec<Value>>>()?;
174                builder.push(Some(ListValueRef::Ref {
175                    val: &ListValue::new(values, item_type),
176                }));
177                let list_vector = builder.to_vector();
178                ConstantVector::new(list_vector, length)
179            }
180            ScalarValue::Date32(v) => {
181                ConstantVector::new(Arc::new(DateVector::from(vec![v])), length)
182            }
183            ScalarValue::TimestampSecond(v, _) => {
184                // Timezone is unimplemented now.
185                ConstantVector::new(Arc::new(TimestampSecondVector::from(vec![v])), length)
186            }
187            ScalarValue::TimestampMillisecond(v, _) => {
188                // Timezone is unimplemented now.
189                ConstantVector::new(Arc::new(TimestampMillisecondVector::from(vec![v])), length)
190            }
191            ScalarValue::TimestampMicrosecond(v, _) => {
192                // Timezone is unimplemented now.
193                ConstantVector::new(Arc::new(TimestampMicrosecondVector::from(vec![v])), length)
194            }
195            ScalarValue::TimestampNanosecond(v, _) => {
196                // Timezone is unimplemented now.
197                ConstantVector::new(Arc::new(TimestampNanosecondVector::from(vec![v])), length)
198            }
199            ScalarValue::Time32Second(v) => {
200                ConstantVector::new(Arc::new(TimeSecondVector::from(vec![v])), length)
201            }
202            ScalarValue::Time32Millisecond(v) => {
203                ConstantVector::new(Arc::new(TimeMillisecondVector::from(vec![v])), length)
204            }
205            ScalarValue::Time64Microsecond(v) => {
206                ConstantVector::new(Arc::new(TimeMicrosecondVector::from(vec![v])), length)
207            }
208            ScalarValue::Time64Nanosecond(v) => {
209                ConstantVector::new(Arc::new(TimeNanosecondVector::from(vec![v])), length)
210            }
211            ScalarValue::IntervalYearMonth(v) => {
212                ConstantVector::new(Arc::new(IntervalYearMonthVector::from(vec![v])), length)
213            }
214            ScalarValue::IntervalDayTime(v) => {
215                ConstantVector::new(Arc::new(IntervalDayTimeVector::from(vec![v])), length)
216            }
217            ScalarValue::IntervalMonthDayNano(v) => {
218                ConstantVector::new(Arc::new(IntervalMonthDayNanoVector::from(vec![v])), length)
219            }
220            ScalarValue::DurationSecond(v) => {
221                ConstantVector::new(Arc::new(DurationSecondVector::from(vec![v])), length)
222            }
223            ScalarValue::DurationMillisecond(v) => {
224                ConstantVector::new(Arc::new(DurationMillisecondVector::from(vec![v])), length)
225            }
226            ScalarValue::DurationMicrosecond(v) => {
227                ConstantVector::new(Arc::new(DurationMicrosecondVector::from(vec![v])), length)
228            }
229            ScalarValue::DurationNanosecond(v) => {
230                ConstantVector::new(Arc::new(DurationNanosecondVector::from(vec![v])), length)
231            }
232            ScalarValue::Decimal128(v, p, s) => {
233                let vector = Decimal128Vector::from(vec![v]).with_precision_and_scale(p, s)?;
234                ConstantVector::new(Arc::new(vector), length)
235            }
236            ScalarValue::Decimal256(_, _, _)
237            | ScalarValue::Struct(_)
238            | ScalarValue::FixedSizeList(_)
239            | ScalarValue::LargeList(_)
240            | ScalarValue::Dictionary(_, _)
241            | ScalarValue::Union(_, _, _)
242            | ScalarValue::Float16(_)
243            | ScalarValue::Utf8View(_)
244            | ScalarValue::BinaryView(_)
245            | ScalarValue::Map(_)
246            | ScalarValue::Date64(_) => {
247                return error::ConversionSnafu {
248                    from: format!("Unsupported scalar value: {value}"),
249                }
250                .fail()
251            }
252        };
253
254        Ok(Arc::new(vector))
255    }
256
257    /// Try to cast an arrow array into vector
258    ///
259    /// # Panics
260    /// Panic if given arrow data type is not supported.
261    pub fn try_into_vector(array: impl AsRef<dyn Array>) -> Result<VectorRef> {
262        Ok(match array.as_ref().data_type() {
263            ArrowDataType::Null => Arc::new(NullVector::try_from_arrow_array(array)?),
264            ArrowDataType::Boolean => Arc::new(BooleanVector::try_from_arrow_array(array)?),
265            ArrowDataType::Binary => Arc::new(BinaryVector::try_from_arrow_array(array)?),
266            ArrowDataType::LargeBinary | ArrowDataType::FixedSizeBinary(_) => {
267                let array = arrow::compute::cast(array.as_ref(), &ArrowDataType::Binary)
268                    .context(crate::error::ArrowComputeSnafu)?;
269                Arc::new(BinaryVector::try_from_arrow_array(array)?)
270            }
271            ArrowDataType::Int8 => Arc::new(Int8Vector::try_from_arrow_array(array)?),
272            ArrowDataType::Int16 => Arc::new(Int16Vector::try_from_arrow_array(array)?),
273            ArrowDataType::Int32 => Arc::new(Int32Vector::try_from_arrow_array(array)?),
274            ArrowDataType::Int64 => Arc::new(Int64Vector::try_from_arrow_array(array)?),
275            ArrowDataType::UInt8 => Arc::new(UInt8Vector::try_from_arrow_array(array)?),
276            ArrowDataType::UInt16 => Arc::new(UInt16Vector::try_from_arrow_array(array)?),
277            ArrowDataType::UInt32 => Arc::new(UInt32Vector::try_from_arrow_array(array)?),
278            ArrowDataType::UInt64 => Arc::new(UInt64Vector::try_from_arrow_array(array)?),
279            ArrowDataType::Float32 => Arc::new(Float32Vector::try_from_arrow_array(array)?),
280            ArrowDataType::Float64 => Arc::new(Float64Vector::try_from_arrow_array(array)?),
281            ArrowDataType::Utf8 => Arc::new(StringVector::try_from_arrow_array(array)?),
282            ArrowDataType::LargeUtf8 => {
283                let array = arrow::compute::cast(array.as_ref(), &ArrowDataType::Utf8)
284                    .context(crate::error::ArrowComputeSnafu)?;
285                Arc::new(StringVector::try_from_arrow_array(array)?)
286            }
287            ArrowDataType::Date32 => Arc::new(DateVector::try_from_arrow_array(array)?),
288            ArrowDataType::List(_) => Arc::new(ListVector::try_from_arrow_array(array)?),
289            ArrowDataType::Timestamp(unit, _) => match unit {
290                TimeUnit::Second => Arc::new(TimestampSecondVector::try_from_arrow_array(array)?),
291                TimeUnit::Millisecond => {
292                    Arc::new(TimestampMillisecondVector::try_from_arrow_array(array)?)
293                }
294                TimeUnit::Microsecond => {
295                    Arc::new(TimestampMicrosecondVector::try_from_arrow_array(array)?)
296                }
297                TimeUnit::Nanosecond => {
298                    Arc::new(TimestampNanosecondVector::try_from_arrow_array(array)?)
299                }
300            },
301            ArrowDataType::Time32(unit) => match unit {
302                TimeUnit::Second => Arc::new(TimeSecondVector::try_from_arrow_array(array)?),
303                TimeUnit::Millisecond => {
304                    Arc::new(TimeMillisecondVector::try_from_arrow_array(array)?)
305                }
306                // Arrow use time32 for second/millisecond.
307                _ => unreachable!(
308                    "unexpected arrow array datatype: {:?}",
309                    array.as_ref().data_type()
310                ),
311            },
312            ArrowDataType::Time64(unit) => match unit {
313                TimeUnit::Microsecond => {
314                    Arc::new(TimeMicrosecondVector::try_from_arrow_array(array)?)
315                }
316                TimeUnit::Nanosecond => {
317                    Arc::new(TimeNanosecondVector::try_from_arrow_array(array)?)
318                }
319                // Arrow use time64 for microsecond/nanosecond.
320                _ => unreachable!(
321                    "unexpected arrow array datatype: {:?}",
322                    array.as_ref().data_type()
323                ),
324            },
325            ArrowDataType::Interval(unit) => match unit {
326                IntervalUnit::YearMonth => {
327                    Arc::new(IntervalYearMonthVector::try_from_arrow_array(array)?)
328                }
329                IntervalUnit::DayTime => {
330                    Arc::new(IntervalDayTimeVector::try_from_arrow_array(array)?)
331                }
332                IntervalUnit::MonthDayNano => {
333                    Arc::new(IntervalMonthDayNanoVector::try_from_arrow_array(array)?)
334                }
335            },
336            ArrowDataType::Duration(unit) => match unit {
337                TimeUnit::Second => Arc::new(DurationSecondVector::try_from_arrow_array(array)?),
338                TimeUnit::Millisecond => {
339                    Arc::new(DurationMillisecondVector::try_from_arrow_array(array)?)
340                }
341                TimeUnit::Microsecond => {
342                    Arc::new(DurationMicrosecondVector::try_from_arrow_array(array)?)
343                }
344                TimeUnit::Nanosecond => {
345                    Arc::new(DurationNanosecondVector::try_from_arrow_array(array)?)
346                }
347            },
348            ArrowDataType::Decimal128(_, _) => {
349                Arc::new(Decimal128Vector::try_from_arrow_array(array)?)
350            }
351            ArrowDataType::Dictionary(key, value) if matches!(&**key, ArrowDataType::Int64) => {
352                let array = array
353                    .as_ref()
354                    .as_any()
355                    .downcast_ref::<DictionaryArray<Int64Type>>()
356                    .unwrap(); // Safety: the type is guarded by match arm condition
357                Arc::new(DictionaryVector::new(
358                    array.clone(),
359                    ConcreteDataType::try_from(value.as_ref())?,
360                )?)
361            }
362            ArrowDataType::Float16
363            | ArrowDataType::LargeList(_)
364            | ArrowDataType::FixedSizeList(_, _)
365            | ArrowDataType::Struct(_)
366            | ArrowDataType::Union(_, _)
367            | ArrowDataType::Dictionary(_, _)
368            | ArrowDataType::Decimal256(_, _)
369            | ArrowDataType::Map(_, _)
370            | ArrowDataType::RunEndEncoded(_, _)
371            | ArrowDataType::BinaryView
372            | ArrowDataType::Utf8View
373            | ArrowDataType::ListView(_)
374            | ArrowDataType::LargeListView(_)
375            | ArrowDataType::Date64 => {
376                return error::UnsupportedArrowTypeSnafu {
377                    arrow_type: array.as_ref().data_type().clone(),
378                }
379                .fail()
380            }
381        })
382    }
383
384    /// Try to cast an vec of values into vector, fail if type is not the same across all values.
385    pub fn try_from_row_into_vector(row: &[Value], dt: &ConcreteDataType) -> Result<VectorRef> {
386        let mut builder = dt.create_mutable_vector(row.len());
387        for val in row {
388            builder.try_push_value_ref(val.as_value_ref())?;
389        }
390        let vector = builder.to_vector();
391        Ok(vector)
392    }
393
394    /// Try to cast slice of `arrays` to vectors.
395    pub fn try_into_vectors(arrays: &[ArrayRef]) -> Result<Vec<VectorRef>> {
396        arrays.iter().map(Self::try_into_vector).collect()
397    }
398
399    /// Perform SQL like operation on `names` and a scalar `s`.
400    pub fn like_utf8(names: Vec<String>, s: &str) -> Result<VectorRef> {
401        let array = StringArray::from(names);
402
403        let s = StringArray::new_scalar(s);
404        let filter = comparison::like(&array, &s).context(error::ArrowComputeSnafu)?;
405
406        let result = compute::filter(&array, &filter).context(error::ArrowComputeSnafu)?;
407        Helper::try_into_vector(result)
408    }
409
410    pub fn like_utf8_filter(names: Vec<String>, s: &str) -> Result<(VectorRef, BooleanVector)> {
411        let array = StringArray::from(names);
412        let s = StringArray::new_scalar(s);
413        let filter = comparison::like(&array, &s).context(error::ArrowComputeSnafu)?;
414        let result = compute::filter(&array, &filter).context(error::ArrowComputeSnafu)?;
415        let vector = Helper::try_into_vector(result)?;
416
417        Ok((vector, BooleanVector::from(filter)))
418    }
419}
420
421#[cfg(test)]
422mod tests {
423    use arrow::array::{
424        ArrayRef, BooleanArray, Date32Array, Float32Array, Float64Array, Int16Array, Int32Array,
425        Int64Array, Int8Array, LargeBinaryArray, ListArray, NullArray, Time32MillisecondArray,
426        Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
427        TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
428        TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
429    };
430    use arrow::buffer::Buffer;
431    use arrow::datatypes::{Int32Type, IntervalMonthDayNano};
432    use arrow_array::{BinaryArray, DictionaryArray, FixedSizeBinaryArray, LargeStringArray};
433    use arrow_schema::DataType;
434    use common_decimal::Decimal128;
435    use common_time::time::Time;
436    use common_time::timestamp::TimeUnit;
437    use common_time::{Date, Duration};
438
439    use super::*;
440    use crate::value::Value;
441    use crate::vectors::ConcreteDataType;
442
443    #[test]
444    fn test_try_into_vectors() {
445        let arrays: Vec<ArrayRef> = vec![
446            Arc::new(Int32Array::from(vec![1])),
447            Arc::new(Int32Array::from(vec![2])),
448            Arc::new(Int32Array::from(vec![3])),
449        ];
450        let vectors = Helper::try_into_vectors(&arrays).unwrap();
451        vectors.iter().for_each(|v| assert_eq!(1, v.len()));
452        assert_eq!(Value::Int32(1), vectors[0].get(0));
453        assert_eq!(Value::Int32(2), vectors[1].get(0));
454        assert_eq!(Value::Int32(3), vectors[2].get(0));
455    }
456
457    #[test]
458    fn test_try_into_date_vector() {
459        let vector = DateVector::from(vec![Some(1), Some(2), None]);
460        let arrow_array = vector.to_arrow_array();
461        assert_eq!(&ArrowDataType::Date32, arrow_array.data_type());
462        let vector_converted = Helper::try_into_vector(arrow_array).unwrap();
463        assert_eq!(vector.len(), vector_converted.len());
464        for i in 0..vector_converted.len() {
465            assert_eq!(vector.get(i), vector_converted.get(i));
466        }
467    }
468
469    #[test]
470    fn test_try_from_scalar_date_value() {
471        let vector = Helper::try_from_scalar_value(ScalarValue::Date32(Some(42)), 3).unwrap();
472        assert_eq!(ConcreteDataType::date_datatype(), vector.data_type());
473        assert_eq!(3, vector.len());
474        for i in 0..vector.len() {
475            assert_eq!(Value::Date(Date::new(42)), vector.get(i));
476        }
477    }
478
479    #[test]
480    fn test_try_from_scalar_duration_value() {
481        let vector =
482            Helper::try_from_scalar_value(ScalarValue::DurationSecond(Some(42)), 3).unwrap();
483        assert_eq!(
484            ConcreteDataType::duration_second_datatype(),
485            vector.data_type()
486        );
487        assert_eq!(3, vector.len());
488        for i in 0..vector.len() {
489            assert_eq!(
490                Value::Duration(Duration::new(42, TimeUnit::Second)),
491                vector.get(i)
492            );
493        }
494    }
495
496    #[test]
497    fn test_try_from_scalar_decimal128_value() {
498        let vector =
499            Helper::try_from_scalar_value(ScalarValue::Decimal128(Some(42), 3, 1), 3).unwrap();
500        assert_eq!(
501            ConcreteDataType::decimal128_datatype(3, 1),
502            vector.data_type()
503        );
504        assert_eq!(3, vector.len());
505        for i in 0..vector.len() {
506            assert_eq!(Value::Decimal128(Decimal128::new(42, 3, 1)), vector.get(i));
507        }
508    }
509
510    #[test]
511    fn test_try_from_list_value() {
512        let value = ScalarValue::List(ScalarValue::new_list(
513            &[ScalarValue::Int32(Some(1)), ScalarValue::Int32(Some(2))],
514            &ArrowDataType::Int32,
515            true,
516        ));
517        let vector = Helper::try_from_scalar_value(value, 3).unwrap();
518        assert_eq!(
519            ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()),
520            vector.data_type()
521        );
522        assert_eq!(3, vector.len());
523        for i in 0..vector.len() {
524            let v = vector.get(i);
525            let items = v.as_list().unwrap().unwrap().items();
526            assert_eq!(vec![Value::Int32(1), Value::Int32(2)], items);
527        }
528    }
529
530    #[test]
531    fn test_like_utf8() {
532        fn assert_vector(expected: Vec<&str>, actual: &VectorRef) {
533            let actual = actual.as_any().downcast_ref::<StringVector>().unwrap();
534            assert_eq!(*actual, StringVector::from(expected));
535        }
536
537        let names: Vec<String> = vec!["greptime", "hello", "public", "world"]
538            .into_iter()
539            .map(|x| x.to_string())
540            .collect();
541
542        let ret = Helper::like_utf8(names.clone(), "%ll%").unwrap();
543        assert_vector(vec!["hello"], &ret);
544
545        let ret = Helper::like_utf8(names.clone(), "%time").unwrap();
546        assert_vector(vec!["greptime"], &ret);
547
548        let ret = Helper::like_utf8(names.clone(), "%ld").unwrap();
549        assert_vector(vec!["world"], &ret);
550
551        let ret = Helper::like_utf8(names, "%").unwrap();
552        assert_vector(vec!["greptime", "hello", "public", "world"], &ret);
553    }
554
555    #[test]
556    fn test_like_utf8_filter() {
557        fn assert_vector(expected: Vec<&str>, actual: &VectorRef) {
558            let actual = actual.as_any().downcast_ref::<StringVector>().unwrap();
559            assert_eq!(*actual, StringVector::from(expected));
560        }
561
562        fn assert_filter(array: Vec<String>, s: &str, expected_filter: &BooleanVector) {
563            let array = StringArray::from(array);
564            let s = StringArray::new_scalar(s);
565            let actual_filter = comparison::like(&array, &s).unwrap();
566            assert_eq!(BooleanVector::from(actual_filter), *expected_filter);
567        }
568
569        let names: Vec<String> = vec!["greptime", "timeseries", "cloud", "database"]
570            .into_iter()
571            .map(|x| x.to_string())
572            .collect();
573
574        let (table, filter) = Helper::like_utf8_filter(names.clone(), "%ti%").unwrap();
575        assert_vector(vec!["greptime", "timeseries"], &table);
576        assert_filter(names.clone(), "%ti%", &filter);
577
578        let (tables, filter) = Helper::like_utf8_filter(names.clone(), "%lou").unwrap();
579        assert_vector(vec![], &tables);
580        assert_filter(names.clone(), "%lou", &filter);
581
582        let (tables, filter) = Helper::like_utf8_filter(names.clone(), "%d%").unwrap();
583        assert_vector(vec!["cloud", "database"], &tables);
584        assert_filter(names.clone(), "%d%", &filter);
585    }
586
587    fn check_try_into_vector(array: impl Array + 'static) {
588        let array: ArrayRef = Arc::new(array);
589        let vector = Helper::try_into_vector(array.clone()).unwrap();
590        assert_eq!(&array, &vector.to_arrow_array());
591    }
592
593    #[test]
594    fn test_try_into_vector() {
595        check_try_into_vector(NullArray::new(2));
596        check_try_into_vector(BooleanArray::from(vec![true, false]));
597        check_try_into_vector(Int8Array::from(vec![1, 2, 3]));
598        check_try_into_vector(Int16Array::from(vec![1, 2, 3]));
599        check_try_into_vector(Int32Array::from(vec![1, 2, 3]));
600        check_try_into_vector(Int64Array::from(vec![1, 2, 3]));
601        check_try_into_vector(UInt8Array::from(vec![1, 2, 3]));
602        check_try_into_vector(UInt16Array::from(vec![1, 2, 3]));
603        check_try_into_vector(UInt32Array::from(vec![1, 2, 3]));
604        check_try_into_vector(UInt64Array::from(vec![1, 2, 3]));
605        check_try_into_vector(Float32Array::from(vec![1.0, 2.0, 3.0]));
606        check_try_into_vector(Float64Array::from(vec![1.0, 2.0, 3.0]));
607        check_try_into_vector(StringArray::from(vec!["hello", "world"]));
608        check_try_into_vector(Date32Array::from(vec![1, 2, 3]));
609        let data = vec![None, Some(vec![Some(6), Some(7)])];
610        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
611        check_try_into_vector(list_array);
612        check_try_into_vector(TimestampSecondArray::from(vec![1, 2, 3]));
613        check_try_into_vector(TimestampMillisecondArray::from(vec![1, 2, 3]));
614        check_try_into_vector(TimestampMicrosecondArray::from(vec![1, 2, 3]));
615        check_try_into_vector(TimestampNanosecondArray::from(vec![1, 2, 3]));
616        check_try_into_vector(Time32SecondArray::from(vec![1, 2, 3]));
617        check_try_into_vector(Time32MillisecondArray::from(vec![1, 2, 3]));
618        check_try_into_vector(Time64MicrosecondArray::from(vec![1, 2, 3]));
619        check_try_into_vector(Time64NanosecondArray::from(vec![1, 2, 3]));
620
621        let values = StringArray::from_iter_values(["a", "b", "c"]);
622        let keys = Int8Array::from_iter_values([0, 0, 1, 2]);
623        let array: ArrayRef = Arc::new(DictionaryArray::try_new(keys, Arc::new(values)).unwrap());
624        Helper::try_into_vector(array).unwrap_err();
625    }
626
627    #[test]
628    fn test_try_binary_array_into_vector() {
629        let input_vec: Vec<&[u8]> = vec!["hello".as_bytes(), "world".as_bytes()];
630        let assertion_vector = BinaryVector::from(input_vec.clone());
631
632        let input_arrays: Vec<ArrayRef> = vec![
633            Arc::new(LargeBinaryArray::from(input_vec.clone())) as ArrayRef,
634            Arc::new(BinaryArray::from(input_vec.clone())) as ArrayRef,
635            Arc::new(FixedSizeBinaryArray::new(
636                5,
637                Buffer::from_vec("helloworld".as_bytes().to_vec()),
638                None,
639            )) as ArrayRef,
640        ];
641
642        for input_array in input_arrays {
643            let vector = Helper::try_into_vector(input_array).unwrap();
644
645            assert_eq!(2, vector.len());
646            assert_eq!(0, vector.null_count());
647
648            let output_arrow_array: ArrayRef = vector.to_arrow_array();
649            assert_eq!(&DataType::Binary, output_arrow_array.data_type());
650            assert_eq!(&assertion_vector.to_arrow_array(), &output_arrow_array);
651        }
652    }
653
654    #[test]
655    fn test_large_string_array_into_vector() {
656        let input_vec = vec!["a", "b"];
657        let assertion_array = StringArray::from(input_vec.clone());
658
659        let large_string_array: ArrayRef = Arc::new(LargeStringArray::from(input_vec));
660        let vector = Helper::try_into_vector(large_string_array).unwrap();
661        assert_eq!(2, vector.len());
662        assert_eq!(0, vector.null_count());
663
664        let output_arrow_array: StringArray = vector
665            .to_arrow_array()
666            .as_any()
667            .downcast_ref::<StringArray>()
668            .unwrap()
669            .clone();
670        assert_eq!(&assertion_array, &output_arrow_array);
671    }
672
673    #[test]
674    fn test_try_from_scalar_time_value() {
675        let vector = Helper::try_from_scalar_value(ScalarValue::Time32Second(Some(42)), 3).unwrap();
676        assert_eq!(ConcreteDataType::time_second_datatype(), vector.data_type());
677        assert_eq!(3, vector.len());
678        for i in 0..vector.len() {
679            assert_eq!(Value::Time(Time::new_second(42)), vector.get(i));
680        }
681    }
682
683    #[test]
684    fn test_try_from_scalar_interval_value() {
685        let vector = Helper::try_from_scalar_value(
686            ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::new(1, 1, 2000))),
687            3,
688        )
689        .unwrap();
690
691        assert_eq!(
692            ConcreteDataType::interval_month_day_nano_datatype(),
693            vector.data_type()
694        );
695        assert_eq!(3, vector.len());
696        for i in 0..vector.len() {
697            assert_eq!(
698                Value::IntervalMonthDayNano(IntervalMonthDayNano::new(1, 1, 2000).into()),
699                vector.get(i)
700            );
701        }
702    }
703
704    fn check_try_from_row_to_vector(row: Vec<Value>, dt: &ConcreteDataType) {
705        let vector = Helper::try_from_row_into_vector(&row, dt).unwrap();
706        for (i, item) in row.iter().enumerate().take(vector.len()) {
707            assert_eq!(*item, vector.get(i));
708        }
709    }
710
711    fn check_into_and_from(array: impl Array + 'static) {
712        let array: ArrayRef = Arc::new(array);
713        let vector = Helper::try_into_vector(array.clone()).unwrap();
714        assert_eq!(&array, &vector.to_arrow_array());
715        let row: Vec<Value> = (0..array.len()).map(|i| vector.get(i)).collect();
716        let dt = vector.data_type();
717        check_try_from_row_to_vector(row, &dt);
718    }
719
720    #[test]
721    fn test_try_from_row_to_vector() {
722        check_into_and_from(NullArray::new(2));
723        check_into_and_from(BooleanArray::from(vec![true, false]));
724        check_into_and_from(Int8Array::from(vec![1, 2, 3]));
725        check_into_and_from(Int16Array::from(vec![1, 2, 3]));
726        check_into_and_from(Int32Array::from(vec![1, 2, 3]));
727        check_into_and_from(Int64Array::from(vec![1, 2, 3]));
728        check_into_and_from(UInt8Array::from(vec![1, 2, 3]));
729        check_into_and_from(UInt16Array::from(vec![1, 2, 3]));
730        check_into_and_from(UInt32Array::from(vec![1, 2, 3]));
731        check_into_and_from(UInt64Array::from(vec![1, 2, 3]));
732        check_into_and_from(Float32Array::from(vec![1.0, 2.0, 3.0]));
733        check_into_and_from(Float64Array::from(vec![1.0, 2.0, 3.0]));
734        check_into_and_from(StringArray::from(vec!["hello", "world"]));
735        check_into_and_from(Date32Array::from(vec![1, 2, 3]));
736
737        check_into_and_from(TimestampSecondArray::from(vec![1, 2, 3]));
738        check_into_and_from(TimestampMillisecondArray::from(vec![1, 2, 3]));
739        check_into_and_from(TimestampMicrosecondArray::from(vec![1, 2, 3]));
740        check_into_and_from(TimestampNanosecondArray::from(vec![1, 2, 3]));
741        check_into_and_from(Time32SecondArray::from(vec![1, 2, 3]));
742        check_into_and_from(Time32MillisecondArray::from(vec![1, 2, 3]));
743        check_into_and_from(Time64MicrosecondArray::from(vec![1, 2, 3]));
744        check_into_and_from(Time64NanosecondArray::from(vec![1, 2, 3]));
745    }
746}