datatypes/vectors/
helper.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Vector helper functions, inspired by databend Series mod
16
17use std::any::Any;
18use std::sync::Arc;
19
20use arrow::array::{Array, ArrayRef, StringArray};
21use arrow::compute;
22use arrow::compute::kernels::comparison;
23use arrow::datatypes::{DataType as ArrowDataType, Int64Type, TimeUnit};
24use arrow_array::{DictionaryArray, StructArray};
25use arrow_schema::IntervalUnit;
26use datafusion_common::ScalarValue;
27use snafu::{OptionExt, ResultExt};
28
29use crate::data_type::ConcreteDataType;
30use crate::error::{self, ConvertArrowArrayToScalarsSnafu, Result};
31use crate::prelude::DataType;
32use crate::scalars::{Scalar, ScalarVectorBuilder};
33use crate::value::{ListValue, ListValueRef, Value};
34use crate::vectors::struct_vector::StructVector;
35use crate::vectors::{
36    BinaryVector, BooleanVector, ConstantVector, DateVector, Decimal128Vector, DictionaryVector,
37    DurationMicrosecondVector, DurationMillisecondVector, DurationNanosecondVector,
38    DurationSecondVector, Float32Vector, Float64Vector, Int16Vector, Int32Vector, Int64Vector,
39    Int8Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector, IntervalYearMonthVector,
40    ListVector, ListVectorBuilder, MutableVector, NullVector, StringVector, TimeMicrosecondVector,
41    TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector,
42    TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt16Vector,
43    UInt32Vector, UInt64Vector, UInt8Vector, Vector, VectorRef,
44};
45
46/// Helper functions for `Vector`.
47pub struct Helper;
48
49impl Helper {
50    /// Get a pointer to the underlying data of this vectors.
51    /// Can be useful for fast comparisons.
52    /// # Safety
53    /// Assumes that the `vector` is  T.
54    pub unsafe fn static_cast<T: Any>(vector: &VectorRef) -> &T {
55        let object = vector.as_ref();
56        debug_assert!(object.as_any().is::<T>());
57        &*(object as *const dyn Vector as *const T)
58    }
59
60    pub fn check_get_scalar<T: Scalar>(vector: &VectorRef) -> Result<&<T as Scalar>::VectorType> {
61        let arr = vector
62            .as_any()
63            .downcast_ref::<<T as Scalar>::VectorType>()
64            .with_context(|| error::UnknownVectorSnafu {
65                msg: format!(
66                    "downcast vector error, vector type: {:?}, expected vector: {:?}",
67                    vector.vector_type_name(),
68                    std::any::type_name::<T>(),
69                ),
70            });
71        arr
72    }
73
74    pub fn check_get<T: 'static + Vector>(vector: &VectorRef) -> Result<&T> {
75        let arr = vector
76            .as_any()
77            .downcast_ref::<T>()
78            .with_context(|| error::UnknownVectorSnafu {
79                msg: format!(
80                    "downcast vector error, vector type: {:?}, expected vector: {:?}",
81                    vector.vector_type_name(),
82                    std::any::type_name::<T>(),
83                ),
84            });
85        arr
86    }
87
88    pub fn check_get_mutable_vector<T: 'static + MutableVector>(
89        vector: &mut dyn MutableVector,
90    ) -> Result<&mut T> {
91        let ty = vector.data_type();
92        let arr = vector
93            .as_mut_any()
94            .downcast_mut()
95            .with_context(|| error::UnknownVectorSnafu {
96                msg: format!(
97                    "downcast vector error, vector type: {:?}, expected vector: {:?}",
98                    ty,
99                    std::any::type_name::<T>(),
100                ),
101            });
102        arr
103    }
104
105    pub fn check_get_scalar_vector<T: Scalar>(
106        vector: &VectorRef,
107    ) -> Result<&<T as Scalar>::VectorType> {
108        let arr = vector
109            .as_any()
110            .downcast_ref::<<T as Scalar>::VectorType>()
111            .with_context(|| error::UnknownVectorSnafu {
112                msg: format!(
113                    "downcast vector error, vector type: {:?}, expected vector: {:?}",
114                    vector.vector_type_name(),
115                    std::any::type_name::<T>(),
116                ),
117            });
118        arr
119    }
120
121    /// Try to cast an arrow scalar value into vector
122    pub fn try_from_scalar_value(value: ScalarValue, length: usize) -> Result<VectorRef> {
123        let vector = match value {
124            ScalarValue::Null => ConstantVector::new(Arc::new(NullVector::new(1)), length),
125            ScalarValue::Boolean(v) => {
126                ConstantVector::new(Arc::new(BooleanVector::from(vec![v])), length)
127            }
128            ScalarValue::Float32(v) => {
129                ConstantVector::new(Arc::new(Float32Vector::from(vec![v])), length)
130            }
131            ScalarValue::Float64(v) => {
132                ConstantVector::new(Arc::new(Float64Vector::from(vec![v])), length)
133            }
134            ScalarValue::Int8(v) => {
135                ConstantVector::new(Arc::new(Int8Vector::from(vec![v])), length)
136            }
137            ScalarValue::Int16(v) => {
138                ConstantVector::new(Arc::new(Int16Vector::from(vec![v])), length)
139            }
140            ScalarValue::Int32(v) => {
141                ConstantVector::new(Arc::new(Int32Vector::from(vec![v])), length)
142            }
143            ScalarValue::Int64(v) => {
144                ConstantVector::new(Arc::new(Int64Vector::from(vec![v])), length)
145            }
146            ScalarValue::UInt8(v) => {
147                ConstantVector::new(Arc::new(UInt8Vector::from(vec![v])), length)
148            }
149            ScalarValue::UInt16(v) => {
150                ConstantVector::new(Arc::new(UInt16Vector::from(vec![v])), length)
151            }
152            ScalarValue::UInt32(v) => {
153                ConstantVector::new(Arc::new(UInt32Vector::from(vec![v])), length)
154            }
155            ScalarValue::UInt64(v) => {
156                ConstantVector::new(Arc::new(UInt64Vector::from(vec![v])), length)
157            }
158            ScalarValue::Utf8(v) | ScalarValue::LargeUtf8(v) => {
159                ConstantVector::new(Arc::new(StringVector::from(vec![v])), length)
160            }
161            ScalarValue::Binary(v)
162            | ScalarValue::LargeBinary(v)
163            | ScalarValue::FixedSizeBinary(_, v) => {
164                ConstantVector::new(Arc::new(BinaryVector::from(vec![v])), length)
165            }
166            ScalarValue::List(array) => {
167                let item_type = ConcreteDataType::try_from(&array.value_type())?;
168                let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1);
169                let values = ScalarValue::convert_array_to_scalar_vec(array.as_ref())
170                    .context(ConvertArrowArrayToScalarsSnafu)?
171                    .into_iter()
172                    .flatten()
173                    .map(ScalarValue::try_into)
174                    .collect::<Result<Vec<Value>>>()?;
175                builder.push(Some(ListValueRef::Ref {
176                    val: &ListValue::new(values, item_type),
177                }));
178                let list_vector = builder.to_vector();
179                ConstantVector::new(list_vector, length)
180            }
181            ScalarValue::Date32(v) => {
182                ConstantVector::new(Arc::new(DateVector::from(vec![v])), length)
183            }
184            ScalarValue::TimestampSecond(v, _) => {
185                // Timezone is unimplemented now.
186                ConstantVector::new(Arc::new(TimestampSecondVector::from(vec![v])), length)
187            }
188            ScalarValue::TimestampMillisecond(v, _) => {
189                // Timezone is unimplemented now.
190                ConstantVector::new(Arc::new(TimestampMillisecondVector::from(vec![v])), length)
191            }
192            ScalarValue::TimestampMicrosecond(v, _) => {
193                // Timezone is unimplemented now.
194                ConstantVector::new(Arc::new(TimestampMicrosecondVector::from(vec![v])), length)
195            }
196            ScalarValue::TimestampNanosecond(v, _) => {
197                // Timezone is unimplemented now.
198                ConstantVector::new(Arc::new(TimestampNanosecondVector::from(vec![v])), length)
199            }
200            ScalarValue::Time32Second(v) => {
201                ConstantVector::new(Arc::new(TimeSecondVector::from(vec![v])), length)
202            }
203            ScalarValue::Time32Millisecond(v) => {
204                ConstantVector::new(Arc::new(TimeMillisecondVector::from(vec![v])), length)
205            }
206            ScalarValue::Time64Microsecond(v) => {
207                ConstantVector::new(Arc::new(TimeMicrosecondVector::from(vec![v])), length)
208            }
209            ScalarValue::Time64Nanosecond(v) => {
210                ConstantVector::new(Arc::new(TimeNanosecondVector::from(vec![v])), length)
211            }
212            ScalarValue::IntervalYearMonth(v) => {
213                ConstantVector::new(Arc::new(IntervalYearMonthVector::from(vec![v])), length)
214            }
215            ScalarValue::IntervalDayTime(v) => {
216                ConstantVector::new(Arc::new(IntervalDayTimeVector::from(vec![v])), length)
217            }
218            ScalarValue::IntervalMonthDayNano(v) => {
219                ConstantVector::new(Arc::new(IntervalMonthDayNanoVector::from(vec![v])), length)
220            }
221            ScalarValue::DurationSecond(v) => {
222                ConstantVector::new(Arc::new(DurationSecondVector::from(vec![v])), length)
223            }
224            ScalarValue::DurationMillisecond(v) => {
225                ConstantVector::new(Arc::new(DurationMillisecondVector::from(vec![v])), length)
226            }
227            ScalarValue::DurationMicrosecond(v) => {
228                ConstantVector::new(Arc::new(DurationMicrosecondVector::from(vec![v])), length)
229            }
230            ScalarValue::DurationNanosecond(v) => {
231                ConstantVector::new(Arc::new(DurationNanosecondVector::from(vec![v])), length)
232            }
233            ScalarValue::Decimal128(v, p, s) => {
234                let vector = Decimal128Vector::from(vec![v]).with_precision_and_scale(p, s)?;
235                ConstantVector::new(Arc::new(vector), length)
236            }
237            ScalarValue::Decimal256(_, _, _)
238            | ScalarValue::Struct(_)
239            | ScalarValue::FixedSizeList(_)
240            | ScalarValue::LargeList(_)
241            | ScalarValue::Dictionary(_, _)
242            | ScalarValue::Union(_, _, _)
243            | ScalarValue::Float16(_)
244            | ScalarValue::Utf8View(_)
245            | ScalarValue::BinaryView(_)
246            | ScalarValue::Map(_)
247            | ScalarValue::Date64(_) => {
248                return error::ConversionSnafu {
249                    from: format!("Unsupported scalar value: {value}"),
250                }
251                .fail()
252            }
253        };
254
255        Ok(Arc::new(vector))
256    }
257
258    /// Try to cast an arrow array into vector
259    ///
260    /// # Panics
261    /// Panic if given arrow data type is not supported.
262    pub fn try_into_vector(array: impl AsRef<dyn Array>) -> Result<VectorRef> {
263        Ok(match array.as_ref().data_type() {
264            ArrowDataType::Null => Arc::new(NullVector::try_from_arrow_array(array)?),
265            ArrowDataType::Boolean => Arc::new(BooleanVector::try_from_arrow_array(array)?),
266            ArrowDataType::Binary => Arc::new(BinaryVector::try_from_arrow_array(array)?),
267            ArrowDataType::LargeBinary | ArrowDataType::FixedSizeBinary(_) => {
268                let array = arrow::compute::cast(array.as_ref(), &ArrowDataType::Binary)
269                    .context(crate::error::ArrowComputeSnafu)?;
270                Arc::new(BinaryVector::try_from_arrow_array(array)?)
271            }
272            ArrowDataType::Int8 => Arc::new(Int8Vector::try_from_arrow_array(array)?),
273            ArrowDataType::Int16 => Arc::new(Int16Vector::try_from_arrow_array(array)?),
274            ArrowDataType::Int32 => Arc::new(Int32Vector::try_from_arrow_array(array)?),
275            ArrowDataType::Int64 => Arc::new(Int64Vector::try_from_arrow_array(array)?),
276            ArrowDataType::UInt8 => Arc::new(UInt8Vector::try_from_arrow_array(array)?),
277            ArrowDataType::UInt16 => Arc::new(UInt16Vector::try_from_arrow_array(array)?),
278            ArrowDataType::UInt32 => Arc::new(UInt32Vector::try_from_arrow_array(array)?),
279            ArrowDataType::UInt64 => Arc::new(UInt64Vector::try_from_arrow_array(array)?),
280            ArrowDataType::Float32 => Arc::new(Float32Vector::try_from_arrow_array(array)?),
281            ArrowDataType::Float64 => Arc::new(Float64Vector::try_from_arrow_array(array)?),
282            ArrowDataType::Utf8 => Arc::new(StringVector::try_from_arrow_array(array)?),
283            ArrowDataType::LargeUtf8 => {
284                let array = arrow::compute::cast(array.as_ref(), &ArrowDataType::Utf8)
285                    .context(crate::error::ArrowComputeSnafu)?;
286                Arc::new(StringVector::try_from_arrow_array(array)?)
287            }
288            ArrowDataType::Date32 => Arc::new(DateVector::try_from_arrow_array(array)?),
289            ArrowDataType::List(_) => Arc::new(ListVector::try_from_arrow_array(array)?),
290            ArrowDataType::Timestamp(unit, _) => match unit {
291                TimeUnit::Second => Arc::new(TimestampSecondVector::try_from_arrow_array(array)?),
292                TimeUnit::Millisecond => {
293                    Arc::new(TimestampMillisecondVector::try_from_arrow_array(array)?)
294                }
295                TimeUnit::Microsecond => {
296                    Arc::new(TimestampMicrosecondVector::try_from_arrow_array(array)?)
297                }
298                TimeUnit::Nanosecond => {
299                    Arc::new(TimestampNanosecondVector::try_from_arrow_array(array)?)
300                }
301            },
302            ArrowDataType::Time32(unit) => match unit {
303                TimeUnit::Second => Arc::new(TimeSecondVector::try_from_arrow_array(array)?),
304                TimeUnit::Millisecond => {
305                    Arc::new(TimeMillisecondVector::try_from_arrow_array(array)?)
306                }
307                // Arrow use time32 for second/millisecond.
308                _ => unreachable!(
309                    "unexpected arrow array datatype: {:?}",
310                    array.as_ref().data_type()
311                ),
312            },
313            ArrowDataType::Time64(unit) => match unit {
314                TimeUnit::Microsecond => {
315                    Arc::new(TimeMicrosecondVector::try_from_arrow_array(array)?)
316                }
317                TimeUnit::Nanosecond => {
318                    Arc::new(TimeNanosecondVector::try_from_arrow_array(array)?)
319                }
320                // Arrow use time64 for microsecond/nanosecond.
321                _ => unreachable!(
322                    "unexpected arrow array datatype: {:?}",
323                    array.as_ref().data_type()
324                ),
325            },
326            ArrowDataType::Interval(unit) => match unit {
327                IntervalUnit::YearMonth => {
328                    Arc::new(IntervalYearMonthVector::try_from_arrow_array(array)?)
329                }
330                IntervalUnit::DayTime => {
331                    Arc::new(IntervalDayTimeVector::try_from_arrow_array(array)?)
332                }
333                IntervalUnit::MonthDayNano => {
334                    Arc::new(IntervalMonthDayNanoVector::try_from_arrow_array(array)?)
335                }
336            },
337            ArrowDataType::Duration(unit) => match unit {
338                TimeUnit::Second => Arc::new(DurationSecondVector::try_from_arrow_array(array)?),
339                TimeUnit::Millisecond => {
340                    Arc::new(DurationMillisecondVector::try_from_arrow_array(array)?)
341                }
342                TimeUnit::Microsecond => {
343                    Arc::new(DurationMicrosecondVector::try_from_arrow_array(array)?)
344                }
345                TimeUnit::Nanosecond => {
346                    Arc::new(DurationNanosecondVector::try_from_arrow_array(array)?)
347                }
348            },
349            ArrowDataType::Decimal128(_, _) => {
350                Arc::new(Decimal128Vector::try_from_arrow_array(array)?)
351            }
352            ArrowDataType::Dictionary(key, value) if matches!(&**key, ArrowDataType::Int64) => {
353                let array = array
354                    .as_ref()
355                    .as_any()
356                    .downcast_ref::<DictionaryArray<Int64Type>>()
357                    .unwrap(); // Safety: the type is guarded by match arm condition
358                Arc::new(DictionaryVector::new(
359                    array.clone(),
360                    ConcreteDataType::try_from(value.as_ref())?,
361                )?)
362            }
363
364            ArrowDataType::Struct(_fields) => {
365                let array = array
366                    .as_ref()
367                    .as_any()
368                    .downcast_ref::<StructArray>()
369                    .unwrap();
370                Arc::new(StructVector::new(array.clone())?)
371            }
372            ArrowDataType::Float16
373            | ArrowDataType::LargeList(_)
374            | ArrowDataType::FixedSizeList(_, _)
375            | ArrowDataType::Union(_, _)
376            | ArrowDataType::Dictionary(_, _)
377            | ArrowDataType::Decimal256(_, _)
378            | ArrowDataType::Map(_, _)
379            | ArrowDataType::RunEndEncoded(_, _)
380            | ArrowDataType::BinaryView
381            | ArrowDataType::Utf8View
382            | ArrowDataType::ListView(_)
383            | ArrowDataType::LargeListView(_)
384            | ArrowDataType::Date64 => {
385                return error::UnsupportedArrowTypeSnafu {
386                    arrow_type: array.as_ref().data_type().clone(),
387                }
388                .fail()
389            }
390        })
391    }
392
393    /// Try to cast an vec of values into vector, fail if type is not the same across all values.
394    pub fn try_from_row_into_vector(row: &[Value], dt: &ConcreteDataType) -> Result<VectorRef> {
395        let mut builder = dt.create_mutable_vector(row.len());
396        for val in row {
397            builder.try_push_value_ref(val.as_value_ref())?;
398        }
399        let vector = builder.to_vector();
400        Ok(vector)
401    }
402
403    /// Try to cast slice of `arrays` to vectors.
404    pub fn try_into_vectors(arrays: &[ArrayRef]) -> Result<Vec<VectorRef>> {
405        arrays.iter().map(Self::try_into_vector).collect()
406    }
407
408    /// Perform SQL like operation on `names` and a scalar `s`.
409    pub fn like_utf8(names: Vec<String>, s: &str) -> Result<VectorRef> {
410        let array = StringArray::from(names);
411
412        let s = StringArray::new_scalar(s);
413        let filter = comparison::like(&array, &s).context(error::ArrowComputeSnafu)?;
414
415        let result = compute::filter(&array, &filter).context(error::ArrowComputeSnafu)?;
416        Helper::try_into_vector(result)
417    }
418
419    pub fn like_utf8_filter(names: Vec<String>, s: &str) -> Result<(VectorRef, BooleanVector)> {
420        let array = StringArray::from(names);
421        let s = StringArray::new_scalar(s);
422        let filter = comparison::like(&array, &s).context(error::ArrowComputeSnafu)?;
423        let result = compute::filter(&array, &filter).context(error::ArrowComputeSnafu)?;
424        let vector = Helper::try_into_vector(result)?;
425
426        Ok((vector, BooleanVector::from(filter)))
427    }
428}
429
430#[cfg(test)]
431mod tests {
432    use arrow::array::{
433        ArrayRef, BooleanArray, Date32Array, Float32Array, Float64Array, Int16Array, Int32Array,
434        Int64Array, Int8Array, LargeBinaryArray, ListArray, NullArray, Time32MillisecondArray,
435        Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
436        TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
437        TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
438    };
439    use arrow::buffer::Buffer;
440    use arrow::datatypes::{Int32Type, IntervalMonthDayNano};
441    use arrow_array::{BinaryArray, DictionaryArray, FixedSizeBinaryArray, LargeStringArray};
442    use arrow_schema::DataType;
443    use common_decimal::Decimal128;
444    use common_time::time::Time;
445    use common_time::timestamp::TimeUnit;
446    use common_time::{Date, Duration};
447
448    use super::*;
449    use crate::value::Value;
450    use crate::vectors::ConcreteDataType;
451
452    #[test]
453    fn test_try_into_vectors() {
454        let arrays: Vec<ArrayRef> = vec![
455            Arc::new(Int32Array::from(vec![1])),
456            Arc::new(Int32Array::from(vec![2])),
457            Arc::new(Int32Array::from(vec![3])),
458        ];
459        let vectors = Helper::try_into_vectors(&arrays).unwrap();
460        vectors.iter().for_each(|v| assert_eq!(1, v.len()));
461        assert_eq!(Value::Int32(1), vectors[0].get(0));
462        assert_eq!(Value::Int32(2), vectors[1].get(0));
463        assert_eq!(Value::Int32(3), vectors[2].get(0));
464    }
465
466    #[test]
467    fn test_try_into_date_vector() {
468        let vector = DateVector::from(vec![Some(1), Some(2), None]);
469        let arrow_array = vector.to_arrow_array();
470        assert_eq!(&ArrowDataType::Date32, arrow_array.data_type());
471        let vector_converted = Helper::try_into_vector(arrow_array).unwrap();
472        assert_eq!(vector.len(), vector_converted.len());
473        for i in 0..vector_converted.len() {
474            assert_eq!(vector.get(i), vector_converted.get(i));
475        }
476    }
477
478    #[test]
479    fn test_try_from_scalar_date_value() {
480        let vector = Helper::try_from_scalar_value(ScalarValue::Date32(Some(42)), 3).unwrap();
481        assert_eq!(ConcreteDataType::date_datatype(), vector.data_type());
482        assert_eq!(3, vector.len());
483        for i in 0..vector.len() {
484            assert_eq!(Value::Date(Date::new(42)), vector.get(i));
485        }
486    }
487
488    #[test]
489    fn test_try_from_scalar_duration_value() {
490        let vector =
491            Helper::try_from_scalar_value(ScalarValue::DurationSecond(Some(42)), 3).unwrap();
492        assert_eq!(
493            ConcreteDataType::duration_second_datatype(),
494            vector.data_type()
495        );
496        assert_eq!(3, vector.len());
497        for i in 0..vector.len() {
498            assert_eq!(
499                Value::Duration(Duration::new(42, TimeUnit::Second)),
500                vector.get(i)
501            );
502        }
503    }
504
505    #[test]
506    fn test_try_from_scalar_decimal128_value() {
507        let vector =
508            Helper::try_from_scalar_value(ScalarValue::Decimal128(Some(42), 3, 1), 3).unwrap();
509        assert_eq!(
510            ConcreteDataType::decimal128_datatype(3, 1),
511            vector.data_type()
512        );
513        assert_eq!(3, vector.len());
514        for i in 0..vector.len() {
515            assert_eq!(Value::Decimal128(Decimal128::new(42, 3, 1)), vector.get(i));
516        }
517    }
518
519    #[test]
520    fn test_try_from_list_value() {
521        let value = ScalarValue::List(ScalarValue::new_list(
522            &[ScalarValue::Int32(Some(1)), ScalarValue::Int32(Some(2))],
523            &ArrowDataType::Int32,
524            true,
525        ));
526        let vector = Helper::try_from_scalar_value(value, 3).unwrap();
527        assert_eq!(
528            ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()),
529            vector.data_type()
530        );
531        assert_eq!(3, vector.len());
532        for i in 0..vector.len() {
533            let v = vector.get(i);
534            let items = v.as_list().unwrap().unwrap().items();
535            assert_eq!(vec![Value::Int32(1), Value::Int32(2)], items);
536        }
537    }
538
539    #[test]
540    fn test_like_utf8() {
541        fn assert_vector(expected: Vec<&str>, actual: &VectorRef) {
542            let actual = actual.as_any().downcast_ref::<StringVector>().unwrap();
543            assert_eq!(*actual, StringVector::from(expected));
544        }
545
546        let names: Vec<String> = vec!["greptime", "hello", "public", "world"]
547            .into_iter()
548            .map(|x| x.to_string())
549            .collect();
550
551        let ret = Helper::like_utf8(names.clone(), "%ll%").unwrap();
552        assert_vector(vec!["hello"], &ret);
553
554        let ret = Helper::like_utf8(names.clone(), "%time").unwrap();
555        assert_vector(vec!["greptime"], &ret);
556
557        let ret = Helper::like_utf8(names.clone(), "%ld").unwrap();
558        assert_vector(vec!["world"], &ret);
559
560        let ret = Helper::like_utf8(names, "%").unwrap();
561        assert_vector(vec!["greptime", "hello", "public", "world"], &ret);
562    }
563
564    #[test]
565    fn test_like_utf8_filter() {
566        fn assert_vector(expected: Vec<&str>, actual: &VectorRef) {
567            let actual = actual.as_any().downcast_ref::<StringVector>().unwrap();
568            assert_eq!(*actual, StringVector::from(expected));
569        }
570
571        fn assert_filter(array: Vec<String>, s: &str, expected_filter: &BooleanVector) {
572            let array = StringArray::from(array);
573            let s = StringArray::new_scalar(s);
574            let actual_filter = comparison::like(&array, &s).unwrap();
575            assert_eq!(BooleanVector::from(actual_filter), *expected_filter);
576        }
577
578        let names: Vec<String> = vec!["greptime", "timeseries", "cloud", "database"]
579            .into_iter()
580            .map(|x| x.to_string())
581            .collect();
582
583        let (table, filter) = Helper::like_utf8_filter(names.clone(), "%ti%").unwrap();
584        assert_vector(vec!["greptime", "timeseries"], &table);
585        assert_filter(names.clone(), "%ti%", &filter);
586
587        let (tables, filter) = Helper::like_utf8_filter(names.clone(), "%lou").unwrap();
588        assert_vector(vec![], &tables);
589        assert_filter(names.clone(), "%lou", &filter);
590
591        let (tables, filter) = Helper::like_utf8_filter(names.clone(), "%d%").unwrap();
592        assert_vector(vec!["cloud", "database"], &tables);
593        assert_filter(names.clone(), "%d%", &filter);
594    }
595
596    fn check_try_into_vector(array: impl Array + 'static) {
597        let array: ArrayRef = Arc::new(array);
598        let vector = Helper::try_into_vector(array.clone()).unwrap();
599        assert_eq!(&array, &vector.to_arrow_array());
600    }
601
602    #[test]
603    fn test_try_into_vector() {
604        check_try_into_vector(NullArray::new(2));
605        check_try_into_vector(BooleanArray::from(vec![true, false]));
606        check_try_into_vector(Int8Array::from(vec![1, 2, 3]));
607        check_try_into_vector(Int16Array::from(vec![1, 2, 3]));
608        check_try_into_vector(Int32Array::from(vec![1, 2, 3]));
609        check_try_into_vector(Int64Array::from(vec![1, 2, 3]));
610        check_try_into_vector(UInt8Array::from(vec![1, 2, 3]));
611        check_try_into_vector(UInt16Array::from(vec![1, 2, 3]));
612        check_try_into_vector(UInt32Array::from(vec![1, 2, 3]));
613        check_try_into_vector(UInt64Array::from(vec![1, 2, 3]));
614        check_try_into_vector(Float32Array::from(vec![1.0, 2.0, 3.0]));
615        check_try_into_vector(Float64Array::from(vec![1.0, 2.0, 3.0]));
616        check_try_into_vector(StringArray::from(vec!["hello", "world"]));
617        check_try_into_vector(Date32Array::from(vec![1, 2, 3]));
618        let data = vec![None, Some(vec![Some(6), Some(7)])];
619        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
620        check_try_into_vector(list_array);
621        check_try_into_vector(TimestampSecondArray::from(vec![1, 2, 3]));
622        check_try_into_vector(TimestampMillisecondArray::from(vec![1, 2, 3]));
623        check_try_into_vector(TimestampMicrosecondArray::from(vec![1, 2, 3]));
624        check_try_into_vector(TimestampNanosecondArray::from(vec![1, 2, 3]));
625        check_try_into_vector(Time32SecondArray::from(vec![1, 2, 3]));
626        check_try_into_vector(Time32MillisecondArray::from(vec![1, 2, 3]));
627        check_try_into_vector(Time64MicrosecondArray::from(vec![1, 2, 3]));
628        check_try_into_vector(Time64NanosecondArray::from(vec![1, 2, 3]));
629
630        let values = StringArray::from_iter_values(["a", "b", "c"]);
631        let keys = Int8Array::from_iter_values([0, 0, 1, 2]);
632        let array: ArrayRef = Arc::new(DictionaryArray::try_new(keys, Arc::new(values)).unwrap());
633        Helper::try_into_vector(array).unwrap_err();
634    }
635
636    #[test]
637    fn test_try_binary_array_into_vector() {
638        let input_vec: Vec<&[u8]> = vec!["hello".as_bytes(), "world".as_bytes()];
639        let assertion_vector = BinaryVector::from(input_vec.clone());
640
641        let input_arrays: Vec<ArrayRef> = vec![
642            Arc::new(LargeBinaryArray::from(input_vec.clone())) as ArrayRef,
643            Arc::new(BinaryArray::from(input_vec.clone())) as ArrayRef,
644            Arc::new(FixedSizeBinaryArray::new(
645                5,
646                Buffer::from_vec("helloworld".as_bytes().to_vec()),
647                None,
648            )) as ArrayRef,
649        ];
650
651        for input_array in input_arrays {
652            let vector = Helper::try_into_vector(input_array).unwrap();
653
654            assert_eq!(2, vector.len());
655            assert_eq!(0, vector.null_count());
656
657            let output_arrow_array: ArrayRef = vector.to_arrow_array();
658            assert_eq!(&DataType::Binary, output_arrow_array.data_type());
659            assert_eq!(&assertion_vector.to_arrow_array(), &output_arrow_array);
660        }
661    }
662
663    #[test]
664    fn test_large_string_array_into_vector() {
665        let input_vec = vec!["a", "b"];
666        let assertion_array = StringArray::from(input_vec.clone());
667
668        let large_string_array: ArrayRef = Arc::new(LargeStringArray::from(input_vec));
669        let vector = Helper::try_into_vector(large_string_array).unwrap();
670        assert_eq!(2, vector.len());
671        assert_eq!(0, vector.null_count());
672
673        let output_arrow_array: StringArray = vector
674            .to_arrow_array()
675            .as_any()
676            .downcast_ref::<StringArray>()
677            .unwrap()
678            .clone();
679        assert_eq!(&assertion_array, &output_arrow_array);
680    }
681
682    #[test]
683    fn test_try_from_scalar_time_value() {
684        let vector = Helper::try_from_scalar_value(ScalarValue::Time32Second(Some(42)), 3).unwrap();
685        assert_eq!(ConcreteDataType::time_second_datatype(), vector.data_type());
686        assert_eq!(3, vector.len());
687        for i in 0..vector.len() {
688            assert_eq!(Value::Time(Time::new_second(42)), vector.get(i));
689        }
690    }
691
692    #[test]
693    fn test_try_from_scalar_interval_value() {
694        let vector = Helper::try_from_scalar_value(
695            ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::new(1, 1, 2000))),
696            3,
697        )
698        .unwrap();
699
700        assert_eq!(
701            ConcreteDataType::interval_month_day_nano_datatype(),
702            vector.data_type()
703        );
704        assert_eq!(3, vector.len());
705        for i in 0..vector.len() {
706            assert_eq!(
707                Value::IntervalMonthDayNano(IntervalMonthDayNano::new(1, 1, 2000).into()),
708                vector.get(i)
709            );
710        }
711    }
712
713    fn check_try_from_row_to_vector(row: Vec<Value>, dt: &ConcreteDataType) {
714        let vector = Helper::try_from_row_into_vector(&row, dt).unwrap();
715        for (i, item) in row.iter().enumerate().take(vector.len()) {
716            assert_eq!(*item, vector.get(i));
717        }
718    }
719
720    fn check_into_and_from(array: impl Array + 'static) {
721        let array: ArrayRef = Arc::new(array);
722        let vector = Helper::try_into_vector(array.clone()).unwrap();
723        assert_eq!(&array, &vector.to_arrow_array());
724        let row: Vec<Value> = (0..array.len()).map(|i| vector.get(i)).collect();
725        let dt = vector.data_type();
726        check_try_from_row_to_vector(row, &dt);
727    }
728
729    #[test]
730    fn test_try_from_row_to_vector() {
731        check_into_and_from(NullArray::new(2));
732        check_into_and_from(BooleanArray::from(vec![true, false]));
733        check_into_and_from(Int8Array::from(vec![1, 2, 3]));
734        check_into_and_from(Int16Array::from(vec![1, 2, 3]));
735        check_into_and_from(Int32Array::from(vec![1, 2, 3]));
736        check_into_and_from(Int64Array::from(vec![1, 2, 3]));
737        check_into_and_from(UInt8Array::from(vec![1, 2, 3]));
738        check_into_and_from(UInt16Array::from(vec![1, 2, 3]));
739        check_into_and_from(UInt32Array::from(vec![1, 2, 3]));
740        check_into_and_from(UInt64Array::from(vec![1, 2, 3]));
741        check_into_and_from(Float32Array::from(vec![1.0, 2.0, 3.0]));
742        check_into_and_from(Float64Array::from(vec![1.0, 2.0, 3.0]));
743        check_into_and_from(StringArray::from(vec!["hello", "world"]));
744        check_into_and_from(Date32Array::from(vec![1, 2, 3]));
745
746        check_into_and_from(TimestampSecondArray::from(vec![1, 2, 3]));
747        check_into_and_from(TimestampMillisecondArray::from(vec![1, 2, 3]));
748        check_into_and_from(TimestampMicrosecondArray::from(vec![1, 2, 3]));
749        check_into_and_from(TimestampNanosecondArray::from(vec![1, 2, 3]));
750        check_into_and_from(Time32SecondArray::from(vec![1, 2, 3]));
751        check_into_and_from(Time32MillisecondArray::from(vec![1, 2, 3]));
752        check_into_and_from(Time64MicrosecondArray::from(vec![1, 2, 3]));
753        check_into_and_from(Time64NanosecondArray::from(vec![1, 2, 3]));
754    }
755}