datatypes/vectors/
helper.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Vector helper functions, inspired by databend Series mod
16
17use std::any::Any;
18use std::sync::Arc;
19
20use arrow::array::{Array, ArrayRef, StringArray};
21use arrow::compute;
22use arrow::compute::kernels::comparison;
23use arrow::datatypes::{
24    DataType as ArrowDataType, Int16Type, Int32Type, Int64Type, Int8Type, TimeUnit, UInt16Type,
25    UInt32Type, UInt64Type, UInt8Type,
26};
27use arrow_array::{DictionaryArray, StructArray};
28use arrow_schema::IntervalUnit;
29use datafusion_common::ScalarValue;
30use snafu::{OptionExt, ResultExt};
31
32use crate::data_type::ConcreteDataType;
33use crate::error::{self, ConvertArrowArrayToScalarsSnafu, Result};
34use crate::prelude::DataType;
35use crate::scalars::{Scalar, ScalarVectorBuilder};
36use crate::value::{ListValue, ListValueRef, Value};
37use crate::vectors::struct_vector::StructVector;
38use crate::vectors::{
39    BinaryVector, BooleanVector, ConstantVector, DateVector, Decimal128Vector, DictionaryVector,
40    DurationMicrosecondVector, DurationMillisecondVector, DurationNanosecondVector,
41    DurationSecondVector, Float32Vector, Float64Vector, Int16Vector, Int32Vector, Int64Vector,
42    Int8Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector, IntervalYearMonthVector,
43    ListVector, ListVectorBuilder, MutableVector, NullVector, StringVector, TimeMicrosecondVector,
44    TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector,
45    TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt16Vector,
46    UInt32Vector, UInt64Vector, UInt8Vector, Vector, VectorRef,
47};
48
49/// Helper functions for `Vector`.
50pub struct Helper;
51
52impl Helper {
53    /// Get a pointer to the underlying data of this vectors.
54    /// Can be useful for fast comparisons.
55    /// # Safety
56    /// Assumes that the `vector` is  T.
57    pub unsafe fn static_cast<T: Any>(vector: &VectorRef) -> &T {
58        let object = vector.as_ref();
59        debug_assert!(object.as_any().is::<T>());
60        &*(object as *const dyn Vector as *const T)
61    }
62
63    pub fn check_get_scalar<T: Scalar>(vector: &VectorRef) -> Result<&<T as Scalar>::VectorType> {
64        let arr = vector
65            .as_any()
66            .downcast_ref::<<T as Scalar>::VectorType>()
67            .with_context(|| error::UnknownVectorSnafu {
68                msg: format!(
69                    "downcast vector error, vector type: {:?}, expected vector: {:?}",
70                    vector.vector_type_name(),
71                    std::any::type_name::<T>(),
72                ),
73            });
74        arr
75    }
76
77    pub fn check_get<T: 'static + Vector>(vector: &VectorRef) -> Result<&T> {
78        let arr = vector
79            .as_any()
80            .downcast_ref::<T>()
81            .with_context(|| error::UnknownVectorSnafu {
82                msg: format!(
83                    "downcast vector error, vector type: {:?}, expected vector: {:?}",
84                    vector.vector_type_name(),
85                    std::any::type_name::<T>(),
86                ),
87            });
88        arr
89    }
90
91    pub fn check_get_mutable_vector<T: 'static + MutableVector>(
92        vector: &mut dyn MutableVector,
93    ) -> Result<&mut T> {
94        let ty = vector.data_type();
95        let arr = vector
96            .as_mut_any()
97            .downcast_mut()
98            .with_context(|| error::UnknownVectorSnafu {
99                msg: format!(
100                    "downcast vector error, vector type: {:?}, expected vector: {:?}",
101                    ty,
102                    std::any::type_name::<T>(),
103                ),
104            });
105        arr
106    }
107
108    pub fn check_get_scalar_vector<T: Scalar>(
109        vector: &VectorRef,
110    ) -> Result<&<T as Scalar>::VectorType> {
111        let arr = vector
112            .as_any()
113            .downcast_ref::<<T as Scalar>::VectorType>()
114            .with_context(|| error::UnknownVectorSnafu {
115                msg: format!(
116                    "downcast vector error, vector type: {:?}, expected vector: {:?}",
117                    vector.vector_type_name(),
118                    std::any::type_name::<T>(),
119                ),
120            });
121        arr
122    }
123
124    /// Try to cast an arrow scalar value into vector
125    pub fn try_from_scalar_value(value: ScalarValue, length: usize) -> Result<VectorRef> {
126        let vector = match value {
127            ScalarValue::Null => ConstantVector::new(Arc::new(NullVector::new(1)), length),
128            ScalarValue::Boolean(v) => {
129                ConstantVector::new(Arc::new(BooleanVector::from(vec![v])), length)
130            }
131            ScalarValue::Float16(v) => ConstantVector::new(
132                Arc::new(Float32Vector::from(vec![v.map(f32::from)])),
133                length,
134            ),
135            ScalarValue::Float32(v) => {
136                ConstantVector::new(Arc::new(Float32Vector::from(vec![v])), length)
137            }
138            ScalarValue::Float64(v) => {
139                ConstantVector::new(Arc::new(Float64Vector::from(vec![v])), length)
140            }
141            ScalarValue::Int8(v) => {
142                ConstantVector::new(Arc::new(Int8Vector::from(vec![v])), length)
143            }
144            ScalarValue::Int16(v) => {
145                ConstantVector::new(Arc::new(Int16Vector::from(vec![v])), length)
146            }
147            ScalarValue::Int32(v) => {
148                ConstantVector::new(Arc::new(Int32Vector::from(vec![v])), length)
149            }
150            ScalarValue::Int64(v) => {
151                ConstantVector::new(Arc::new(Int64Vector::from(vec![v])), length)
152            }
153            ScalarValue::UInt8(v) => {
154                ConstantVector::new(Arc::new(UInt8Vector::from(vec![v])), length)
155            }
156            ScalarValue::UInt16(v) => {
157                ConstantVector::new(Arc::new(UInt16Vector::from(vec![v])), length)
158            }
159            ScalarValue::UInt32(v) => {
160                ConstantVector::new(Arc::new(UInt32Vector::from(vec![v])), length)
161            }
162            ScalarValue::UInt64(v) => {
163                ConstantVector::new(Arc::new(UInt64Vector::from(vec![v])), length)
164            }
165            ScalarValue::Utf8(v) | ScalarValue::LargeUtf8(v) => {
166                ConstantVector::new(Arc::new(StringVector::from(vec![v])), length)
167            }
168            ScalarValue::Binary(v)
169            | ScalarValue::LargeBinary(v)
170            | ScalarValue::FixedSizeBinary(_, v) => {
171                ConstantVector::new(Arc::new(BinaryVector::from(vec![v])), length)
172            }
173            ScalarValue::List(array) => {
174                let item_type = ConcreteDataType::try_from(&array.value_type())?;
175                let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1);
176                let values = ScalarValue::convert_array_to_scalar_vec(array.as_ref())
177                    .context(ConvertArrowArrayToScalarsSnafu)?
178                    .into_iter()
179                    .flatten()
180                    .map(ScalarValue::try_into)
181                    .collect::<Result<Vec<Value>>>()?;
182                builder.push(Some(ListValueRef::Ref {
183                    val: &ListValue::new(values, item_type),
184                }));
185                let list_vector = builder.to_vector();
186                ConstantVector::new(list_vector, length)
187            }
188            ScalarValue::Date32(v) => {
189                ConstantVector::new(Arc::new(DateVector::from(vec![v])), length)
190            }
191            ScalarValue::TimestampSecond(v, _) => {
192                // Timezone is unimplemented now.
193                ConstantVector::new(Arc::new(TimestampSecondVector::from(vec![v])), length)
194            }
195            ScalarValue::TimestampMillisecond(v, _) => {
196                // Timezone is unimplemented now.
197                ConstantVector::new(Arc::new(TimestampMillisecondVector::from(vec![v])), length)
198            }
199            ScalarValue::TimestampMicrosecond(v, _) => {
200                // Timezone is unimplemented now.
201                ConstantVector::new(Arc::new(TimestampMicrosecondVector::from(vec![v])), length)
202            }
203            ScalarValue::TimestampNanosecond(v, _) => {
204                // Timezone is unimplemented now.
205                ConstantVector::new(Arc::new(TimestampNanosecondVector::from(vec![v])), length)
206            }
207            ScalarValue::Time32Second(v) => {
208                ConstantVector::new(Arc::new(TimeSecondVector::from(vec![v])), length)
209            }
210            ScalarValue::Time32Millisecond(v) => {
211                ConstantVector::new(Arc::new(TimeMillisecondVector::from(vec![v])), length)
212            }
213            ScalarValue::Time64Microsecond(v) => {
214                ConstantVector::new(Arc::new(TimeMicrosecondVector::from(vec![v])), length)
215            }
216            ScalarValue::Time64Nanosecond(v) => {
217                ConstantVector::new(Arc::new(TimeNanosecondVector::from(vec![v])), length)
218            }
219            ScalarValue::IntervalYearMonth(v) => {
220                ConstantVector::new(Arc::new(IntervalYearMonthVector::from(vec![v])), length)
221            }
222            ScalarValue::IntervalDayTime(v) => {
223                ConstantVector::new(Arc::new(IntervalDayTimeVector::from(vec![v])), length)
224            }
225            ScalarValue::IntervalMonthDayNano(v) => {
226                ConstantVector::new(Arc::new(IntervalMonthDayNanoVector::from(vec![v])), length)
227            }
228            ScalarValue::DurationSecond(v) => {
229                ConstantVector::new(Arc::new(DurationSecondVector::from(vec![v])), length)
230            }
231            ScalarValue::DurationMillisecond(v) => {
232                ConstantVector::new(Arc::new(DurationMillisecondVector::from(vec![v])), length)
233            }
234            ScalarValue::DurationMicrosecond(v) => {
235                ConstantVector::new(Arc::new(DurationMicrosecondVector::from(vec![v])), length)
236            }
237            ScalarValue::DurationNanosecond(v) => {
238                ConstantVector::new(Arc::new(DurationNanosecondVector::from(vec![v])), length)
239            }
240            ScalarValue::Decimal128(v, p, s) => {
241                let vector = Decimal128Vector::from(vec![v]).with_precision_and_scale(p, s)?;
242                ConstantVector::new(Arc::new(vector), length)
243            }
244            ScalarValue::Decimal256(_, _, _)
245            | ScalarValue::Struct(_)
246            | ScalarValue::FixedSizeList(_)
247            | ScalarValue::LargeList(_)
248            | ScalarValue::Dictionary(_, _)
249            | ScalarValue::Union(_, _, _)
250            | ScalarValue::Utf8View(_)
251            | ScalarValue::BinaryView(_)
252            | ScalarValue::Map(_)
253            | ScalarValue::Date64(_) => {
254                return error::ConversionSnafu {
255                    from: format!("Unsupported scalar value: {value}"),
256                }
257                .fail()
258            }
259        };
260
261        Ok(Arc::new(vector))
262    }
263
264    /// Try to cast an arrow array into vector
265    ///
266    /// # Panics
267    /// Panic if given arrow data type is not supported.
268    pub fn try_into_vector(array: impl AsRef<dyn Array>) -> Result<VectorRef> {
269        Ok(match array.as_ref().data_type() {
270            ArrowDataType::Null => Arc::new(NullVector::try_from_arrow_array(array)?),
271            ArrowDataType::Boolean => Arc::new(BooleanVector::try_from_arrow_array(array)?),
272            ArrowDataType::Binary => Arc::new(BinaryVector::try_from_arrow_array(array)?),
273            ArrowDataType::LargeBinary
274            | ArrowDataType::FixedSizeBinary(_)
275            | ArrowDataType::BinaryView => {
276                let array = arrow::compute::cast(array.as_ref(), &ArrowDataType::Binary)
277                    .context(crate::error::ArrowComputeSnafu)?;
278                Arc::new(BinaryVector::try_from_arrow_array(array)?)
279            }
280            ArrowDataType::Int8 => Arc::new(Int8Vector::try_from_arrow_array(array)?),
281            ArrowDataType::Int16 => Arc::new(Int16Vector::try_from_arrow_array(array)?),
282            ArrowDataType::Int32 => Arc::new(Int32Vector::try_from_arrow_array(array)?),
283            ArrowDataType::Int64 => Arc::new(Int64Vector::try_from_arrow_array(array)?),
284            ArrowDataType::UInt8 => Arc::new(UInt8Vector::try_from_arrow_array(array)?),
285            ArrowDataType::UInt16 => Arc::new(UInt16Vector::try_from_arrow_array(array)?),
286            ArrowDataType::UInt32 => Arc::new(UInt32Vector::try_from_arrow_array(array)?),
287            ArrowDataType::UInt64 => Arc::new(UInt64Vector::try_from_arrow_array(array)?),
288            ArrowDataType::Float32 => Arc::new(Float32Vector::try_from_arrow_array(array)?),
289            ArrowDataType::Float64 => Arc::new(Float64Vector::try_from_arrow_array(array)?),
290            ArrowDataType::Utf8 => Arc::new(StringVector::try_from_arrow_array(array)?),
291            ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View => {
292                let array = arrow::compute::cast(array.as_ref(), &ArrowDataType::Utf8)
293                    .context(crate::error::ArrowComputeSnafu)?;
294                Arc::new(StringVector::try_from_arrow_array(array)?)
295            }
296            ArrowDataType::Date32 => Arc::new(DateVector::try_from_arrow_array(array)?),
297            ArrowDataType::List(_) => Arc::new(ListVector::try_from_arrow_array(array)?),
298            ArrowDataType::Timestamp(unit, _) => match unit {
299                TimeUnit::Second => Arc::new(TimestampSecondVector::try_from_arrow_array(array)?),
300                TimeUnit::Millisecond => {
301                    Arc::new(TimestampMillisecondVector::try_from_arrow_array(array)?)
302                }
303                TimeUnit::Microsecond => {
304                    Arc::new(TimestampMicrosecondVector::try_from_arrow_array(array)?)
305                }
306                TimeUnit::Nanosecond => {
307                    Arc::new(TimestampNanosecondVector::try_from_arrow_array(array)?)
308                }
309            },
310            ArrowDataType::Time32(unit) => match unit {
311                TimeUnit::Second => Arc::new(TimeSecondVector::try_from_arrow_array(array)?),
312                TimeUnit::Millisecond => {
313                    Arc::new(TimeMillisecondVector::try_from_arrow_array(array)?)
314                }
315                // Arrow use time32 for second/millisecond.
316                _ => unreachable!(
317                    "unexpected arrow array datatype: {:?}",
318                    array.as_ref().data_type()
319                ),
320            },
321            ArrowDataType::Time64(unit) => match unit {
322                TimeUnit::Microsecond => {
323                    Arc::new(TimeMicrosecondVector::try_from_arrow_array(array)?)
324                }
325                TimeUnit::Nanosecond => {
326                    Arc::new(TimeNanosecondVector::try_from_arrow_array(array)?)
327                }
328                // Arrow use time64 for microsecond/nanosecond.
329                _ => unreachable!(
330                    "unexpected arrow array datatype: {:?}",
331                    array.as_ref().data_type()
332                ),
333            },
334            ArrowDataType::Interval(unit) => match unit {
335                IntervalUnit::YearMonth => {
336                    Arc::new(IntervalYearMonthVector::try_from_arrow_array(array)?)
337                }
338                IntervalUnit::DayTime => {
339                    Arc::new(IntervalDayTimeVector::try_from_arrow_array(array)?)
340                }
341                IntervalUnit::MonthDayNano => {
342                    Arc::new(IntervalMonthDayNanoVector::try_from_arrow_array(array)?)
343                }
344            },
345            ArrowDataType::Duration(unit) => match unit {
346                TimeUnit::Second => Arc::new(DurationSecondVector::try_from_arrow_array(array)?),
347                TimeUnit::Millisecond => {
348                    Arc::new(DurationMillisecondVector::try_from_arrow_array(array)?)
349                }
350                TimeUnit::Microsecond => {
351                    Arc::new(DurationMicrosecondVector::try_from_arrow_array(array)?)
352                }
353                TimeUnit::Nanosecond => {
354                    Arc::new(DurationNanosecondVector::try_from_arrow_array(array)?)
355                }
356            },
357            ArrowDataType::Decimal128(_, _) => {
358                Arc::new(Decimal128Vector::try_from_arrow_array(array)?)
359            }
360            ArrowDataType::Dictionary(key, value) => {
361                macro_rules! handle_dictionary_key_type {
362                    ($key_type:ident) => {{
363                        let array = array
364                            .as_ref()
365                            .as_any()
366                            .downcast_ref::<DictionaryArray<$key_type>>()
367                            .unwrap(); // Safety: the type is guarded by match arm condition
368                        Arc::new(DictionaryVector::new(
369                            array.clone(),
370                            ConcreteDataType::try_from(value.as_ref())?,
371                        )?)
372                    }};
373                }
374
375                match key.as_ref() {
376                    ArrowDataType::Int8 => handle_dictionary_key_type!(Int8Type),
377                    ArrowDataType::Int16 => handle_dictionary_key_type!(Int16Type),
378                    ArrowDataType::Int32 => handle_dictionary_key_type!(Int32Type),
379                    ArrowDataType::Int64 => handle_dictionary_key_type!(Int64Type),
380                    ArrowDataType::UInt8 => handle_dictionary_key_type!(UInt8Type),
381                    ArrowDataType::UInt16 => handle_dictionary_key_type!(UInt16Type),
382                    ArrowDataType::UInt32 => handle_dictionary_key_type!(UInt32Type),
383                    ArrowDataType::UInt64 => handle_dictionary_key_type!(UInt64Type),
384                    _ => {
385                        return error::UnsupportedArrowTypeSnafu {
386                            arrow_type: array.as_ref().data_type().clone(),
387                        }
388                        .fail()
389                    }
390                }
391            }
392
393            ArrowDataType::Struct(_fields) => {
394                let array = array
395                    .as_ref()
396                    .as_any()
397                    .downcast_ref::<StructArray>()
398                    .unwrap();
399                Arc::new(StructVector::new(array.clone())?)
400            }
401            ArrowDataType::Float16
402            | ArrowDataType::LargeList(_)
403            | ArrowDataType::FixedSizeList(_, _)
404            | ArrowDataType::Union(_, _)
405            | ArrowDataType::Decimal256(_, _)
406            | ArrowDataType::Map(_, _)
407            | ArrowDataType::RunEndEncoded(_, _)
408            | ArrowDataType::ListView(_)
409            | ArrowDataType::LargeListView(_)
410            | ArrowDataType::Date64
411            | ArrowDataType::Decimal32(_, _)
412            | ArrowDataType::Decimal64(_, _) => {
413                return error::UnsupportedArrowTypeSnafu {
414                    arrow_type: array.as_ref().data_type().clone(),
415                }
416                .fail()
417            }
418        })
419    }
420
421    /// Try to cast an vec of values into vector, fail if type is not the same across all values.
422    pub fn try_from_row_into_vector(row: &[Value], dt: &ConcreteDataType) -> Result<VectorRef> {
423        let mut builder = dt.create_mutable_vector(row.len());
424        for val in row {
425            builder.try_push_value_ref(val.as_value_ref())?;
426        }
427        let vector = builder.to_vector();
428        Ok(vector)
429    }
430
431    /// Try to cast slice of `arrays` to vectors.
432    pub fn try_into_vectors(arrays: &[ArrayRef]) -> Result<Vec<VectorRef>> {
433        arrays.iter().map(Self::try_into_vector).collect()
434    }
435
436    /// Perform SQL like operation on `names` and a scalar `s`.
437    pub fn like_utf8(names: Vec<String>, s: &str) -> Result<VectorRef> {
438        let array = StringArray::from(names);
439
440        let s = StringArray::new_scalar(s);
441        let filter = comparison::like(&array, &s).context(error::ArrowComputeSnafu)?;
442
443        let result = compute::filter(&array, &filter).context(error::ArrowComputeSnafu)?;
444        Helper::try_into_vector(result)
445    }
446
447    pub fn like_utf8_filter(names: Vec<String>, s: &str) -> Result<(VectorRef, BooleanVector)> {
448        let array = StringArray::from(names);
449        let s = StringArray::new_scalar(s);
450        let filter = comparison::like(&array, &s).context(error::ArrowComputeSnafu)?;
451        let result = compute::filter(&array, &filter).context(error::ArrowComputeSnafu)?;
452        let vector = Helper::try_into_vector(result)?;
453
454        Ok((vector, BooleanVector::from(filter)))
455    }
456}
457
458#[cfg(test)]
459mod tests {
460    use arrow::array::{
461        ArrayRef, BooleanArray, Date32Array, Float32Array, Float64Array, Int16Array, Int32Array,
462        Int64Array, Int8Array, LargeBinaryArray, ListArray, NullArray, Time32MillisecondArray,
463        Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
464        TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
465        TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
466    };
467    use arrow::buffer::Buffer;
468    use arrow::datatypes::{Int32Type, IntervalMonthDayNano};
469    use arrow_array::{BinaryArray, DictionaryArray, FixedSizeBinaryArray, LargeStringArray};
470    use arrow_schema::DataType;
471    use common_decimal::Decimal128;
472    use common_time::time::Time;
473    use common_time::timestamp::TimeUnit;
474    use common_time::{Date, Duration};
475
476    use super::*;
477    use crate::value::Value;
478    use crate::vectors::ConcreteDataType;
479
480    #[test]
481    fn test_try_into_vectors() {
482        let arrays: Vec<ArrayRef> = vec![
483            Arc::new(Int32Array::from(vec![1])),
484            Arc::new(Int32Array::from(vec![2])),
485            Arc::new(Int32Array::from(vec![3])),
486        ];
487        let vectors = Helper::try_into_vectors(&arrays).unwrap();
488        vectors.iter().for_each(|v| assert_eq!(1, v.len()));
489        assert_eq!(Value::Int32(1), vectors[0].get(0));
490        assert_eq!(Value::Int32(2), vectors[1].get(0));
491        assert_eq!(Value::Int32(3), vectors[2].get(0));
492    }
493
494    #[test]
495    fn test_try_into_date_vector() {
496        let vector = DateVector::from(vec![Some(1), Some(2), None]);
497        let arrow_array = vector.to_arrow_array();
498        assert_eq!(&ArrowDataType::Date32, arrow_array.data_type());
499        let vector_converted = Helper::try_into_vector(arrow_array).unwrap();
500        assert_eq!(vector.len(), vector_converted.len());
501        for i in 0..vector_converted.len() {
502            assert_eq!(vector.get(i), vector_converted.get(i));
503        }
504    }
505
506    #[test]
507    fn test_try_from_scalar_date_value() {
508        let vector = Helper::try_from_scalar_value(ScalarValue::Date32(Some(42)), 3).unwrap();
509        assert_eq!(ConcreteDataType::date_datatype(), vector.data_type());
510        assert_eq!(3, vector.len());
511        for i in 0..vector.len() {
512            assert_eq!(Value::Date(Date::new(42)), vector.get(i));
513        }
514    }
515
516    #[test]
517    fn test_try_from_scalar_duration_value() {
518        let vector =
519            Helper::try_from_scalar_value(ScalarValue::DurationSecond(Some(42)), 3).unwrap();
520        assert_eq!(
521            ConcreteDataType::duration_second_datatype(),
522            vector.data_type()
523        );
524        assert_eq!(3, vector.len());
525        for i in 0..vector.len() {
526            assert_eq!(
527                Value::Duration(Duration::new(42, TimeUnit::Second)),
528                vector.get(i)
529            );
530        }
531    }
532
533    #[test]
534    fn test_try_from_scalar_decimal128_value() {
535        let vector =
536            Helper::try_from_scalar_value(ScalarValue::Decimal128(Some(42), 3, 1), 3).unwrap();
537        assert_eq!(
538            ConcreteDataType::decimal128_datatype(3, 1),
539            vector.data_type()
540        );
541        assert_eq!(3, vector.len());
542        for i in 0..vector.len() {
543            assert_eq!(Value::Decimal128(Decimal128::new(42, 3, 1)), vector.get(i));
544        }
545    }
546
547    #[test]
548    fn test_try_from_list_value() {
549        let value = ScalarValue::List(ScalarValue::new_list(
550            &[ScalarValue::Int32(Some(1)), ScalarValue::Int32(Some(2))],
551            &ArrowDataType::Int32,
552            true,
553        ));
554        let vector = Helper::try_from_scalar_value(value, 3).unwrap();
555        assert_eq!(
556            ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()),
557            vector.data_type()
558        );
559        assert_eq!(3, vector.len());
560        for i in 0..vector.len() {
561            let v = vector.get(i);
562            let items = v.as_list().unwrap().unwrap().items();
563            assert_eq!(vec![Value::Int32(1), Value::Int32(2)], items);
564        }
565    }
566
567    #[test]
568    fn test_like_utf8() {
569        fn assert_vector(expected: Vec<&str>, actual: &VectorRef) {
570            let actual = actual.as_any().downcast_ref::<StringVector>().unwrap();
571            assert_eq!(*actual, StringVector::from(expected));
572        }
573
574        let names: Vec<String> = vec!["greptime", "hello", "public", "world"]
575            .into_iter()
576            .map(|x| x.to_string())
577            .collect();
578
579        let ret = Helper::like_utf8(names.clone(), "%ll%").unwrap();
580        assert_vector(vec!["hello"], &ret);
581
582        let ret = Helper::like_utf8(names.clone(), "%time").unwrap();
583        assert_vector(vec!["greptime"], &ret);
584
585        let ret = Helper::like_utf8(names.clone(), "%ld").unwrap();
586        assert_vector(vec!["world"], &ret);
587
588        let ret = Helper::like_utf8(names, "%").unwrap();
589        assert_vector(vec!["greptime", "hello", "public", "world"], &ret);
590    }
591
592    #[test]
593    fn test_like_utf8_filter() {
594        fn assert_vector(expected: Vec<&str>, actual: &VectorRef) {
595            let actual = actual.as_any().downcast_ref::<StringVector>().unwrap();
596            assert_eq!(*actual, StringVector::from(expected));
597        }
598
599        fn assert_filter(array: Vec<String>, s: &str, expected_filter: &BooleanVector) {
600            let array = StringArray::from(array);
601            let s = StringArray::new_scalar(s);
602            let actual_filter = comparison::like(&array, &s).unwrap();
603            assert_eq!(BooleanVector::from(actual_filter), *expected_filter);
604        }
605
606        let names: Vec<String> = vec!["greptime", "timeseries", "cloud", "database"]
607            .into_iter()
608            .map(|x| x.to_string())
609            .collect();
610
611        let (table, filter) = Helper::like_utf8_filter(names.clone(), "%ti%").unwrap();
612        assert_vector(vec!["greptime", "timeseries"], &table);
613        assert_filter(names.clone(), "%ti%", &filter);
614
615        let (tables, filter) = Helper::like_utf8_filter(names.clone(), "%lou").unwrap();
616        assert_vector(vec![], &tables);
617        assert_filter(names.clone(), "%lou", &filter);
618
619        let (tables, filter) = Helper::like_utf8_filter(names.clone(), "%d%").unwrap();
620        assert_vector(vec!["cloud", "database"], &tables);
621        assert_filter(names.clone(), "%d%", &filter);
622    }
623
624    fn check_try_into_vector(array: impl Array + 'static) {
625        let array: ArrayRef = Arc::new(array);
626        let vector = Helper::try_into_vector(array.clone()).unwrap();
627        assert_eq!(&array, &vector.to_arrow_array());
628    }
629
630    #[test]
631    fn test_try_into_vector() {
632        check_try_into_vector(NullArray::new(2));
633        check_try_into_vector(BooleanArray::from(vec![true, false]));
634        check_try_into_vector(Int8Array::from(vec![1, 2, 3]));
635        check_try_into_vector(Int16Array::from(vec![1, 2, 3]));
636        check_try_into_vector(Int32Array::from(vec![1, 2, 3]));
637        check_try_into_vector(Int64Array::from(vec![1, 2, 3]));
638        check_try_into_vector(UInt8Array::from(vec![1, 2, 3]));
639        check_try_into_vector(UInt16Array::from(vec![1, 2, 3]));
640        check_try_into_vector(UInt32Array::from(vec![1, 2, 3]));
641        check_try_into_vector(UInt64Array::from(vec![1, 2, 3]));
642        check_try_into_vector(Float32Array::from(vec![1.0, 2.0, 3.0]));
643        check_try_into_vector(Float64Array::from(vec![1.0, 2.0, 3.0]));
644        check_try_into_vector(StringArray::from(vec!["hello", "world"]));
645        check_try_into_vector(Date32Array::from(vec![1, 2, 3]));
646        let data = vec![None, Some(vec![Some(6), Some(7)])];
647        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
648        check_try_into_vector(list_array);
649        check_try_into_vector(TimestampSecondArray::from(vec![1, 2, 3]));
650        check_try_into_vector(TimestampMillisecondArray::from(vec![1, 2, 3]));
651        check_try_into_vector(TimestampMicrosecondArray::from(vec![1, 2, 3]));
652        check_try_into_vector(TimestampNanosecondArray::from(vec![1, 2, 3]));
653        check_try_into_vector(Time32SecondArray::from(vec![1, 2, 3]));
654        check_try_into_vector(Time32MillisecondArray::from(vec![1, 2, 3]));
655        check_try_into_vector(Time64MicrosecondArray::from(vec![1, 2, 3]));
656        check_try_into_vector(Time64NanosecondArray::from(vec![1, 2, 3]));
657
658        // Test dictionary arrays with different key types
659        let values = StringArray::from_iter_values(["a", "b", "c"]);
660
661        // Test Int8 keys
662        let keys = Int8Array::from_iter_values([0, 0, 1, 2]);
663        let array: ArrayRef =
664            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
665        Helper::try_into_vector(array).unwrap();
666
667        // Test Int16 keys
668        let keys = Int16Array::from_iter_values([0, 0, 1, 2]);
669        let array: ArrayRef =
670            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
671        Helper::try_into_vector(array).unwrap();
672
673        // Test Int32 keys
674        let keys = Int32Array::from_iter_values([0, 0, 1, 2]);
675        let array: ArrayRef =
676            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
677        Helper::try_into_vector(array).unwrap();
678
679        // Test Int64 keys
680        let keys = Int64Array::from_iter_values([0, 0, 1, 2]);
681        let array: ArrayRef =
682            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
683        Helper::try_into_vector(array).unwrap();
684
685        // Test UInt8 keys
686        let keys = UInt8Array::from_iter_values([0, 0, 1, 2]);
687        let array: ArrayRef =
688            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
689        Helper::try_into_vector(array).unwrap();
690
691        // Test UInt16 keys
692        let keys = UInt16Array::from_iter_values([0, 0, 1, 2]);
693        let array: ArrayRef =
694            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
695        Helper::try_into_vector(array).unwrap();
696
697        // Test UInt32 keys
698        let keys = UInt32Array::from_iter_values([0, 0, 1, 2]);
699        let array: ArrayRef =
700            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
701        Helper::try_into_vector(array).unwrap();
702
703        // Test UInt64 keys
704        let keys = UInt64Array::from_iter_values([0, 0, 1, 2]);
705        let array: ArrayRef = Arc::new(DictionaryArray::try_new(keys, Arc::new(values)).unwrap());
706        Helper::try_into_vector(array).unwrap();
707    }
708
709    #[test]
710    fn test_try_binary_array_into_vector() {
711        let input_vec: Vec<&[u8]> = vec!["hello".as_bytes(), "world".as_bytes()];
712        let assertion_vector = BinaryVector::from(input_vec.clone());
713
714        let input_arrays: Vec<ArrayRef> = vec![
715            Arc::new(LargeBinaryArray::from(input_vec.clone())) as ArrayRef,
716            Arc::new(BinaryArray::from(input_vec.clone())) as ArrayRef,
717            Arc::new(FixedSizeBinaryArray::new(
718                5,
719                Buffer::from_vec("helloworld".as_bytes().to_vec()),
720                None,
721            )) as ArrayRef,
722        ];
723
724        for input_array in input_arrays {
725            let vector = Helper::try_into_vector(input_array).unwrap();
726
727            assert_eq!(2, vector.len());
728            assert_eq!(0, vector.null_count());
729
730            let output_arrow_array: ArrayRef = vector.to_arrow_array();
731            assert_eq!(&DataType::Binary, output_arrow_array.data_type());
732            assert_eq!(&assertion_vector.to_arrow_array(), &output_arrow_array);
733        }
734    }
735
736    #[test]
737    fn test_large_string_array_into_vector() {
738        let input_vec = vec!["a", "b"];
739        let assertion_array = StringArray::from(input_vec.clone());
740
741        let large_string_array: ArrayRef = Arc::new(LargeStringArray::from(input_vec));
742        let vector = Helper::try_into_vector(large_string_array).unwrap();
743        assert_eq!(2, vector.len());
744        assert_eq!(0, vector.null_count());
745
746        let output_arrow_array: StringArray = vector
747            .to_arrow_array()
748            .as_any()
749            .downcast_ref::<StringArray>()
750            .unwrap()
751            .clone();
752        assert_eq!(&assertion_array, &output_arrow_array);
753    }
754
755    #[test]
756    fn test_try_from_scalar_time_value() {
757        let vector = Helper::try_from_scalar_value(ScalarValue::Time32Second(Some(42)), 3).unwrap();
758        assert_eq!(ConcreteDataType::time_second_datatype(), vector.data_type());
759        assert_eq!(3, vector.len());
760        for i in 0..vector.len() {
761            assert_eq!(Value::Time(Time::new_second(42)), vector.get(i));
762        }
763    }
764
765    #[test]
766    fn test_try_from_scalar_interval_value() {
767        let vector = Helper::try_from_scalar_value(
768            ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::new(1, 1, 2000))),
769            3,
770        )
771        .unwrap();
772
773        assert_eq!(
774            ConcreteDataType::interval_month_day_nano_datatype(),
775            vector.data_type()
776        );
777        assert_eq!(3, vector.len());
778        for i in 0..vector.len() {
779            assert_eq!(
780                Value::IntervalMonthDayNano(IntervalMonthDayNano::new(1, 1, 2000).into()),
781                vector.get(i)
782            );
783        }
784    }
785
786    fn check_try_from_row_to_vector(row: Vec<Value>, dt: &ConcreteDataType) {
787        let vector = Helper::try_from_row_into_vector(&row, dt).unwrap();
788        for (i, item) in row.iter().enumerate().take(vector.len()) {
789            assert_eq!(*item, vector.get(i));
790        }
791    }
792
793    fn check_into_and_from(array: impl Array + 'static) {
794        let array: ArrayRef = Arc::new(array);
795        let vector = Helper::try_into_vector(array.clone()).unwrap();
796        assert_eq!(&array, &vector.to_arrow_array());
797        let row: Vec<Value> = (0..array.len()).map(|i| vector.get(i)).collect();
798        let dt = vector.data_type();
799        check_try_from_row_to_vector(row, &dt);
800    }
801
802    #[test]
803    fn test_try_from_row_to_vector() {
804        check_into_and_from(NullArray::new(2));
805        check_into_and_from(BooleanArray::from(vec![true, false]));
806        check_into_and_from(Int8Array::from(vec![1, 2, 3]));
807        check_into_and_from(Int16Array::from(vec![1, 2, 3]));
808        check_into_and_from(Int32Array::from(vec![1, 2, 3]));
809        check_into_and_from(Int64Array::from(vec![1, 2, 3]));
810        check_into_and_from(UInt8Array::from(vec![1, 2, 3]));
811        check_into_and_from(UInt16Array::from(vec![1, 2, 3]));
812        check_into_and_from(UInt32Array::from(vec![1, 2, 3]));
813        check_into_and_from(UInt64Array::from(vec![1, 2, 3]));
814        check_into_and_from(Float32Array::from(vec![1.0, 2.0, 3.0]));
815        check_into_and_from(Float64Array::from(vec![1.0, 2.0, 3.0]));
816        check_into_and_from(StringArray::from(vec!["hello", "world"]));
817        check_into_and_from(Date32Array::from(vec![1, 2, 3]));
818
819        check_into_and_from(TimestampSecondArray::from(vec![1, 2, 3]));
820        check_into_and_from(TimestampMillisecondArray::from(vec![1, 2, 3]));
821        check_into_and_from(TimestampMicrosecondArray::from(vec![1, 2, 3]));
822        check_into_and_from(TimestampNanosecondArray::from(vec![1, 2, 3]));
823        check_into_and_from(Time32SecondArray::from(vec![1, 2, 3]));
824        check_into_and_from(Time32MillisecondArray::from(vec![1, 2, 3]));
825        check_into_and_from(Time64MicrosecondArray::from(vec![1, 2, 3]));
826        check_into_and_from(Time64NanosecondArray::from(vec![1, 2, 3]));
827    }
828}