1use std::any::Any;
16use std::fmt::Debug;
17use std::sync::Arc;
18
19use arrow::array::{Array, ArrayRef};
20use snafu::ensure;
21
22use crate::data_type::ConcreteDataType;
23use crate::error::{self, Result};
24use crate::serialize::Serializable;
25use crate::value::{Value, ValueRef};
26use crate::vectors::operations::VectorOp;
27
28mod binary;
29mod boolean;
30mod constant;
31mod date;
32mod decimal;
33mod dictionary;
34mod duration;
35mod eq;
36mod helper;
37mod interval;
38mod list;
39mod null;
40pub(crate) mod operations;
41mod primitive;
42mod string;
43mod struct_vector;
44mod time;
45mod timestamp;
46mod validity;
47
48pub use binary::{BinaryVector, BinaryVectorBuilder};
49pub use boolean::{BooleanVector, BooleanVectorBuilder};
50pub use constant::ConstantVector;
51pub use date::{DateVector, DateVectorBuilder};
52pub use decimal::{Decimal128Vector, Decimal128VectorBuilder};
53pub use dictionary::{DictionaryIter, DictionaryVector};
54pub use duration::{
55 DurationMicrosecondVector, DurationMicrosecondVectorBuilder, DurationMillisecondVector,
56 DurationMillisecondVectorBuilder, DurationNanosecondVector, DurationNanosecondVectorBuilder,
57 DurationSecondVector, DurationSecondVectorBuilder,
58};
59pub use helper::Helper;
60pub use interval::{
61 IntervalDayTimeVector, IntervalDayTimeVectorBuilder, IntervalMonthDayNanoVector,
62 IntervalMonthDayNanoVectorBuilder, IntervalYearMonthVector, IntervalYearMonthVectorBuilder,
63};
64pub use list::{ListIter, ListVector, ListVectorBuilder};
65pub use null::{NullVector, NullVectorBuilder};
66pub use primitive::{
67 Float32Vector, Float32VectorBuilder, Float64Vector, Float64VectorBuilder, Int16Vector,
68 Int16VectorBuilder, Int32Vector, Int32VectorBuilder, Int64Vector, Int64VectorBuilder,
69 Int8Vector, Int8VectorBuilder, PrimitiveIter, PrimitiveVector, PrimitiveVectorBuilder,
70 UInt16Vector, UInt16VectorBuilder, UInt32Vector, UInt32VectorBuilder, UInt64Vector,
71 UInt64VectorBuilder, UInt8Vector, UInt8VectorBuilder,
72};
73pub use string::{StringVector, StringVectorBuilder};
74pub use time::{
75 TimeMicrosecondVector, TimeMicrosecondVectorBuilder, TimeMillisecondVector,
76 TimeMillisecondVectorBuilder, TimeNanosecondVector, TimeNanosecondVectorBuilder,
77 TimeSecondVector, TimeSecondVectorBuilder,
78};
79pub use timestamp::{
80 TimestampMicrosecondVector, TimestampMicrosecondVectorBuilder, TimestampMillisecondVector,
81 TimestampMillisecondVectorBuilder, TimestampNanosecondVector, TimestampNanosecondVectorBuilder,
82 TimestampSecondVector, TimestampSecondVectorBuilder,
83};
84pub use validity::Validity;
85
86pub trait Vector: Send + Sync + Serializable + Debug + VectorOp {
90 fn data_type(&self) -> ConcreteDataType;
94
95 fn vector_type_name(&self) -> String;
96
97 fn as_any(&self) -> &dyn Any;
100
101 fn len(&self) -> usize;
103
104 fn is_empty(&self) -> bool {
106 self.len() == 0
107 }
108
109 fn to_arrow_array(&self) -> ArrayRef;
111
112 fn to_boxed_arrow_array(&self) -> Box<dyn Array>;
114
115 fn validity(&self) -> Validity;
117
118 fn memory_size(&self) -> usize;
120
121 fn null_count(&self) -> usize;
125
126 fn is_const(&self) -> bool {
128 false
129 }
130
131 fn is_null(&self, row: usize) -> bool;
133
134 fn only_null(&self) -> bool {
136 self.null_count() == self.len()
137 }
138
139 fn slice(&self, offset: usize, length: usize) -> VectorRef;
144
145 fn get(&self, index: usize) -> Value;
150
151 fn try_get(&self, index: usize) -> Result<Value> {
154 ensure!(
155 index < self.len(),
156 error::BadArrayAccessSnafu {
157 index,
158 size: self.len()
159 }
160 );
161 Ok(self.get(index))
162 }
163
164 fn get_ref(&self, index: usize) -> ValueRef;
169}
170
171pub type VectorRef = Arc<dyn Vector>;
172
173pub trait MutableVector: Send + Sync {
175 fn data_type(&self) -> ConcreteDataType;
177
178 fn len(&self) -> usize;
180
181 fn is_empty(&self) -> bool {
183 self.len() == 0
184 }
185
186 fn as_any(&self) -> &dyn Any;
188
189 fn as_mut_any(&mut self) -> &mut dyn Any;
191
192 fn to_vector(&mut self) -> VectorRef;
194
195 fn to_vector_cloned(&self) -> VectorRef;
197
198 fn try_push_value_ref(&mut self, value: ValueRef) -> Result<()>;
200
201 fn push_value_ref(&mut self, value: ValueRef) {
206 self.try_push_value_ref(value).unwrap_or_else(|_| {
207 panic!(
208 "expecting pushing value of datatype {:?}, actual {:?}",
209 self.data_type(),
210 value
211 );
212 });
213 }
214
215 fn push_null(&mut self);
217
218 fn push_nulls(&mut self, num_nulls: usize) {
220 for _ in 0..num_nulls {
221 self.push_null();
222 }
223 }
224
225 fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()>;
232}
233
234macro_rules! impl_try_from_arrow_array_for_vector {
236 ($Array: ident, $Vector: ident) => {
237 impl $Vector {
238 pub fn try_from_arrow_array(
239 array: impl AsRef<dyn arrow::array::Array>,
240 ) -> crate::error::Result<$Vector> {
241 use snafu::OptionExt;
242
243 let arrow_array = array
244 .as_ref()
245 .as_any()
246 .downcast_ref::<$Array>()
247 .with_context(|| crate::error::ConversionSnafu {
248 from: std::format!("{:?}", array.as_ref().data_type()),
249 })?
250 .clone();
251
252 Ok($Vector::from(arrow_array))
253 }
254 }
255 };
256}
257
258macro_rules! impl_validity_for_vector {
259 ($array: expr) => {
260 Validity::from_array_data($array.to_data())
261 };
262}
263
264macro_rules! impl_get_for_vector {
265 ($array: expr, $index: ident) => {
266 if $array.is_valid($index) {
267 unsafe { $array.value_unchecked($index).into() }
269 } else {
270 Value::Null
271 }
272 };
273}
274
275macro_rules! impl_get_ref_for_vector {
276 ($array: expr, $index: ident) => {
277 if $array.is_valid($index) {
278 unsafe { $array.value_unchecked($index).into() }
280 } else {
281 ValueRef::Null
282 }
283 };
284}
285
286macro_rules! impl_extend_for_builder {
287 ($mutable_vector: expr, $vector: ident, $VectorType: ident, $offset: ident, $length: ident) => {{
288 use snafu::OptionExt;
289
290 let sliced_vector = $vector.slice($offset, $length);
291 let concrete_vector = sliced_vector
292 .as_any()
293 .downcast_ref::<$VectorType>()
294 .with_context(|| crate::error::CastTypeSnafu {
295 msg: format!(
296 "Failed to cast vector from {} to {}",
297 $vector.vector_type_name(),
298 stringify!($VectorType)
299 ),
300 })?;
301 for value in concrete_vector.iter_data() {
302 $mutable_vector.push(value);
303 }
304 Ok(())
305 }};
306}
307
308pub(crate) use {
309 impl_extend_for_builder, impl_get_for_vector, impl_get_ref_for_vector,
310 impl_try_from_arrow_array_for_vector, impl_validity_for_vector,
311};
312
313#[cfg(test)]
314pub mod tests {
315 use arrow::array::{Array, Int32Array, UInt8Array};
316 use paste::paste;
317 use serde_json;
318
319 use super::*;
320 use crate::data_type::DataType;
321 use crate::prelude::ScalarVectorBuilder;
322 use crate::types::{Int32Type, LogicalPrimitiveType};
323 use crate::vectors::helper::Helper;
324
325 #[test]
326 fn test_df_columns_to_vector() {
327 let df_column: Arc<dyn Array> = Arc::new(Int32Array::from(vec![1, 2, 3]));
328 let vector = Helper::try_into_vector(df_column).unwrap();
329 assert_eq!(
330 Int32Type::build_data_type().as_arrow_type(),
331 vector.data_type().as_arrow_type()
332 );
333 }
334
335 #[test]
336 fn test_serialize_i32_vector() {
337 let df_column: Arc<dyn Array> = Arc::new(Int32Array::from(vec![1, 2, 3]));
338 let json_value = Helper::try_into_vector(df_column)
339 .unwrap()
340 .serialize_to_json()
341 .unwrap();
342 assert_eq!("[1,2,3]", serde_json::to_string(&json_value).unwrap());
343 }
344
345 #[test]
346 fn test_serialize_i8_vector() {
347 let df_column: Arc<dyn Array> = Arc::new(UInt8Array::from(vec![1, 2, 3]));
348 let json_value = Helper::try_into_vector(df_column)
349 .unwrap()
350 .serialize_to_json()
351 .unwrap();
352 assert_eq!("[1,2,3]", serde_json::to_string(&json_value).unwrap());
353 }
354
355 #[test]
356 fn test_mutable_vector_data_type() {
357 macro_rules! mutable_primitive_data_type_eq_with_lower {
358 ($($type: ident),*) => {
359 $(
360 paste! {
361 let mutable_vector = [<$type VectorBuilder>]::with_capacity(1024);
362 assert_eq!(mutable_vector.data_type(), ConcreteDataType::[<$type:lower _datatype>]());
363 }
364 )*
365 };
366 }
367
368 macro_rules! mutable_time_data_type_eq_with_snake {
369 ($($type: ident),*) => {
370 $(
371 paste! {
372 let mutable_vector = [<$type VectorBuilder>]::with_capacity(1024);
373 assert_eq!(mutable_vector.data_type(), ConcreteDataType::[<$type:snake _datatype>]());
374 }
375 )*
376 };
377 }
378 mutable_primitive_data_type_eq_with_lower!(
380 Boolean, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64,
381 Date, Binary, String
382 );
383
384 mutable_time_data_type_eq_with_snake!(
386 TimeSecond,
387 TimeMillisecond,
388 TimeMicrosecond,
389 TimeNanosecond,
390 TimestampSecond,
391 TimestampMillisecond,
392 TimestampMicrosecond,
393 TimestampNanosecond,
394 DurationSecond,
395 DurationMillisecond,
396 DurationMicrosecond,
397 DurationNanosecond,
398 IntervalYearMonth,
399 IntervalDayTime,
400 IntervalMonthDayNano
401 );
402
403 let builder = NullVectorBuilder::default();
405 assert_eq!(builder.data_type(), ConcreteDataType::null_datatype());
406
407 let builder = Decimal128VectorBuilder::with_capacity(1024);
409 assert_eq!(
410 builder.data_type(),
411 ConcreteDataType::decimal128_datatype(38, 10)
412 );
413
414 let builder = Decimal128VectorBuilder::with_capacity(1024)
415 .with_precision_and_scale(3, 2)
416 .unwrap();
417 assert_eq!(
418 builder.data_type(),
419 ConcreteDataType::decimal128_datatype(3, 2)
420 );
421 }
422
423 #[test]
424 #[should_panic(expected = "Must use ListVectorBuilder::with_type_capacity()")]
425 fn test_mutable_vector_list_data_type() {
426 let builder =
428 ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 1024);
429 assert_eq!(
430 builder.data_type(),
431 ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype())
432 );
433
434 let _ = ListVectorBuilder::with_capacity(1024);
436 }
437
438 #[test]
439 fn test_mutable_vector_to_vector_cloned() {
440 let mut builder = ConcreteDataType::string_datatype().create_mutable_vector(1024);
442 builder.push_value_ref(ValueRef::String("hello"));
443 builder.push_value_ref(ValueRef::String("world"));
444 builder.push_value_ref(ValueRef::String("!"));
445
446 let vector = builder.to_vector_cloned();
448 assert_eq!(vector.len(), 3);
449 assert_eq!(builder.len(), 3);
450 }
451}