1use std::any::Any;
16use std::fmt::Debug;
17use std::sync::Arc;
18
19use arrow::array::{Array, ArrayRef};
20use snafu::ensure;
21
22use crate::data_type::ConcreteDataType;
23use crate::error::{self, Result};
24use crate::serialize::Serializable;
25use crate::value::{Value, ValueRef};
26use crate::vectors::operations::VectorOp;
27
28mod binary;
29mod boolean;
30mod constant;
31mod date;
32mod decimal;
33mod dictionary;
34mod duration;
35mod eq;
36mod helper;
37mod interval;
38pub(crate) mod json;
39mod list;
40mod null;
41pub(crate) mod operations;
42mod primitive;
43mod string;
44mod struct_vector;
45mod time;
46mod timestamp;
47mod validity;
48
49pub use binary::{BinaryVector, BinaryVectorBuilder};
50pub use boolean::{BooleanVector, BooleanVectorBuilder};
51pub use constant::ConstantVector;
52pub use date::{DateVector, DateVectorBuilder};
53pub use decimal::{Decimal128Vector, Decimal128VectorBuilder};
54pub use dictionary::{DictionaryIter, DictionaryVector};
55pub use duration::{
56 DurationMicrosecondVector, DurationMicrosecondVectorBuilder, DurationMillisecondVector,
57 DurationMillisecondVectorBuilder, DurationNanosecondVector, DurationNanosecondVectorBuilder,
58 DurationSecondVector, DurationSecondVectorBuilder,
59};
60pub use helper::Helper;
61pub use interval::{
62 IntervalDayTimeVector, IntervalDayTimeVectorBuilder, IntervalMonthDayNanoVector,
63 IntervalMonthDayNanoVectorBuilder, IntervalYearMonthVector, IntervalYearMonthVectorBuilder,
64};
65pub use list::{ListIter, ListVector, ListVectorBuilder};
66pub use null::{NullVector, NullVectorBuilder};
67pub use primitive::{
68 Float32Vector, Float32VectorBuilder, Float64Vector, Float64VectorBuilder, Int8Vector,
69 Int8VectorBuilder, Int16Vector, Int16VectorBuilder, Int32Vector, Int32VectorBuilder,
70 Int64Vector, Int64VectorBuilder, PrimitiveIter, PrimitiveVector, PrimitiveVectorBuilder,
71 UInt8Vector, UInt8VectorBuilder, UInt16Vector, UInt16VectorBuilder, UInt32Vector,
72 UInt32VectorBuilder, UInt64Vector, UInt64VectorBuilder,
73};
74pub use string::{StringVector, StringVectorBuilder};
75pub use struct_vector::{StructVector, StructVectorBuilder};
76pub use time::{
77 TimeMicrosecondVector, TimeMicrosecondVectorBuilder, TimeMillisecondVector,
78 TimeMillisecondVectorBuilder, TimeNanosecondVector, TimeNanosecondVectorBuilder,
79 TimeSecondVector, TimeSecondVectorBuilder,
80};
81pub use timestamp::{
82 TimestampMicrosecondVector, TimestampMicrosecondVectorBuilder, TimestampMillisecondVector,
83 TimestampMillisecondVectorBuilder, TimestampNanosecondVector, TimestampNanosecondVectorBuilder,
84 TimestampSecondVector, TimestampSecondVectorBuilder,
85};
86pub use validity::Validity;
87
88pub trait Vector: Send + Sync + Serializable + Debug + VectorOp {
92 fn data_type(&self) -> ConcreteDataType;
96
97 fn vector_type_name(&self) -> String;
98
99 fn as_any(&self) -> &dyn Any;
102
103 fn len(&self) -> usize;
105
106 fn is_empty(&self) -> bool {
108 self.len() == 0
109 }
110
111 fn to_arrow_array(&self) -> ArrayRef;
113
114 fn to_boxed_arrow_array(&self) -> Box<dyn Array>;
116
117 fn validity(&self) -> Validity;
119
120 fn memory_size(&self) -> usize;
122
123 fn null_count(&self) -> usize;
127
128 fn is_const(&self) -> bool {
130 false
131 }
132
133 fn is_null(&self, row: usize) -> bool;
135
136 fn only_null(&self) -> bool {
138 self.null_count() == self.len()
139 }
140
141 fn slice(&self, offset: usize, length: usize) -> VectorRef;
146
147 fn get(&self, index: usize) -> Value;
152
153 fn try_get(&self, index: usize) -> Result<Value> {
156 ensure!(
157 index < self.len(),
158 error::BadArrayAccessSnafu {
159 index,
160 size: self.len()
161 }
162 );
163 Ok(self.get(index))
164 }
165
166 fn get_ref(&self, index: usize) -> ValueRef<'_>;
171}
172
173pub type VectorRef = Arc<dyn Vector>;
174
175pub trait MutableVector: Send + Sync {
177 fn data_type(&self) -> ConcreteDataType;
179
180 fn len(&self) -> usize;
182
183 fn is_empty(&self) -> bool {
185 self.len() == 0
186 }
187
188 fn as_any(&self) -> &dyn Any;
190
191 fn as_mut_any(&mut self) -> &mut dyn Any;
193
194 fn to_vector(&mut self) -> VectorRef;
196
197 fn to_vector_cloned(&self) -> VectorRef;
199
200 fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()>;
202
203 fn push_value_ref(&mut self, value: &ValueRef) {
208 self.try_push_value_ref(value).unwrap_or_else(|_| {
209 panic!(
210 "expecting pushing value of datatype {:?}, actual {:?}",
211 self.data_type(),
212 value
213 );
214 });
215 }
216
217 fn push_null(&mut self);
219
220 fn push_nulls(&mut self, num_nulls: usize) {
222 for _ in 0..num_nulls {
223 self.push_null();
224 }
225 }
226
227 fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()>;
234}
235
236macro_rules! impl_try_from_arrow_array_for_vector {
238 ($Array: ident, $Vector: ident) => {
239 impl $Vector {
240 pub fn try_from_arrow_array(
241 array: impl AsRef<dyn arrow::array::Array>,
242 ) -> crate::error::Result<$Vector> {
243 use snafu::OptionExt;
244
245 let arrow_array = array
246 .as_ref()
247 .as_any()
248 .downcast_ref::<$Array>()
249 .with_context(|| crate::error::ConversionSnafu {
250 from: std::format!("{:?}", array.as_ref().data_type()),
251 })?
252 .clone();
253
254 Ok($Vector::from(arrow_array))
255 }
256 }
257 };
258}
259
260macro_rules! impl_validity_for_vector {
261 ($array: expr) => {
262 Validity::from_array_data($array.to_data())
263 };
264}
265
266macro_rules! impl_get_for_vector {
267 ($array: expr, $index: ident) => {
268 if $array.is_valid($index) {
269 unsafe { $array.value_unchecked($index).into() }
271 } else {
272 Value::Null
273 }
274 };
275}
276
277macro_rules! impl_get_ref_for_vector {
278 ($array: expr, $index: ident) => {
279 if $array.is_valid($index) {
280 unsafe { $array.value_unchecked($index).into() }
282 } else {
283 ValueRef::Null
284 }
285 };
286}
287
288macro_rules! impl_extend_for_builder {
289 ($mutable_vector: expr, $vector: ident, $VectorType: ident, $offset: ident, $length: ident) => {{
290 use snafu::OptionExt;
291
292 let sliced_vector = $vector.slice($offset, $length);
293 let concrete_vector = sliced_vector
294 .as_any()
295 .downcast_ref::<$VectorType>()
296 .with_context(|| crate::error::CastTypeSnafu {
297 msg: format!(
298 "Failed to cast vector from {} to {}",
299 $vector.vector_type_name(),
300 stringify!($VectorType)
301 ),
302 })?;
303 for value in concrete_vector.iter_data() {
304 $mutable_vector.push(value);
305 }
306 Ok(())
307 }};
308}
309
310pub(crate) use {
311 impl_extend_for_builder, impl_get_for_vector, impl_get_ref_for_vector,
312 impl_try_from_arrow_array_for_vector, impl_validity_for_vector,
313};
314
315#[cfg(test)]
316pub mod tests {
317 use arrow::array::{Array, Int32Array, UInt8Array};
318 use paste::paste;
319 use serde_json;
320
321 use super::*;
322 use crate::data_type::DataType;
323 use crate::prelude::ScalarVectorBuilder;
324 use crate::types::{Int32Type, LogicalPrimitiveType};
325 use crate::vectors::helper::Helper;
326
327 #[test]
328 fn test_df_columns_to_vector() {
329 let df_column: Arc<dyn Array> = Arc::new(Int32Array::from(vec![1, 2, 3]));
330 let vector = Helper::try_into_vector(df_column).unwrap();
331 assert_eq!(
332 Int32Type::build_data_type().as_arrow_type(),
333 vector.data_type().as_arrow_type()
334 );
335 }
336
337 #[test]
338 fn test_serialize_i32_vector() {
339 let df_column: Arc<dyn Array> = Arc::new(Int32Array::from(vec![1, 2, 3]));
340 let json_value = Helper::try_into_vector(df_column)
341 .unwrap()
342 .serialize_to_json()
343 .unwrap();
344 assert_eq!("[1,2,3]", serde_json::to_string(&json_value).unwrap());
345 }
346
347 #[test]
348 fn test_serialize_i8_vector() {
349 let df_column: Arc<dyn Array> = Arc::new(UInt8Array::from(vec![1, 2, 3]));
350 let json_value = Helper::try_into_vector(df_column)
351 .unwrap()
352 .serialize_to_json()
353 .unwrap();
354 assert_eq!("[1,2,3]", serde_json::to_string(&json_value).unwrap());
355 }
356
357 #[test]
358 fn test_mutable_vector_data_type() {
359 macro_rules! mutable_primitive_data_type_eq_with_lower {
360 ($($type: ident),*) => {
361 $(
362 paste! {
363 let mutable_vector = [<$type VectorBuilder>]::with_capacity(1024);
364 assert_eq!(mutable_vector.data_type(), ConcreteDataType::[<$type:lower _datatype>]());
365 }
366 )*
367 };
368 }
369
370 macro_rules! mutable_time_data_type_eq_with_snake {
371 ($($type: ident),*) => {
372 $(
373 paste! {
374 let mutable_vector = [<$type VectorBuilder>]::with_capacity(1024);
375 assert_eq!(mutable_vector.data_type(), ConcreteDataType::[<$type:snake _datatype>]());
376 }
377 )*
378 };
379 }
380 mutable_primitive_data_type_eq_with_lower!(
382 Boolean, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64,
383 Date, Binary, String
384 );
385
386 mutable_time_data_type_eq_with_snake!(
388 TimeSecond,
389 TimeMillisecond,
390 TimeMicrosecond,
391 TimeNanosecond,
392 TimestampSecond,
393 TimestampMillisecond,
394 TimestampMicrosecond,
395 TimestampNanosecond,
396 DurationSecond,
397 DurationMillisecond,
398 DurationMicrosecond,
399 DurationNanosecond,
400 IntervalYearMonth,
401 IntervalDayTime,
402 IntervalMonthDayNano
403 );
404
405 let builder = NullVectorBuilder::default();
407 assert_eq!(builder.data_type(), ConcreteDataType::null_datatype());
408
409 let builder = Decimal128VectorBuilder::with_capacity(1024);
411 assert_eq!(
412 builder.data_type(),
413 ConcreteDataType::decimal128_datatype(38, 10)
414 );
415
416 let builder = Decimal128VectorBuilder::with_capacity(1024)
417 .with_precision_and_scale(3, 2)
418 .unwrap();
419 assert_eq!(
420 builder.data_type(),
421 ConcreteDataType::decimal128_datatype(3, 2)
422 );
423 }
424
425 #[test]
426 #[should_panic(expected = "Must use ListVectorBuilder::with_type_capacity()")]
427 fn test_mutable_vector_list_data_type() {
428 let item_type = Arc::new(ConcreteDataType::int32_datatype());
429 let builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1024);
431 assert_eq!(
432 builder.data_type(),
433 ConcreteDataType::list_datatype(item_type)
434 );
435
436 let _ = ListVectorBuilder::with_capacity(1024);
438 }
439
440 #[test]
441 fn test_mutable_vector_to_vector_cloned() {
442 let mut builder = ConcreteDataType::string_datatype().create_mutable_vector(1024);
444 builder.push_value_ref(&ValueRef::String("hello"));
445 builder.push_value_ref(&ValueRef::String("world"));
446 builder.push_value_ref(&ValueRef::String("!"));
447
448 let vector = builder.to_vector_cloned();
450 assert_eq!(vector.len(), 3);
451 assert_eq!(builder.len(), 3);
452 }
453}