1use std::any::Any;
16use std::fmt::Debug;
17use std::sync::Arc;
18
19use arrow::array::{Array, ArrayRef};
20use snafu::ensure;
21
22use crate::data_type::ConcreteDataType;
23use crate::error::{self, Result};
24use crate::serialize::Serializable;
25use crate::value::{Value, ValueRef};
26use crate::vectors::operations::VectorOp;
27
28mod binary;
29mod boolean;
30mod constant;
31mod date;
32mod decimal;
33mod dictionary;
34mod duration;
35mod eq;
36mod helper;
37mod interval;
38pub(crate) mod json;
39mod list;
40mod null;
41pub(crate) mod operations;
42mod primitive;
43mod string;
44mod struct_vector;
45mod time;
46mod timestamp;
47mod validity;
48
49pub use binary::{BinaryVector, BinaryVectorBuilder};
50pub use boolean::{BooleanVector, BooleanVectorBuilder};
51pub use constant::ConstantVector;
52pub use date::{DateVector, DateVectorBuilder};
53pub use decimal::{Decimal128Vector, Decimal128VectorBuilder};
54pub use dictionary::{DictionaryIter, DictionaryVector};
55pub use duration::{
56 DurationMicrosecondVector, DurationMicrosecondVectorBuilder, DurationMillisecondVector,
57 DurationMillisecondVectorBuilder, DurationNanosecondVector, DurationNanosecondVectorBuilder,
58 DurationSecondVector, DurationSecondVectorBuilder,
59};
60pub use helper::Helper;
61pub use interval::{
62 IntervalDayTimeVector, IntervalDayTimeVectorBuilder, IntervalMonthDayNanoVector,
63 IntervalMonthDayNanoVectorBuilder, IntervalYearMonthVector, IntervalYearMonthVectorBuilder,
64};
65pub use list::{ListIter, ListVector, ListVectorBuilder};
66pub use null::{NullVector, NullVectorBuilder};
67pub use primitive::{
68 Float32Vector, Float32VectorBuilder, Float64Vector, Float64VectorBuilder, Int8Vector,
69 Int8VectorBuilder, Int16Vector, Int16VectorBuilder, Int32Vector, Int32VectorBuilder,
70 Int64Vector, Int64VectorBuilder, PrimitiveIter, PrimitiveVector, PrimitiveVectorBuilder,
71 UInt8Vector, UInt8VectorBuilder, UInt16Vector, UInt16VectorBuilder, UInt32Vector,
72 UInt32VectorBuilder, UInt64Vector, UInt64VectorBuilder,
73};
74pub use string::{StringVector, StringVectorBuilder};
75pub use struct_vector::{StructVector, StructVectorBuilder};
76pub use time::{
77 TimeMicrosecondVector, TimeMicrosecondVectorBuilder, TimeMillisecondVector,
78 TimeMillisecondVectorBuilder, TimeNanosecondVector, TimeNanosecondVectorBuilder,
79 TimeSecondVector, TimeSecondVectorBuilder,
80};
81pub use timestamp::{
82 TimestampMicrosecondVector, TimestampMicrosecondVectorBuilder, TimestampMillisecondVector,
83 TimestampMillisecondVectorBuilder, TimestampNanosecondVector, TimestampNanosecondVectorBuilder,
84 TimestampSecondVector, TimestampSecondVectorBuilder,
85};
86pub use validity::Validity;
87
88pub trait Vector: Send + Sync + Serializable + Debug + VectorOp {
92 fn data_type(&self) -> ConcreteDataType;
96
97 fn vector_type_name(&self) -> String;
98
99 fn as_any(&self) -> &dyn Any;
102
103 fn len(&self) -> usize;
105
106 fn is_empty(&self) -> bool {
108 self.len() == 0
109 }
110
111 fn to_arrow_array(&self) -> ArrayRef;
113
114 fn to_boxed_arrow_array(&self) -> Box<dyn Array>;
116
117 fn validity(&self) -> Validity;
119
120 fn memory_size(&self) -> usize;
122
123 fn null_count(&self) -> usize;
127
128 fn is_const(&self) -> bool {
130 false
131 }
132
133 fn is_null(&self, row: usize) -> bool;
135
136 fn only_null(&self) -> bool {
138 self.null_count() == self.len()
139 }
140
141 fn slice(&self, offset: usize, length: usize) -> VectorRef;
146
147 fn get(&self, index: usize) -> Value;
152
153 fn try_get(&self, index: usize) -> Result<Value> {
156 ensure!(
157 index < self.len(),
158 error::BadArrayAccessSnafu {
159 index,
160 size: self.len()
161 }
162 );
163 Ok(self.get(index))
164 }
165
166 fn get_ref(&self, index: usize) -> ValueRef<'_>;
171}
172
173pub type VectorRef = Arc<dyn Vector>;
174
175pub trait MutableVector: Send + Sync {
177 fn data_type(&self) -> ConcreteDataType;
179
180 fn len(&self) -> usize;
182
183 fn is_empty(&self) -> bool {
185 self.len() == 0
186 }
187
188 fn as_any(&self) -> &dyn Any;
190
191 fn as_mut_any(&mut self) -> &mut dyn Any;
193
194 fn to_vector(&mut self) -> VectorRef;
196
197 fn to_vector_cloned(&self) -> VectorRef;
199
200 fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()>;
202
203 fn push_value_ref(&mut self, value: &ValueRef) {
208 self.try_push_value_ref(value).unwrap_or_else(|_| {
209 panic!(
210 "expecting pushing value of datatype {:?}, actual {:?}",
211 self.data_type(),
212 value
213 );
214 });
215 }
216
217 fn push_null(&mut self);
219
220 fn push_nulls(&mut self, num_nulls: usize) {
222 for _ in 0..num_nulls {
223 self.push_null();
224 }
225 }
226
227 fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()>;
234}
235
236macro_rules! impl_try_from_arrow_array_for_vector {
238 ($Array: ident, $Vector: ident) => {
239 impl $Vector {
240 pub fn try_from_arrow_array(
241 array: impl AsRef<dyn arrow::array::Array>,
242 ) -> crate::error::Result<$Vector> {
243 use snafu::OptionExt;
244
245 let arrow_array = array
246 .as_ref()
247 .as_any()
248 .downcast_ref::<$Array>()
249 .with_context(|| crate::error::ConversionSnafu {
250 from: std::format!("{:?}", array.as_ref().data_type()),
251 })?
252 .clone();
253
254 Ok($Vector::from(arrow_array))
255 }
256 }
257 };
258}
259
260macro_rules! impl_validity_for_vector {
261 ($array: expr) => {
262 Validity::from_array_data($array.to_data())
263 };
264}
265
266macro_rules! impl_get_for_vector {
267 ($array: expr, $index: ident) => {
268 if $array.is_valid($index) {
269 unsafe { $array.value_unchecked($index).into() }
271 } else {
272 Value::Null
273 }
274 };
275}
276
277macro_rules! impl_get_ref_for_vector {
278 ($array: expr, $index: ident) => {
279 if $array.is_valid($index) {
280 unsafe { $array.value_unchecked($index).into() }
282 } else {
283 ValueRef::Null
284 }
285 };
286}
287
288macro_rules! impl_extend_for_builder {
289 ($mutable_vector: expr, $vector: ident, $VectorType: ident, $offset: ident, $length: ident) => {{
290 use snafu::OptionExt;
291
292 let sliced_vector = $vector.slice($offset, $length);
293 let concrete_vector = sliced_vector
294 .as_any()
295 .downcast_ref::<$VectorType>()
296 .with_context(|| crate::error::CastTypeSnafu {
297 msg: format!(
298 "Failed to cast vector from {} to {}",
299 $vector.vector_type_name(),
300 stringify!($VectorType)
301 ),
302 })?;
303 for value in concrete_vector.iter_data() {
304 $mutable_vector.push(value);
305 }
306 Ok(())
307 }};
308}
309
310pub(crate) use impl_extend_for_builder;
311pub(crate) use impl_get_for_vector;
312pub(crate) use impl_get_ref_for_vector;
313pub(crate) use impl_try_from_arrow_array_for_vector;
314pub(crate) use impl_validity_for_vector;
315
316#[cfg(test)]
317pub mod tests {
318 use arrow::array::{Array, Int32Array, UInt8Array};
319 use paste::paste;
320 use serde_json;
321
322 use super::*;
323 use crate::data_type::DataType;
324 use crate::prelude::ScalarVectorBuilder;
325 use crate::types::{Int32Type, LogicalPrimitiveType};
326 use crate::vectors::helper::Helper;
327
328 #[test]
329 fn test_df_columns_to_vector() {
330 let df_column: Arc<dyn Array> = Arc::new(Int32Array::from(vec![1, 2, 3]));
331 let vector = Helper::try_into_vector(df_column).unwrap();
332 assert_eq!(
333 Int32Type::build_data_type().as_arrow_type(),
334 vector.data_type().as_arrow_type()
335 );
336 }
337
338 #[test]
339 fn test_serialize_i32_vector() {
340 let df_column: Arc<dyn Array> = Arc::new(Int32Array::from(vec![1, 2, 3]));
341 let json_value = Helper::try_into_vector(df_column)
342 .unwrap()
343 .serialize_to_json()
344 .unwrap();
345 assert_eq!("[1,2,3]", serde_json::to_string(&json_value).unwrap());
346 }
347
348 #[test]
349 fn test_serialize_i8_vector() {
350 let df_column: Arc<dyn Array> = Arc::new(UInt8Array::from(vec![1, 2, 3]));
351 let json_value = Helper::try_into_vector(df_column)
352 .unwrap()
353 .serialize_to_json()
354 .unwrap();
355 assert_eq!("[1,2,3]", serde_json::to_string(&json_value).unwrap());
356 }
357
358 #[test]
359 fn test_mutable_vector_data_type() {
360 macro_rules! mutable_primitive_data_type_eq_with_lower {
361 ($($type: ident),*) => {
362 $(
363 paste! {
364 let mutable_vector = [<$type VectorBuilder>]::with_capacity(1024);
365 assert_eq!(mutable_vector.data_type(), ConcreteDataType::[<$type:lower _datatype>]());
366 }
367 )*
368 };
369 }
370
371 macro_rules! mutable_time_data_type_eq_with_snake {
372 ($($type: ident),*) => {
373 $(
374 paste! {
375 let mutable_vector = [<$type VectorBuilder>]::with_capacity(1024);
376 assert_eq!(mutable_vector.data_type(), ConcreteDataType::[<$type:snake _datatype>]());
377 }
378 )*
379 };
380 }
381 mutable_primitive_data_type_eq_with_lower!(
383 Boolean, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64,
384 Date, Binary, String
385 );
386
387 mutable_time_data_type_eq_with_snake!(
389 TimeSecond,
390 TimeMillisecond,
391 TimeMicrosecond,
392 TimeNanosecond,
393 TimestampSecond,
394 TimestampMillisecond,
395 TimestampMicrosecond,
396 TimestampNanosecond,
397 DurationSecond,
398 DurationMillisecond,
399 DurationMicrosecond,
400 DurationNanosecond,
401 IntervalYearMonth,
402 IntervalDayTime,
403 IntervalMonthDayNano
404 );
405
406 let builder = NullVectorBuilder::default();
408 assert_eq!(builder.data_type(), ConcreteDataType::null_datatype());
409
410 let builder = Decimal128VectorBuilder::with_capacity(1024);
412 assert_eq!(
413 builder.data_type(),
414 ConcreteDataType::decimal128_datatype(38, 10)
415 );
416
417 let builder = Decimal128VectorBuilder::with_capacity(1024)
418 .with_precision_and_scale(3, 2)
419 .unwrap();
420 assert_eq!(
421 builder.data_type(),
422 ConcreteDataType::decimal128_datatype(3, 2)
423 );
424 }
425
426 #[test]
427 #[should_panic(expected = "Must use ListVectorBuilder::with_type_capacity()")]
428 fn test_mutable_vector_list_data_type() {
429 let item_type = Arc::new(ConcreteDataType::int32_datatype());
430 let builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1024);
432 assert_eq!(
433 builder.data_type(),
434 ConcreteDataType::list_datatype(item_type)
435 );
436
437 let _ = ListVectorBuilder::with_capacity(1024);
439 }
440
441 #[test]
442 fn test_mutable_vector_to_vector_cloned() {
443 let mut builder = ConcreteDataType::string_datatype().create_mutable_vector(1024);
445 builder.push_value_ref(&ValueRef::String("hello"));
446 builder.push_value_ref(&ValueRef::String("world"));
447 builder.push_value_ref(&ValueRef::String("!"));
448
449 let vector = builder.to_vector_cloned();
451 assert_eq!(vector.len(), 3);
452 assert_eq!(builder.len(), 3);
453 }
454}