1use std::any::Any;
16use std::fmt::Debug;
17use std::sync::Arc;
18
19use arrow::array::{Array, ArrayRef};
20use snafu::ensure;
21
22use crate::data_type::ConcreteDataType;
23use crate::error::{self, Result};
24use crate::serialize::Serializable;
25use crate::value::{Value, ValueRef};
26use crate::vectors::operations::VectorOp;
27
28mod binary;
29mod boolean;
30mod constant;
31mod date;
32mod decimal;
33mod dictionary;
34mod duration;
35mod eq;
36mod helper;
37mod interval;
38mod list;
39mod null;
40pub(crate) mod operations;
41mod primitive;
42mod string;
43mod struct_vector;
44mod time;
45mod timestamp;
46mod validity;
47
48pub use binary::{BinaryVector, BinaryVectorBuilder};
49pub use boolean::{BooleanVector, BooleanVectorBuilder};
50pub use constant::ConstantVector;
51pub use date::{DateVector, DateVectorBuilder};
52pub use decimal::{Decimal128Vector, Decimal128VectorBuilder};
53pub use dictionary::{DictionaryIter, DictionaryVector};
54pub use duration::{
55 DurationMicrosecondVector, DurationMicrosecondVectorBuilder, DurationMillisecondVector,
56 DurationMillisecondVectorBuilder, DurationNanosecondVector, DurationNanosecondVectorBuilder,
57 DurationSecondVector, DurationSecondVectorBuilder,
58};
59pub use helper::Helper;
60pub use interval::{
61 IntervalDayTimeVector, IntervalDayTimeVectorBuilder, IntervalMonthDayNanoVector,
62 IntervalMonthDayNanoVectorBuilder, IntervalYearMonthVector, IntervalYearMonthVectorBuilder,
63};
64pub use list::{ListIter, ListVector, ListVectorBuilder};
65pub use null::{NullVector, NullVectorBuilder};
66pub use primitive::{
67 Float32Vector, Float32VectorBuilder, Float64Vector, Float64VectorBuilder, Int8Vector,
68 Int8VectorBuilder, Int16Vector, Int16VectorBuilder, Int32Vector, Int32VectorBuilder,
69 Int64Vector, Int64VectorBuilder, PrimitiveIter, PrimitiveVector, PrimitiveVectorBuilder,
70 UInt8Vector, UInt8VectorBuilder, UInt16Vector, UInt16VectorBuilder, UInt32Vector,
71 UInt32VectorBuilder, UInt64Vector, UInt64VectorBuilder,
72};
73pub use string::{StringVector, StringVectorBuilder};
74pub use struct_vector::{StructVector, StructVectorBuilder};
75pub use time::{
76 TimeMicrosecondVector, TimeMicrosecondVectorBuilder, TimeMillisecondVector,
77 TimeMillisecondVectorBuilder, TimeNanosecondVector, TimeNanosecondVectorBuilder,
78 TimeSecondVector, TimeSecondVectorBuilder,
79};
80pub use timestamp::{
81 TimestampMicrosecondVector, TimestampMicrosecondVectorBuilder, TimestampMillisecondVector,
82 TimestampMillisecondVectorBuilder, TimestampNanosecondVector, TimestampNanosecondVectorBuilder,
83 TimestampSecondVector, TimestampSecondVectorBuilder,
84};
85pub use validity::Validity;
86
87pub trait Vector: Send + Sync + Serializable + Debug + VectorOp {
91 fn data_type(&self) -> ConcreteDataType;
95
96 fn vector_type_name(&self) -> String;
97
98 fn as_any(&self) -> &dyn Any;
101
102 fn len(&self) -> usize;
104
105 fn is_empty(&self) -> bool {
107 self.len() == 0
108 }
109
110 fn to_arrow_array(&self) -> ArrayRef;
112
113 fn to_boxed_arrow_array(&self) -> Box<dyn Array>;
115
116 fn validity(&self) -> Validity;
118
119 fn memory_size(&self) -> usize;
121
122 fn null_count(&self) -> usize;
126
127 fn is_const(&self) -> bool {
129 false
130 }
131
132 fn is_null(&self, row: usize) -> bool;
134
135 fn only_null(&self) -> bool {
137 self.null_count() == self.len()
138 }
139
140 fn slice(&self, offset: usize, length: usize) -> VectorRef;
145
146 fn get(&self, index: usize) -> Value;
151
152 fn try_get(&self, index: usize) -> Result<Value> {
155 ensure!(
156 index < self.len(),
157 error::BadArrayAccessSnafu {
158 index,
159 size: self.len()
160 }
161 );
162 Ok(self.get(index))
163 }
164
165 fn get_ref(&self, index: usize) -> ValueRef<'_>;
170}
171
172pub type VectorRef = Arc<dyn Vector>;
173
174pub trait MutableVector: Send + Sync {
176 fn data_type(&self) -> ConcreteDataType;
178
179 fn len(&self) -> usize;
181
182 fn is_empty(&self) -> bool {
184 self.len() == 0
185 }
186
187 fn as_any(&self) -> &dyn Any;
189
190 fn as_mut_any(&mut self) -> &mut dyn Any;
192
193 fn to_vector(&mut self) -> VectorRef;
195
196 fn to_vector_cloned(&self) -> VectorRef;
198
199 fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()>;
201
202 fn push_value_ref(&mut self, value: &ValueRef) {
207 self.try_push_value_ref(value).unwrap_or_else(|_| {
208 panic!(
209 "expecting pushing value of datatype {:?}, actual {:?}",
210 self.data_type(),
211 value
212 );
213 });
214 }
215
216 fn push_null(&mut self);
218
219 fn push_nulls(&mut self, num_nulls: usize) {
221 for _ in 0..num_nulls {
222 self.push_null();
223 }
224 }
225
226 fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()>;
233}
234
235macro_rules! impl_try_from_arrow_array_for_vector {
237 ($Array: ident, $Vector: ident) => {
238 impl $Vector {
239 pub fn try_from_arrow_array(
240 array: impl AsRef<dyn arrow::array::Array>,
241 ) -> crate::error::Result<$Vector> {
242 use snafu::OptionExt;
243
244 let arrow_array = array
245 .as_ref()
246 .as_any()
247 .downcast_ref::<$Array>()
248 .with_context(|| crate::error::ConversionSnafu {
249 from: std::format!("{:?}", array.as_ref().data_type()),
250 })?
251 .clone();
252
253 Ok($Vector::from(arrow_array))
254 }
255 }
256 };
257}
258
259macro_rules! impl_validity_for_vector {
260 ($array: expr) => {
261 Validity::from_array_data($array.to_data())
262 };
263}
264
265macro_rules! impl_get_for_vector {
266 ($array: expr, $index: ident) => {
267 if $array.is_valid($index) {
268 unsafe { $array.value_unchecked($index).into() }
270 } else {
271 Value::Null
272 }
273 };
274}
275
276macro_rules! impl_get_ref_for_vector {
277 ($array: expr, $index: ident) => {
278 if $array.is_valid($index) {
279 unsafe { $array.value_unchecked($index).into() }
281 } else {
282 ValueRef::Null
283 }
284 };
285}
286
287macro_rules! impl_extend_for_builder {
288 ($mutable_vector: expr, $vector: ident, $VectorType: ident, $offset: ident, $length: ident) => {{
289 use snafu::OptionExt;
290
291 let sliced_vector = $vector.slice($offset, $length);
292 let concrete_vector = sliced_vector
293 .as_any()
294 .downcast_ref::<$VectorType>()
295 .with_context(|| crate::error::CastTypeSnafu {
296 msg: format!(
297 "Failed to cast vector from {} to {}",
298 $vector.vector_type_name(),
299 stringify!($VectorType)
300 ),
301 })?;
302 for value in concrete_vector.iter_data() {
303 $mutable_vector.push(value);
304 }
305 Ok(())
306 }};
307}
308
309pub(crate) use {
310 impl_extend_for_builder, impl_get_for_vector, impl_get_ref_for_vector,
311 impl_try_from_arrow_array_for_vector, impl_validity_for_vector,
312};
313
314#[cfg(test)]
315pub mod tests {
316 use arrow::array::{Array, Int32Array, UInt8Array};
317 use paste::paste;
318 use serde_json;
319
320 use super::*;
321 use crate::data_type::DataType;
322 use crate::prelude::ScalarVectorBuilder;
323 use crate::types::{Int32Type, LogicalPrimitiveType};
324 use crate::vectors::helper::Helper;
325
326 #[test]
327 fn test_df_columns_to_vector() {
328 let df_column: Arc<dyn Array> = Arc::new(Int32Array::from(vec![1, 2, 3]));
329 let vector = Helper::try_into_vector(df_column).unwrap();
330 assert_eq!(
331 Int32Type::build_data_type().as_arrow_type(),
332 vector.data_type().as_arrow_type()
333 );
334 }
335
336 #[test]
337 fn test_serialize_i32_vector() {
338 let df_column: Arc<dyn Array> = Arc::new(Int32Array::from(vec![1, 2, 3]));
339 let json_value = Helper::try_into_vector(df_column)
340 .unwrap()
341 .serialize_to_json()
342 .unwrap();
343 assert_eq!("[1,2,3]", serde_json::to_string(&json_value).unwrap());
344 }
345
346 #[test]
347 fn test_serialize_i8_vector() {
348 let df_column: Arc<dyn Array> = Arc::new(UInt8Array::from(vec![1, 2, 3]));
349 let json_value = Helper::try_into_vector(df_column)
350 .unwrap()
351 .serialize_to_json()
352 .unwrap();
353 assert_eq!("[1,2,3]", serde_json::to_string(&json_value).unwrap());
354 }
355
356 #[test]
357 fn test_mutable_vector_data_type() {
358 macro_rules! mutable_primitive_data_type_eq_with_lower {
359 ($($type: ident),*) => {
360 $(
361 paste! {
362 let mutable_vector = [<$type VectorBuilder>]::with_capacity(1024);
363 assert_eq!(mutable_vector.data_type(), ConcreteDataType::[<$type:lower _datatype>]());
364 }
365 )*
366 };
367 }
368
369 macro_rules! mutable_time_data_type_eq_with_snake {
370 ($($type: ident),*) => {
371 $(
372 paste! {
373 let mutable_vector = [<$type VectorBuilder>]::with_capacity(1024);
374 assert_eq!(mutable_vector.data_type(), ConcreteDataType::[<$type:snake _datatype>]());
375 }
376 )*
377 };
378 }
379 mutable_primitive_data_type_eq_with_lower!(
381 Boolean, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64,
382 Date, Binary, String
383 );
384
385 mutable_time_data_type_eq_with_snake!(
387 TimeSecond,
388 TimeMillisecond,
389 TimeMicrosecond,
390 TimeNanosecond,
391 TimestampSecond,
392 TimestampMillisecond,
393 TimestampMicrosecond,
394 TimestampNanosecond,
395 DurationSecond,
396 DurationMillisecond,
397 DurationMicrosecond,
398 DurationNanosecond,
399 IntervalYearMonth,
400 IntervalDayTime,
401 IntervalMonthDayNano
402 );
403
404 let builder = NullVectorBuilder::default();
406 assert_eq!(builder.data_type(), ConcreteDataType::null_datatype());
407
408 let builder = Decimal128VectorBuilder::with_capacity(1024);
410 assert_eq!(
411 builder.data_type(),
412 ConcreteDataType::decimal128_datatype(38, 10)
413 );
414
415 let builder = Decimal128VectorBuilder::with_capacity(1024)
416 .with_precision_and_scale(3, 2)
417 .unwrap();
418 assert_eq!(
419 builder.data_type(),
420 ConcreteDataType::decimal128_datatype(3, 2)
421 );
422 }
423
424 #[test]
425 #[should_panic(expected = "Must use ListVectorBuilder::with_type_capacity()")]
426 fn test_mutable_vector_list_data_type() {
427 let builder =
429 ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 1024);
430 assert_eq!(
431 builder.data_type(),
432 ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype())
433 );
434
435 let _ = ListVectorBuilder::with_capacity(1024);
437 }
438
439 #[test]
440 fn test_mutable_vector_to_vector_cloned() {
441 let mut builder = ConcreteDataType::string_datatype().create_mutable_vector(1024);
443 builder.push_value_ref(&ValueRef::String("hello"));
444 builder.push_value_ref(&ValueRef::String("world"));
445 builder.push_value_ref(&ValueRef::String("!"));
446
447 let vector = builder.to_vector_cloned();
449 assert_eq!(vector.len(), 3);
450 assert_eq!(builder.len(), 3);
451 }
452}