1use std::any::Any;
16use std::fmt::Debug;
17use std::sync::Arc;
18
19use arrow::array::{Array, ArrayRef};
20use snafu::ensure;
21
22use crate::data_type::ConcreteDataType;
23use crate::error::{self, Result};
24use crate::serialize::Serializable;
25use crate::value::{Value, ValueRef};
26use crate::vectors::operations::VectorOp;
27
28mod binary;
29mod boolean;
30mod constant;
31mod date;
32mod decimal;
33mod dictionary;
34mod duration;
35mod eq;
36mod helper;
37mod interval;
38mod list;
39mod null;
40pub(crate) mod operations;
41mod primitive;
42mod string;
43mod time;
44mod timestamp;
45mod validity;
46
47pub use binary::{BinaryVector, BinaryVectorBuilder};
48pub use boolean::{BooleanVector, BooleanVectorBuilder};
49pub use constant::ConstantVector;
50pub use date::{DateVector, DateVectorBuilder};
51pub use decimal::{Decimal128Vector, Decimal128VectorBuilder};
52pub use dictionary::{DictionaryIter, DictionaryVector};
53pub use duration::{
54 DurationMicrosecondVector, DurationMicrosecondVectorBuilder, DurationMillisecondVector,
55 DurationMillisecondVectorBuilder, DurationNanosecondVector, DurationNanosecondVectorBuilder,
56 DurationSecondVector, DurationSecondVectorBuilder,
57};
58pub use helper::Helper;
59pub use interval::{
60 IntervalDayTimeVector, IntervalDayTimeVectorBuilder, IntervalMonthDayNanoVector,
61 IntervalMonthDayNanoVectorBuilder, IntervalYearMonthVector, IntervalYearMonthVectorBuilder,
62};
63pub use list::{ListIter, ListVector, ListVectorBuilder};
64pub use null::{NullVector, NullVectorBuilder};
65pub use primitive::{
66 Float32Vector, Float32VectorBuilder, Float64Vector, Float64VectorBuilder, Int16Vector,
67 Int16VectorBuilder, Int32Vector, Int32VectorBuilder, Int64Vector, Int64VectorBuilder,
68 Int8Vector, Int8VectorBuilder, PrimitiveIter, PrimitiveVector, PrimitiveVectorBuilder,
69 UInt16Vector, UInt16VectorBuilder, UInt32Vector, UInt32VectorBuilder, UInt64Vector,
70 UInt64VectorBuilder, UInt8Vector, UInt8VectorBuilder,
71};
72pub use string::{StringVector, StringVectorBuilder};
73pub use time::{
74 TimeMicrosecondVector, TimeMicrosecondVectorBuilder, TimeMillisecondVector,
75 TimeMillisecondVectorBuilder, TimeNanosecondVector, TimeNanosecondVectorBuilder,
76 TimeSecondVector, TimeSecondVectorBuilder,
77};
78pub use timestamp::{
79 TimestampMicrosecondVector, TimestampMicrosecondVectorBuilder, TimestampMillisecondVector,
80 TimestampMillisecondVectorBuilder, TimestampNanosecondVector, TimestampNanosecondVectorBuilder,
81 TimestampSecondVector, TimestampSecondVectorBuilder,
82};
83pub use validity::Validity;
84
85pub trait Vector: Send + Sync + Serializable + Debug + VectorOp {
89 fn data_type(&self) -> ConcreteDataType;
93
94 fn vector_type_name(&self) -> String;
95
96 fn as_any(&self) -> &dyn Any;
99
100 fn len(&self) -> usize;
102
103 fn is_empty(&self) -> bool {
105 self.len() == 0
106 }
107
108 fn to_arrow_array(&self) -> ArrayRef;
110
111 fn to_boxed_arrow_array(&self) -> Box<dyn Array>;
113
114 fn validity(&self) -> Validity;
116
117 fn memory_size(&self) -> usize;
119
120 fn null_count(&self) -> usize;
124
125 fn is_const(&self) -> bool {
127 false
128 }
129
130 fn is_null(&self, row: usize) -> bool;
132
133 fn only_null(&self) -> bool {
135 self.null_count() == self.len()
136 }
137
138 fn slice(&self, offset: usize, length: usize) -> VectorRef;
143
144 fn get(&self, index: usize) -> Value;
149
150 fn try_get(&self, index: usize) -> Result<Value> {
153 ensure!(
154 index < self.len(),
155 error::BadArrayAccessSnafu {
156 index,
157 size: self.len()
158 }
159 );
160 Ok(self.get(index))
161 }
162
163 fn get_ref(&self, index: usize) -> ValueRef;
168}
169
170pub type VectorRef = Arc<dyn Vector>;
171
172pub trait MutableVector: Send + Sync {
174 fn data_type(&self) -> ConcreteDataType;
176
177 fn len(&self) -> usize;
179
180 fn is_empty(&self) -> bool {
182 self.len() == 0
183 }
184
185 fn as_any(&self) -> &dyn Any;
187
188 fn as_mut_any(&mut self) -> &mut dyn Any;
190
191 fn to_vector(&mut self) -> VectorRef;
193
194 fn to_vector_cloned(&self) -> VectorRef;
196
197 fn try_push_value_ref(&mut self, value: ValueRef) -> Result<()>;
199
200 fn push_value_ref(&mut self, value: ValueRef) {
205 self.try_push_value_ref(value).unwrap_or_else(|_| {
206 panic!(
207 "expecting pushing value of datatype {:?}, actual {:?}",
208 self.data_type(),
209 value
210 );
211 });
212 }
213
214 fn push_null(&mut self);
216
217 fn push_nulls(&mut self, num_nulls: usize) {
219 for _ in 0..num_nulls {
220 self.push_null();
221 }
222 }
223
224 fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()>;
231}
232
233macro_rules! impl_try_from_arrow_array_for_vector {
235 ($Array: ident, $Vector: ident) => {
236 impl $Vector {
237 pub fn try_from_arrow_array(
238 array: impl AsRef<dyn arrow::array::Array>,
239 ) -> crate::error::Result<$Vector> {
240 use snafu::OptionExt;
241
242 let arrow_array = array
243 .as_ref()
244 .as_any()
245 .downcast_ref::<$Array>()
246 .with_context(|| crate::error::ConversionSnafu {
247 from: std::format!("{:?}", array.as_ref().data_type()),
248 })?
249 .clone();
250
251 Ok($Vector::from(arrow_array))
252 }
253 }
254 };
255}
256
257macro_rules! impl_validity_for_vector {
258 ($array: expr) => {
259 Validity::from_array_data($array.to_data())
260 };
261}
262
263macro_rules! impl_get_for_vector {
264 ($array: expr, $index: ident) => {
265 if $array.is_valid($index) {
266 unsafe { $array.value_unchecked($index).into() }
268 } else {
269 Value::Null
270 }
271 };
272}
273
274macro_rules! impl_get_ref_for_vector {
275 ($array: expr, $index: ident) => {
276 if $array.is_valid($index) {
277 unsafe { $array.value_unchecked($index).into() }
279 } else {
280 ValueRef::Null
281 }
282 };
283}
284
285macro_rules! impl_extend_for_builder {
286 ($mutable_vector: expr, $vector: ident, $VectorType: ident, $offset: ident, $length: ident) => {{
287 use snafu::OptionExt;
288
289 let sliced_vector = $vector.slice($offset, $length);
290 let concrete_vector = sliced_vector
291 .as_any()
292 .downcast_ref::<$VectorType>()
293 .with_context(|| crate::error::CastTypeSnafu {
294 msg: format!(
295 "Failed to cast vector from {} to {}",
296 $vector.vector_type_name(),
297 stringify!($VectorType)
298 ),
299 })?;
300 for value in concrete_vector.iter_data() {
301 $mutable_vector.push(value);
302 }
303 Ok(())
304 }};
305}
306
307pub(crate) use {
308 impl_extend_for_builder, impl_get_for_vector, impl_get_ref_for_vector,
309 impl_try_from_arrow_array_for_vector, impl_validity_for_vector,
310};
311
312#[cfg(test)]
313pub mod tests {
314 use arrow::array::{Array, Int32Array, UInt8Array};
315 use paste::paste;
316 use serde_json;
317
318 use super::*;
319 use crate::data_type::DataType;
320 use crate::prelude::ScalarVectorBuilder;
321 use crate::types::{Int32Type, LogicalPrimitiveType};
322 use crate::vectors::helper::Helper;
323
324 #[test]
325 fn test_df_columns_to_vector() {
326 let df_column: Arc<dyn Array> = Arc::new(Int32Array::from(vec![1, 2, 3]));
327 let vector = Helper::try_into_vector(df_column).unwrap();
328 assert_eq!(
329 Int32Type::build_data_type().as_arrow_type(),
330 vector.data_type().as_arrow_type()
331 );
332 }
333
334 #[test]
335 fn test_serialize_i32_vector() {
336 let df_column: Arc<dyn Array> = Arc::new(Int32Array::from(vec![1, 2, 3]));
337 let json_value = Helper::try_into_vector(df_column)
338 .unwrap()
339 .serialize_to_json()
340 .unwrap();
341 assert_eq!("[1,2,3]", serde_json::to_string(&json_value).unwrap());
342 }
343
344 #[test]
345 fn test_serialize_i8_vector() {
346 let df_column: Arc<dyn Array> = Arc::new(UInt8Array::from(vec![1, 2, 3]));
347 let json_value = Helper::try_into_vector(df_column)
348 .unwrap()
349 .serialize_to_json()
350 .unwrap();
351 assert_eq!("[1,2,3]", serde_json::to_string(&json_value).unwrap());
352 }
353
354 #[test]
355 fn test_mutable_vector_data_type() {
356 macro_rules! mutable_primitive_data_type_eq_with_lower {
357 ($($type: ident),*) => {
358 $(
359 paste! {
360 let mutable_vector = [<$type VectorBuilder>]::with_capacity(1024);
361 assert_eq!(mutable_vector.data_type(), ConcreteDataType::[<$type:lower _datatype>]());
362 }
363 )*
364 };
365 }
366
367 macro_rules! mutable_time_data_type_eq_with_snake {
368 ($($type: ident),*) => {
369 $(
370 paste! {
371 let mutable_vector = [<$type VectorBuilder>]::with_capacity(1024);
372 assert_eq!(mutable_vector.data_type(), ConcreteDataType::[<$type:snake _datatype>]());
373 }
374 )*
375 };
376 }
377 mutable_primitive_data_type_eq_with_lower!(
379 Boolean, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64,
380 Date, Binary, String
381 );
382
383 mutable_time_data_type_eq_with_snake!(
385 TimeSecond,
386 TimeMillisecond,
387 TimeMicrosecond,
388 TimeNanosecond,
389 TimestampSecond,
390 TimestampMillisecond,
391 TimestampMicrosecond,
392 TimestampNanosecond,
393 DurationSecond,
394 DurationMillisecond,
395 DurationMicrosecond,
396 DurationNanosecond,
397 IntervalYearMonth,
398 IntervalDayTime,
399 IntervalMonthDayNano
400 );
401
402 let builder = NullVectorBuilder::default();
404 assert_eq!(builder.data_type(), ConcreteDataType::null_datatype());
405
406 let builder = Decimal128VectorBuilder::with_capacity(1024);
408 assert_eq!(
409 builder.data_type(),
410 ConcreteDataType::decimal128_datatype(38, 10)
411 );
412
413 let builder = Decimal128VectorBuilder::with_capacity(1024)
414 .with_precision_and_scale(3, 2)
415 .unwrap();
416 assert_eq!(
417 builder.data_type(),
418 ConcreteDataType::decimal128_datatype(3, 2)
419 );
420 }
421
422 #[test]
423 #[should_panic(expected = "Must use ListVectorBuilder::with_type_capacity()")]
424 fn test_mutable_vector_list_data_type() {
425 let builder =
427 ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 1024);
428 assert_eq!(
429 builder.data_type(),
430 ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype())
431 );
432
433 let _ = ListVectorBuilder::with_capacity(1024);
435 }
436
437 #[test]
438 fn test_mutable_vector_to_vector_cloned() {
439 let mut builder = ConcreteDataType::string_datatype().create_mutable_vector(1024);
441 builder.push_value_ref(ValueRef::String("hello"));
442 builder.push_value_ref(ValueRef::String("world"));
443 builder.push_value_ref(ValueRef::String("!"));
444
445 let vector = builder.to_vector_cloned();
447 assert_eq!(vector.len(), 3);
448 assert_eq!(builder.len(), 3);
449 }
450}