datatypes/
scalars.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::any::Any;
16
17use common_decimal::Decimal128;
18use common_time::Date;
19
20use crate::types::{
21    Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, UInt8Type, UInt16Type,
22    UInt32Type, UInt64Type,
23};
24use crate::value::{ListValue, ListValueRef, StructValue, StructValueRef, Value};
25use crate::vectors::{
26    BinaryVector, BooleanVector, DateVector, Decimal128Vector, ListVector, MutableVector,
27    NullVector, PrimitiveVector, StringVector, StructVector, Vector,
28};
29
30fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
31    match iter.size_hint() {
32        (_lower, Some(upper)) => upper,
33        (0, None) => 1024,
34        (lower, None) => lower,
35    }
36}
37
38/// Owned scalar value
39/// e.g. primitive types, bool, `Vec<u8>` ...
40pub trait Scalar: 'static + Sized + Default + Any
41where
42    for<'a> Self::VectorType: ScalarVector<RefItem<'a> = Self::RefType<'a>>,
43{
44    type VectorType: ScalarVector<OwnedItem = Self>;
45    type RefType<'a>: ScalarRef<'a, ScalarType = Self>
46    where
47        Self: 'a;
48    /// Get a reference of the current value.
49    fn as_scalar_ref(&self) -> Self::RefType<'_>;
50
51    /// Upcast GAT type's lifetime.
52    fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short>;
53}
54
55pub trait ScalarRef<'a>: std::fmt::Debug + Clone + Send + 'a {
56    /// The corresponding [`Scalar`] type.
57    type ScalarType: Scalar<RefType<'a> = Self>;
58
59    /// Convert the reference into an owned value.
60    fn to_owned_scalar(&self) -> Self::ScalarType;
61}
62
63/// A sub trait of Vector to add scalar operation support.
64// This implementation refers to Datebend's [ScalarColumn](https://github.com/datafuselabs/databend/blob/main/common/datavalues/src/scalars/type_.rs)
65// and skyzh's [type-exercise-in-rust](https://github.com/skyzh/type-exercise-in-rust).
66pub trait ScalarVector: Vector + Send + Sync + Sized + 'static
67where
68    for<'a> Self::OwnedItem: Scalar<RefType<'a> = Self::RefItem<'a>>,
69{
70    type OwnedItem: Scalar<VectorType = Self>;
71    /// The reference item of this vector.
72    type RefItem<'a>: ScalarRef<'a, ScalarType = Self::OwnedItem>
73    where
74        Self: 'a;
75
76    /// Iterator type of this vector.
77    type Iter<'a>: Iterator<Item = Option<Self::RefItem<'a>>>
78    where
79        Self: 'a;
80
81    /// Builder type to build this vector.
82    type Builder: ScalarVectorBuilder<VectorType = Self>;
83
84    /// Returns the reference to an element at given position.
85    ///
86    /// Note: `get()` has bad performance, avoid call this function inside loop.
87    ///
88    /// # Panics
89    /// Panics if `idx >= self.len()`.
90    fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>>;
91
92    /// Returns iterator of current vector.
93    fn iter_data(&self) -> Self::Iter<'_>;
94
95    fn from_slice(data: &[Self::RefItem<'_>]) -> Self {
96        let mut builder = Self::Builder::with_capacity(data.len());
97        for item in data {
98            builder.push(Some(item.clone()));
99        }
100        builder.finish()
101    }
102
103    fn from_iterator<'a>(it: impl Iterator<Item = Self::RefItem<'a>>) -> Self {
104        let mut builder = Self::Builder::with_capacity(get_iter_capacity(&it));
105        for item in it {
106            builder.push(Some(item));
107        }
108        builder.finish()
109    }
110
111    fn from_owned_iterator(it: impl Iterator<Item = Option<Self::OwnedItem>>) -> Self {
112        let mut builder = Self::Builder::with_capacity(get_iter_capacity(&it));
113        for item in it {
114            match item {
115                Some(item) => builder.push(Some(item.as_scalar_ref())),
116                None => builder.push(None),
117            }
118        }
119        builder.finish()
120    }
121
122    fn from_vec<I: Into<Self::OwnedItem>>(values: Vec<I>) -> Self {
123        let it = values.into_iter();
124        let mut builder = Self::Builder::with_capacity(get_iter_capacity(&it));
125        for item in it {
126            builder.push(Some(item.into().as_scalar_ref()));
127        }
128        builder.finish()
129    }
130}
131
132/// A trait over all vector builders.
133pub trait ScalarVectorBuilder: MutableVector {
134    type VectorType: ScalarVector<Builder = Self>;
135
136    /// Create a new builder with initial `capacity`.
137    fn with_capacity(capacity: usize) -> Self;
138
139    /// Push a value into the builder.
140    fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>);
141
142    /// Build a new vector and reset `self`.
143    fn finish(&mut self) -> Self::VectorType;
144
145    /// Build a new vector without resetting `self`.
146    fn finish_cloned(&self) -> Self::VectorType;
147}
148
149macro_rules! impl_scalar_for_native {
150    ($Native: ident, $DataType: ident) => {
151        impl Scalar for $Native {
152            type VectorType = PrimitiveVector<$DataType>;
153            type RefType<'a> = $Native;
154
155            fn as_scalar_ref(&self) -> $Native {
156                *self
157            }
158
159            #[allow(clippy::needless_lifetimes)]
160            fn upcast_gat<'short, 'long: 'short>(long: $Native) -> $Native {
161                long
162            }
163        }
164
165        /// Implement [`ScalarRef`] for primitive types. Note that primitive types are both [`Scalar`] and [`ScalarRef`].
166        impl<'a> ScalarRef<'a> for $Native {
167            type ScalarType = $Native;
168
169            fn to_owned_scalar(&self) -> $Native {
170                *self
171            }
172        }
173    };
174}
175
176impl_scalar_for_native!(u8, UInt8Type);
177impl_scalar_for_native!(u16, UInt16Type);
178impl_scalar_for_native!(u32, UInt32Type);
179impl_scalar_for_native!(u64, UInt64Type);
180impl_scalar_for_native!(i8, Int8Type);
181impl_scalar_for_native!(i16, Int16Type);
182impl_scalar_for_native!(i32, Int32Type);
183impl_scalar_for_native!(i64, Int64Type);
184impl_scalar_for_native!(f32, Float32Type);
185impl_scalar_for_native!(f64, Float64Type);
186
187impl Scalar for () {
188    type VectorType = NullVector;
189    type RefType<'a> = ();
190
191    fn as_scalar_ref(&self) {}
192
193    #[allow(clippy::needless_lifetimes)]
194    fn upcast_gat<'short, 'long: 'short>(long: ()) {
195        long
196    }
197}
198
199impl ScalarRef<'_> for () {
200    type ScalarType = ();
201
202    fn to_owned_scalar(&self) {}
203}
204
205impl Scalar for bool {
206    type VectorType = BooleanVector;
207    type RefType<'a> = bool;
208
209    fn as_scalar_ref(&self) -> bool {
210        *self
211    }
212
213    #[allow(clippy::needless_lifetimes)]
214    fn upcast_gat<'short, 'long: 'short>(long: bool) -> bool {
215        long
216    }
217}
218
219impl ScalarRef<'_> for bool {
220    type ScalarType = bool;
221
222    fn to_owned_scalar(&self) -> bool {
223        *self
224    }
225}
226
227impl Scalar for String {
228    type VectorType = StringVector;
229    type RefType<'a> = &'a str;
230
231    fn as_scalar_ref(&self) -> &str {
232        self
233    }
234
235    fn upcast_gat<'short, 'long: 'short>(long: &'long str) -> &'short str {
236        long
237    }
238}
239
240impl<'a> ScalarRef<'a> for &'a str {
241    type ScalarType = String;
242
243    fn to_owned_scalar(&self) -> String {
244        self.to_string()
245    }
246}
247
248impl Scalar for Vec<u8> {
249    type VectorType = BinaryVector;
250    type RefType<'a> = &'a [u8];
251
252    fn as_scalar_ref(&self) -> &[u8] {
253        self
254    }
255
256    fn upcast_gat<'short, 'long: 'short>(long: &'long [u8]) -> &'short [u8] {
257        long
258    }
259}
260
261impl<'a> ScalarRef<'a> for &'a [u8] {
262    type ScalarType = Vec<u8>;
263
264    fn to_owned_scalar(&self) -> Vec<u8> {
265        self.to_vec()
266    }
267}
268
269impl Scalar for Date {
270    type VectorType = DateVector;
271    type RefType<'a> = Date;
272
273    fn as_scalar_ref(&self) -> Self::RefType<'_> {
274        *self
275    }
276
277    fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short> {
278        long
279    }
280}
281
282impl ScalarRef<'_> for Date {
283    type ScalarType = Date;
284
285    fn to_owned_scalar(&self) -> Self::ScalarType {
286        *self
287    }
288}
289
290impl Scalar for Decimal128 {
291    type VectorType = Decimal128Vector;
292    type RefType<'a> = Decimal128;
293
294    fn as_scalar_ref(&self) -> Self::RefType<'_> {
295        *self
296    }
297
298    fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short> {
299        long
300    }
301}
302
303impl ScalarRef<'_> for Decimal128 {
304    type ScalarType = Decimal128;
305
306    fn to_owned_scalar(&self) -> Self::ScalarType {
307        *self
308    }
309}
310
311// Timestamp types implement Scalar and ScalarRef in `src/timestamp.rs`.
312
313impl Scalar for ListValue {
314    type VectorType = ListVector;
315    type RefType<'a> = ListValueRef<'a>;
316
317    fn as_scalar_ref(&self) -> Self::RefType<'_> {
318        ListValueRef::Ref { val: self }
319    }
320
321    fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short> {
322        long
323    }
324}
325
326impl<'a> ScalarRef<'a> for ListValueRef<'a> {
327    type ScalarType = ListValue;
328
329    fn to_owned_scalar(&self) -> Self::ScalarType {
330        match self {
331            ListValueRef::Indexed { vector, idx } => match vector.get(*idx) {
332                // Normally should not get `Value::Null` if the `ListValueRef` comes
333                // from the iterator of the ListVector, but we avoid panic and just
334                // returns a default list value in such case since `ListValueRef` may
335                // be constructed manually.
336                Value::Null => ListValue::default(),
337                Value::List(v) => v,
338                _ => unreachable!(),
339            },
340            ListValueRef::Ref { val } => (*val).clone(),
341            ListValueRef::RefList { val, item_datatype } => ListValue::new(
342                val.iter().map(|v| Value::from(v.clone())).collect(),
343                item_datatype.clone(),
344            ),
345        }
346    }
347}
348
349impl Scalar for StructValue {
350    type VectorType = StructVector;
351    type RefType<'a> = StructValueRef<'a>;
352
353    fn as_scalar_ref(&self) -> Self::RefType<'_> {
354        StructValueRef::Ref(self)
355    }
356
357    fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short> {
358        long
359    }
360}
361
362impl<'a> ScalarRef<'a> for StructValueRef<'a> {
363    type ScalarType = StructValue;
364
365    fn to_owned_scalar(&self) -> Self::ScalarType {
366        match self {
367            Self::Indexed { vector, idx } => match vector.get(*idx) {
368                Value::Null => StructValue::default(),
369                Value::Struct(v) => v,
370                _ => unreachable!(),
371            },
372            StructValueRef::Ref(val) => (*val).clone(),
373            StructValueRef::RefList { val, fields } => {
374                let items = val.iter().map(|v| Value::from(v.clone())).collect();
375                StructValue::try_new(items, fields.clone()).unwrap()
376            }
377        }
378    }
379}
380
381#[cfg(test)]
382mod tests {
383    use super::*;
384    use crate::data_type::ConcreteDataType;
385    use crate::timestamp::TimestampSecond;
386    use crate::vectors::{BinaryVector, Int32Vector, ListVectorBuilder, TimestampSecondVector};
387
388    fn build_vector_from_slice<T: ScalarVector>(items: &[Option<T::RefItem<'_>>]) -> T {
389        let mut builder = T::Builder::with_capacity(items.len());
390        for item in items {
391            builder.push(item.clone());
392        }
393        builder.finish()
394    }
395
396    fn assert_vector_eq<'a, T: ScalarVector>(expect: &[Option<T::RefItem<'a>>], vector: &'a T)
397    where
398        T::RefItem<'a>: PartialEq + std::fmt::Debug,
399    {
400        for (a, b) in expect.iter().zip(vector.iter_data()) {
401            assert_eq!(*a, b);
402        }
403    }
404
405    #[test]
406    fn test_build_i32_vector() {
407        let expect = vec![Some(1), Some(2), Some(3), None, Some(5)];
408        let vector: Int32Vector = build_vector_from_slice(&expect);
409        assert_vector_eq(&expect, &vector);
410    }
411
412    #[test]
413    fn test_build_binary_vector() {
414        let expect: Vec<Option<&'static [u8]>> = vec![
415            Some(b"a"),
416            Some(b"b"),
417            Some(b"c"),
418            None,
419            Some(b"e"),
420            Some(b""),
421        ];
422        let vector: BinaryVector = build_vector_from_slice(&expect);
423        assert_vector_eq(&expect, &vector);
424    }
425
426    #[test]
427    fn test_build_date_vector() {
428        let expect: Vec<Option<Date>> = vec![
429            Some(Date::new(0)),
430            Some(Date::new(-1)),
431            None,
432            Some(Date::new(1)),
433        ];
434        let vector: DateVector = build_vector_from_slice(&expect);
435        assert_vector_eq(&expect, &vector);
436    }
437
438    #[test]
439    fn test_date_scalar() {
440        let date = Date::new(1);
441        assert_eq!(date, date.as_scalar_ref());
442        assert_eq!(date, date.to_owned_scalar());
443    }
444
445    #[test]
446    fn test_decimal_scalar() {
447        let decimal = Decimal128::new(1, 1, 1);
448        assert_eq!(decimal, decimal.as_scalar_ref());
449        assert_eq!(decimal, decimal.to_owned_scalar());
450    }
451
452    #[test]
453    fn test_list_value_scalar() {
454        let list_value =
455            ListValue::new(vec![Value::Int32(123)], ConcreteDataType::int32_datatype());
456        let list_ref = ListValueRef::Ref { val: &list_value };
457        assert_eq!(list_ref, list_value.as_scalar_ref());
458        assert_eq!(list_value, list_ref.to_owned_scalar());
459
460        let mut builder =
461            ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 1);
462        builder.push(None);
463        builder.push(Some(list_value.as_scalar_ref()));
464        let vector = builder.finish();
465
466        let ref_on_vec = ListValueRef::Indexed {
467            vector: &vector,
468            idx: 0,
469        };
470        assert_eq!(ListValue::default(), ref_on_vec.to_owned_scalar());
471        let ref_on_vec = ListValueRef::Indexed {
472            vector: &vector,
473            idx: 1,
474        };
475        assert_eq!(list_value, ref_on_vec.to_owned_scalar());
476    }
477
478    #[test]
479    fn test_build_timestamp_vector() {
480        let expect: Vec<Option<TimestampSecond>> = vec![Some(10.into()), None, Some(42.into())];
481        let vector: TimestampSecondVector = build_vector_from_slice(&expect);
482        assert_vector_eq(&expect, &vector);
483        let val = vector.get_data(0).unwrap();
484        assert_eq!(val, val.as_scalar_ref());
485        assert_eq!(TimestampSecond::from(10), val.to_owned_scalar());
486    }
487}