datatypes/
scalars.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::any::Any;
16
17use common_decimal::Decimal128;
18use common_time::Date;
19
20use crate::types::{
21    Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type,
22    UInt64Type, UInt8Type,
23};
24use crate::value::{ListValue, ListValueRef, Value};
25use crate::vectors::{
26    BinaryVector, BooleanVector, DateVector, Decimal128Vector, ListVector, MutableVector,
27    PrimitiveVector, StringVector, Vector,
28};
29
30fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
31    match iter.size_hint() {
32        (_lower, Some(upper)) => upper,
33        (0, None) => 1024,
34        (lower, None) => lower,
35    }
36}
37
38/// Owned scalar value
39/// e.g. primitive types, bool, `Vec<u8>` ...
40pub trait Scalar: 'static + Sized + Default + Any
41where
42    for<'a> Self::VectorType: ScalarVector<RefItem<'a> = Self::RefType<'a>>,
43{
44    type VectorType: ScalarVector<OwnedItem = Self>;
45    type RefType<'a>: ScalarRef<'a, ScalarType = Self>
46    where
47        Self: 'a;
48    /// Get a reference of the current value.
49    fn as_scalar_ref(&self) -> Self::RefType<'_>;
50
51    /// Upcast GAT type's lifetime.
52    fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short>;
53}
54
55pub trait ScalarRef<'a>: std::fmt::Debug + Clone + Copy + Send + 'a {
56    /// The corresponding [`Scalar`] type.
57    type ScalarType: Scalar<RefType<'a> = Self>;
58
59    /// Convert the reference into an owned value.
60    fn to_owned_scalar(&self) -> Self::ScalarType;
61}
62
63/// A sub trait of Vector to add scalar operation support.
64// This implementation refers to Datebend's [ScalarColumn](https://github.com/datafuselabs/databend/blob/main/common/datavalues/src/scalars/type_.rs)
65// and skyzh's [type-exercise-in-rust](https://github.com/skyzh/type-exercise-in-rust).
66pub trait ScalarVector: Vector + Send + Sync + Sized + 'static
67where
68    for<'a> Self::OwnedItem: Scalar<RefType<'a> = Self::RefItem<'a>>,
69{
70    type OwnedItem: Scalar<VectorType = Self>;
71    /// The reference item of this vector.
72    type RefItem<'a>: ScalarRef<'a, ScalarType = Self::OwnedItem>
73    where
74        Self: 'a;
75
76    /// Iterator type of this vector.
77    type Iter<'a>: Iterator<Item = Option<Self::RefItem<'a>>>
78    where
79        Self: 'a;
80
81    /// Builder type to build this vector.
82    type Builder: ScalarVectorBuilder<VectorType = Self>;
83
84    /// Returns the reference to an element at given position.
85    ///
86    /// Note: `get()` has bad performance, avoid call this function inside loop.
87    ///
88    /// # Panics
89    /// Panics if `idx >= self.len()`.
90    fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>>;
91
92    /// Returns iterator of current vector.
93    fn iter_data(&self) -> Self::Iter<'_>;
94
95    fn from_slice(data: &[Self::RefItem<'_>]) -> Self {
96        let mut builder = Self::Builder::with_capacity(data.len());
97        for item in data {
98            builder.push(Some(*item));
99        }
100        builder.finish()
101    }
102
103    fn from_iterator<'a>(it: impl Iterator<Item = Self::RefItem<'a>>) -> Self {
104        let mut builder = Self::Builder::with_capacity(get_iter_capacity(&it));
105        for item in it {
106            builder.push(Some(item));
107        }
108        builder.finish()
109    }
110
111    fn from_owned_iterator(it: impl Iterator<Item = Option<Self::OwnedItem>>) -> Self {
112        let mut builder = Self::Builder::with_capacity(get_iter_capacity(&it));
113        for item in it {
114            match item {
115                Some(item) => builder.push(Some(item.as_scalar_ref())),
116                None => builder.push(None),
117            }
118        }
119        builder.finish()
120    }
121
122    fn from_vec<I: Into<Self::OwnedItem>>(values: Vec<I>) -> Self {
123        let it = values.into_iter();
124        let mut builder = Self::Builder::with_capacity(get_iter_capacity(&it));
125        for item in it {
126            builder.push(Some(item.into().as_scalar_ref()));
127        }
128        builder.finish()
129    }
130}
131
132/// A trait over all vector builders.
133pub trait ScalarVectorBuilder: MutableVector {
134    type VectorType: ScalarVector<Builder = Self>;
135
136    /// Create a new builder with initial `capacity`.
137    fn with_capacity(capacity: usize) -> Self;
138
139    /// Push a value into the builder.
140    fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>);
141
142    /// Build a new vector and reset `self`.
143    fn finish(&mut self) -> Self::VectorType;
144
145    /// Build a new vector without resetting `self`.
146    fn finish_cloned(&self) -> Self::VectorType;
147}
148
149macro_rules! impl_scalar_for_native {
150    ($Native: ident, $DataType: ident) => {
151        impl Scalar for $Native {
152            type VectorType = PrimitiveVector<$DataType>;
153            type RefType<'a> = $Native;
154
155            #[inline]
156            fn as_scalar_ref(&self) -> $Native {
157                *self
158            }
159
160            #[allow(clippy::needless_lifetimes)]
161            #[inline]
162            fn upcast_gat<'short, 'long: 'short>(long: $Native) -> $Native {
163                long
164            }
165        }
166
167        /// Implement [`ScalarRef`] for primitive types. Note that primitive types are both [`Scalar`] and [`ScalarRef`].
168        impl<'a> ScalarRef<'a> for $Native {
169            type ScalarType = $Native;
170
171            #[inline]
172            fn to_owned_scalar(&self) -> $Native {
173                *self
174            }
175        }
176    };
177}
178
179impl_scalar_for_native!(u8, UInt8Type);
180impl_scalar_for_native!(u16, UInt16Type);
181impl_scalar_for_native!(u32, UInt32Type);
182impl_scalar_for_native!(u64, UInt64Type);
183impl_scalar_for_native!(i8, Int8Type);
184impl_scalar_for_native!(i16, Int16Type);
185impl_scalar_for_native!(i32, Int32Type);
186impl_scalar_for_native!(i64, Int64Type);
187impl_scalar_for_native!(f32, Float32Type);
188impl_scalar_for_native!(f64, Float64Type);
189
190impl Scalar for bool {
191    type VectorType = BooleanVector;
192    type RefType<'a> = bool;
193
194    #[inline]
195    fn as_scalar_ref(&self) -> bool {
196        *self
197    }
198
199    #[allow(clippy::needless_lifetimes)]
200    #[inline]
201    fn upcast_gat<'short, 'long: 'short>(long: bool) -> bool {
202        long
203    }
204}
205
206impl ScalarRef<'_> for bool {
207    type ScalarType = bool;
208
209    #[inline]
210    fn to_owned_scalar(&self) -> bool {
211        *self
212    }
213}
214
215impl Scalar for String {
216    type VectorType = StringVector;
217    type RefType<'a> = &'a str;
218
219    #[inline]
220    fn as_scalar_ref(&self) -> &str {
221        self
222    }
223
224    #[inline]
225    fn upcast_gat<'short, 'long: 'short>(long: &'long str) -> &'short str {
226        long
227    }
228}
229
230impl<'a> ScalarRef<'a> for &'a str {
231    type ScalarType = String;
232
233    #[inline]
234    fn to_owned_scalar(&self) -> String {
235        self.to_string()
236    }
237}
238
239impl Scalar for Vec<u8> {
240    type VectorType = BinaryVector;
241    type RefType<'a> = &'a [u8];
242
243    #[inline]
244    fn as_scalar_ref(&self) -> &[u8] {
245        self
246    }
247
248    #[inline]
249    fn upcast_gat<'short, 'long: 'short>(long: &'long [u8]) -> &'short [u8] {
250        long
251    }
252}
253
254impl<'a> ScalarRef<'a> for &'a [u8] {
255    type ScalarType = Vec<u8>;
256
257    #[inline]
258    fn to_owned_scalar(&self) -> Vec<u8> {
259        self.to_vec()
260    }
261}
262
263impl Scalar for Date {
264    type VectorType = DateVector;
265    type RefType<'a> = Date;
266
267    fn as_scalar_ref(&self) -> Self::RefType<'_> {
268        *self
269    }
270
271    fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short> {
272        long
273    }
274}
275
276impl ScalarRef<'_> for Date {
277    type ScalarType = Date;
278
279    fn to_owned_scalar(&self) -> Self::ScalarType {
280        *self
281    }
282}
283
284impl Scalar for Decimal128 {
285    type VectorType = Decimal128Vector;
286    type RefType<'a> = Decimal128;
287
288    fn as_scalar_ref(&self) -> Self::RefType<'_> {
289        *self
290    }
291
292    fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short> {
293        long
294    }
295}
296
297impl ScalarRef<'_> for Decimal128 {
298    type ScalarType = Decimal128;
299
300    fn to_owned_scalar(&self) -> Self::ScalarType {
301        *self
302    }
303}
304
305// Timestamp types implement Scalar and ScalarRef in `src/timestamp.rs`.
306
307impl Scalar for ListValue {
308    type VectorType = ListVector;
309    type RefType<'a> = ListValueRef<'a>;
310
311    fn as_scalar_ref(&self) -> Self::RefType<'_> {
312        ListValueRef::Ref { val: self }
313    }
314
315    fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short> {
316        long
317    }
318}
319
320impl<'a> ScalarRef<'a> for ListValueRef<'a> {
321    type ScalarType = ListValue;
322
323    fn to_owned_scalar(&self) -> Self::ScalarType {
324        match self {
325            ListValueRef::Indexed { vector, idx } => match vector.get(*idx) {
326                // Normally should not get `Value::Null` if the `ListValueRef` comes
327                // from the iterator of the ListVector, but we avoid panic and just
328                // returns a default list value in such case since `ListValueRef` may
329                // be constructed manually.
330                Value::Null => ListValue::default(),
331                Value::List(v) => v,
332                _ => unreachable!(),
333            },
334            ListValueRef::Ref { val } => (*val).clone(),
335        }
336    }
337}
338
339#[cfg(test)]
340mod tests {
341    use super::*;
342    use crate::data_type::ConcreteDataType;
343    use crate::timestamp::TimestampSecond;
344    use crate::vectors::{BinaryVector, Int32Vector, ListVectorBuilder, TimestampSecondVector};
345
346    fn build_vector_from_slice<T: ScalarVector>(items: &[Option<T::RefItem<'_>>]) -> T {
347        let mut builder = T::Builder::with_capacity(items.len());
348        for item in items {
349            builder.push(*item);
350        }
351        builder.finish()
352    }
353
354    fn assert_vector_eq<'a, T: ScalarVector>(expect: &[Option<T::RefItem<'a>>], vector: &'a T)
355    where
356        T::RefItem<'a>: PartialEq + std::fmt::Debug,
357    {
358        for (a, b) in expect.iter().zip(vector.iter_data()) {
359            assert_eq!(*a, b);
360        }
361    }
362
363    #[test]
364    fn test_build_i32_vector() {
365        let expect = vec![Some(1), Some(2), Some(3), None, Some(5)];
366        let vector: Int32Vector = build_vector_from_slice(&expect);
367        assert_vector_eq(&expect, &vector);
368    }
369
370    #[test]
371    fn test_build_binary_vector() {
372        let expect: Vec<Option<&'static [u8]>> = vec![
373            Some(b"a"),
374            Some(b"b"),
375            Some(b"c"),
376            None,
377            Some(b"e"),
378            Some(b""),
379        ];
380        let vector: BinaryVector = build_vector_from_slice(&expect);
381        assert_vector_eq(&expect, &vector);
382    }
383
384    #[test]
385    fn test_build_date_vector() {
386        let expect: Vec<Option<Date>> = vec![
387            Some(Date::new(0)),
388            Some(Date::new(-1)),
389            None,
390            Some(Date::new(1)),
391        ];
392        let vector: DateVector = build_vector_from_slice(&expect);
393        assert_vector_eq(&expect, &vector);
394    }
395
396    #[test]
397    fn test_date_scalar() {
398        let date = Date::new(1);
399        assert_eq!(date, date.as_scalar_ref());
400        assert_eq!(date, date.to_owned_scalar());
401    }
402
403    #[test]
404    fn test_decimal_scalar() {
405        let decimal = Decimal128::new(1, 1, 1);
406        assert_eq!(decimal, decimal.as_scalar_ref());
407        assert_eq!(decimal, decimal.to_owned_scalar());
408    }
409
410    #[test]
411    fn test_list_value_scalar() {
412        let list_value =
413            ListValue::new(vec![Value::Int32(123)], ConcreteDataType::int32_datatype());
414        let list_ref = ListValueRef::Ref { val: &list_value };
415        assert_eq!(list_ref, list_value.as_scalar_ref());
416        assert_eq!(list_value, list_ref.to_owned_scalar());
417
418        let mut builder =
419            ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 1);
420        builder.push(None);
421        builder.push(Some(list_value.as_scalar_ref()));
422        let vector = builder.finish();
423
424        let ref_on_vec = ListValueRef::Indexed {
425            vector: &vector,
426            idx: 0,
427        };
428        assert_eq!(ListValue::default(), ref_on_vec.to_owned_scalar());
429        let ref_on_vec = ListValueRef::Indexed {
430            vector: &vector,
431            idx: 1,
432        };
433        assert_eq!(list_value, ref_on_vec.to_owned_scalar());
434    }
435
436    #[test]
437    fn test_build_timestamp_vector() {
438        let expect: Vec<Option<TimestampSecond>> = vec![Some(10.into()), None, Some(42.into())];
439        let vector: TimestampSecondVector = build_vector_from_slice(&expect);
440        assert_vector_eq(&expect, &vector);
441        let val = vector.get_data(0).unwrap();
442        assert_eq!(val, val.as_scalar_ref());
443        assert_eq!(TimestampSecond::from(10), val.to_owned_scalar());
444    }
445}