datatypes/vectors/
validity.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use arrow::array::ArrayData;
16use arrow::buffer::NullBuffer;
17
18#[derive(Debug, PartialEq)]
19enum ValidityKind {
20    /// Whether the array slot is valid or not (null).
21    Slots {
22        bitmap: NullBuffer,
23        len: usize,
24        null_count: usize,
25    },
26    /// All slots are valid.
27    AllValid { len: usize },
28    /// All slots are null.
29    AllNull { len: usize },
30}
31
32/// Validity of a vector.
33#[derive(Debug, PartialEq)]
34pub struct Validity {
35    kind: ValidityKind,
36}
37
38impl Validity {
39    /// Creates a `Validity` from [`ArrayData`].
40    pub fn from_array_data(data: ArrayData) -> Validity {
41        match data.nulls() {
42            Some(null_buf) => Validity {
43                kind: ValidityKind::Slots {
44                    bitmap: null_buf.clone(),
45                    len: data.len(),
46                    null_count: data.null_count(),
47                },
48            },
49            None => Validity::all_valid(data.len()),
50        }
51    }
52
53    /// Returns `Validity` that all elements are valid.
54    pub fn all_valid(len: usize) -> Validity {
55        Validity {
56            kind: ValidityKind::AllValid { len },
57        }
58    }
59
60    /// Returns `Validity` that all elements are null.
61    pub fn all_null(len: usize) -> Validity {
62        Validity {
63            kind: ValidityKind::AllNull { len },
64        }
65    }
66
67    /// Returns whether `i-th` bit is set.
68    pub fn is_set(&self, i: usize) -> bool {
69        match &self.kind {
70            ValidityKind::Slots { bitmap, .. } => bitmap.is_valid(i),
71            ValidityKind::AllValid { len } => i < *len,
72            ValidityKind::AllNull { .. } => false,
73        }
74    }
75
76    /// Returns true if all bits are null.
77    pub fn is_all_null(&self) -> bool {
78        match self.kind {
79            ValidityKind::Slots {
80                len, null_count, ..
81            } => len == null_count,
82            ValidityKind::AllValid { .. } => false,
83            ValidityKind::AllNull { .. } => true,
84        }
85    }
86
87    /// Returns true if all bits are valid.
88    pub fn is_all_valid(&self) -> bool {
89        match self.kind {
90            ValidityKind::Slots { null_count, .. } => null_count == 0,
91            ValidityKind::AllValid { .. } => true,
92            ValidityKind::AllNull { .. } => false,
93        }
94    }
95
96    /// The number of null slots.
97    pub fn null_count(&self) -> usize {
98        match self.kind {
99            ValidityKind::Slots { null_count, .. } => null_count,
100            ValidityKind::AllValid { .. } => 0,
101            ValidityKind::AllNull { len } => len,
102        }
103    }
104}
105
106#[cfg(test)]
107mod tests {
108    use arrow::array::{Array, Int32Array};
109
110    use super::*;
111
112    #[test]
113    fn test_all_valid() {
114        let validity = Validity::all_valid(5);
115        assert!(validity.is_all_valid());
116        assert!(!validity.is_all_null());
117        assert_eq!(0, validity.null_count());
118        for i in 0..5 {
119            assert!(validity.is_set(i));
120        }
121        assert!(!validity.is_set(5));
122    }
123
124    #[test]
125    fn test_all_null() {
126        let validity = Validity::all_null(5);
127        assert!(validity.is_all_null());
128        assert!(!validity.is_all_valid());
129        assert_eq!(5, validity.null_count());
130        for i in 0..5 {
131            assert!(!validity.is_set(i));
132        }
133        assert!(!validity.is_set(5));
134    }
135
136    #[test]
137    fn test_from_array_data() {
138        let array = Int32Array::from_iter([None, Some(1), None]);
139        let validity = Validity::from_array_data(array.to_data());
140        assert_eq!(2, validity.null_count());
141        assert!(!validity.is_set(0));
142        assert!(validity.is_set(1));
143        assert!(!validity.is_set(2));
144        assert!(!validity.is_all_null());
145        assert!(!validity.is_all_valid());
146
147        let array = Int32Array::from_iter([None, None]);
148        let validity = Validity::from_array_data(array.to_data());
149        assert!(validity.is_all_null());
150        assert!(!validity.is_all_valid());
151        assert_eq!(2, validity.null_count());
152
153        let array = Int32Array::from_iter_values([1, 2]);
154        let validity = Validity::from_array_data(array.to_data());
155        assert!(!validity.is_all_null());
156        assert!(validity.is_all_valid());
157        assert_eq!(0, validity.null_count());
158    }
159}