datatypes/
arrow_array.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use arrow::array::{ArrayRef, AsArray};
16use arrow::datatypes::{
17    DataType, DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType,
18    DurationSecondType, Int8Type, Int16Type, Int32Type, Int64Type, Time32MillisecondType,
19    Time32SecondType, Time64MicrosecondType, Time64NanosecondType, TimeUnit,
20    TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
21    TimestampSecondType, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
22};
23use arrow_array::Array;
24use common_time::time::Time;
25use common_time::{Duration, Timestamp};
26
27pub type BinaryArray = arrow::array::BinaryArray;
28pub type LargeBinaryArray = arrow::array::LargeBinaryArray;
29pub type MutableBinaryArray = arrow::array::BinaryBuilder;
30pub type BinaryViewArray = arrow::array::BinaryViewArray;
31pub type MutableBinaryViewArray = arrow::array::BinaryViewBuilder;
32pub type StringArray = arrow::array::StringArray;
33pub type MutableStringArray = arrow::array::StringBuilder;
34pub type LargeStringArray = arrow::array::LargeStringArray;
35pub type MutableLargeStringArray = arrow::array::LargeStringBuilder;
36pub type StringViewArray = arrow::array::StringViewArray;
37pub type MutableStringViewArray = arrow::array::StringViewBuilder;
38
39/// Get the [Timestamp] value at index `i` of the timestamp array.
40///
41/// Note: This method does not check for nulls and the value is arbitrary
42/// if [`is_null`](arrow::array::Array::is_null) returns true for the index.
43///
44/// # Panics
45/// 1. if index `i` is out of bounds;
46/// 2. or the array is not timestamp type.
47pub fn timestamp_array_value(array: &ArrayRef, i: usize) -> Timestamp {
48    let DataType::Timestamp(time_unit, _) = &array.data_type() else {
49        unreachable!()
50    };
51    let v = match time_unit {
52        TimeUnit::Second => {
53            let array = array.as_primitive::<TimestampSecondType>();
54            array.value(i)
55        }
56        TimeUnit::Millisecond => {
57            let array = array.as_primitive::<TimestampMillisecondType>();
58            array.value(i)
59        }
60        TimeUnit::Microsecond => {
61            let array = array.as_primitive::<TimestampMicrosecondType>();
62            array.value(i)
63        }
64        TimeUnit::Nanosecond => {
65            let array = array.as_primitive::<TimestampNanosecondType>();
66            array.value(i)
67        }
68    };
69    Timestamp::new(v, time_unit.into())
70}
71
72/// Get the [Time] value at index `i` of the time array.
73///
74/// Note: This method does not check for nulls and the value is arbitrary
75/// if [`is_null`](arrow::array::Array::is_null) returns true for the index.
76///
77/// # Panics
78/// 1. if index `i` is out of bounds;
79/// 2. or the array is not `Time32` or `Time64` type.
80pub fn time_array_value(array: &ArrayRef, i: usize) -> Time {
81    match array.data_type() {
82        DataType::Time32(time_unit) | DataType::Time64(time_unit) => match time_unit {
83            TimeUnit::Second => {
84                let array = array.as_primitive::<Time32SecondType>();
85                Time::new_second(array.value(i) as i64)
86            }
87            TimeUnit::Millisecond => {
88                let array = array.as_primitive::<Time32MillisecondType>();
89                Time::new_millisecond(array.value(i) as i64)
90            }
91            TimeUnit::Microsecond => {
92                let array = array.as_primitive::<Time64MicrosecondType>();
93                Time::new_microsecond(array.value(i))
94            }
95            TimeUnit::Nanosecond => {
96                let array = array.as_primitive::<Time64NanosecondType>();
97                Time::new_nanosecond(array.value(i))
98            }
99        },
100        _ => unreachable!(),
101    }
102}
103
104/// Get the [Duration] value at index `i` of the duration array.
105///
106/// Note: This method does not check for nulls and the value is arbitrary
107/// if [`is_null`](arrow::array::Array::is_null) returns true for the index.
108///
109/// # Panics
110/// 1. if index `i` is out of bounds;
111/// 2. or the array is not duration type.
112pub fn duration_array_value(array: &ArrayRef, i: usize) -> Duration {
113    let DataType::Duration(time_unit) = array.data_type() else {
114        unreachable!();
115    };
116    let v = match time_unit {
117        TimeUnit::Second => {
118            let array = array.as_primitive::<DurationSecondType>();
119            array.value(i)
120        }
121        TimeUnit::Millisecond => {
122            let array = array.as_primitive::<DurationMillisecondType>();
123            array.value(i)
124        }
125        TimeUnit::Microsecond => {
126            let array = array.as_primitive::<DurationMicrosecondType>();
127            array.value(i)
128        }
129        TimeUnit::Nanosecond => {
130            let array = array.as_primitive::<DurationNanosecondType>();
131            array.value(i)
132        }
133    };
134    Duration::new(v, time_unit.into())
135}
136
137/// Get the string value at index `i` for `Utf8`, `LargeUtf8`, or `Utf8View` arrays.
138///
139/// Returns `None` when the array type is not a string type or the value is null.
140///
141/// # Panics
142///
143/// If index `i` is out of bounds.
144pub fn string_array_value_at_index(array: &ArrayRef, i: usize) -> Option<&str> {
145    match array.data_type() {
146        DataType::Utf8 => {
147            let array = array.as_string::<i32>();
148            array.is_valid(i).then(|| array.value(i))
149        }
150        DataType::LargeUtf8 => {
151            let array = array.as_string::<i64>();
152            array.is_valid(i).then(|| array.value(i))
153        }
154        DataType::Utf8View => {
155            let array = array.as_string_view();
156            array.is_valid(i).then(|| array.value(i))
157        }
158        _ => None,
159    }
160}
161
162/// Get the string value at index `i` for `Utf8`, `LargeUtf8`, or `Utf8View` arrays.
163///
164/// Note: This method does not check for nulls and the value is arbitrary
165/// if [`is_null`](arrow::array::Array::is_null) returns true for the index.
166///
167/// # Panics
168/// 1. if index `i` is out of bounds;
169/// 2. or the array is not a string type.
170pub fn string_array_value(array: &ArrayRef, i: usize) -> &str {
171    match array.data_type() {
172        DataType::Utf8 => array.as_string::<i32>().value(i),
173        DataType::LargeUtf8 => array.as_string::<i64>().value(i),
174        DataType::Utf8View => array.as_string_view().value(i),
175        _ => unreachable!(),
176    }
177}
178
179/// Get the binary value at index `i` for `Binary`, `LargeBinary`, or `BinaryView` arrays.
180///
181/// Note: This method does not check for nulls and the value is arbitrary
182/// if [`is_null`](arrow::array::Array::is_null) returns true for the index.
183///
184/// # Panics
185/// 1. if index `i` is out of bounds;
186/// 2. or the array is not a binary type.
187pub fn binary_array_value(array: &ArrayRef, i: usize) -> &[u8] {
188    match array.data_type() {
189        DataType::Binary => array.as_binary::<i32>().value(i),
190        DataType::LargeBinary => array.as_binary::<i64>().value(i),
191        DataType::BinaryView => array.as_binary_view().value(i),
192        _ => unreachable!(),
193    }
194}
195
196/// Get the integer value (`i64`) at index `i` for any integer array.
197///
198/// Returns `None` when:
199///
200/// - the array type is not an integer type;
201/// - the value is larger than `i64::MAX`;
202/// - the value is null.
203///
204/// # Panics
205///
206/// If index `i` is out of bounds.
207pub fn int_array_value_at_index(array: &ArrayRef, i: usize) -> Option<i64> {
208    match array.data_type() {
209        DataType::Int8 => {
210            let array = array.as_primitive::<Int8Type>();
211            array.is_valid(i).then(|| array.value(i) as i64)
212        }
213        DataType::Int16 => {
214            let array = array.as_primitive::<Int16Type>();
215            array.is_valid(i).then(|| array.value(i) as i64)
216        }
217        DataType::Int32 => {
218            let array = array.as_primitive::<Int32Type>();
219            array.is_valid(i).then(|| array.value(i) as i64)
220        }
221        DataType::Int64 => {
222            let array = array.as_primitive::<Int64Type>();
223            array.is_valid(i).then(|| array.value(i))
224        }
225        DataType::UInt8 => {
226            let array = array.as_primitive::<UInt8Type>();
227            array.is_valid(i).then(|| array.value(i) as i64)
228        }
229        DataType::UInt16 => {
230            let array = array.as_primitive::<UInt16Type>();
231            array.is_valid(i).then(|| array.value(i) as i64)
232        }
233        DataType::UInt32 => {
234            let array = array.as_primitive::<UInt32Type>();
235            array.is_valid(i).then(|| array.value(i) as i64)
236        }
237        DataType::UInt64 => {
238            let array = array.as_primitive::<UInt64Type>();
239            array
240                .is_valid(i)
241                .then(|| {
242                    let i = array.value(i);
243                    if i <= i64::MAX as u64 {
244                        Some(i as i64)
245                    } else {
246                        None
247                    }
248                })
249                .flatten()
250        }
251        _ => None,
252    }
253}