datatypes/
arrow_array.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use arrow::array::{ArrayRef, AsArray};
16use arrow::datatypes::{
17    DataType, DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType,
18    DurationSecondType, Int8Type, Int16Type, Int32Type, Int64Type, Time32MillisecondType,
19    Time32SecondType, Time64MicrosecondType, Time64NanosecondType, TimeUnit,
20    TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
21    TimestampSecondType, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
22};
23use arrow_array::Array;
24use common_time::time::Time;
25use common_time::{Duration, Timestamp};
26
27pub type BinaryArray = arrow::array::BinaryArray;
28pub type MutableBinaryArray = arrow::array::BinaryBuilder;
29pub type StringArray = arrow::array::StringArray;
30pub type MutableStringArray = arrow::array::StringBuilder;
31pub type LargeStringArray = arrow::array::LargeStringArray;
32pub type MutableLargeStringArray = arrow::array::LargeStringBuilder;
33
34/// Get the [Timestamp] value at index `i` of the timestamp array.
35///
36/// Note: This method does not check for nulls and the value is arbitrary
37/// if [`is_null`](arrow::array::Array::is_null) returns true for the index.
38///
39/// # Panics
40/// 1. if index `i` is out of bounds;
41/// 2. or the array is not timestamp type.
42pub fn timestamp_array_value(array: &ArrayRef, i: usize) -> Timestamp {
43    let DataType::Timestamp(time_unit, _) = &array.data_type() else {
44        unreachable!()
45    };
46    let v = match time_unit {
47        TimeUnit::Second => {
48            let array = array.as_primitive::<TimestampSecondType>();
49            array.value(i)
50        }
51        TimeUnit::Millisecond => {
52            let array = array.as_primitive::<TimestampMillisecondType>();
53            array.value(i)
54        }
55        TimeUnit::Microsecond => {
56            let array = array.as_primitive::<TimestampMicrosecondType>();
57            array.value(i)
58        }
59        TimeUnit::Nanosecond => {
60            let array = array.as_primitive::<TimestampNanosecondType>();
61            array.value(i)
62        }
63    };
64    Timestamp::new(v, time_unit.into())
65}
66
67/// Get the [Time] value at index `i` of the time array.
68///
69/// Note: This method does not check for nulls and the value is arbitrary
70/// if [`is_null`](arrow::array::Array::is_null) returns true for the index.
71///
72/// # Panics
73/// 1. if index `i` is out of bounds;
74/// 2. or the array is not `Time32` or `Time64` type.
75pub fn time_array_value(array: &ArrayRef, i: usize) -> Time {
76    match array.data_type() {
77        DataType::Time32(time_unit) | DataType::Time64(time_unit) => match time_unit {
78            TimeUnit::Second => {
79                let array = array.as_primitive::<Time32SecondType>();
80                Time::new_second(array.value(i) as i64)
81            }
82            TimeUnit::Millisecond => {
83                let array = array.as_primitive::<Time32MillisecondType>();
84                Time::new_millisecond(array.value(i) as i64)
85            }
86            TimeUnit::Microsecond => {
87                let array = array.as_primitive::<Time64MicrosecondType>();
88                Time::new_microsecond(array.value(i))
89            }
90            TimeUnit::Nanosecond => {
91                let array = array.as_primitive::<Time64NanosecondType>();
92                Time::new_nanosecond(array.value(i))
93            }
94        },
95        _ => unreachable!(),
96    }
97}
98
99/// Get the [Duration] value at index `i` of the duration array.
100///
101/// Note: This method does not check for nulls and the value is arbitrary
102/// if [`is_null`](arrow::array::Array::is_null) returns true for the index.
103///
104/// # Panics
105/// 1. if index `i` is out of bounds;
106/// 2. or the array is not duration type.
107pub fn duration_array_value(array: &ArrayRef, i: usize) -> Duration {
108    let DataType::Duration(time_unit) = array.data_type() else {
109        unreachable!();
110    };
111    let v = match time_unit {
112        TimeUnit::Second => {
113            let array = array.as_primitive::<DurationSecondType>();
114            array.value(i)
115        }
116        TimeUnit::Millisecond => {
117            let array = array.as_primitive::<DurationMillisecondType>();
118            array.value(i)
119        }
120        TimeUnit::Microsecond => {
121            let array = array.as_primitive::<DurationMicrosecondType>();
122            array.value(i)
123        }
124        TimeUnit::Nanosecond => {
125            let array = array.as_primitive::<DurationNanosecondType>();
126            array.value(i)
127        }
128    };
129    Duration::new(v, time_unit.into())
130}
131
132/// Get the string value at index `i` for `Utf8`, `LargeUtf8`, or `Utf8View` arrays.
133///
134/// Returns `None` when the array type is not a string type or the value is null.
135///
136/// # Panics
137///
138/// If index `i` is out of bounds.
139pub fn string_array_value_at_index(array: &ArrayRef, i: usize) -> Option<&str> {
140    match array.data_type() {
141        DataType::Utf8 => {
142            let array = array.as_string::<i32>();
143            array.is_valid(i).then(|| array.value(i))
144        }
145        DataType::LargeUtf8 => {
146            let array = array.as_string::<i64>();
147            array.is_valid(i).then(|| array.value(i))
148        }
149        DataType::Utf8View => {
150            let array = array.as_string_view();
151            array.is_valid(i).then(|| array.value(i))
152        }
153        _ => None,
154    }
155}
156
157/// Get the integer value (`i64`) at index `i` for any integer array.
158///
159/// Returns `None` when:
160///
161/// - the array type is not an integer type;
162/// - the value is larger than `i64::MAX`;
163/// - the value is null.
164///
165/// # Panics
166///
167/// If index `i` is out of bounds.
168pub fn int_array_value_at_index(array: &ArrayRef, i: usize) -> Option<i64> {
169    match array.data_type() {
170        DataType::Int8 => {
171            let array = array.as_primitive::<Int8Type>();
172            array.is_valid(i).then(|| array.value(i) as i64)
173        }
174        DataType::Int16 => {
175            let array = array.as_primitive::<Int16Type>();
176            array.is_valid(i).then(|| array.value(i) as i64)
177        }
178        DataType::Int32 => {
179            let array = array.as_primitive::<Int32Type>();
180            array.is_valid(i).then(|| array.value(i) as i64)
181        }
182        DataType::Int64 => {
183            let array = array.as_primitive::<Int64Type>();
184            array.is_valid(i).then(|| array.value(i))
185        }
186        DataType::UInt8 => {
187            let array = array.as_primitive::<UInt8Type>();
188            array.is_valid(i).then(|| array.value(i) as i64)
189        }
190        DataType::UInt16 => {
191            let array = array.as_primitive::<UInt16Type>();
192            array.is_valid(i).then(|| array.value(i) as i64)
193        }
194        DataType::UInt32 => {
195            let array = array.as_primitive::<UInt32Type>();
196            array.is_valid(i).then(|| array.value(i) as i64)
197        }
198        DataType::UInt64 => {
199            let array = array.as_primitive::<UInt64Type>();
200            array
201                .is_valid(i)
202                .then(|| {
203                    let i = array.value(i);
204                    if i <= i64::MAX as u64 {
205                        Some(i as i64)
206                    } else {
207                        None
208                    }
209                })
210                .flatten()
211        }
212        _ => None,
213    }
214}