common_function/scalars/string/
space.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! MySQL-compatible SPACE function implementation.
16//!
17//! SPACE(N) - Returns a string consisting of N space characters.
18
19use std::fmt;
20use std::sync::Arc;
21
22use datafusion_common::DataFusionError;
23use datafusion_common::arrow::array::{Array, AsArray, LargeStringBuilder};
24use datafusion_common::arrow::datatypes::DataType;
25use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, TypeSignature, Volatility};
26
27use crate::function::Function;
28use crate::function_registry::FunctionRegistry;
29
30const NAME: &str = "space";
31
32// Safety limit for maximum number of spaces
33const MAX_SPACE_COUNT: i64 = 1024 * 1024; // 1MB of spaces
34
35/// MySQL-compatible SPACE function.
36///
37/// Syntax: SPACE(N)
38/// Returns a string consisting of N space characters.
39/// Returns NULL if N is NULL.
40/// Returns empty string if N < 0.
41#[derive(Debug)]
42pub struct SpaceFunction {
43    signature: Signature,
44}
45
46impl SpaceFunction {
47    pub fn register(registry: &FunctionRegistry) {
48        registry.register_scalar(SpaceFunction::default());
49    }
50}
51
52impl Default for SpaceFunction {
53    fn default() -> Self {
54        Self {
55            signature: Signature::one_of(
56                vec![
57                    TypeSignature::Exact(vec![DataType::Int64]),
58                    TypeSignature::Exact(vec![DataType::Int32]),
59                    TypeSignature::Exact(vec![DataType::Int16]),
60                    TypeSignature::Exact(vec![DataType::Int8]),
61                    TypeSignature::Exact(vec![DataType::UInt64]),
62                    TypeSignature::Exact(vec![DataType::UInt32]),
63                    TypeSignature::Exact(vec![DataType::UInt16]),
64                    TypeSignature::Exact(vec![DataType::UInt8]),
65                ],
66                Volatility::Immutable,
67            ),
68        }
69    }
70}
71
72impl fmt::Display for SpaceFunction {
73    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
74        write!(f, "{}", NAME.to_ascii_uppercase())
75    }
76}
77
78impl Function for SpaceFunction {
79    fn name(&self) -> &str {
80        NAME
81    }
82
83    fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
84        Ok(DataType::LargeUtf8)
85    }
86
87    fn signature(&self) -> &Signature {
88        &self.signature
89    }
90
91    fn invoke_with_args(
92        &self,
93        args: ScalarFunctionArgs,
94    ) -> datafusion_common::Result<ColumnarValue> {
95        if args.args.len() != 1 {
96            return Err(DataFusionError::Execution(
97                "SPACE requires exactly 1 argument: SPACE(N)".to_string(),
98            ));
99        }
100
101        let arrays = ColumnarValue::values_to_arrays(&args.args)?;
102        let len = arrays[0].len();
103        let n_array = &arrays[0];
104
105        let mut builder = LargeStringBuilder::with_capacity(len, len * 10);
106
107        for i in 0..len {
108            if n_array.is_null(i) {
109                builder.append_null();
110                continue;
111            }
112
113            let n = get_int_value(n_array, i)?;
114
115            if n < 0 {
116                // MySQL returns empty string for negative values
117                builder.append_value("");
118            } else if n > MAX_SPACE_COUNT {
119                return Err(DataFusionError::Execution(format!(
120                    "SPACE: requested {} spaces exceeds maximum allowed ({})",
121                    n, MAX_SPACE_COUNT
122                )));
123            } else {
124                let spaces = " ".repeat(n as usize);
125                builder.append_value(&spaces);
126            }
127        }
128
129        Ok(ColumnarValue::Array(Arc::new(builder.finish())))
130    }
131}
132
133/// Extract integer value from various integer types.
134fn get_int_value(
135    array: &datafusion_common::arrow::array::ArrayRef,
136    index: usize,
137) -> datafusion_common::Result<i64> {
138    use datafusion_common::arrow::datatypes as arrow_types;
139
140    match array.data_type() {
141        DataType::Int64 => Ok(array.as_primitive::<arrow_types::Int64Type>().value(index)),
142        DataType::Int32 => Ok(array.as_primitive::<arrow_types::Int32Type>().value(index) as i64),
143        DataType::Int16 => Ok(array.as_primitive::<arrow_types::Int16Type>().value(index) as i64),
144        DataType::Int8 => Ok(array.as_primitive::<arrow_types::Int8Type>().value(index) as i64),
145        DataType::UInt64 => {
146            let v = array.as_primitive::<arrow_types::UInt64Type>().value(index);
147            if v > i64::MAX as u64 {
148                Err(DataFusionError::Execution(format!(
149                    "SPACE: value {} exceeds maximum",
150                    v
151                )))
152            } else {
153                Ok(v as i64)
154            }
155        }
156        DataType::UInt32 => Ok(array.as_primitive::<arrow_types::UInt32Type>().value(index) as i64),
157        DataType::UInt16 => Ok(array.as_primitive::<arrow_types::UInt16Type>().value(index) as i64),
158        DataType::UInt8 => Ok(array.as_primitive::<arrow_types::UInt8Type>().value(index) as i64),
159        _ => Err(DataFusionError::Execution(format!(
160            "SPACE: unsupported type {:?}",
161            array.data_type()
162        ))),
163    }
164}
165
166#[cfg(test)]
167mod tests {
168    use std::sync::Arc;
169
170    use datafusion_common::arrow::array::Int64Array;
171    use datafusion_common::arrow::datatypes::Field;
172    use datafusion_expr::ScalarFunctionArgs;
173
174    use super::*;
175
176    fn create_args(arrays: Vec<datafusion_common::arrow::array::ArrayRef>) -> ScalarFunctionArgs {
177        let arg_fields: Vec<_> = arrays
178            .iter()
179            .enumerate()
180            .map(|(i, arr)| {
181                Arc::new(Field::new(
182                    format!("arg_{}", i),
183                    arr.data_type().clone(),
184                    true,
185                ))
186            })
187            .collect();
188
189        ScalarFunctionArgs {
190            args: arrays.iter().cloned().map(ColumnarValue::Array).collect(),
191            arg_fields,
192            return_field: Arc::new(Field::new("result", DataType::LargeUtf8, true)),
193            number_rows: arrays[0].len(),
194            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
195        }
196    }
197
198    #[test]
199    fn test_space_basic() {
200        let function = SpaceFunction::default();
201
202        let n = Arc::new(Int64Array::from(vec![0, 1, 5]));
203
204        let args = create_args(vec![n]);
205        let result = function.invoke_with_args(args).unwrap();
206
207        if let ColumnarValue::Array(array) = result {
208            let str_array = array.as_string::<i64>();
209            assert_eq!(str_array.value(0), "");
210            assert_eq!(str_array.value(1), " ");
211            assert_eq!(str_array.value(2), "     ");
212        } else {
213            panic!("Expected array result");
214        }
215    }
216
217    #[test]
218    fn test_space_negative() {
219        let function = SpaceFunction::default();
220
221        let n = Arc::new(Int64Array::from(vec![-1, -100]));
222
223        let args = create_args(vec![n]);
224        let result = function.invoke_with_args(args).unwrap();
225
226        if let ColumnarValue::Array(array) = result {
227            let str_array = array.as_string::<i64>();
228            assert_eq!(str_array.value(0), "");
229            assert_eq!(str_array.value(1), "");
230        } else {
231            panic!("Expected array result");
232        }
233    }
234
235    #[test]
236    fn test_space_with_nulls() {
237        let function = SpaceFunction::default();
238
239        let n = Arc::new(Int64Array::from(vec![Some(3), None]));
240
241        let args = create_args(vec![n]);
242        let result = function.invoke_with_args(args).unwrap();
243
244        if let ColumnarValue::Array(array) = result {
245            let str_array = array.as_string::<i64>();
246            assert_eq!(str_array.value(0), "   ");
247            assert!(str_array.is_null(1));
248        } else {
249            panic!("Expected array result");
250        }
251    }
252}