common_function/scalars/string/
space.rs1use std::fmt;
20use std::sync::Arc;
21
22use datafusion_common::DataFusionError;
23use datafusion_common::arrow::array::{Array, AsArray, LargeStringBuilder};
24use datafusion_common::arrow::datatypes::DataType;
25use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, TypeSignature, Volatility};
26
27use crate::function::Function;
28use crate::function_registry::FunctionRegistry;
29
30const NAME: &str = "space";
31
32const MAX_SPACE_COUNT: i64 = 1024 * 1024; #[derive(Debug)]
42pub struct SpaceFunction {
43 signature: Signature,
44}
45
46impl SpaceFunction {
47 pub fn register(registry: &FunctionRegistry) {
48 registry.register_scalar(SpaceFunction::default());
49 }
50}
51
52impl Default for SpaceFunction {
53 fn default() -> Self {
54 Self {
55 signature: Signature::one_of(
56 vec![
57 TypeSignature::Exact(vec![DataType::Int64]),
58 TypeSignature::Exact(vec![DataType::Int32]),
59 TypeSignature::Exact(vec![DataType::Int16]),
60 TypeSignature::Exact(vec![DataType::Int8]),
61 TypeSignature::Exact(vec![DataType::UInt64]),
62 TypeSignature::Exact(vec![DataType::UInt32]),
63 TypeSignature::Exact(vec![DataType::UInt16]),
64 TypeSignature::Exact(vec![DataType::UInt8]),
65 ],
66 Volatility::Immutable,
67 ),
68 }
69 }
70}
71
72impl fmt::Display for SpaceFunction {
73 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
74 write!(f, "{}", NAME.to_ascii_uppercase())
75 }
76}
77
78impl Function for SpaceFunction {
79 fn name(&self) -> &str {
80 NAME
81 }
82
83 fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
84 Ok(DataType::LargeUtf8)
85 }
86
87 fn signature(&self) -> &Signature {
88 &self.signature
89 }
90
91 fn invoke_with_args(
92 &self,
93 args: ScalarFunctionArgs,
94 ) -> datafusion_common::Result<ColumnarValue> {
95 if args.args.len() != 1 {
96 return Err(DataFusionError::Execution(
97 "SPACE requires exactly 1 argument: SPACE(N)".to_string(),
98 ));
99 }
100
101 let arrays = ColumnarValue::values_to_arrays(&args.args)?;
102 let len = arrays[0].len();
103 let n_array = &arrays[0];
104
105 let mut builder = LargeStringBuilder::with_capacity(len, len * 10);
106
107 for i in 0..len {
108 if n_array.is_null(i) {
109 builder.append_null();
110 continue;
111 }
112
113 let n = get_int_value(n_array, i)?;
114
115 if n < 0 {
116 builder.append_value("");
118 } else if n > MAX_SPACE_COUNT {
119 return Err(DataFusionError::Execution(format!(
120 "SPACE: requested {} spaces exceeds maximum allowed ({})",
121 n, MAX_SPACE_COUNT
122 )));
123 } else {
124 let spaces = " ".repeat(n as usize);
125 builder.append_value(&spaces);
126 }
127 }
128
129 Ok(ColumnarValue::Array(Arc::new(builder.finish())))
130 }
131}
132
133fn get_int_value(
135 array: &datafusion_common::arrow::array::ArrayRef,
136 index: usize,
137) -> datafusion_common::Result<i64> {
138 use datafusion_common::arrow::datatypes as arrow_types;
139
140 match array.data_type() {
141 DataType::Int64 => Ok(array.as_primitive::<arrow_types::Int64Type>().value(index)),
142 DataType::Int32 => Ok(array.as_primitive::<arrow_types::Int32Type>().value(index) as i64),
143 DataType::Int16 => Ok(array.as_primitive::<arrow_types::Int16Type>().value(index) as i64),
144 DataType::Int8 => Ok(array.as_primitive::<arrow_types::Int8Type>().value(index) as i64),
145 DataType::UInt64 => {
146 let v = array.as_primitive::<arrow_types::UInt64Type>().value(index);
147 if v > i64::MAX as u64 {
148 Err(DataFusionError::Execution(format!(
149 "SPACE: value {} exceeds maximum",
150 v
151 )))
152 } else {
153 Ok(v as i64)
154 }
155 }
156 DataType::UInt32 => Ok(array.as_primitive::<arrow_types::UInt32Type>().value(index) as i64),
157 DataType::UInt16 => Ok(array.as_primitive::<arrow_types::UInt16Type>().value(index) as i64),
158 DataType::UInt8 => Ok(array.as_primitive::<arrow_types::UInt8Type>().value(index) as i64),
159 _ => Err(DataFusionError::Execution(format!(
160 "SPACE: unsupported type {:?}",
161 array.data_type()
162 ))),
163 }
164}
165
166#[cfg(test)]
167mod tests {
168 use std::sync::Arc;
169
170 use datafusion_common::arrow::array::Int64Array;
171 use datafusion_common::arrow::datatypes::Field;
172 use datafusion_expr::ScalarFunctionArgs;
173
174 use super::*;
175
176 fn create_args(arrays: Vec<datafusion_common::arrow::array::ArrayRef>) -> ScalarFunctionArgs {
177 let arg_fields: Vec<_> = arrays
178 .iter()
179 .enumerate()
180 .map(|(i, arr)| {
181 Arc::new(Field::new(
182 format!("arg_{}", i),
183 arr.data_type().clone(),
184 true,
185 ))
186 })
187 .collect();
188
189 ScalarFunctionArgs {
190 args: arrays.iter().cloned().map(ColumnarValue::Array).collect(),
191 arg_fields,
192 return_field: Arc::new(Field::new("result", DataType::LargeUtf8, true)),
193 number_rows: arrays[0].len(),
194 config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
195 }
196 }
197
198 #[test]
199 fn test_space_basic() {
200 let function = SpaceFunction::default();
201
202 let n = Arc::new(Int64Array::from(vec![0, 1, 5]));
203
204 let args = create_args(vec![n]);
205 let result = function.invoke_with_args(args).unwrap();
206
207 if let ColumnarValue::Array(array) = result {
208 let str_array = array.as_string::<i64>();
209 assert_eq!(str_array.value(0), "");
210 assert_eq!(str_array.value(1), " ");
211 assert_eq!(str_array.value(2), " ");
212 } else {
213 panic!("Expected array result");
214 }
215 }
216
217 #[test]
218 fn test_space_negative() {
219 let function = SpaceFunction::default();
220
221 let n = Arc::new(Int64Array::from(vec![-1, -100]));
222
223 let args = create_args(vec![n]);
224 let result = function.invoke_with_args(args).unwrap();
225
226 if let ColumnarValue::Array(array) = result {
227 let str_array = array.as_string::<i64>();
228 assert_eq!(str_array.value(0), "");
229 assert_eq!(str_array.value(1), "");
230 } else {
231 panic!("Expected array result");
232 }
233 }
234
235 #[test]
236 fn test_space_with_nulls() {
237 let function = SpaceFunction::default();
238
239 let n = Arc::new(Int64Array::from(vec![Some(3), None]));
240
241 let args = create_args(vec![n]);
242 let result = function.invoke_with_args(args).unwrap();
243
244 if let ColumnarValue::Array(array) = result {
245 let str_array = array.as_string::<i64>();
246 assert_eq!(str_array.value(0), " ");
247 assert!(str_array.is_null(1));
248 } else {
249 panic!("Expected array result");
250 }
251 }
252}