1use std::any::Any;
16use std::sync::Arc;
17
18use arrow::array::{Array, ArrayBuilder, ArrayIter, ArrayRef};
19use snafu::ResultExt;
20
21use crate::arrow_array::{MutableStringArray, StringArray};
22use crate::data_type::ConcreteDataType;
23use crate::error::{self, Result};
24use crate::scalars::{ScalarVector, ScalarVectorBuilder};
25use crate::serialize::Serializable;
26use crate::value::{Value, ValueRef};
27use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef};
28
29#[derive(Debug, PartialEq)]
31pub struct StringVector {
32 array: StringArray,
33}
34
35impl StringVector {
36 pub(crate) fn as_arrow(&self) -> &dyn Array {
37 &self.array
38 }
39}
40
41impl From<StringArray> for StringVector {
42 fn from(array: StringArray) -> Self {
43 Self { array }
44 }
45}
46
47impl From<Vec<Option<String>>> for StringVector {
48 fn from(data: Vec<Option<String>>) -> Self {
49 Self {
50 array: StringArray::from_iter(data),
51 }
52 }
53}
54
55impl From<Vec<Option<&str>>> for StringVector {
56 fn from(data: Vec<Option<&str>>) -> Self {
57 Self {
58 array: StringArray::from_iter(data),
59 }
60 }
61}
62
63impl From<&[Option<String>]> for StringVector {
64 fn from(data: &[Option<String>]) -> Self {
65 Self {
66 array: StringArray::from_iter(data),
67 }
68 }
69}
70
71impl From<&[Option<&str>]> for StringVector {
72 fn from(data: &[Option<&str>]) -> Self {
73 Self {
74 array: StringArray::from_iter(data),
75 }
76 }
77}
78
79impl From<Vec<String>> for StringVector {
80 fn from(data: Vec<String>) -> Self {
81 Self {
82 array: StringArray::from_iter(data.into_iter().map(Some)),
83 }
84 }
85}
86
87impl From<Vec<&str>> for StringVector {
88 fn from(data: Vec<&str>) -> Self {
89 Self {
90 array: StringArray::from_iter(data.into_iter().map(Some)),
91 }
92 }
93}
94
95impl Vector for StringVector {
96 fn data_type(&self) -> ConcreteDataType {
97 ConcreteDataType::string_datatype()
98 }
99
100 fn vector_type_name(&self) -> String {
101 "StringVector".to_string()
102 }
103
104 fn as_any(&self) -> &dyn Any {
105 self
106 }
107
108 fn len(&self) -> usize {
109 self.array.len()
110 }
111
112 fn to_arrow_array(&self) -> ArrayRef {
113 Arc::new(self.array.clone())
114 }
115
116 fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
117 Box::new(self.array.clone())
118 }
119
120 fn validity(&self) -> Validity {
121 vectors::impl_validity_for_vector!(self.array)
122 }
123
124 fn memory_size(&self) -> usize {
125 self.array.get_buffer_memory_size()
126 }
127
128 fn null_count(&self) -> usize {
129 self.array.null_count()
130 }
131
132 fn is_null(&self, row: usize) -> bool {
133 self.array.is_null(row)
134 }
135
136 fn slice(&self, offset: usize, length: usize) -> VectorRef {
137 Arc::new(Self::from(self.array.slice(offset, length)))
138 }
139
140 fn get(&self, index: usize) -> Value {
141 vectors::impl_get_for_vector!(self.array, index)
142 }
143
144 fn get_ref(&self, index: usize) -> ValueRef {
145 vectors::impl_get_ref_for_vector!(self.array, index)
146 }
147}
148
149impl ScalarVector for StringVector {
150 type OwnedItem = String;
151 type RefItem<'a> = &'a str;
152 type Iter<'a> = ArrayIter<&'a StringArray>;
153 type Builder = StringVectorBuilder;
154
155 fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
156 if self.array.is_valid(idx) {
157 Some(self.array.value(idx))
158 } else {
159 None
160 }
161 }
162
163 fn iter_data(&self) -> Self::Iter<'_> {
164 self.array.iter()
165 }
166}
167
168pub struct StringVectorBuilder {
169 pub mutable_array: MutableStringArray,
170}
171
172impl MutableVector for StringVectorBuilder {
173 fn data_type(&self) -> ConcreteDataType {
174 ConcreteDataType::string_datatype()
175 }
176
177 fn len(&self) -> usize {
178 self.mutable_array.len()
179 }
180
181 fn as_any(&self) -> &dyn Any {
182 self
183 }
184
185 fn as_mut_any(&mut self) -> &mut dyn Any {
186 self
187 }
188
189 fn to_vector(&mut self) -> VectorRef {
190 Arc::new(self.finish())
191 }
192
193 fn to_vector_cloned(&self) -> VectorRef {
194 Arc::new(self.finish_cloned())
195 }
196
197 fn try_push_value_ref(&mut self, value: ValueRef) -> Result<()> {
198 match value.as_string()? {
199 Some(v) => self.mutable_array.append_value(v),
200 None => self.mutable_array.append_null(),
201 }
202 Ok(())
203 }
204
205 fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
206 vectors::impl_extend_for_builder!(self, vector, StringVector, offset, length)
207 }
208
209 fn push_null(&mut self) {
210 self.mutable_array.append_null()
211 }
212}
213
214impl ScalarVectorBuilder for StringVectorBuilder {
215 type VectorType = StringVector;
216
217 fn with_capacity(capacity: usize) -> Self {
218 Self {
219 mutable_array: MutableStringArray::with_capacity(capacity, 0),
220 }
221 }
222
223 fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
224 match value {
225 Some(v) => self.mutable_array.append_value(v),
226 None => self.mutable_array.append_null(),
227 }
228 }
229
230 fn finish(&mut self) -> Self::VectorType {
231 StringVector {
232 array: self.mutable_array.finish(),
233 }
234 }
235
236 fn finish_cloned(&self) -> Self::VectorType {
237 StringVector {
238 array: self.mutable_array.finish_cloned(),
239 }
240 }
241}
242
243impl Serializable for StringVector {
244 fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
245 self.iter_data()
246 .map(serde_json::to_value)
247 .collect::<serde_json::Result<_>>()
248 .context(error::SerializeSnafu)
249 }
250}
251
252vectors::impl_try_from_arrow_array_for_vector!(StringArray, StringVector);
253
254#[cfg(test)]
255mod tests {
256
257 use std::vec;
258
259 use arrow::datatypes::DataType;
260
261 use super::*;
262
263 #[test]
264 fn test_string_vector_build_get() {
265 let mut builder = StringVectorBuilder::with_capacity(4);
266 builder.push(Some("hello"));
267 builder.push(None);
268 builder.push(Some("world"));
269 let vector = builder.finish();
270
271 assert_eq!(Some("hello"), vector.get_data(0));
272 assert_eq!(None, vector.get_data(1));
273 assert_eq!(Some("world"), vector.get_data(2));
274
275 assert!(vector.try_get(3).is_err());
277
278 assert_eq!(Value::String("hello".into()), vector.get(0));
279 assert_eq!(Value::Null, vector.get(1));
280 assert_eq!(Value::String("world".into()), vector.get(2));
281
282 let mut iter = vector.iter_data();
283 assert_eq!("hello", iter.next().unwrap().unwrap());
284 assert_eq!(None, iter.next().unwrap());
285 assert_eq!("world", iter.next().unwrap().unwrap());
286 assert_eq!(None, iter.next());
287 }
288
289 #[test]
290 fn test_string_vector_builder() {
291 let mut builder = StringVectorBuilder::with_capacity(3);
292 builder.push_value_ref(ValueRef::String("hello"));
293 assert!(builder.try_push_value_ref(ValueRef::Int32(123)).is_err());
294
295 let input = StringVector::from_slice(&["world", "one", "two"]);
296 builder.extend_slice_of(&input, 1, 2).unwrap();
297 assert!(builder
298 .extend_slice_of(&crate::vectors::Int32Vector::from_slice([13]), 0, 1)
299 .is_err());
300 let vector = builder.to_vector();
301
302 let expect: VectorRef = Arc::new(StringVector::from_slice(&["hello", "one", "two"]));
303 assert_eq!(expect, vector);
304 }
305
306 #[test]
307 fn test_string_vector_misc() {
308 let strs = vec!["hello", "greptime", "rust"];
309 let v = StringVector::from(strs.clone());
310 assert_eq!(3, v.len());
311 assert_eq!("StringVector", v.vector_type_name());
312 assert!(!v.is_const());
313 assert!(v.validity().is_all_valid());
314 assert!(!v.only_null());
315 assert_eq!(1088, v.memory_size());
316
317 for (i, s) in strs.iter().enumerate() {
318 assert_eq!(Value::from(*s), v.get(i));
319 assert_eq!(ValueRef::from(*s), v.get_ref(i));
320 assert_eq!(Value::from(*s), v.try_get(i).unwrap());
321 }
322
323 let arrow_arr = v.to_arrow_array();
324 assert_eq!(3, arrow_arr.len());
325 assert_eq!(&DataType::Utf8, arrow_arr.data_type());
326 }
327
328 #[test]
329 fn test_serialize_string_vector() {
330 let mut builder = StringVectorBuilder::with_capacity(3);
331 builder.push(Some("hello"));
332 builder.push(None);
333 builder.push(Some("world"));
334 let string_vector = builder.finish();
335 let serialized =
336 serde_json::to_string(&string_vector.serialize_to_json().unwrap()).unwrap();
337 assert_eq!(r#"["hello",null,"world"]"#, serialized);
338 }
339
340 #[test]
341 fn test_from_arrow_array() {
342 let mut builder = MutableStringArray::new();
343 builder.append_option(Some("A"));
344 builder.append_option(Some("B"));
345 builder.append_null();
346 builder.append_option(Some("D"));
347 let string_array: StringArray = builder.finish();
348 let vector = StringVector::from(string_array);
349 assert_eq!(
350 r#"["A","B",null,"D"]"#,
351 serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap(),
352 );
353 }
354
355 #[test]
356 fn test_from_non_option_string() {
357 let nul = String::from_utf8(vec![0]).unwrap();
358 let corpus = vec!["😅😅😅", "😍😍😍😍", "🥵🥵", nul.as_str()];
359 let vector = StringVector::from(corpus);
360 let serialized = serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap();
361 assert_eq!(r#"["😅😅😅","😍😍😍😍","🥵🥵","\u0000"]"#, serialized);
362
363 let corpus = vec![
364 "🀀🀀🀀".to_string(),
365 "🀁🀁🀁".to_string(),
366 "🀂🀂🀂".to_string(),
367 "🀃🀃🀃".to_string(),
368 "🀆🀆".to_string(),
369 ];
370 let vector = StringVector::from(corpus);
371 let serialized = serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap();
372 assert_eq!(r#"["🀀🀀🀀","🀁🀁🀁","🀂🀂🀂","🀃🀃🀃","🀆🀆"]"#, serialized);
373 }
374
375 #[test]
376 fn test_string_vector_builder_finish_cloned() {
377 let mut builder = StringVectorBuilder::with_capacity(1024);
378 builder.push(Some("1"));
379 builder.push(Some("2"));
380 builder.push(Some("3"));
381 let vector = builder.finish_cloned();
382 assert_eq!(vector.len(), 3);
383 assert_eq!(
384 r#"["1","2","3"]"#,
385 serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap(),
386 );
387 assert_eq!(builder.len(), 3);
388 }
389}