datatypes/vectors/
binary.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::any::Any;
16use std::sync::Arc;
17
18use arrow::array::{Array, ArrayBuilder, ArrayIter, ArrayRef};
19use snafu::ResultExt;
20
21use crate::arrow_array::{
22    BinaryArray, BinaryViewArray, LargeBinaryArray, MutableBinaryArray, MutableBinaryViewArray,
23};
24use crate::data_type::ConcreteDataType;
25use crate::error::{self, InvalidVectorSnafu, Result};
26use crate::scalars::{ScalarVector, ScalarVectorBuilder};
27use crate::serialize::Serializable;
28use crate::types::parse_string_to_vector_type_value;
29use crate::value::{Value, ValueRef};
30use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef};
31
32#[derive(Debug, PartialEq)]
33enum BinaryArrayData {
34    Binary(BinaryArray),
35    LargeBinary(LargeBinaryArray),
36    BinaryView(BinaryViewArray),
37}
38
39/// Vector of binary strings.
40#[derive(Debug, PartialEq)]
41pub struct BinaryVector {
42    array: BinaryArrayData,
43}
44
45impl BinaryVector {
46    pub(crate) fn as_arrow(&self) -> &dyn Array {
47        match &self.array {
48            BinaryArrayData::Binary(array) => array,
49            BinaryArrayData::LargeBinary(array) => array,
50            BinaryArrayData::BinaryView(array) => array,
51        }
52    }
53
54    /// Creates a new binary vector of JSONB from a binary vector.
55    /// The binary vector must contain valid JSON strings.
56    pub fn convert_binary_to_json(&self) -> Result<BinaryVector> {
57        let mut vector = vec![];
58        for binary in self.iter_data() {
59            let jsonb = if let Some(binary) = binary {
60                match jsonb::from_slice(binary) {
61                    Ok(jsonb) => Some(jsonb.to_vec()),
62                    Err(_) => {
63                        let s = String::from_utf8_lossy(binary);
64                        return error::InvalidJsonSnafu {
65                            value: s.to_string(),
66                        }
67                        .fail();
68                    }
69                }
70            } else {
71                None
72            };
73            vector.push(jsonb);
74        }
75        Ok(BinaryVector::from(vector))
76    }
77
78    pub fn convert_binary_to_vector(&self, dim: u32) -> Result<BinaryVector> {
79        let mut vector = vec![];
80        for binary in self.iter_data() {
81            let Some(binary) = binary else {
82                vector.push(None);
83                continue;
84            };
85
86            if let Ok(s) = String::from_utf8(binary.to_vec())
87                && let Ok(v) = parse_string_to_vector_type_value(&s, Some(dim))
88            {
89                vector.push(Some(v));
90                continue;
91            }
92
93            let expected_bytes_size = dim as usize * std::mem::size_of::<f32>();
94            if binary.len() == expected_bytes_size {
95                vector.push(Some(binary.to_vec()));
96                continue;
97            } else {
98                return InvalidVectorSnafu {
99                    msg: format!(
100                        "Unexpected bytes size for vector value, expected {}, got {}",
101                        expected_bytes_size,
102                        binary.len()
103                    ),
104                }
105                .fail();
106            }
107        }
108        Ok(BinaryVector::from(vector))
109    }
110}
111
112impl From<BinaryArray> for BinaryVector {
113    fn from(array: BinaryArray) -> Self {
114        Self {
115            array: BinaryArrayData::Binary(array),
116        }
117    }
118}
119
120impl From<BinaryViewArray> for BinaryVector {
121    fn from(array: BinaryViewArray) -> Self {
122        Self {
123            array: BinaryArrayData::BinaryView(array),
124        }
125    }
126}
127
128impl From<LargeBinaryArray> for BinaryVector {
129    fn from(array: LargeBinaryArray) -> Self {
130        Self {
131            array: BinaryArrayData::LargeBinary(array),
132        }
133    }
134}
135
136impl From<Vec<Option<Vec<u8>>>> for BinaryVector {
137    fn from(data: Vec<Option<Vec<u8>>>) -> Self {
138        Self {
139            array: BinaryArrayData::Binary(BinaryArray::from_iter(data)),
140        }
141    }
142}
143
144impl From<Vec<&[u8]>> for BinaryVector {
145    fn from(data: Vec<&[u8]>) -> Self {
146        Self {
147            array: BinaryArrayData::Binary(BinaryArray::from_iter_values(data)),
148        }
149    }
150}
151
152impl Vector for BinaryVector {
153    fn data_type(&self) -> ConcreteDataType {
154        match &self.array {
155            BinaryArrayData::Binary(_) => ConcreteDataType::binary_datatype(),
156            BinaryArrayData::LargeBinary(_) => ConcreteDataType::binary_datatype(),
157            BinaryArrayData::BinaryView(_) => ConcreteDataType::binary_view_datatype(),
158        }
159    }
160
161    fn vector_type_name(&self) -> String {
162        "BinaryVector".to_string()
163    }
164
165    fn as_any(&self) -> &dyn Any {
166        self
167    }
168
169    fn len(&self) -> usize {
170        match &self.array {
171            BinaryArrayData::Binary(array) => array.len(),
172            BinaryArrayData::LargeBinary(array) => array.len(),
173            BinaryArrayData::BinaryView(array) => array.len(),
174        }
175    }
176
177    fn to_arrow_array(&self) -> ArrayRef {
178        match &self.array {
179            BinaryArrayData::Binary(array) => Arc::new(array.clone()),
180            BinaryArrayData::LargeBinary(array) => Arc::new(array.clone()),
181            BinaryArrayData::BinaryView(array) => Arc::new(array.clone()),
182        }
183    }
184
185    fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
186        match &self.array {
187            BinaryArrayData::Binary(array) => Box::new(array.clone()),
188            BinaryArrayData::LargeBinary(array) => Box::new(array.clone()),
189            BinaryArrayData::BinaryView(array) => Box::new(array.clone()),
190        }
191    }
192
193    fn validity(&self) -> Validity {
194        match &self.array {
195            BinaryArrayData::Binary(array) => vectors::impl_validity_for_vector!(array),
196            BinaryArrayData::LargeBinary(array) => vectors::impl_validity_for_vector!(array),
197            BinaryArrayData::BinaryView(array) => vectors::impl_validity_for_vector!(array),
198        }
199    }
200
201    fn memory_size(&self) -> usize {
202        match &self.array {
203            BinaryArrayData::Binary(array) => array.get_buffer_memory_size(),
204            BinaryArrayData::LargeBinary(array) => array.get_buffer_memory_size(),
205            BinaryArrayData::BinaryView(array) => array.get_buffer_memory_size(),
206        }
207    }
208
209    fn null_count(&self) -> usize {
210        match &self.array {
211            BinaryArrayData::Binary(array) => array.null_count(),
212            BinaryArrayData::LargeBinary(array) => array.null_count(),
213            BinaryArrayData::BinaryView(array) => array.null_count(),
214        }
215    }
216
217    fn is_null(&self, row: usize) -> bool {
218        match &self.array {
219            BinaryArrayData::Binary(array) => array.is_null(row),
220            BinaryArrayData::LargeBinary(array) => array.is_null(row),
221            BinaryArrayData::BinaryView(array) => array.is_null(row),
222        }
223    }
224
225    fn slice(&self, offset: usize, length: usize) -> VectorRef {
226        match &self.array {
227            BinaryArrayData::Binary(array) => {
228                let array = array.slice(offset, length);
229                Arc::new(Self {
230                    array: BinaryArrayData::Binary(array),
231                })
232            }
233            BinaryArrayData::LargeBinary(array) => {
234                let array = array.slice(offset, length);
235                Arc::new(Self {
236                    array: BinaryArrayData::LargeBinary(array),
237                })
238            }
239            BinaryArrayData::BinaryView(array) => {
240                let array = array.slice(offset, length);
241                Arc::new(Self {
242                    array: BinaryArrayData::BinaryView(array),
243                })
244            }
245        }
246    }
247
248    fn get(&self, index: usize) -> Value {
249        match &self.array {
250            BinaryArrayData::Binary(array) => vectors::impl_get_for_vector!(array, index),
251            BinaryArrayData::LargeBinary(array) => vectors::impl_get_for_vector!(array, index),
252            BinaryArrayData::BinaryView(array) => vectors::impl_get_for_vector!(array, index),
253        }
254    }
255
256    fn get_ref(&self, index: usize) -> ValueRef<'_> {
257        match &self.array {
258            BinaryArrayData::Binary(array) => vectors::impl_get_ref_for_vector!(array, index),
259            BinaryArrayData::LargeBinary(array) => vectors::impl_get_ref_for_vector!(array, index),
260            BinaryArrayData::BinaryView(array) => vectors::impl_get_ref_for_vector!(array, index),
261        }
262    }
263}
264
265impl From<Vec<Vec<u8>>> for BinaryVector {
266    fn from(data: Vec<Vec<u8>>) -> Self {
267        Self {
268            array: BinaryArrayData::Binary(BinaryArray::from_iter_values(data)),
269        }
270    }
271}
272
273impl ScalarVector for BinaryVector {
274    type OwnedItem = Vec<u8>;
275    type RefItem<'a> = &'a [u8];
276    type Iter<'a> = BinaryIter<'a>;
277    type Builder = BinaryVectorBuilder;
278
279    fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
280        match &self.array {
281            BinaryArrayData::Binary(array) => array.is_valid(idx).then(|| array.value(idx)),
282            BinaryArrayData::LargeBinary(array) => array.is_valid(idx).then(|| array.value(idx)),
283            BinaryArrayData::BinaryView(array) => array.is_valid(idx).then(|| array.value(idx)),
284        }
285    }
286
287    fn iter_data(&self) -> Self::Iter<'_> {
288        match &self.array {
289            BinaryArrayData::Binary(array) => BinaryIter::Binary(array.iter()),
290            BinaryArrayData::LargeBinary(array) => BinaryIter::LargeBinary(array.iter()),
291            BinaryArrayData::BinaryView(array) => BinaryIter::BinaryView(array.iter()),
292        }
293    }
294}
295
296pub enum BinaryIter<'a> {
297    Binary(ArrayIter<&'a BinaryArray>),
298    LargeBinary(ArrayIter<&'a LargeBinaryArray>),
299    BinaryView(ArrayIter<&'a BinaryViewArray>),
300}
301
302impl<'a> Iterator for BinaryIter<'a> {
303    type Item = Option<&'a [u8]>;
304
305    fn next(&mut self) -> Option<Self::Item> {
306        match self {
307            BinaryIter::Binary(iter) => iter.next(),
308            BinaryIter::LargeBinary(iter) => iter.next(),
309            BinaryIter::BinaryView(iter) => iter.next(),
310        }
311    }
312}
313
314enum MutableBinaryArrayData {
315    Binary(MutableBinaryArray),
316    BinaryView(MutableBinaryViewArray),
317}
318
319pub struct BinaryVectorBuilder {
320    mutable_array: MutableBinaryArrayData,
321}
322
323impl BinaryVectorBuilder {
324    pub fn with_view_capacity(capacity: usize) -> Self {
325        Self {
326            mutable_array: MutableBinaryArrayData::BinaryView(
327                MutableBinaryViewArray::with_capacity(capacity),
328            ),
329        }
330    }
331}
332
333impl MutableVector for BinaryVectorBuilder {
334    fn data_type(&self) -> ConcreteDataType {
335        match &self.mutable_array {
336            MutableBinaryArrayData::Binary(_) => ConcreteDataType::binary_datatype(),
337            MutableBinaryArrayData::BinaryView(_) => ConcreteDataType::binary_view_datatype(),
338        }
339    }
340
341    fn len(&self) -> usize {
342        match &self.mutable_array {
343            MutableBinaryArrayData::Binary(array) => array.len(),
344            MutableBinaryArrayData::BinaryView(array) => array.len(),
345        }
346    }
347
348    fn as_any(&self) -> &dyn Any {
349        self
350    }
351
352    fn as_mut_any(&mut self) -> &mut dyn Any {
353        self
354    }
355
356    fn to_vector(&mut self) -> VectorRef {
357        Arc::new(self.finish())
358    }
359
360    fn to_vector_cloned(&self) -> VectorRef {
361        Arc::new(self.finish_cloned())
362    }
363
364    fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> {
365        let value = value.try_into_binary()?;
366        match &mut self.mutable_array {
367            MutableBinaryArrayData::Binary(array) => array.append_option(value),
368            MutableBinaryArrayData::BinaryView(array) => array.append_option(value),
369        };
370        Ok(())
371    }
372
373    fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
374        vectors::impl_extend_for_builder!(self, vector, BinaryVector, offset, length)
375    }
376
377    fn push_null(&mut self) {
378        match &mut self.mutable_array {
379            MutableBinaryArrayData::Binary(array) => array.append_null(),
380            MutableBinaryArrayData::BinaryView(array) => array.append_null(),
381        }
382    }
383}
384
385impl ScalarVectorBuilder for BinaryVectorBuilder {
386    type VectorType = BinaryVector;
387
388    fn with_capacity(capacity: usize) -> Self {
389        Self {
390            mutable_array: MutableBinaryArrayData::Binary(MutableBinaryArray::with_capacity(
391                capacity, 0,
392            )),
393        }
394    }
395
396    fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
397        match &mut self.mutable_array {
398            MutableBinaryArrayData::Binary(array) => array.append_option(value),
399            MutableBinaryArrayData::BinaryView(array) => array.append_option(value),
400        };
401    }
402
403    fn finish(&mut self) -> Self::VectorType {
404        match &mut self.mutable_array {
405            MutableBinaryArrayData::Binary(array) => BinaryVector {
406                array: BinaryArrayData::Binary(array.finish()),
407            },
408            MutableBinaryArrayData::BinaryView(array) => BinaryVector {
409                array: BinaryArrayData::BinaryView(array.finish()),
410            },
411        }
412    }
413
414    fn finish_cloned(&self) -> Self::VectorType {
415        match &self.mutable_array {
416            MutableBinaryArrayData::Binary(array) => BinaryVector {
417                array: BinaryArrayData::Binary(array.finish_cloned()),
418            },
419            MutableBinaryArrayData::BinaryView(array) => BinaryVector {
420                array: BinaryArrayData::BinaryView(array.finish_cloned()),
421            },
422        }
423    }
424}
425
426impl Serializable for BinaryVector {
427    fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
428        self.iter_data()
429            .map(|v| match v {
430                None => Ok(serde_json::Value::Null), // if binary vector not present, map to NULL
431                Some(vec) => serde_json::to_value(vec),
432            })
433            .collect::<serde_json::Result<_>>()
434            .context(error::SerializeSnafu)
435    }
436}
437
438impl BinaryVector {
439    pub fn try_from_arrow_array(
440        array: impl AsRef<dyn Array>,
441    ) -> crate::error::Result<BinaryVector> {
442        let array = array.as_ref();
443
444        if let Some(binary_array) = array.as_any().downcast_ref::<BinaryArray>() {
445            Ok(BinaryVector::from(binary_array.clone()))
446        } else if let Some(large_binary_array) = array.as_any().downcast_ref::<LargeBinaryArray>() {
447            Ok(BinaryVector::from(large_binary_array.clone()))
448        } else if let Some(binary_view_array) = array.as_any().downcast_ref::<BinaryViewArray>() {
449            Ok(BinaryVector::from(binary_view_array.clone()))
450        } else {
451            Err(crate::error::UnsupportedArrowTypeSnafu {
452                arrow_type: array.data_type().clone(),
453            }
454            .build())
455        }
456    }
457}
458
459#[cfg(test)]
460mod tests {
461    use std::assert_matches::assert_matches;
462
463    use arrow::datatypes::DataType as ArrowDataType;
464    use common_base::bytes::Bytes;
465    use serde_json;
466
467    use super::*;
468    use crate::arrow_array::{BinaryArray, LargeBinaryArray};
469    use crate::data_type::DataType;
470    use crate::serialize::Serializable;
471    use crate::types::BinaryType;
472
473    #[test]
474    fn test_binary_vector_misc() {
475        let v = BinaryVector::from(BinaryArray::from_iter_values([
476            vec![1, 2, 3],
477            vec![1, 2, 3],
478        ]));
479
480        assert_eq!(2, v.len());
481        assert_eq!("BinaryVector", v.vector_type_name());
482        assert!(!v.is_const());
483        assert!(v.validity().is_all_valid());
484        assert!(!v.only_null());
485        assert_eq!(128, v.memory_size());
486
487        for i in 0..2 {
488            assert!(!v.is_null(i));
489            assert_eq!(Value::Binary(Bytes::from(vec![1, 2, 3])), v.get(i));
490            assert_eq!(ValueRef::Binary(&[1, 2, 3]), v.get_ref(i));
491        }
492
493        let arrow_arr = v.to_arrow_array();
494        assert_eq!(2, arrow_arr.len());
495        assert_eq!(&ArrowDataType::Binary, arrow_arr.data_type());
496    }
497
498    #[test]
499    fn test_binary_view_vector_build_get() {
500        let mut builder = BinaryVectorBuilder::with_view_capacity(4);
501        builder.push(Some(b"hello"));
502        builder.push(None);
503        builder.push(Some(b"world"));
504        let vector = builder.finish();
505
506        assert_eq!(ConcreteDataType::binary_view_datatype(), vector.data_type());
507        assert_eq!(b"hello", vector.get_data(0).unwrap());
508        assert_eq!(None, vector.get_data(1));
509        assert_eq!(b"world", vector.get_data(2).unwrap());
510
511        assert_eq!(Value::Binary(b"hello".as_slice().into()), vector.get(0));
512        assert_eq!(Value::Null, vector.get(1));
513        assert_eq!(Value::Binary(b"world".as_slice().into()), vector.get(2));
514
515        let mut iter = vector.iter_data();
516        assert_eq!(b"hello", iter.next().unwrap().unwrap());
517        assert_eq!(None, iter.next().unwrap());
518        assert_eq!(b"world", iter.next().unwrap().unwrap());
519        assert_eq!(None, iter.next());
520
521        let arrow_arr = vector.to_arrow_array();
522        assert_eq!(&ArrowDataType::BinaryView, arrow_arr.data_type());
523    }
524
525    #[test]
526    fn test_serialize_binary_vector_to_json() {
527        let vector = BinaryVector::from(BinaryArray::from_iter_values([
528            vec![1, 2, 3],
529            vec![1, 2, 3],
530        ]));
531
532        let json_value = vector.serialize_to_json().unwrap();
533        assert_eq!(
534            "[[1,2,3],[1,2,3]]",
535            serde_json::to_string(&json_value).unwrap()
536        );
537    }
538
539    #[test]
540    fn test_serialize_binary_vector_with_null_to_json() {
541        let mut builder = BinaryVectorBuilder::with_capacity(4);
542        builder.push(Some(&[1, 2, 3]));
543        builder.push(None);
544        builder.push(Some(&[4, 5, 6]));
545        let vector = builder.finish();
546
547        let json_value = vector.serialize_to_json().unwrap();
548        assert_eq!(
549            "[[1,2,3],null,[4,5,6]]",
550            serde_json::to_string(&json_value).unwrap()
551        );
552    }
553
554    #[test]
555    fn test_from_arrow_array() {
556        let arrow_array = BinaryArray::from_iter_values([vec![1, 2, 3], vec![1, 2, 3]]);
557        let original = BinaryArray::from(arrow_array.to_data());
558        let vector = BinaryVector::from(arrow_array);
559        let BinaryArrayData::Binary(array) = &vector.array else {
560            panic!("Expected BinaryArray");
561        };
562        assert_eq!(&original, array);
563    }
564
565    #[test]
566    fn test_from_large_binary_arrow_array() {
567        let arrow_array = LargeBinaryArray::from_iter_values([vec![1, 2, 3], vec![1, 2, 3]]);
568        let original = LargeBinaryArray::from(arrow_array.to_data());
569        let vector = BinaryVector::from(arrow_array);
570        let BinaryArrayData::LargeBinary(array) = &vector.array else {
571            panic!("Expected LargeBinaryArray");
572        };
573        assert_eq!(&original, array);
574    }
575
576    #[test]
577    fn test_binary_vector_build_get() {
578        let mut builder = BinaryVectorBuilder::with_capacity(4);
579        builder.push(Some(b"hello"));
580        builder.push(Some(b"happy"));
581        builder.push(Some(b"world"));
582        builder.push(None);
583
584        let vector = builder.finish();
585        assert_eq!(b"hello", vector.get_data(0).unwrap());
586        assert_eq!(None, vector.get_data(3));
587
588        assert_eq!(Value::Binary(b"hello".as_slice().into()), vector.get(0));
589        assert_eq!(Value::Null, vector.get(3));
590
591        let mut iter = vector.iter_data();
592        assert_eq!(b"hello", iter.next().unwrap().unwrap());
593        assert_eq!(b"happy", iter.next().unwrap().unwrap());
594        assert_eq!(b"world", iter.next().unwrap().unwrap());
595        assert_eq!(None, iter.next().unwrap());
596        assert_eq!(None, iter.next());
597    }
598
599    #[test]
600    fn test_binary_vector_validity() {
601        let mut builder = BinaryVectorBuilder::with_capacity(4);
602        builder.push(Some(b"hello"));
603        builder.push(Some(b"world"));
604        let vector = builder.finish();
605        assert_eq!(0, vector.null_count());
606        assert!(vector.validity().is_all_valid());
607
608        let mut builder = BinaryVectorBuilder::with_capacity(3);
609        builder.push(Some(b"hello"));
610        builder.push(None);
611        builder.push(Some(b"world"));
612        let vector = builder.finish();
613        assert_eq!(1, vector.null_count());
614        let validity = vector.validity();
615        assert!(!validity.is_set(1));
616
617        assert_eq!(1, validity.null_count());
618        assert!(!validity.is_set(1));
619    }
620
621    #[test]
622    fn test_binary_vector_builder() {
623        let input = BinaryVector::from_slice(&[b"world", b"one", b"two"]);
624
625        let mut builder = BinaryType::default().create_mutable_vector(3);
626        builder.push_value_ref(&ValueRef::Binary("hello".as_bytes()));
627        assert!(builder.try_push_value_ref(&ValueRef::Int32(123)).is_err());
628        builder.extend_slice_of(&input, 1, 2).unwrap();
629        assert!(
630            builder
631                .extend_slice_of(&crate::vectors::Int32Vector::from_slice([13]), 0, 1)
632                .is_err()
633        );
634        let vector = builder.to_vector();
635
636        let expect: VectorRef = Arc::new(BinaryVector::from_slice(&[b"hello", b"one", b"two"]));
637        assert_eq!(expect, vector);
638    }
639
640    #[test]
641    fn test_binary_vector_builder_finish_cloned() {
642        let mut builder = BinaryVectorBuilder::with_capacity(1024);
643        builder.push(Some(b"one"));
644        builder.push(Some(b"two"));
645        builder.push(Some(b"three"));
646        let vector = builder.finish_cloned();
647        assert_eq!(b"one", vector.get_data(0).unwrap());
648        assert_eq!(vector.len(), 3);
649        assert_eq!(builder.len(), 3);
650
651        builder.push(Some(b"four"));
652        let vector = builder.finish_cloned();
653        assert_eq!(b"four", vector.get_data(3).unwrap());
654        assert_eq!(builder.len(), 4);
655    }
656
657    #[test]
658    fn test_binary_json_conversion() {
659        // json strings
660        let json_strings = vec![
661            b"{\"hello\": \"world\"}".to_vec(),
662            b"{\"foo\": 1}".to_vec(),
663            b"123".to_vec(),
664        ];
665        let json_vector = BinaryVector::from(json_strings.clone())
666            .convert_binary_to_json()
667            .unwrap();
668        let jsonbs = json_strings
669            .iter()
670            .map(|v| jsonb::parse_value(v).unwrap().to_vec())
671            .collect::<Vec<_>>();
672        for i in 0..3 {
673            assert_eq!(
674                json_vector.get_ref(i).try_into_binary().unwrap().unwrap(),
675                jsonbs.get(i).unwrap().as_slice()
676            );
677        }
678
679        // jsonb
680        let json_vector = BinaryVector::from(jsonbs.clone())
681            .convert_binary_to_json()
682            .unwrap();
683        for i in 0..3 {
684            assert_eq!(
685                json_vector.get_ref(i).try_into_binary().unwrap().unwrap(),
686                jsonbs.get(i).unwrap().as_slice()
687            );
688        }
689
690        // binary with jsonb header (0x80, 0x40, 0x20)
691        let binary_with_jsonb_header: Vec<u8> = [0x80, 0x23, 0x40, 0x22].to_vec();
692        let error = BinaryVector::from(vec![binary_with_jsonb_header])
693            .convert_binary_to_json()
694            .unwrap_err();
695        assert_matches!(error, error::Error::InvalidJson { .. });
696
697        // invalid json string
698        let json_strings = vec![b"{\"hello\": \"world\"".to_vec()];
699        let error = BinaryVector::from(json_strings)
700            .convert_binary_to_json()
701            .unwrap_err();
702        assert_matches!(error, error::Error::InvalidJson { .. });
703
704        // corrupted jsonb
705        let jsonb = jsonb::parse_value("{\"hello\": \"world\"}".as_bytes())
706            .unwrap()
707            .to_vec();
708        let corrupted_jsonb = jsonb[0..jsonb.len() - 1].to_vec();
709        let error = BinaryVector::from(vec![corrupted_jsonb])
710            .convert_binary_to_json()
711            .unwrap_err();
712        assert_matches!(error, error::Error::InvalidJson { .. });
713    }
714
715    #[test]
716    fn test_binary_vector_conversion() {
717        let dim = 3;
718        let vector = BinaryVector::from(vec![
719            Some(b"[1,2,3]".to_vec()),
720            Some(b"[4,5,6]".to_vec()),
721            Some(b"[7,8,9]".to_vec()),
722            None,
723        ]);
724        let expected = BinaryVector::from(vec![
725            Some(
726                [1.0f32, 2.0, 3.0]
727                    .iter()
728                    .flat_map(|v| v.to_le_bytes())
729                    .collect(),
730            ),
731            Some(
732                [4.0f32, 5.0, 6.0]
733                    .iter()
734                    .flat_map(|v| v.to_le_bytes())
735                    .collect(),
736            ),
737            Some(
738                [7.0f32, 8.0, 9.0]
739                    .iter()
740                    .flat_map(|v| v.to_le_bytes())
741                    .collect(),
742            ),
743            None,
744        ]);
745
746        let converted = vector.convert_binary_to_vector(dim).unwrap();
747        assert_eq!(converted.len(), expected.len());
748        for i in 0..3 {
749            assert_eq!(
750                converted.get_ref(i).try_into_binary().unwrap().unwrap(),
751                expected.get_ref(i).try_into_binary().unwrap().unwrap()
752            );
753        }
754    }
755}