1use std::sync::Arc;
16
17use bytes::Buf;
18use common_base::bytes::Bytes;
19use common_decimal::Decimal128;
20use common_recordbatch::filter::SimpleFilterEvaluator;
21use common_time::time::Time;
22use common_time::{Date, Duration, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth};
23use datatypes::data_type::ConcreteDataType;
24use datatypes::prelude::Value;
25use datatypes::types::IntervalType;
26use datatypes::value::ValueRef;
27use memcomparable::{Deserializer, Serializer};
28use paste::paste;
29use serde::{Deserialize, Serialize};
30use snafu::ResultExt;
31use store_api::codec::PrimaryKeyEncoding;
32use store_api::metadata::{RegionMetadata, RegionMetadataRef};
33use store_api::storage::ColumnId;
34
35use crate::error::{
36 self, FieldTypeMismatchSnafu, NotSupportedFieldSnafu, Result, SerializeFieldSnafu,
37};
38use crate::key_values::KeyValue;
39use crate::primary_key_filter::DensePrimaryKeyFilter;
40use crate::row_converter::{
41 CompositeValues, PrimaryKeyCodec, PrimaryKeyCodecExt, PrimaryKeyFilter,
42};
43
44#[derive(Debug, Clone, PartialEq, Eq)]
46pub struct SortField {
47 data_type: ConcreteDataType,
48}
49
50impl SortField {
51 pub fn new(data_type: ConcreteDataType) -> Self {
52 Self { data_type }
53 }
54
55 pub fn data_type(&self) -> &ConcreteDataType {
57 &self.data_type
58 }
59
60 pub fn encode_data_type(&self) -> &ConcreteDataType {
64 match &self.data_type {
65 ConcreteDataType::Dictionary(dict_type) => dict_type.value_type(),
66 _ => &self.data_type,
67 }
68 }
69
70 pub fn estimated_size(&self) -> usize {
71 Self::estimated_size_by_type(self.encode_data_type())
72 }
73
74 fn estimated_size_by_type(data_type: &ConcreteDataType) -> usize {
75 match data_type {
76 ConcreteDataType::Boolean(_) => 2,
77 ConcreteDataType::Int8(_) | ConcreteDataType::UInt8(_) => 2,
78 ConcreteDataType::Int16(_) | ConcreteDataType::UInt16(_) => 3,
79 ConcreteDataType::Int32(_) | ConcreteDataType::UInt32(_) => 5,
80 ConcreteDataType::Int64(_) | ConcreteDataType::UInt64(_) => 9,
81 ConcreteDataType::Float32(_) => 5,
82 ConcreteDataType::Float64(_) => 9,
83 ConcreteDataType::Binary(_)
84 | ConcreteDataType::Json(_)
85 | ConcreteDataType::Vector(_) => 11,
86 ConcreteDataType::String(_) => 11, ConcreteDataType::Date(_) => 5,
88 ConcreteDataType::Timestamp(_) => 10,
89 ConcreteDataType::Time(_) => 10,
90 ConcreteDataType::Duration(_) => 10,
91 ConcreteDataType::Interval(_) => 18,
92 ConcreteDataType::Decimal128(_) => 19,
93 ConcreteDataType::Null(_)
94 | ConcreteDataType::List(_)
95 | ConcreteDataType::Struct(_)
96 | ConcreteDataType::Dictionary(_) => 0,
97 }
98 }
99
100 pub fn serialize(
102 &self,
103 serializer: &mut Serializer<&mut Vec<u8>>,
104 value: &ValueRef,
105 ) -> Result<()> {
106 Self::serialize_by_type(self.encode_data_type(), serializer, value)
107 }
108
109 fn serialize_by_type(
110 data_type: &ConcreteDataType,
111 serializer: &mut Serializer<&mut Vec<u8>>,
112 value: &ValueRef,
113 ) -> Result<()> {
114 macro_rules! cast_value_and_serialize {
115 (
116 $data_type: ident;
117 $serializer: ident;
118 $(
119 $ty: ident, $f: ident
120 ),*
121 ) => {
122 match $data_type {
123 $(
124 ConcreteDataType::$ty(_) => {
125 paste!{
126 value
127 .[<try_into_ $f>]()
128 .context(FieldTypeMismatchSnafu)?
129 .serialize($serializer)
130 .context(SerializeFieldSnafu)?;
131 }
132 }
133 )*
134 ConcreteDataType::Timestamp(_) => {
135 let timestamp = value.try_into_timestamp().context(FieldTypeMismatchSnafu)?;
136 timestamp
137 .map(|t|t.value())
138 .serialize($serializer)
139 .context(SerializeFieldSnafu)?;
140 }
141 ConcreteDataType::Interval(IntervalType::YearMonth(_)) => {
142 let interval = value.try_into_interval_year_month().context(FieldTypeMismatchSnafu)?;
143 interval.map(|i| i.to_i32())
144 .serialize($serializer)
145 .context(SerializeFieldSnafu)?;
146 }
147 ConcreteDataType::Interval(IntervalType::DayTime(_)) => {
148 let interval = value.try_into_interval_day_time().context(FieldTypeMismatchSnafu)?;
149 interval.map(|i| i.to_i64())
150 .serialize($serializer)
151 .context(SerializeFieldSnafu)?;
152 }
153 ConcreteDataType::Interval(IntervalType::MonthDayNano(_)) => {
154 let interval = value.try_into_interval_month_day_nano().context(FieldTypeMismatchSnafu)?;
155 interval.map(|i| i.to_i128())
156 .serialize($serializer)
157 .context(SerializeFieldSnafu)?;
158 }
159 ConcreteDataType::List(_) |
160 ConcreteDataType::Struct(_) |
161 ConcreteDataType::Dictionary(_) |
162 ConcreteDataType::Null(_) => {
163 return error::NotSupportedFieldSnafu {
164 data_type: $data_type.clone()
165 }.fail()
166 }
167 }
168 };
169 }
170 cast_value_and_serialize!(data_type; serializer;
171 Boolean, boolean,
172 Binary, binary,
173 Int8, i8,
174 UInt8, u8,
175 Int16, i16,
176 UInt16, u16,
177 Int32, i32,
178 UInt32, u32,
179 Int64, i64,
180 UInt64, u64,
181 Float32, f32,
182 Float64, f64,
183 String, string,
184 Date, date,
185 Time, time,
186 Duration, duration,
187 Decimal128, decimal128,
188 Json, binary,
189 Vector, binary
190 );
191
192 Ok(())
193 }
194
195 pub fn deserialize<B: Buf>(&self, deserializer: &mut Deserializer<B>) -> Result<Value> {
197 Self::deserialize_by_type(self.encode_data_type(), deserializer)
198 }
199
200 fn deserialize_by_type<B: Buf>(
201 data_type: &ConcreteDataType,
202 deserializer: &mut Deserializer<B>,
203 ) -> Result<Value> {
204 macro_rules! deserialize_and_build_value {
205 (
206 $data_type: ident;
207 $serializer: ident;
208 $(
209 $ty: ident, $f: ident
210 ),*
211 ) => {
212
213 match $data_type {
214 $(
215 ConcreteDataType::$ty(_) => {
216 Ok(Value::from(Option::<$f>::deserialize(deserializer).context(error::DeserializeFieldSnafu)?))
217 }
218 )*
219 ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) | ConcreteDataType::Vector(_) => Ok(Value::from(
220 Option::<Vec<u8>>::deserialize(deserializer)
221 .context(error::DeserializeFieldSnafu)?
222 .map(Bytes::from),
223 )),
224 ConcreteDataType::Timestamp(ty) => {
225 let timestamp = Option::<i64>::deserialize(deserializer)
226 .context(error::DeserializeFieldSnafu)?
227 .map(|t|ty.create_timestamp(t));
228 Ok(Value::from(timestamp))
229 }
230 ConcreteDataType::Interval(IntervalType::YearMonth(_)) => {
231 let interval = Option::<i32>::deserialize(deserializer)
232 .context(error::DeserializeFieldSnafu)?
233 .map(IntervalYearMonth::from_i32);
234 Ok(Value::from(interval))
235 }
236 ConcreteDataType::Interval(IntervalType::DayTime(_)) => {
237 let interval = Option::<i64>::deserialize(deserializer)
238 .context(error::DeserializeFieldSnafu)?
239 .map(IntervalDayTime::from_i64);
240 Ok(Value::from(interval))
241 }
242 ConcreteDataType::Interval(IntervalType::MonthDayNano(_)) => {
243 let interval = Option::<i128>::deserialize(deserializer)
244 .context(error::DeserializeFieldSnafu)?
245 .map(IntervalMonthDayNano::from_i128);
246 Ok(Value::from(interval))
247 }
248 ConcreteDataType::List(l) => NotSupportedFieldSnafu {
249 data_type: ConcreteDataType::List(l.clone()),
250 }
251 .fail(),
252 ConcreteDataType::Struct(f) => NotSupportedFieldSnafu {
253 data_type: ConcreteDataType::Struct(f.clone()),
254 }
255 .fail(),
256 ConcreteDataType::Dictionary(d) => NotSupportedFieldSnafu {
257 data_type: ConcreteDataType::Dictionary(d.clone()),
258 }
259 .fail(),
260 ConcreteDataType::Null(n) => NotSupportedFieldSnafu {
261 data_type: ConcreteDataType::Null(n.clone()),
262 }
263 .fail(),
264 }
265 };
266 }
267 deserialize_and_build_value!(data_type; deserializer;
268 Boolean, bool,
269 Int8, i8,
270 Int16, i16,
271 Int32, i32,
272 Int64, i64,
273 UInt8, u8,
274 UInt16, u16,
275 UInt32, u32,
276 UInt64, u64,
277 Float32, f32,
278 Float64, f64,
279 String, String,
280 Date, Date,
281 Time, Time,
282 Duration, Duration,
283 Decimal128, Decimal128
284 )
285 }
286
287 pub(crate) fn skip_deserialize(
289 &self,
290 bytes: &[u8],
291 deserializer: &mut Deserializer<&[u8]>,
292 ) -> Result<usize> {
293 let pos = deserializer.position();
294 if bytes[pos] == 0 {
295 deserializer.advance(1);
296 return Ok(1);
297 }
298
299 Self::skip_deserialize_by_type(self.encode_data_type(), bytes, deserializer)
300 }
301
302 fn skip_deserialize_by_type(
303 data_type: &ConcreteDataType,
304 bytes: &[u8],
305 deserializer: &mut Deserializer<&[u8]>,
306 ) -> Result<usize> {
307 let to_skip = match data_type {
308 ConcreteDataType::Boolean(_) => 2,
309 ConcreteDataType::Int8(_) | ConcreteDataType::UInt8(_) => 2,
310 ConcreteDataType::Int16(_) | ConcreteDataType::UInt16(_) => 3,
311 ConcreteDataType::Int32(_) | ConcreteDataType::UInt32(_) => 5,
312 ConcreteDataType::Int64(_) | ConcreteDataType::UInt64(_) => 9,
313 ConcreteDataType::Float32(_) => 5,
314 ConcreteDataType::Float64(_) => 9,
315 ConcreteDataType::Binary(_)
316 | ConcreteDataType::Json(_)
317 | ConcreteDataType::Vector(_) => {
318 let pos_before = deserializer.position();
321 let mut current = pos_before + 1;
322 while bytes[current] == 1 {
323 current += 2;
324 }
325 let to_skip = current - pos_before + 1;
326 deserializer.advance(to_skip);
327 return Ok(to_skip);
328 }
329 ConcreteDataType::String(_) => {
330 let pos_before = deserializer.position();
331 deserializer.advance(1);
332 deserializer
333 .skip_bytes()
334 .context(error::DeserializeFieldSnafu)?;
335 return Ok(deserializer.position() - pos_before);
336 }
337 ConcreteDataType::Date(_) => 5,
338 ConcreteDataType::Timestamp(_) => 9, ConcreteDataType::Time(_) => 10, ConcreteDataType::Duration(_) => 10,
341 ConcreteDataType::Interval(IntervalType::YearMonth(_)) => 5,
342 ConcreteDataType::Interval(IntervalType::DayTime(_)) => 9,
343 ConcreteDataType::Interval(IntervalType::MonthDayNano(_)) => 17,
344 ConcreteDataType::Decimal128(_) => 19,
345 ConcreteDataType::Null(_)
346 | ConcreteDataType::List(_)
347 | ConcreteDataType::Struct(_)
348 | ConcreteDataType::Dictionary(_) => 0,
349 };
350 deserializer.advance(to_skip);
351 Ok(to_skip)
352 }
353}
354
355impl PrimaryKeyCodecExt for DensePrimaryKeyCodec {
356 fn encode_to_vec<'a, I>(&self, row: I, buffer: &mut Vec<u8>) -> Result<()>
357 where
358 I: Iterator<Item = ValueRef<'a>>,
359 {
360 self.encode_dense(row, buffer)
361 }
362}
363
364#[derive(Clone, Debug)]
366pub struct DensePrimaryKeyCodec {
367 ordered_primary_key_columns: Arc<Vec<(ColumnId, SortField)>>,
369}
370
371impl DensePrimaryKeyCodec {
372 pub fn new(metadata: &RegionMetadata) -> Self {
373 let ordered_primary_key_columns = metadata
374 .primary_key_columns()
375 .map(|c| {
376 (
377 c.column_id,
378 SortField::new(c.column_schema.data_type.clone()),
379 )
380 })
381 .collect::<Vec<_>>();
382
383 Self::with_fields(ordered_primary_key_columns)
384 }
385
386 pub fn with_fields(fields: Vec<(ColumnId, SortField)>) -> Self {
387 Self {
388 ordered_primary_key_columns: Arc::new(fields),
389 }
390 }
391
392 fn encode_dense<'a, I>(&self, row: I, buffer: &mut Vec<u8>) -> Result<()>
393 where
394 I: Iterator<Item = ValueRef<'a>>,
395 {
396 let mut serializer = Serializer::new(buffer);
397 for (idx, value) in row.enumerate() {
398 self.field_at(idx).serialize(&mut serializer, &value)?;
399 }
400 Ok(())
401 }
402
403 pub fn decode_dense(&self, bytes: &[u8]) -> Result<Vec<(ColumnId, Value)>> {
405 let mut deserializer = Deserializer::new(bytes);
406 let mut values = Vec::with_capacity(self.ordered_primary_key_columns.len());
407 for (column_id, field) in self.ordered_primary_key_columns.iter() {
408 let value = field.deserialize(&mut deserializer)?;
409 values.push((*column_id, value));
410 }
411 Ok(values)
412 }
413
414 pub fn decode_dense_without_column_id(&self, bytes: &[u8]) -> Result<Vec<Value>> {
416 let mut deserializer = Deserializer::new(bytes);
417 let mut values = Vec::with_capacity(self.ordered_primary_key_columns.len());
418 for (_, field) in self.ordered_primary_key_columns.iter() {
419 let value = field.deserialize(&mut deserializer)?;
420 values.push(value);
421 }
422 Ok(values)
423 }
424
425 fn field_at(&self, pos: usize) -> &SortField {
430 &self.ordered_primary_key_columns[pos].1
431 }
432
433 fn advance_to_value_at(
437 &self,
438 bytes: &[u8],
439 pos: usize,
440 offsets_buf: &mut Vec<usize>,
441 deserializer: &mut Deserializer<&[u8]>,
442 ) -> Result<usize> {
443 if pos < offsets_buf.len() {
444 let offset = offsets_buf[pos];
446 deserializer.advance(offset);
447 return Ok(offset);
448 }
449
450 if offsets_buf.is_empty() {
451 let mut offset = 0;
452 for i in 0..pos {
454 offsets_buf.push(offset);
456 let skip = self.field_at(i).skip_deserialize(bytes, deserializer)?;
457 offset += skip;
458 }
459 offsets_buf.push(offset);
461 Ok(offset)
462 } else {
463 let value_start = offsets_buf.len() - 1;
465 let mut offset = offsets_buf[value_start];
467 deserializer.advance(offset);
468 for i in value_start..pos {
469 let skip = self.field_at(i).skip_deserialize(bytes, deserializer)?;
471 offset += skip;
473 offsets_buf.push(offset);
474 }
475 Ok(offset)
476 }
477 }
478
479 pub fn decode_value_at(
483 &self,
484 bytes: &[u8],
485 pos: usize,
486 offsets_buf: &mut Vec<usize>,
487 ) -> Result<Value> {
488 let mut deserializer = Deserializer::new(bytes);
489 self.advance_to_value_at(bytes, pos, offsets_buf, &mut deserializer)?;
490
491 self.field_at(pos).deserialize(&mut deserializer)
492 }
493
494 pub fn encoded_value_at<'a>(
499 &self,
500 bytes: &'a [u8],
501 pos: usize,
502 offsets_buf: &mut Vec<usize>,
503 ) -> Result<&'a [u8]> {
504 let mut deserializer = Deserializer::new(bytes);
505 let offset = self.advance_to_value_at(bytes, pos, offsets_buf, &mut deserializer)?;
506
507 let len = self
508 .field_at(pos)
509 .skip_deserialize(bytes, &mut deserializer)?;
510 Ok(&bytes[offset..offset + len])
511 }
512
513 pub fn estimated_size(&self) -> usize {
514 self.ordered_primary_key_columns
515 .iter()
516 .map(|(_, f)| f.estimated_size())
517 .sum()
518 }
519
520 pub fn num_fields(&self) -> usize {
521 self.ordered_primary_key_columns.len()
522 }
523}
524
525impl PrimaryKeyCodec for DensePrimaryKeyCodec {
526 fn encode_key_value(&self, key_value: &KeyValue, buffer: &mut Vec<u8>) -> Result<()> {
527 self.encode_dense(key_value.primary_keys(), buffer)
528 }
529
530 fn encode_values(&self, values: &[(ColumnId, Value)], buffer: &mut Vec<u8>) -> Result<()> {
531 self.encode_dense(values.iter().map(|(_, v)| v.as_value_ref()), buffer)
532 }
533
534 fn encode_value_refs(
535 &self,
536 values: &[(ColumnId, ValueRef)],
537 buffer: &mut Vec<u8>,
538 ) -> Result<()> {
539 let iter = values.iter().map(|(_, v)| v.clone());
540 self.encode_dense(iter, buffer)
541 }
542
543 fn estimated_size(&self) -> Option<usize> {
544 Some(self.estimated_size())
545 }
546
547 fn num_fields(&self) -> Option<usize> {
548 Some(self.num_fields())
549 }
550
551 fn encoding(&self) -> PrimaryKeyEncoding {
552 PrimaryKeyEncoding::Dense
553 }
554
555 fn primary_key_filter(
556 &self,
557 metadata: &RegionMetadataRef,
558 filters: Arc<Vec<SimpleFilterEvaluator>>,
559 skip_partition_column: bool,
560 ) -> Box<dyn PrimaryKeyFilter> {
561 Box::new(DensePrimaryKeyFilter::new(
562 metadata.clone(),
563 filters,
564 self.clone(),
565 skip_partition_column,
566 ))
567 }
568
569 fn decode(&self, bytes: &[u8]) -> Result<CompositeValues> {
570 Ok(CompositeValues::Dense(self.decode_dense(bytes)?))
571 }
572
573 fn decode_leftmost(&self, bytes: &[u8]) -> Result<Option<Value>> {
574 let mut values = self.decode_dense(bytes)?;
576 Ok(values.pop().map(|(_, v)| v))
577 }
578}
579
580#[cfg(test)]
581mod tests {
582 use common_base::bytes::StringBytes;
583 use common_time::{IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp};
584 use datatypes::value::Value;
585
586 use super::*;
587
588 fn check_encode_and_decode(data_types: &[ConcreteDataType], row: Vec<Value>) {
589 let encoder = DensePrimaryKeyCodec::with_fields(
590 data_types
591 .iter()
592 .map(|t| (0, SortField::new(t.clone())))
593 .collect::<Vec<_>>(),
594 );
595
596 let value_ref = row.iter().map(|v| v.as_value_ref()).collect::<Vec<_>>();
597
598 let result = encoder.encode(value_ref.iter().cloned()).unwrap();
599 let decoded = encoder.decode(&result).unwrap().into_dense();
600 assert_eq!(decoded, row);
601 let mut decoded = Vec::new();
602 let mut offsets = Vec::new();
603 for _ in 0..2 {
605 decoded.clear();
606 for i in 0..data_types.len() {
607 let value = encoder.decode_value_at(&result, i, &mut offsets).unwrap();
608 decoded.push(value);
609 }
610 assert_eq!(data_types.len(), offsets.len(), "offsets: {offsets:?}");
611 assert_eq!(decoded, row);
612 }
613 }
614
615 #[test]
616 fn test_memcmp() {
617 let encoder = DensePrimaryKeyCodec::with_fields(vec![
618 (0, SortField::new(ConcreteDataType::string_datatype())),
619 (1, SortField::new(ConcreteDataType::int64_datatype())),
620 ]);
621 let values = [Value::String("abcdefgh".into()), Value::Int64(128)];
622 let value_ref = values.iter().map(|v| v.as_value_ref()).collect::<Vec<_>>();
623 let result = encoder.encode(value_ref.iter().cloned()).unwrap();
624
625 let decoded = encoder.decode(&result).unwrap().into_dense();
626 assert_eq!(&values, &decoded as &[Value]);
627 }
628
629 #[test]
630 fn test_memcmp_timestamp() {
631 check_encode_and_decode(
632 &[
633 ConcreteDataType::timestamp_millisecond_datatype(),
634 ConcreteDataType::int64_datatype(),
635 ],
636 vec![
637 Value::Timestamp(Timestamp::new_millisecond(42)),
638 Value::Int64(43),
639 ],
640 );
641 }
642
643 #[test]
644 fn test_memcmp_duration() {
645 check_encode_and_decode(
646 &[
647 ConcreteDataType::duration_millisecond_datatype(),
648 ConcreteDataType::int64_datatype(),
649 ],
650 vec![
651 Value::Duration(Duration::new_millisecond(44)),
652 Value::Int64(45),
653 ],
654 )
655 }
656
657 #[test]
658 fn test_memcmp_binary() {
659 check_encode_and_decode(
660 &[
661 ConcreteDataType::binary_datatype(),
662 ConcreteDataType::int64_datatype(),
663 ],
664 vec![
665 Value::Binary(Bytes::from("hello".as_bytes())),
666 Value::Int64(43),
667 ],
668 );
669 }
670
671 #[test]
672 fn test_memcmp_string() {
673 check_encode_and_decode(
674 &[ConcreteDataType::string_datatype()],
675 vec![Value::String(StringBytes::from("hello"))],
676 );
677
678 check_encode_and_decode(&[ConcreteDataType::string_datatype()], vec![Value::Null]);
679
680 check_encode_and_decode(
681 &[ConcreteDataType::string_datatype()],
682 vec![Value::String("".into())],
683 );
684 check_encode_and_decode(
685 &[ConcreteDataType::string_datatype()],
686 vec![Value::String("world".into())],
687 );
688 }
689
690 #[test]
691 fn test_encode_null() {
692 check_encode_and_decode(
693 &[
694 ConcreteDataType::string_datatype(),
695 ConcreteDataType::int32_datatype(),
696 ],
697 vec![Value::String(StringBytes::from("abcd")), Value::Null],
698 )
699 }
700
701 #[test]
702 fn test_encoded_value_at() {
703 let data_types = [
704 ConcreteDataType::string_datatype(),
705 ConcreteDataType::int32_datatype(),
706 ConcreteDataType::string_datatype(),
707 ];
708 let encoder = DensePrimaryKeyCodec::with_fields(
709 data_types
710 .iter()
711 .enumerate()
712 .map(|(idx, t)| (idx as ColumnId, SortField::new(t.clone())))
713 .collect::<Vec<_>>(),
714 );
715 let row = [Value::String("hello".into()), Value::Int32(42), Value::Null];
716 let value_ref = row.iter().map(|v| v.as_value_ref()).collect::<Vec<_>>();
717 let encoded_pk = encoder.encode(value_ref.iter().cloned()).unwrap();
718
719 let mut offsets = Vec::new();
720 let mut combined = Vec::new();
721 for (pos, value) in row.iter().enumerate() {
722 let encoded_value = encoder
723 .encoded_value_at(&encoded_pk, pos, &mut offsets)
724 .unwrap();
725 combined.extend_from_slice(encoded_value);
726
727 let field = SortField::new(data_types[pos].clone());
728 let mut expected = Vec::new();
729 let mut serializer = Serializer::new(&mut expected);
730 field
731 .serialize(&mut serializer, &value.as_value_ref())
732 .unwrap();
733 assert_eq!(encoded_value, expected.as_slice());
734 }
735 assert_eq!(combined, encoded_pk);
736 assert_eq!(offsets.len(), row.len());
737
738 for (pos, value) in row.iter().enumerate().rev() {
740 let encoded_value = encoder
741 .encoded_value_at(&encoded_pk, pos, &mut offsets)
742 .unwrap();
743
744 let field = SortField::new(data_types[pos].clone());
745 let mut expected = Vec::new();
746 let mut serializer = Serializer::new(&mut expected);
747 field
748 .serialize(&mut serializer, &value.as_value_ref())
749 .unwrap();
750 assert_eq!(encoded_value, expected.as_slice());
751 }
752 }
753
754 #[test]
755 fn test_memcmp_dictionary() {
756 check_encode_and_decode(
758 &[ConcreteDataType::dictionary_datatype(
759 ConcreteDataType::int32_datatype(),
760 ConcreteDataType::string_datatype(),
761 )],
762 vec![Value::String("hello".into())],
763 );
764
765 check_encode_and_decode(
767 &[ConcreteDataType::dictionary_datatype(
768 ConcreteDataType::int32_datatype(),
769 ConcreteDataType::int64_datatype(),
770 )],
771 vec![Value::Int64(42)],
772 );
773
774 check_encode_and_decode(
776 &[ConcreteDataType::dictionary_datatype(
777 ConcreteDataType::int32_datatype(),
778 ConcreteDataType::string_datatype(),
779 )],
780 vec![Value::Null],
781 );
782
783 check_encode_and_decode(
785 &[
786 ConcreteDataType::dictionary_datatype(
787 ConcreteDataType::int32_datatype(),
788 ConcreteDataType::string_datatype(),
789 ),
790 ConcreteDataType::dictionary_datatype(
791 ConcreteDataType::int16_datatype(),
792 ConcreteDataType::int64_datatype(),
793 ),
794 ],
795 vec![Value::String("world".into()), Value::Int64(123)],
796 );
797 }
798
799 #[test]
800 fn test_encode_multiple_rows() {
801 check_encode_and_decode(
802 &[
803 ConcreteDataType::string_datatype(),
804 ConcreteDataType::int64_datatype(),
805 ConcreteDataType::boolean_datatype(),
806 ],
807 vec![
808 Value::String("hello".into()),
809 Value::Int64(42),
810 Value::Boolean(false),
811 ],
812 );
813
814 check_encode_and_decode(
815 &[
816 ConcreteDataType::string_datatype(),
817 ConcreteDataType::int64_datatype(),
818 ConcreteDataType::boolean_datatype(),
819 ],
820 vec![
821 Value::String("world".into()),
822 Value::Int64(43),
823 Value::Boolean(true),
824 ],
825 );
826
827 check_encode_and_decode(
828 &[
829 ConcreteDataType::string_datatype(),
830 ConcreteDataType::int64_datatype(),
831 ConcreteDataType::boolean_datatype(),
832 ],
833 vec![Value::Null, Value::Int64(43), Value::Boolean(true)],
834 );
835
836 check_encode_and_decode(
838 &[
839 ConcreteDataType::boolean_datatype(),
840 ConcreteDataType::int8_datatype(),
841 ConcreteDataType::uint8_datatype(),
842 ConcreteDataType::int16_datatype(),
843 ConcreteDataType::uint16_datatype(),
844 ConcreteDataType::int32_datatype(),
845 ConcreteDataType::uint32_datatype(),
846 ConcreteDataType::int64_datatype(),
847 ConcreteDataType::uint64_datatype(),
848 ConcreteDataType::float32_datatype(),
849 ConcreteDataType::float64_datatype(),
850 ConcreteDataType::binary_datatype(),
851 ConcreteDataType::string_datatype(),
852 ConcreteDataType::date_datatype(),
853 ConcreteDataType::timestamp_millisecond_datatype(),
854 ConcreteDataType::time_millisecond_datatype(),
855 ConcreteDataType::duration_millisecond_datatype(),
856 ConcreteDataType::interval_year_month_datatype(),
857 ConcreteDataType::interval_day_time_datatype(),
858 ConcreteDataType::interval_month_day_nano_datatype(),
859 ConcreteDataType::decimal128_default_datatype(),
860 ConcreteDataType::vector_datatype(3),
861 ConcreteDataType::dictionary_datatype(
862 ConcreteDataType::int32_datatype(),
863 ConcreteDataType::string_datatype(),
864 ),
865 ],
866 vec![
867 Value::Boolean(true),
868 Value::Int8(8),
869 Value::UInt8(8),
870 Value::Int16(16),
871 Value::UInt16(16),
872 Value::Int32(32),
873 Value::UInt32(32),
874 Value::Int64(64),
875 Value::UInt64(64),
876 Value::Float32(1.0.into()),
877 Value::Float64(1.0.into()),
878 Value::Binary(b"hello"[..].into()),
879 Value::String("world".into()),
880 Value::Date(Date::new(10)),
881 Value::Timestamp(Timestamp::new_millisecond(12)),
882 Value::Time(Time::new_millisecond(13)),
883 Value::Duration(Duration::new_millisecond(14)),
884 Value::IntervalYearMonth(IntervalYearMonth::new(1)),
885 Value::IntervalDayTime(IntervalDayTime::new(1, 15)),
886 Value::IntervalMonthDayNano(IntervalMonthDayNano::new(1, 1, 15)),
887 Value::Decimal128(Decimal128::from(16)),
888 Value::Binary(Bytes::from(vec![0; 12])),
889 Value::String("dict_value".into()),
890 ],
891 );
892 }
893}