1use std::collections::{HashMap, HashSet};
16use std::sync::Arc;
17
18use bytes::BufMut;
19use common_recordbatch::filter::SimpleFilterEvaluator;
20use datatypes::prelude::ConcreteDataType;
21use datatypes::value::{Value, ValueRef};
22use memcomparable::{Deserializer, Serializer};
23use serde::{Deserialize, Serialize};
24use snafu::ResultExt;
25use store_api::codec::PrimaryKeyEncoding;
26use store_api::metadata::RegionMetadataRef;
27use store_api::storage::ColumnId;
28use store_api::storage::consts::ReservedColumnId;
29
30use crate::error::{DeserializeFieldSnafu, Result, SerializeFieldSnafu, UnsupportedOperationSnafu};
31use crate::key_values::KeyValue;
32use crate::primary_key_filter::SparsePrimaryKeyFilter;
33use crate::row_converter::dense::SortField;
34use crate::row_converter::{CompositeValues, PrimaryKeyCodec, PrimaryKeyFilter};
35
36#[derive(Clone, Debug)]
40pub struct SparsePrimaryKeyCodec {
41 inner: Arc<SparsePrimaryKeyCodecInner>,
42}
43
44#[derive(Debug)]
45struct SparsePrimaryKeyCodecInner {
46 table_id_field: SortField,
48 tsid_field: SortField,
50 label_field: SortField,
52 columns: Option<HashSet<ColumnId>>,
56}
57
58#[derive(Debug, Clone, PartialEq, Eq)]
62pub struct SparseValues {
63 values: HashMap<ColumnId, Value>,
64}
65
66impl SparseValues {
67 pub fn new(values: HashMap<ColumnId, Value>) -> Self {
69 Self { values }
70 }
71
72 pub fn get_or_null(&self, column_id: ColumnId) -> &Value {
74 self.values.get(&column_id).unwrap_or(&Value::Null)
75 }
76
77 pub fn get(&self, column_id: &ColumnId) -> Option<&Value> {
79 self.values.get(column_id)
80 }
81
82 pub fn insert(&mut self, column_id: ColumnId, value: Value) {
84 self.values.insert(column_id, value);
85 }
86
87 pub fn iter(&self) -> impl Iterator<Item = (&ColumnId, &Value)> {
89 self.values.iter()
90 }
91}
92
93pub const RESERVED_COLUMN_ID_TSID: ColumnId = ReservedColumnId::tsid();
95pub const RESERVED_COLUMN_ID_TABLE_ID: ColumnId = ReservedColumnId::table_id();
97pub const COLUMN_ID_ENCODE_SIZE: usize = 4;
99
100impl SparsePrimaryKeyCodec {
101 pub fn from_columns(columns_ids: impl Iterator<Item = ColumnId>) -> Self {
103 let columns = columns_ids.collect();
104 Self {
105 inner: Arc::new(SparsePrimaryKeyCodecInner {
106 table_id_field: SortField::new(ConcreteDataType::uint32_datatype()),
107 tsid_field: SortField::new(ConcreteDataType::uint64_datatype()),
108 label_field: SortField::new(ConcreteDataType::string_datatype()),
109 columns: Some(columns),
110 }),
111 }
112 }
113
114 pub fn new(region_metadata: &RegionMetadataRef) -> Self {
116 Self::from_columns(region_metadata.primary_key_columns().map(|c| c.column_id))
117 }
118
119 pub fn schemaless() -> Self {
123 Self {
124 inner: Arc::new(SparsePrimaryKeyCodecInner {
125 table_id_field: SortField::new(ConcreteDataType::uint32_datatype()),
126 tsid_field: SortField::new(ConcreteDataType::uint64_datatype()),
127 label_field: SortField::new(ConcreteDataType::string_datatype()),
128 columns: None,
129 }),
130 }
131 }
132
133 pub fn with_fields(fields: Vec<(ColumnId, SortField)>) -> Self {
135 Self {
136 inner: Arc::new(SparsePrimaryKeyCodecInner {
137 columns: Some(fields.iter().map(|f| f.0).collect()),
138 table_id_field: SortField::new(ConcreteDataType::uint32_datatype()),
139 tsid_field: SortField::new(ConcreteDataType::uint64_datatype()),
140 label_field: SortField::new(ConcreteDataType::string_datatype()),
141 }),
142 }
143 }
144
145 fn get_field(&self, column_id: ColumnId) -> Option<&SortField> {
147 if let Some(columns) = &self.inner.columns
149 && !columns.contains(&column_id)
150 {
151 return None;
152 }
153
154 match column_id {
155 RESERVED_COLUMN_ID_TABLE_ID => Some(&self.inner.table_id_field),
156 RESERVED_COLUMN_ID_TSID => Some(&self.inner.tsid_field),
157 _ => Some(&self.inner.label_field),
158 }
159 }
160
161 pub fn encode_to_vec<'a, I>(&self, row: I, buffer: &mut Vec<u8>) -> Result<()>
163 where
164 I: Iterator<Item = (ColumnId, ValueRef<'a>)>,
165 {
166 let mut serializer = Serializer::new(buffer);
167 for (column_id, value) in row {
168 if value.is_null() {
169 continue;
170 }
171
172 if let Some(field) = self.get_field(column_id) {
173 column_id
174 .serialize(&mut serializer)
175 .context(SerializeFieldSnafu)?;
176 field.serialize(&mut serializer, &value)?;
177 } else {
178 common_telemetry::warn!("Column {} is not in primary key, skipping", column_id);
180 }
181 }
182 Ok(())
183 }
184
185 pub fn encode_raw_tag_value<'a, I>(&self, row: I, buffer: &mut Vec<u8>) -> Result<()>
186 where
187 I: Iterator<Item = (ColumnId, &'a [u8])>,
188 {
189 for (tag_column_id, tag_value) in row {
190 let value_len = tag_value.len();
191 buffer.reserve(6 + value_len / 8 * 9);
192 buffer.put_u32(tag_column_id);
193 buffer.put_u8(1);
194 buffer.put_u8(!tag_value.is_empty() as u8);
195
196 let mut len = 0;
199 let num_chucks = value_len / 8;
200 let remainder = value_len % 8;
201
202 for idx in 0..num_chucks {
203 buffer.extend_from_slice(&tag_value[idx * 8..idx * 8 + 8]);
204 len += 8;
205 let extra = if len == value_len { 8 } else { 9 };
209 buffer.put_u8(extra);
210 }
211
212 if remainder != 0 {
213 buffer.extend_from_slice(&tag_value[len..value_len]);
214 buffer.put_bytes(0, 8 - remainder);
215 buffer.put_u8(remainder as u8);
216 }
217 }
218 Ok(())
219 }
220
221 pub fn encode_internal(&self, table_id: u32, tsid: u64, buffer: &mut Vec<u8>) -> Result<()> {
223 buffer.reserve_exact(22);
224 buffer.put_u32(RESERVED_COLUMN_ID_TABLE_ID);
225 buffer.put_u8(1);
226 buffer.put_u32(table_id);
227 buffer.put_u32(RESERVED_COLUMN_ID_TSID);
228 buffer.put_u8(1);
229 buffer.put_u64(tsid);
230 Ok(())
231 }
232
233 fn decode_sparse(&self, bytes: &[u8]) -> Result<SparseValues> {
235 let mut deserializer = Deserializer::new(bytes);
236 let mut values = SparseValues::new(HashMap::new());
237
238 let column_id = u32::deserialize(&mut deserializer).context(DeserializeFieldSnafu)?;
239 let value = self.inner.table_id_field.deserialize(&mut deserializer)?;
240 values.insert(column_id, value);
241
242 let column_id = u32::deserialize(&mut deserializer).context(DeserializeFieldSnafu)?;
243 let value = self.inner.tsid_field.deserialize(&mut deserializer)?;
244 values.insert(column_id, value);
245 while deserializer.has_remaining() {
246 let column_id = u32::deserialize(&mut deserializer).context(DeserializeFieldSnafu)?;
247 let value = self.inner.label_field.deserialize(&mut deserializer)?;
248 values.insert(column_id, value);
249 }
250
251 Ok(values)
252 }
253
254 fn decode_leftmost(&self, bytes: &[u8]) -> Result<Option<Value>> {
256 let mut deserializer = Deserializer::new(bytes);
257 deserializer.advance(COLUMN_ID_ENCODE_SIZE);
259 let value = self.inner.table_id_field.deserialize(&mut deserializer)?;
260 Ok(Some(value))
261 }
262
263 pub fn has_column(
265 &self,
266 pk: &[u8],
267 offsets_map: &mut HashMap<u32, usize>,
268 column_id: ColumnId,
269 ) -> Option<usize> {
270 if offsets_map.is_empty() {
271 let mut deserializer = Deserializer::new(pk);
272 let mut offset = 0;
273 while deserializer.has_remaining() {
274 let column_id = u32::deserialize(&mut deserializer).unwrap();
275 offset += 4;
276 offsets_map.insert(column_id, offset);
277 let Some(field) = self.get_field(column_id) else {
278 break;
279 };
280
281 let skip = field.skip_deserialize(pk, &mut deserializer).unwrap();
282 offset += skip;
283 }
284
285 offsets_map.get(&column_id).copied()
286 } else {
287 offsets_map.get(&column_id).copied()
288 }
289 }
290
291 pub fn decode_value_at(&self, pk: &[u8], offset: usize, column_id: ColumnId) -> Result<Value> {
293 let mut deserializer = Deserializer::new(pk);
294 deserializer.advance(offset);
295 let field = self.get_field(column_id).unwrap();
297 field.deserialize(&mut deserializer)
298 }
299}
300
301impl PrimaryKeyCodec for SparsePrimaryKeyCodec {
302 fn encode_key_value(&self, _key_value: &KeyValue, _buffer: &mut Vec<u8>) -> Result<()> {
303 UnsupportedOperationSnafu {
304 err_msg: "The encode_key_value method is not supported in SparsePrimaryKeyCodec.",
305 }
306 .fail()
307 }
308
309 fn encode_values(&self, values: &[(ColumnId, Value)], buffer: &mut Vec<u8>) -> Result<()> {
310 self.encode_to_vec(values.iter().map(|v| (v.0, v.1.as_value_ref())), buffer)
311 }
312
313 fn encode_value_refs(
314 &self,
315 values: &[(ColumnId, ValueRef)],
316 buffer: &mut Vec<u8>,
317 ) -> Result<()> {
318 self.encode_to_vec(values.iter().map(|v| (v.0, v.1.clone())), buffer)
319 }
320
321 fn estimated_size(&self) -> Option<usize> {
322 None
323 }
324
325 fn num_fields(&self) -> Option<usize> {
326 None
327 }
328
329 fn encoding(&self) -> PrimaryKeyEncoding {
330 PrimaryKeyEncoding::Sparse
331 }
332
333 fn primary_key_filter(
334 &self,
335 metadata: &RegionMetadataRef,
336 filters: Arc<Vec<SimpleFilterEvaluator>>,
337 ) -> Box<dyn PrimaryKeyFilter> {
338 Box::new(SparsePrimaryKeyFilter::new(
339 metadata.clone(),
340 filters,
341 self.clone(),
342 ))
343 }
344
345 fn decode(&self, bytes: &[u8]) -> Result<CompositeValues> {
346 Ok(CompositeValues::Sparse(self.decode_sparse(bytes)?))
347 }
348
349 fn decode_leftmost(&self, bytes: &[u8]) -> Result<Option<Value>> {
350 self.decode_leftmost(bytes)
351 }
352}
353
354pub struct FieldWithId {
356 pub field: SortField,
357 pub column_id: ColumnId,
358}
359
360pub struct SparseEncoder {
362 fields: Vec<FieldWithId>,
363}
364
365impl SparseEncoder {
366 pub fn new(fields: Vec<FieldWithId>) -> Self {
367 Self { fields }
368 }
369
370 pub fn encode_to_vec<'a, I>(&self, row: I, buffer: &mut Vec<u8>) -> Result<()>
371 where
372 I: Iterator<Item = ValueRef<'a>>,
373 {
374 let mut serializer = Serializer::new(buffer);
375 for (value, field) in row.zip(self.fields.iter()) {
376 if !value.is_null() {
377 field
378 .column_id
379 .serialize(&mut serializer)
380 .context(SerializeFieldSnafu)?;
381 field.field.serialize(&mut serializer, &value)?;
382 }
383 }
384 Ok(())
385 }
386}
387
388#[cfg(test)]
389mod tests {
390 use std::sync::Arc;
391
392 use api::v1::SemanticType;
393 use common_query::prelude::{greptime_timestamp, greptime_value};
394 use common_time::Timestamp;
395 use common_time::timestamp::TimeUnit;
396 use datatypes::schema::ColumnSchema;
397 use datatypes::value::{OrderedFloat, Value};
398 use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder};
399 use store_api::metric_engine_consts::{
400 DATA_SCHEMA_TABLE_ID_COLUMN_NAME, DATA_SCHEMA_TSID_COLUMN_NAME,
401 };
402 use store_api::storage::{ColumnId, RegionId};
403
404 use super::*;
405
406 fn test_region_metadata() -> RegionMetadataRef {
407 let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 1));
408 builder
409 .push_column_metadata(ColumnMetadata {
410 column_schema: ColumnSchema::new(
411 DATA_SCHEMA_TABLE_ID_COLUMN_NAME,
412 ConcreteDataType::uint32_datatype(),
413 false,
414 ),
415 semantic_type: SemanticType::Tag,
416 column_id: ReservedColumnId::table_id(),
417 })
418 .push_column_metadata(ColumnMetadata {
419 column_schema: ColumnSchema::new(
420 DATA_SCHEMA_TSID_COLUMN_NAME,
421 ConcreteDataType::uint64_datatype(),
422 false,
423 ),
424 semantic_type: SemanticType::Tag,
425 column_id: ReservedColumnId::tsid(),
426 })
427 .push_column_metadata(ColumnMetadata {
428 column_schema: ColumnSchema::new("pod", ConcreteDataType::string_datatype(), true),
429 semantic_type: SemanticType::Tag,
430 column_id: 1,
431 })
432 .push_column_metadata(ColumnMetadata {
433 column_schema: ColumnSchema::new(
434 "namespace",
435 ConcreteDataType::string_datatype(),
436 true,
437 ),
438 semantic_type: SemanticType::Tag,
439 column_id: 2,
440 })
441 .push_column_metadata(ColumnMetadata {
442 column_schema: ColumnSchema::new(
443 "container",
444 ConcreteDataType::string_datatype(),
445 true,
446 ),
447 semantic_type: SemanticType::Tag,
448 column_id: 3,
449 })
450 .push_column_metadata(ColumnMetadata {
451 column_schema: ColumnSchema::new(
452 "pod_name",
453 ConcreteDataType::string_datatype(),
454 true,
455 ),
456 semantic_type: SemanticType::Tag,
457 column_id: 4,
458 })
459 .push_column_metadata(ColumnMetadata {
460 column_schema: ColumnSchema::new(
461 "pod_ip",
462 ConcreteDataType::string_datatype(),
463 true,
464 ),
465 semantic_type: SemanticType::Tag,
466 column_id: 5,
467 })
468 .push_column_metadata(ColumnMetadata {
469 column_schema: ColumnSchema::new(
470 greptime_value(),
471 ConcreteDataType::float64_datatype(),
472 false,
473 ),
474 semantic_type: SemanticType::Field,
475 column_id: 6,
476 })
477 .push_column_metadata(ColumnMetadata {
478 column_schema: ColumnSchema::new(
479 greptime_timestamp(),
480 ConcreteDataType::timestamp_nanosecond_datatype(),
481 false,
482 ),
483 semantic_type: SemanticType::Timestamp,
484 column_id: 7,
485 })
486 .primary_key(vec![
487 ReservedColumnId::table_id(),
488 ReservedColumnId::tsid(),
489 1,
490 2,
491 3,
492 4,
493 5,
494 ]);
495 let metadata = builder.build().unwrap();
496 Arc::new(metadata)
497 }
498
499 #[test]
500 fn test_sparse_value_new_and_get_or_null() {
501 let mut values = HashMap::new();
502 values.insert(1, Value::Int32(42));
503 let sparse_value = SparseValues::new(values);
504
505 assert_eq!(sparse_value.get_or_null(1), &Value::Int32(42));
506 assert_eq!(sparse_value.get_or_null(2), &Value::Null);
507 }
508
509 #[test]
510 fn test_sparse_value_insert() {
511 let mut sparse_value = SparseValues::new(HashMap::new());
512 sparse_value.insert(1, Value::Int32(42));
513
514 assert_eq!(sparse_value.get_or_null(1), &Value::Int32(42));
515 }
516
517 fn test_row() -> Vec<(ColumnId, ValueRef<'static>)> {
518 vec![
519 (RESERVED_COLUMN_ID_TABLE_ID, ValueRef::UInt32(42)),
520 (
521 RESERVED_COLUMN_ID_TSID,
522 ValueRef::UInt64(123843349035232323),
523 ),
524 (1, ValueRef::String("greptime-frontend-6989d9899-22222")),
526 (2, ValueRef::String("greptime-cluster")),
528 (3, ValueRef::String("greptime-frontend-6989d9899-22222")),
530 (4, ValueRef::String("greptime-frontend-6989d9899-22222")),
532 (5, ValueRef::String("10.10.10.10")),
534 (6, ValueRef::Float64(OrderedFloat(1.0))),
536 (
538 7,
539 ValueRef::Timestamp(Timestamp::new(1618876800000000000, TimeUnit::Nanosecond)),
540 ),
541 ]
542 }
543
544 #[test]
545 fn test_encode_by_short_cuts() {
546 let region_metadata = test_region_metadata();
547 let codec = SparsePrimaryKeyCodec::new(®ion_metadata);
548 let mut buffer = Vec::new();
549 let internal_columns = [
550 (RESERVED_COLUMN_ID_TABLE_ID, ValueRef::UInt32(1024)),
551 (RESERVED_COLUMN_ID_TSID, ValueRef::UInt64(42)),
552 ];
553 let tags = [
554 (1, "greptime-frontend-6989d9899-22222"),
555 (2, "greptime-cluster"),
556 (3, "greptime-frontend-6989d9899-22222"),
557 (4, "greptime-frontend-6989d9899-22222"),
558 (5, "10.10.10.10"),
559 ];
560 codec
561 .encode_to_vec(internal_columns.into_iter(), &mut buffer)
562 .unwrap();
563 codec
564 .encode_to_vec(
565 tags.iter()
566 .map(|(col_id, tag_value)| (*col_id, ValueRef::String(tag_value))),
567 &mut buffer,
568 )
569 .unwrap();
570
571 let mut buffer_by_raw_encoding = Vec::new();
572 codec
573 .encode_internal(1024, 42, &mut buffer_by_raw_encoding)
574 .unwrap();
575 let tags: Vec<_> = tags
576 .into_iter()
577 .map(|(col_id, tag_value)| (col_id, tag_value.as_bytes()))
578 .collect();
579 codec
580 .encode_raw_tag_value(
581 tags.iter().map(|(c, b)| (*c, *b)),
582 &mut buffer_by_raw_encoding,
583 )
584 .unwrap();
585 assert_eq!(buffer, buffer_by_raw_encoding);
586 }
587
588 #[test]
589 fn test_encode_to_vec() {
590 let region_metadata = test_region_metadata();
591 let codec = SparsePrimaryKeyCodec::new(®ion_metadata);
592 let mut buffer = Vec::new();
593
594 let row = test_row();
595 codec.encode_to_vec(row.into_iter(), &mut buffer).unwrap();
596 assert!(!buffer.is_empty());
597 let sparse_value = codec.decode_sparse(&buffer).unwrap();
598 assert_eq!(
599 sparse_value.get_or_null(RESERVED_COLUMN_ID_TABLE_ID),
600 &Value::UInt32(42)
601 );
602 assert_eq!(
603 sparse_value.get_or_null(1),
604 &Value::String("greptime-frontend-6989d9899-22222".into())
605 );
606 assert_eq!(
607 sparse_value.get_or_null(2),
608 &Value::String("greptime-cluster".into())
609 );
610 assert_eq!(
611 sparse_value.get_or_null(3),
612 &Value::String("greptime-frontend-6989d9899-22222".into())
613 );
614 assert_eq!(
615 sparse_value.get_or_null(4),
616 &Value::String("greptime-frontend-6989d9899-22222".into())
617 );
618 assert_eq!(
619 sparse_value.get_or_null(5),
620 &Value::String("10.10.10.10".into())
621 );
622 }
623
624 #[test]
625 fn test_decode_leftmost() {
626 let region_metadata = test_region_metadata();
627 let codec = SparsePrimaryKeyCodec::new(®ion_metadata);
628 let mut buffer = Vec::new();
629 let row = test_row();
630 codec.encode_to_vec(row.into_iter(), &mut buffer).unwrap();
631 assert!(!buffer.is_empty());
632 let result = codec.decode_leftmost(&buffer).unwrap().unwrap();
633 assert_eq!(result, Value::UInt32(42));
634 }
635
636 #[test]
637 fn test_has_column() {
638 let region_metadata = test_region_metadata();
639 let codec = SparsePrimaryKeyCodec::new(®ion_metadata);
640 let mut buffer = Vec::new();
641 let row = test_row();
642 codec.encode_to_vec(row.into_iter(), &mut buffer).unwrap();
643 assert!(!buffer.is_empty());
644
645 let mut offsets_map = HashMap::new();
646 for column_id in [
647 RESERVED_COLUMN_ID_TABLE_ID,
648 RESERVED_COLUMN_ID_TSID,
649 1,
650 2,
651 3,
652 4,
653 5,
654 ] {
655 let offset = codec.has_column(&buffer, &mut offsets_map, column_id);
656 assert!(offset.is_some());
657 }
658
659 let offset = codec.has_column(&buffer, &mut offsets_map, 6);
660 assert!(offset.is_none());
661 }
662
663 #[test]
664 fn test_decode_value_at() {
665 let region_metadata = test_region_metadata();
666 let codec = SparsePrimaryKeyCodec::new(®ion_metadata);
667 let mut buffer = Vec::new();
668 let row = test_row();
669 codec.encode_to_vec(row.into_iter(), &mut buffer).unwrap();
670 assert!(!buffer.is_empty());
671
672 let row = test_row();
673 let mut offsets_map = HashMap::new();
674 for column_id in [
675 RESERVED_COLUMN_ID_TABLE_ID,
676 RESERVED_COLUMN_ID_TSID,
677 1,
678 2,
679 3,
680 4,
681 5,
682 ] {
683 let offset = codec
684 .has_column(&buffer, &mut offsets_map, column_id)
685 .unwrap();
686 let value = codec.decode_value_at(&buffer, offset, column_id).unwrap();
687 let expected_value = row
688 .iter()
689 .find(|(id, _)| *id == column_id)
690 .unwrap()
691 .1
692 .clone();
693 assert_eq!(value.as_value_ref(), expected_value);
694 }
695 }
696}