1use std::collections::{BTreeMap, BTreeSet};
16use std::ops::Range;
17
18use index::inverted_index::search::index_apply::ApplyOutput;
19use itertools::Itertools;
20use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
21
22#[derive(Debug, Clone, Default)]
24pub struct RowGroupSelection {
25 selection_in_rg: BTreeMap<usize, RowSelectionWithCount>,
27 row_count: usize,
29 selector_len: usize,
31}
32
33#[derive(Debug, Clone, Default)]
35struct RowSelectionWithCount {
36 selection: RowSelection,
38 row_count: usize,
40 selector_len: usize,
42}
43
44impl RowGroupSelection {
45 pub fn new(row_group_size: usize, total_row_count: usize) -> Self {
51 let mut selection_in_rg = BTreeMap::new();
52
53 let row_group_count = total_row_count.div_ceil(row_group_size);
54 for rg_id in 0..row_group_count {
55 let row_group_size = if rg_id == row_group_count - 1 {
57 total_row_count - (row_group_count - 1) * row_group_size
58 } else {
59 row_group_size
60 };
61
62 let selection = RowSelection::from(vec![RowSelector::select(row_group_size)]);
63 selection_in_rg.insert(
64 rg_id,
65 RowSelectionWithCount {
66 selection,
67 row_count: row_group_size,
68 selector_len: 1,
69 },
70 );
71 }
72
73 Self {
74 selection_in_rg,
75 row_count: total_row_count,
76 selector_len: row_group_count,
77 }
78 }
79
80 pub fn from_full_row_group_ids<I>(
85 row_group_ids: I,
86 row_group_size: usize,
87 total_row_count: usize,
88 ) -> Self
89 where
90 I: IntoIterator<Item = usize>,
91 {
92 if row_group_size == 0 || total_row_count == 0 {
93 return Self::default();
94 }
95
96 let row_group_count = total_row_count.div_ceil(row_group_size);
97 if row_group_count == 0 {
98 return Self::default();
99 }
100
101 let last_row_group_size = total_row_count - (row_group_count - 1) * row_group_size;
102
103 let mut selection_in_rg = BTreeMap::new();
104 let mut row_count = 0usize;
105 let mut selector_len = 0usize;
106
107 for rg_id in row_group_ids {
108 if rg_id >= row_group_count {
109 continue;
110 }
111
112 let rg_row_count = if rg_id == row_group_count - 1 {
113 last_row_group_size
114 } else {
115 row_group_size
116 };
117
118 let selection = RowSelection::from(vec![RowSelector::select(rg_row_count)]);
119 if selection_in_rg
120 .insert(
121 rg_id,
122 RowSelectionWithCount {
123 selection,
124 row_count: rg_row_count,
125 selector_len: 1,
126 },
127 )
128 .is_none()
129 {
130 row_count += rg_row_count;
131 selector_len += 1;
132 }
133 }
134
135 Self {
136 selection_in_rg,
137 row_count,
138 selector_len,
139 }
140 }
141
142 pub fn get(&self, rg_id: usize) -> Option<&RowSelection> {
146 self.selection_in_rg.get(&rg_id).map(|x| &x.selection)
147 }
148
149 pub fn from_inverted_index_apply_output(
159 row_group_size: usize,
160 num_row_groups: usize,
161 apply_output: ApplyOutput,
162 ) -> Self {
163 let segment_row_count = apply_output.segment_row_count;
166 let row_group_ranges = apply_output.matched_segment_ids.iter_ones().map(|seg_id| {
167 let begin_row_id = seg_id * segment_row_count;
169 let row_group_id = begin_row_id / row_group_size;
171 let rg_begin_row_id = begin_row_id % row_group_size;
173 let rg_end_row_id = (rg_begin_row_id + segment_row_count).min(row_group_size);
175
176 (row_group_id, rg_begin_row_id..rg_end_row_id)
177 });
178
179 let mut total_row_count = 0;
181 let mut total_selector_len = 0;
182 let mut selection_in_rg = row_group_ranges
183 .chunk_by(|(row_group_id, _)| *row_group_id)
184 .into_iter()
185 .map(|(row_group_id, group)| {
186 let ranges = group.map(|(_, ranges)| ranges);
188 let selection = row_selection_from_row_ranges(ranges, row_group_size);
194 let row_count = selection.row_count();
195 let selector_len = selector_len(&selection);
196 total_row_count += row_count;
197 total_selector_len += selector_len;
198 (
199 row_group_id,
200 RowSelectionWithCount {
201 selection,
202 row_count,
203 selector_len,
204 },
205 )
206 })
207 .collect::<BTreeMap<_, _>>();
208
209 Self::fill_missing_row_groups(&mut selection_in_rg, num_row_groups);
210
211 Self {
212 selection_in_rg,
213 row_count: total_row_count,
214 selector_len: total_selector_len,
215 }
216 }
217
218 pub fn from_row_ids(
230 row_ids: BTreeSet<u32>,
231 row_group_size: usize,
232 num_row_groups: usize,
233 ) -> Self {
234 let row_group_to_row_ids =
236 Self::group_row_ids_by_row_group(row_ids, row_group_size, num_row_groups);
237
238 let mut total_row_count = 0;
240 let mut total_selector_len = 0;
241 let mut selection_in_rg = row_group_to_row_ids
242 .into_iter()
243 .map(|(row_group_id, row_ids)| {
244 let selection =
245 row_selection_from_sorted_row_ids(row_ids.into_iter(), row_group_size);
246 let row_count = selection.row_count();
247 let selector_len = selector_len(&selection);
248 total_row_count += row_count;
249 total_selector_len += selector_len;
250 (
251 row_group_id,
252 RowSelectionWithCount {
253 selection,
254 row_count,
255 selector_len,
256 },
257 )
258 })
259 .collect::<BTreeMap<_, _>>();
260
261 Self::fill_missing_row_groups(&mut selection_in_rg, num_row_groups);
262
263 Self {
264 selection_in_rg,
265 row_count: total_row_count,
266 selector_len: total_selector_len,
267 }
268 }
269
270 pub fn from_row_ranges(
283 row_ranges: Vec<(usize, Vec<Range<usize>>)>,
284 row_group_size: usize,
285 ) -> Self {
286 let mut total_row_count = 0;
287 let mut total_selector_len = 0;
288 let selection_in_rg = row_ranges
289 .into_iter()
290 .map(|(row_group_id, ranges)| {
291 let selection = row_selection_from_row_ranges(ranges.into_iter(), row_group_size);
292 let row_count = selection.row_count();
293 let selector_len = selector_len(&selection);
294 total_row_count += row_count;
295 total_selector_len += selector_len;
296 (
297 row_group_id,
298 RowSelectionWithCount {
299 selection,
300 row_count,
301 selector_len,
302 },
303 )
304 })
305 .collect();
306
307 Self {
308 selection_in_rg,
309 row_count: total_row_count,
310 selector_len: total_selector_len,
311 }
312 }
313
314 fn group_row_ids_by_row_group(
324 row_ids: BTreeSet<u32>,
325 row_group_size: usize,
326 num_row_groups: usize,
327 ) -> Vec<(usize, Vec<usize>)> {
328 let est_rows_per_group = row_ids.len() / num_row_groups;
329 let mut row_group_to_row_ids: Vec<(usize, Vec<usize>)> = Vec::with_capacity(num_row_groups);
330
331 for row_id in row_ids {
332 let row_group_id = row_id as usize / row_group_size;
333 let row_id_in_group = row_id as usize % row_group_size;
334
335 if let Some((rg_id, row_ids)) = row_group_to_row_ids.last_mut()
336 && *rg_id == row_group_id
337 {
338 row_ids.push(row_id_in_group);
339 } else {
340 let mut row_ids = Vec::with_capacity(est_rows_per_group);
341 row_ids.push(row_id_in_group);
342 row_group_to_row_ids.push((row_group_id, row_ids));
343 }
344 }
345
346 row_group_to_row_ids
347 }
348
349 pub fn intersect(&self, other: &Self) -> Self {
351 let mut res = BTreeMap::new();
352 let mut total_row_count = 0;
353 let mut total_selector_len = 0;
354
355 for (rg_id, x) in other.selection_in_rg.iter() {
356 let Some(y) = self.selection_in_rg.get(rg_id) else {
357 continue;
358 };
359 let selection = intersect_row_selections(&x.selection, &y.selection);
360 let row_count = selection.row_count();
361 let selector_len = selector_len(&selection);
362 if row_count > 0 {
363 total_row_count += row_count;
364 total_selector_len += selector_len;
365 res.insert(
366 *rg_id,
367 RowSelectionWithCount {
368 selection,
369 row_count,
370 selector_len,
371 },
372 );
373 }
374 }
375
376 Self {
377 selection_in_rg: res,
378 row_count: total_row_count,
379 selector_len: total_selector_len,
380 }
381 }
382
383 pub fn row_group_count(&self) -> usize {
385 self.selection_in_rg.len()
386 }
387
388 pub fn row_count(&self) -> usize {
390 self.row_count
391 }
392
393 pub fn pop_first(&mut self) -> Option<(usize, RowSelection)> {
397 while let Some((
398 row_group_id,
399 RowSelectionWithCount {
400 selection,
401 row_count,
402 selector_len,
403 },
404 )) = self.selection_in_rg.pop_first()
405 {
406 if row_count > 0 {
407 self.row_count -= row_count;
408 self.selector_len -= selector_len;
409 return Some((row_group_id, selection));
410 }
411 }
412
413 None
414 }
415
416 pub fn remove_row_group(&mut self, row_group_id: usize) {
418 let Some(RowSelectionWithCount {
419 row_count,
420 selector_len,
421 ..
422 }) = self.selection_in_rg.remove(&row_group_id)
423 else {
424 return;
425 };
426 self.row_count -= row_count;
427 self.selector_len -= selector_len;
428 }
429
430 pub fn is_empty(&self) -> bool {
432 self.selection_in_rg.is_empty()
433 }
434
435 pub fn contains_row_group(&self, row_group_id: usize) -> bool {
437 self.selection_in_rg.contains_key(&row_group_id)
438 }
439
440 pub fn contains_non_empty_row_group(&self, row_group_id: usize) -> bool {
442 self.selection_in_rg
443 .get(&row_group_id)
444 .map(|r| r.row_count > 0)
445 .unwrap_or(false)
446 }
447
448 pub fn iter(&self) -> impl Iterator<Item = (&usize, &RowSelection)> {
450 self.selection_in_rg
451 .iter()
452 .map(|(row_group_id, x)| (row_group_id, &x.selection))
453 }
454
455 pub fn mem_usage(&self) -> usize {
457 self.selector_len * size_of::<RowSelector>()
458 + self.selection_in_rg.len() * size_of::<RowSelectionWithCount>()
459 }
460
461 pub fn concat(&mut self, other: &Self) {
465 for (rg_id, other_rs) in other.selection_in_rg.iter() {
466 if self.selection_in_rg.contains_key(rg_id) {
467 panic!("row group {} is already in `self`", rg_id);
468 }
469
470 self.selection_in_rg.insert(*rg_id, other_rs.clone());
471 self.row_count += other_rs.row_count;
472 self.selector_len += other_rs.selector_len;
473 }
474 }
475
476 fn fill_missing_row_groups(
479 selection_in_rg: &mut BTreeMap<usize, RowSelectionWithCount>,
480 num_row_groups: usize,
481 ) {
482 for rg_id in 0..num_row_groups {
483 selection_in_rg.entry(rg_id).or_default();
484 }
485 }
486}
487
488fn intersect_row_selections(left: &RowSelection, right: &RowSelection) -> RowSelection {
498 let mut l_iter = left.iter().copied().peekable();
499 let mut r_iter = right.iter().copied().peekable();
500
501 let iter = std::iter::from_fn(move || {
502 loop {
503 let l = l_iter.peek_mut();
504 let r = r_iter.peek_mut();
505
506 match (l, r) {
507 (Some(a), _) if a.row_count == 0 => {
508 l_iter.next().unwrap();
509 }
510 (_, Some(b)) if b.row_count == 0 => {
511 r_iter.next().unwrap();
512 }
513 (Some(l), Some(r)) => {
514 return match (l.skip, r.skip) {
515 (false, false) => {
517 if l.row_count < r.row_count {
518 r.row_count -= l.row_count;
519 l_iter.next()
520 } else {
521 l.row_count -= r.row_count;
522 r_iter.next()
523 }
524 }
525 _ => {
527 if l.row_count < r.row_count {
528 let skip = l.row_count;
529 r.row_count -= l.row_count;
530 l_iter.next();
531 Some(RowSelector::skip(skip))
532 } else {
533 let skip = r.row_count;
534 l.row_count -= skip;
535 r_iter.next();
536 Some(RowSelector::skip(skip))
537 }
538 }
539 };
540 }
541 (None, _) => return None,
542 (_, None) => return None,
543 }
544 }
545 });
546
547 iter.collect()
548}
549
550pub(crate) fn row_selection_from_row_ranges(
563 row_ranges: impl Iterator<Item = Range<usize>>,
564 total_row_count: usize,
565) -> RowSelection {
566 let (selectors, _) = build_selectors_from_row_ranges(row_ranges, total_row_count);
567 RowSelection::from(selectors)
568}
569
570pub(crate) fn row_selection_from_row_ranges_exact(
576 row_ranges: impl Iterator<Item = Range<usize>>,
577 total_row_count: usize,
578) -> RowSelection {
579 let (mut selectors, last_processed_end) =
580 build_selectors_from_row_ranges(row_ranges, total_row_count);
581 if last_processed_end < total_row_count {
582 add_or_merge_selector(&mut selectors, total_row_count - last_processed_end, true);
586 }
587 RowSelection::from(selectors)
588}
589
590fn build_selectors_from_row_ranges(
591 row_ranges: impl Iterator<Item = Range<usize>>,
592 total_row_count: usize,
593) -> (Vec<RowSelector>, usize) {
594 let mut selectors: Vec<RowSelector> = Vec::new();
595 let mut last_processed_end = 0;
596
597 for Range { start, end } in row_ranges {
598 let end = end.min(total_row_count);
599 if start > last_processed_end {
600 add_or_merge_selector(&mut selectors, start - last_processed_end, true);
601 }
602
603 add_or_merge_selector(&mut selectors, end - start, false);
604 last_processed_end = end;
605 }
606
607 (selectors, last_processed_end)
608}
609
610pub(crate) fn row_selection_from_sorted_row_ids(
615 row_ids: impl Iterator<Item = usize>,
616 total_row_count: usize,
617) -> RowSelection {
618 let mut selectors: Vec<RowSelector> = Vec::new();
619 let mut last_processed_end = 0;
620
621 for row_id in row_ids {
622 let start = row_id;
623 let end = start + 1;
624
625 if start > last_processed_end {
626 add_or_merge_selector(&mut selectors, start - last_processed_end, true);
627 }
628
629 add_or_merge_selector(&mut selectors, end - start, false);
630 last_processed_end = end;
631 }
632
633 if last_processed_end < total_row_count {
634 add_or_merge_selector(&mut selectors, total_row_count - last_processed_end, true);
635 }
636
637 RowSelection::from(selectors)
638}
639
640fn add_or_merge_selector(selectors: &mut Vec<RowSelector>, count: usize, is_skip: bool) {
643 if let Some(last) = selectors.last_mut() {
644 if last.skip == is_skip {
646 last.row_count += count;
647 return;
648 }
649 }
650 let new_selector = if is_skip {
652 RowSelector::skip(count)
653 } else {
654 RowSelector::select(count)
655 };
656 selectors.push(new_selector);
657}
658
659fn selector_len(selection: &RowSelection) -> usize {
661 selection.iter().size_hint().0
662}
663
664#[cfg(test)]
665#[allow(clippy::single_range_in_vec_init)]
666mod tests {
667 use super::*;
668
669 #[test]
670 fn test_selector_len() {
671 let selection = RowSelection::from(vec![RowSelector::skip(5), RowSelector::select(5)]);
672 assert_eq!(selector_len(&selection), 2);
673
674 let selection = RowSelection::from(vec![
675 RowSelector::select(5),
676 RowSelector::skip(5),
677 RowSelector::select(5),
678 ]);
679 assert_eq!(selector_len(&selection), 3);
680
681 let selection = RowSelection::from(vec![]);
682 assert_eq!(selector_len(&selection), 0);
683 }
684
685 #[test]
686 fn test_single_contiguous_range() {
687 let selection = row_selection_from_row_ranges(Some(5..10).into_iter(), 10);
688 let expected = RowSelection::from(vec![RowSelector::skip(5), RowSelector::select(5)]);
689 assert_eq!(selection, expected);
690 }
691
692 #[test]
693 fn test_non_contiguous_ranges() {
694 let ranges = [1..3, 5..8];
695 let selection = row_selection_from_row_ranges(ranges.iter().cloned(), 10);
696 let expected = RowSelection::from(vec![
697 RowSelector::skip(1),
698 RowSelector::select(2),
699 RowSelector::skip(2),
700 RowSelector::select(3),
701 ]);
702 assert_eq!(selection, expected);
703 }
704
705 #[test]
706 fn test_empty_range() {
707 let ranges = [];
708 let selection = row_selection_from_row_ranges(ranges.iter().cloned(), 10);
709 let expected = RowSelection::from(vec![]);
710 assert_eq!(selection, expected);
711 }
712
713 #[test]
714 fn test_adjacent_ranges() {
715 let ranges = [1..2, 2..3];
716 let selection = row_selection_from_row_ranges(ranges.iter().cloned(), 10);
717 let expected = RowSelection::from(vec![RowSelector::skip(1), RowSelector::select(2)]);
718 assert_eq!(selection, expected);
719 }
720
721 #[test]
722 fn test_large_gap_between_ranges() {
723 let ranges = [1..2, 100..101];
724 let selection = row_selection_from_row_ranges(ranges.iter().cloned(), 10240);
725 let expected = RowSelection::from(vec![
726 RowSelector::skip(1),
727 RowSelector::select(1),
728 RowSelector::skip(98),
729 RowSelector::select(1),
730 ]);
731 assert_eq!(selection, expected);
732 }
733
734 #[test]
735 fn test_range_end_over_total_row_count() {
736 let ranges = Some(1..10);
737 let selection = row_selection_from_row_ranges(ranges.into_iter(), 5);
738 let expected = RowSelection::from(vec![RowSelector::skip(1), RowSelector::select(4)]);
739 assert_eq!(selection, expected);
740 }
741
742 #[test]
743 fn test_exact_single_range_with_trailing_skip() {
744 let selection = row_selection_from_row_ranges_exact(Some(0..3).into_iter(), 6);
745 let expected = RowSelection::from(vec![RowSelector::select(3), RowSelector::skip(3)]);
746 assert_eq!(selection, expected);
747 assert_eq!(selection.row_count(), 3);
748 }
749
750 #[test]
751 fn test_exact_non_contiguous_ranges() {
752 let ranges = [1..3, 5..8];
753 let selection = row_selection_from_row_ranges_exact(ranges.iter().cloned(), 10);
754 let expected = RowSelection::from(vec![
755 RowSelector::skip(1),
756 RowSelector::select(2),
757 RowSelector::skip(2),
758 RowSelector::select(3),
759 RowSelector::skip(2),
760 ]);
761 assert_eq!(selection, expected);
762 assert_eq!(selection.row_count(), 5);
763 }
764
765 #[test]
766 fn test_exact_empty_ranges() {
767 let selection = row_selection_from_row_ranges_exact([].iter().cloned(), 10);
768 let expected = RowSelection::from(vec![RowSelector::skip(10)]);
769 assert_eq!(selection, expected);
770 assert_eq!(selection.row_count(), 0);
771 }
772
773 #[test]
774 fn test_exact_range_covers_all_rows() {
775 let selection = row_selection_from_row_ranges_exact(Some(0..10).into_iter(), 10);
776 let expected = RowSelection::from(vec![RowSelector::select(10)]);
777 assert_eq!(selection, expected);
778 assert_eq!(selection.row_count(), 10);
779 }
780
781 #[test]
782 fn test_exact_compatible_with_and_then() {
783 let outer = RowSelection::from(vec![RowSelector::select(6), RowSelector::skip(4)]);
785 let inner = row_selection_from_row_ranges_exact(Some(0..3).into_iter(), 6);
787 let result = outer.and_then(&inner);
788 let expected = RowSelection::from(vec![RowSelector::select(3), RowSelector::skip(7)]);
789 assert_eq!(result, expected);
790 }
791
792 #[test]
793 fn test_row_ids_to_selection() {
794 let row_ids = [1, 3, 5, 7, 9].into_iter();
795 let selection = row_selection_from_sorted_row_ids(row_ids, 10);
796 let expected = RowSelection::from(vec![
797 RowSelector::skip(1),
798 RowSelector::select(1),
799 RowSelector::skip(1),
800 RowSelector::select(1),
801 RowSelector::skip(1),
802 RowSelector::select(1),
803 RowSelector::skip(1),
804 RowSelector::select(1),
805 RowSelector::skip(1),
806 RowSelector::select(1),
807 ]);
808 assert_eq!(selection, expected);
809 }
810
811 #[test]
812 fn test_row_ids_to_selection_full() {
813 let row_ids = 0..10;
814 let selection = row_selection_from_sorted_row_ids(row_ids, 10);
815 let expected = RowSelection::from(vec![RowSelector::select(10)]);
816 assert_eq!(selection, expected);
817 }
818
819 #[test]
820 fn test_row_ids_to_selection_empty() {
821 let selection = row_selection_from_sorted_row_ids(None.into_iter(), 10);
822 let expected = RowSelection::from(vec![RowSelector::skip(10)]);
823 assert_eq!(selection, expected);
824 }
825
826 #[test]
827 fn test_group_row_ids() {
828 let row_ids = [0, 1, 2, 5, 6, 7, 8, 12].into_iter().collect();
829 let row_group_size = 5;
830 let num_row_groups = 3;
831
832 let row_group_to_row_ids =
833 RowGroupSelection::group_row_ids_by_row_group(row_ids, row_group_size, num_row_groups);
834
835 assert_eq!(
836 row_group_to_row_ids,
837 vec![(0, vec![0, 1, 2]), (1, vec![0, 1, 2, 3]), (2, vec![2])]
838 );
839 }
840
841 #[test]
842 fn test_row_group_selection_new() {
843 let selection = RowGroupSelection::new(100, 250);
845 assert_eq!(selection.row_count(), 250);
846 assert_eq!(selection.row_group_count(), 3);
847
848 let row_selection = selection.get(0).unwrap();
850 assert_eq!(row_selection.row_count(), 100);
851
852 let row_selection = selection.get(1).unwrap();
853 assert_eq!(row_selection.row_count(), 100);
854
855 let row_selection = selection.get(2).unwrap();
856 assert_eq!(row_selection.row_count(), 50);
857
858 let selection = RowGroupSelection::new(100, 0);
860 assert_eq!(selection.row_count(), 0);
861 assert_eq!(selection.row_group_count(), 0);
862 assert!(selection.get(0).is_none());
863
864 let selection = RowGroupSelection::new(100, 50);
866 assert_eq!(selection.row_count(), 50);
867 assert_eq!(selection.row_group_count(), 1);
868
869 let row_selection = selection.get(0).unwrap();
870 assert_eq!(row_selection.row_count(), 50);
871
872 let selection = RowGroupSelection::new(100, 150);
874 assert_eq!(selection.row_count(), 150);
875 assert_eq!(selection.row_group_count(), 2);
876
877 let row_selection = selection.get(0).unwrap();
878 assert_eq!(row_selection.row_count(), 100);
879
880 let row_selection = selection.get(1).unwrap();
881 assert_eq!(row_selection.row_count(), 50);
882
883 let selection = RowGroupSelection::new(100, 101);
885 assert_eq!(selection.row_count(), 101);
886 assert_eq!(selection.row_group_count(), 2);
887
888 let row_selection = selection.get(0).unwrap();
889 assert_eq!(row_selection.row_count(), 100);
890
891 let row_selection = selection.get(1).unwrap();
892 assert_eq!(row_selection.row_count(), 1);
893 }
894
895 #[test]
896 fn test_from_full_row_group_ids_dedup_duplicates() {
897 let selection = RowGroupSelection::from_full_row_group_ids([0, 0, 2, 2], 10, 25);
898 assert_eq!(selection.row_group_count(), 2);
899 assert_eq!(selection.row_count(), 15);
900
901 assert_eq!(selection.get(0).unwrap().row_count(), 10);
902 assert_eq!(selection.get(2).unwrap().row_count(), 5);
903 }
904
905 #[test]
906 fn test_from_row_ids() {
907 let row_group_size = 100;
908 let num_row_groups = 3;
909
910 let row_ids: BTreeSet<u32> = vec![5, 15, 25, 35, 105, 115, 205, 215]
912 .into_iter()
913 .collect();
914 let selection = RowGroupSelection::from_row_ids(row_ids, row_group_size, num_row_groups);
915 assert_eq!(selection.row_count(), 8);
916 assert_eq!(selection.row_group_count(), 3);
917
918 let row_selection = selection.get(0).unwrap();
920 assert_eq!(row_selection.row_count(), 4); let row_selection = selection.get(1).unwrap();
923 assert_eq!(row_selection.row_count(), 2); let row_selection = selection.get(2).unwrap();
926 assert_eq!(row_selection.row_count(), 2); let empty_row_ids: BTreeSet<u32> = BTreeSet::new();
930 let selection =
931 RowGroupSelection::from_row_ids(empty_row_ids, row_group_size, num_row_groups);
932 assert_eq!(selection.row_count(), 0);
933 assert_eq!(selection.row_group_count(), 3);
934
935 let consecutive_row_ids: BTreeSet<u32> = vec![5, 6, 7, 8, 9].into_iter().collect();
937 let selection =
938 RowGroupSelection::from_row_ids(consecutive_row_ids, row_group_size, num_row_groups);
939 assert_eq!(selection.row_count(), 5);
940 assert_eq!(selection.row_group_count(), 3);
941
942 let row_selection = selection.get(0).unwrap();
943 assert_eq!(row_selection.row_count(), 5); let boundary_row_ids: BTreeSet<u32> = vec![0, 99, 100, 199, 200, 249].into_iter().collect();
947 let selection =
948 RowGroupSelection::from_row_ids(boundary_row_ids, row_group_size, num_row_groups);
949 assert_eq!(selection.row_count(), 6);
950 assert_eq!(selection.row_group_count(), 3);
951
952 let row_selection = selection.get(0).unwrap();
953 assert_eq!(row_selection.row_count(), 2); let row_selection = selection.get(1).unwrap();
956 assert_eq!(row_selection.row_count(), 2); let row_selection = selection.get(2).unwrap();
959 assert_eq!(row_selection.row_count(), 2); let single_group_row_ids: BTreeSet<u32> = vec![5, 10, 15].into_iter().collect();
963 let selection = RowGroupSelection::from_row_ids(single_group_row_ids, row_group_size, 1);
964 assert_eq!(selection.row_count(), 3);
965 assert_eq!(selection.row_group_count(), 1);
966
967 let row_selection = selection.get(0).unwrap();
968 assert_eq!(row_selection.row_count(), 3); }
970
971 #[test]
972 fn test_from_row_ranges() {
973 let row_group_size = 100;
974
975 let ranges = vec![
977 (0, vec![5..15, 25..35]), (1, vec![5..15]), (2, vec![0..5, 10..15]), ];
981 let selection = RowGroupSelection::from_row_ranges(ranges, row_group_size);
982 assert_eq!(selection.row_count(), 40);
983 assert_eq!(selection.row_group_count(), 3);
984
985 let row_selection = selection.get(0).unwrap();
987 assert_eq!(row_selection.row_count(), 20); let row_selection = selection.get(1).unwrap();
990 assert_eq!(row_selection.row_count(), 10); let row_selection = selection.get(2).unwrap();
993 assert_eq!(row_selection.row_count(), 10); let empty_ranges: Vec<(usize, Vec<Range<usize>>)> = vec![];
997 let selection = RowGroupSelection::from_row_ranges(empty_ranges, row_group_size);
998 assert_eq!(selection.row_count(), 0);
999 assert_eq!(selection.row_group_count(), 0);
1000 assert!(selection.get(0).is_none());
1001
1002 let adjacent_ranges = vec![
1004 (0, vec![5..15, 15..25]), ];
1006 let selection = RowGroupSelection::from_row_ranges(adjacent_ranges, row_group_size);
1007 assert_eq!(selection.row_count(), 20);
1008 assert_eq!(selection.row_group_count(), 1);
1009
1010 let row_selection = selection.get(0).unwrap();
1011 assert_eq!(row_selection.row_count(), 20); let boundary_ranges = vec![
1015 (0, vec![0..10, 90..100]), (1, vec![0..100]), (2, vec![0..50]), ];
1019 let selection = RowGroupSelection::from_row_ranges(boundary_ranges, row_group_size);
1020 assert_eq!(selection.row_count(), 170);
1021 assert_eq!(selection.row_group_count(), 3);
1022
1023 let row_selection = selection.get(0).unwrap();
1024 assert_eq!(row_selection.row_count(), 20); let row_selection = selection.get(1).unwrap();
1027 assert_eq!(row_selection.row_count(), 100); let row_selection = selection.get(2).unwrap();
1030 assert_eq!(row_selection.row_count(), 50); let single_group_ranges = vec![
1034 (0, vec![0..50]), ];
1036 let selection = RowGroupSelection::from_row_ranges(single_group_ranges, row_group_size);
1037 assert_eq!(selection.row_count(), 50);
1038 assert_eq!(selection.row_group_count(), 1);
1039
1040 let row_selection = selection.get(0).unwrap();
1041 assert_eq!(row_selection.row_count(), 50); }
1043
1044 #[test]
1045 fn test_intersect() {
1046 let row_group_size = 100;
1047
1048 let ranges1 = vec![
1050 (0, vec![5..15, 25..35]), (1, vec![5..15]), ];
1053 let selection1 = RowGroupSelection::from_row_ranges(ranges1, row_group_size);
1054
1055 let ranges2 = vec![
1056 (0, vec![10..20]), (1, vec![10..20]), (2, vec![0..10]), ];
1060 let selection2 = RowGroupSelection::from_row_ranges(ranges2, row_group_size);
1061
1062 let intersection = selection1.intersect(&selection2);
1063 assert_eq!(intersection.row_count(), 10);
1064 assert_eq!(intersection.row_group_count(), 2);
1065
1066 let row_selection = intersection.get(0).unwrap();
1067 assert_eq!(row_selection.row_count(), 5); let row_selection = intersection.get(1).unwrap();
1070 assert_eq!(row_selection.row_count(), 5); let empty_ranges: Vec<(usize, Vec<Range<usize>>)> = vec![];
1074 let empty_selection = RowGroupSelection::from_row_ranges(empty_ranges, row_group_size);
1075 let intersection = selection1.intersect(&empty_selection);
1076 assert_eq!(intersection.row_count(), 0);
1077 assert_eq!(intersection.row_group_count(), 0);
1078 assert!(intersection.get(0).is_none());
1079
1080 let non_overlapping_ranges = vec![
1082 (3, vec![0..10]), ];
1084 let non_overlapping_selection =
1085 RowGroupSelection::from_row_ranges(non_overlapping_ranges, row_group_size);
1086 let intersection = selection1.intersect(&non_overlapping_selection);
1087 assert_eq!(intersection.row_count(), 0);
1088 assert_eq!(intersection.row_group_count(), 0);
1089 assert!(intersection.get(0).is_none());
1090
1091 let full_overlap_ranges1 = vec![
1093 (0, vec![0..50]), ];
1095 let full_overlap_ranges2 = vec![
1096 (0, vec![0..50]), ];
1098 let selection1 = RowGroupSelection::from_row_ranges(full_overlap_ranges1, row_group_size);
1099 let selection2 = RowGroupSelection::from_row_ranges(full_overlap_ranges2, row_group_size);
1100 let intersection = selection1.intersect(&selection2);
1101 assert_eq!(intersection.row_count(), 50);
1102 assert_eq!(intersection.row_group_count(), 1);
1103
1104 let row_selection = intersection.get(0).unwrap();
1105 assert_eq!(row_selection.row_count(), 50); let boundary_ranges1 = vec![
1109 (0, vec![0..10, 90..100]), (1, vec![0..100]), ];
1112 let boundary_ranges2 = vec![
1113 (0, vec![5..15, 95..100]), (1, vec![50..100]), ];
1116 let selection1 = RowGroupSelection::from_row_ranges(boundary_ranges1, row_group_size);
1117 let selection2 = RowGroupSelection::from_row_ranges(boundary_ranges2, row_group_size);
1118 let intersection = selection1.intersect(&selection2);
1119 assert_eq!(intersection.row_count(), 60);
1120 assert_eq!(intersection.row_group_count(), 2);
1121
1122 let row_selection = intersection.get(0).unwrap();
1123 assert_eq!(row_selection.row_count(), 10); let row_selection = intersection.get(1).unwrap();
1126 assert_eq!(row_selection.row_count(), 50); let complex_ranges1 = vec![
1130 (0, vec![5..15, 25..35, 45..55]), (1, vec![10..20, 30..40]), ];
1133 let complex_ranges2 = vec![
1134 (0, vec![10..20, 30..40, 50..60]), (1, vec![15..25, 35..45]), ];
1137 let selection1 = RowGroupSelection::from_row_ranges(complex_ranges1, row_group_size);
1138 let selection2 = RowGroupSelection::from_row_ranges(complex_ranges2, row_group_size);
1139 let intersection = selection1.intersect(&selection2);
1140 assert_eq!(intersection.row_count(), 25);
1141 assert_eq!(intersection.row_group_count(), 2);
1142
1143 let row_selection = intersection.get(0).unwrap();
1144 assert_eq!(row_selection.row_count(), 15); let row_selection = intersection.get(1).unwrap();
1147 assert_eq!(row_selection.row_count(), 10); let last_rg_ranges1 = vec![
1151 (2, vec![0..25, 30..40]), ];
1153 let last_rg_ranges2 = vec![
1154 (2, vec![20..30, 35..45]), ];
1156 let selection1 = RowGroupSelection::from_row_ranges(last_rg_ranges1, row_group_size);
1157 let selection2 = RowGroupSelection::from_row_ranges(last_rg_ranges2, row_group_size);
1158 let intersection = selection1.intersect(&selection2);
1159 assert_eq!(intersection.row_count(), 10);
1160 assert_eq!(intersection.row_group_count(), 1);
1161
1162 let row_selection = intersection.get(2).unwrap();
1163 assert_eq!(row_selection.row_count(), 10); let empty_ranges = vec![
1167 (0, vec![]), (1, vec![5..15]), ];
1170 let selection1 = RowGroupSelection::from_row_ranges(empty_ranges, row_group_size);
1171 let ranges2 = vec![
1172 (0, vec![5..15, 25..35]), (1, vec![5..15]), ];
1175 let selection2 = RowGroupSelection::from_row_ranges(ranges2, row_group_size);
1176 let intersection = selection1.intersect(&selection2);
1177 assert_eq!(intersection.row_count(), 10);
1178 assert_eq!(intersection.row_group_count(), 1);
1179
1180 let row_selection = intersection.get(1).unwrap();
1181 assert_eq!(row_selection.row_count(), 10); }
1183
1184 #[test]
1185 fn test_pop_first() {
1186 let row_group_size = 100;
1187 let ranges = vec![
1188 (0, vec![5..15]), (1, vec![5..15]), (2, vec![0..5]), ];
1192 let mut selection = RowGroupSelection::from_row_ranges(ranges, row_group_size);
1193
1194 let (rg_id, row_selection) = selection.pop_first().unwrap();
1196 assert_eq!(rg_id, 0);
1197 assert_eq!(row_selection.row_count(), 10); assert_eq!(selection.row_count(), 15);
1199 assert_eq!(selection.row_group_count(), 2);
1200
1201 let row_selection = selection.get(1).unwrap();
1203 assert_eq!(row_selection.row_count(), 10); let row_selection = selection.get(2).unwrap();
1206 assert_eq!(row_selection.row_count(), 5); let (rg_id, row_selection) = selection.pop_first().unwrap();
1210 assert_eq!(rg_id, 1);
1211 assert_eq!(row_selection.row_count(), 10); assert_eq!(selection.row_count(), 5);
1213 assert_eq!(selection.row_group_count(), 1);
1214
1215 let row_selection = selection.get(2).unwrap();
1217 assert_eq!(row_selection.row_count(), 5); let (rg_id, row_selection) = selection.pop_first().unwrap();
1221 assert_eq!(rg_id, 2);
1222 assert_eq!(row_selection.row_count(), 5); assert_eq!(selection.row_count(), 0);
1224 assert_eq!(selection.row_group_count(), 0);
1225 assert!(selection.is_empty());
1226
1227 let mut empty_selection = RowGroupSelection::from_row_ranges(vec![], row_group_size);
1229 assert!(empty_selection.pop_first().is_none());
1230 assert_eq!(empty_selection.row_count(), 0);
1231 assert_eq!(empty_selection.row_group_count(), 0);
1232 assert!(empty_selection.is_empty());
1233 }
1234
1235 #[test]
1236 fn test_remove_row_group() {
1237 let row_group_size = 100;
1238 let ranges = vec![
1239 (0, vec![5..15]), (1, vec![5..15]), (2, vec![0..5]), ];
1243 let mut selection = RowGroupSelection::from_row_ranges(ranges, row_group_size);
1244
1245 selection.remove_row_group(1);
1247 assert_eq!(selection.row_count(), 15);
1248 assert_eq!(selection.row_group_count(), 2);
1249 assert!(!selection.contains_row_group(1));
1250
1251 let row_selection = selection.get(0).unwrap();
1253 assert_eq!(row_selection.row_count(), 10); let row_selection = selection.get(2).unwrap();
1256 assert_eq!(row_selection.row_count(), 5); selection.remove_row_group(5);
1260 assert_eq!(selection.row_count(), 15);
1261 assert_eq!(selection.row_group_count(), 2);
1262
1263 selection.remove_row_group(0);
1265 assert_eq!(selection.row_count(), 5);
1266 assert_eq!(selection.row_group_count(), 1);
1267
1268 let row_selection = selection.get(2).unwrap();
1269 assert_eq!(row_selection.row_count(), 5); selection.remove_row_group(2);
1272 assert_eq!(selection.row_count(), 0);
1273 assert_eq!(selection.row_group_count(), 0);
1274 assert!(selection.is_empty());
1275
1276 let mut empty_selection = RowGroupSelection::from_row_ranges(vec![], row_group_size);
1278 empty_selection.remove_row_group(0);
1279 assert_eq!(empty_selection.row_count(), 0);
1280 assert_eq!(empty_selection.row_group_count(), 0);
1281 assert!(empty_selection.is_empty());
1282 }
1283
1284 #[test]
1285 fn test_contains_row_group() {
1286 let row_group_size = 100;
1287 let ranges = vec![
1288 (0, vec![5..15]), (1, vec![5..15]), ];
1291 let selection = RowGroupSelection::from_row_ranges(ranges, row_group_size);
1292
1293 assert!(selection.contains_row_group(0));
1294 assert!(selection.contains_row_group(1));
1295 assert!(!selection.contains_row_group(2));
1296
1297 let empty_selection = RowGroupSelection::from_row_ranges(vec![], row_group_size);
1299 assert!(!empty_selection.contains_row_group(0));
1300 }
1301
1302 #[test]
1303 fn test_concat() {
1304 let row_group_size = 100;
1305 let ranges1 = vec![
1306 (0, vec![5..15]), (1, vec![5..15]), ];
1309
1310 let ranges2 = vec![
1311 (2, vec![5..15]), (3, vec![5..15]), ];
1314
1315 let mut selection1 = RowGroupSelection::from_row_ranges(ranges1, row_group_size);
1316 let selection2 = RowGroupSelection::from_row_ranges(ranges2, row_group_size);
1317
1318 selection1.concat(&selection2);
1319 assert_eq!(selection1.row_count(), 40);
1320 assert_eq!(selection1.row_group_count(), 4);
1321 }
1322}