Skip to main content

mito2/sst/parquet/
row_selection.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::{BTreeMap, BTreeSet};
16use std::ops::Range;
17
18use index::inverted_index::search::index_apply::ApplyOutput;
19use itertools::Itertools;
20use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
21
22/// A selection of row groups.
23#[derive(Debug, Clone, Default)]
24pub struct RowGroupSelection {
25    /// Row group id to row selection.
26    selection_in_rg: BTreeMap<usize, RowSelectionWithCount>,
27    /// Total number of rows in the selection.
28    row_count: usize,
29    /// Total length of the selectors.
30    selector_len: usize,
31}
32
33/// A row selection with its count.
34#[derive(Debug, Clone, Default)]
35struct RowSelectionWithCount {
36    /// Row selection.
37    selection: RowSelection,
38    /// Number of rows in the selection.
39    row_count: usize,
40    /// Length of the selectors.
41    selector_len: usize,
42}
43
44impl RowGroupSelection {
45    /// Creates a new `RowGroupSelection` with all row groups selected.
46    ///
47    /// # Arguments
48    /// * `row_group_size` - The number of rows in each row group (except possibly the last one)
49    /// * `total_row_count` - Total number of rows
50    pub fn new(row_group_size: usize, total_row_count: usize) -> Self {
51        let mut selection_in_rg = BTreeMap::new();
52
53        let row_group_count = total_row_count.div_ceil(row_group_size);
54        for rg_id in 0..row_group_count {
55            // The last row group may have fewer rows than `row_group_size`
56            let row_group_size = if rg_id == row_group_count - 1 {
57                total_row_count - (row_group_count - 1) * row_group_size
58            } else {
59                row_group_size
60            };
61
62            let selection = RowSelection::from(vec![RowSelector::select(row_group_size)]);
63            selection_in_rg.insert(
64                rg_id,
65                RowSelectionWithCount {
66                    selection,
67                    row_count: row_group_size,
68                    selector_len: 1,
69                },
70            );
71        }
72
73        Self {
74            selection_in_rg,
75            row_count: total_row_count,
76            selector_len: row_group_count,
77        }
78    }
79
80    /// Creates a new `RowGroupSelection` that selects all rows in the specified row groups.
81    ///
82    /// This is useful for fast construction after coarse pruning (e.g. min-max pruning),
83    /// avoiding building and then removing a full selection of all row groups.
84    pub fn from_full_row_group_ids<I>(
85        row_group_ids: I,
86        row_group_size: usize,
87        total_row_count: usize,
88    ) -> Self
89    where
90        I: IntoIterator<Item = usize>,
91    {
92        if row_group_size == 0 || total_row_count == 0 {
93            return Self::default();
94        }
95
96        let row_group_count = total_row_count.div_ceil(row_group_size);
97        if row_group_count == 0 {
98            return Self::default();
99        }
100
101        let last_row_group_size = total_row_count - (row_group_count - 1) * row_group_size;
102
103        let mut selection_in_rg = BTreeMap::new();
104        let mut row_count = 0usize;
105        let mut selector_len = 0usize;
106
107        for rg_id in row_group_ids {
108            if rg_id >= row_group_count {
109                continue;
110            }
111
112            let rg_row_count = if rg_id == row_group_count - 1 {
113                last_row_group_size
114            } else {
115                row_group_size
116            };
117
118            let selection = RowSelection::from(vec![RowSelector::select(rg_row_count)]);
119            if selection_in_rg
120                .insert(
121                    rg_id,
122                    RowSelectionWithCount {
123                        selection,
124                        row_count: rg_row_count,
125                        selector_len: 1,
126                    },
127                )
128                .is_none()
129            {
130                row_count += rg_row_count;
131                selector_len += 1;
132            }
133        }
134
135        Self {
136            selection_in_rg,
137            row_count,
138            selector_len,
139        }
140    }
141
142    /// Returns the row selection for a given row group.
143    ///
144    /// `None` indicates not selected.
145    pub fn get(&self, rg_id: usize) -> Option<&RowSelection> {
146        self.selection_in_rg.get(&rg_id).map(|x| &x.selection)
147    }
148
149    /// Creates a new `RowGroupSelection` from the output of inverted index application.
150    ///
151    /// # Arguments
152    /// * `row_group_size` - The number of rows in each row group (except possibly the last one)
153    /// * `apply_output` - The output from applying the inverted index
154    ///
155    /// # Assumptions
156    /// * All row groups (except possibly the last one) have the same number of rows
157    /// * The last row group may have fewer rows than `row_group_size`
158    pub fn from_inverted_index_apply_output(
159        row_group_size: usize,
160        num_row_groups: usize,
161        apply_output: ApplyOutput,
162    ) -> Self {
163        // Step 1: Convert segment IDs to row ranges within row groups
164        // For each segment ID, calculate its corresponding row range in the row group
165        let segment_row_count = apply_output.segment_row_count;
166        let row_group_ranges = apply_output.matched_segment_ids.iter_ones().map(|seg_id| {
167            // Calculate the global row ID where this segment starts
168            let begin_row_id = seg_id * segment_row_count;
169            // Determine which row group this segment belongs to
170            let row_group_id = begin_row_id / row_group_size;
171            // Calculate the offset within the row group
172            let rg_begin_row_id = begin_row_id % row_group_size;
173            // Ensure the end row ID doesn't exceed the row group size
174            let rg_end_row_id = (rg_begin_row_id + segment_row_count).min(row_group_size);
175
176            (row_group_id, rg_begin_row_id..rg_end_row_id)
177        });
178
179        // Step 2: Group ranges by row group ID and create row selections
180        let mut total_row_count = 0;
181        let mut total_selector_len = 0;
182        let mut selection_in_rg = row_group_ranges
183            .chunk_by(|(row_group_id, _)| *row_group_id)
184            .into_iter()
185            .map(|(row_group_id, group)| {
186                // Extract just the ranges from the group
187                let ranges = group.map(|(_, ranges)| ranges);
188                // Create row selection from the ranges
189                // Note: We use `row_group_size` here, which is safe because:
190                // 1. For non-last row groups, it's the actual size
191                // 2. For the last row group, any ranges beyond the actual size will be clipped
192                //    by the min() operation above
193                let selection = row_selection_from_row_ranges(ranges, row_group_size);
194                let row_count = selection.row_count();
195                let selector_len = selector_len(&selection);
196                total_row_count += row_count;
197                total_selector_len += selector_len;
198                (
199                    row_group_id,
200                    RowSelectionWithCount {
201                        selection,
202                        row_count,
203                        selector_len,
204                    },
205                )
206            })
207            .collect::<BTreeMap<_, _>>();
208
209        Self::fill_missing_row_groups(&mut selection_in_rg, num_row_groups);
210
211        Self {
212            selection_in_rg,
213            row_count: total_row_count,
214            selector_len: total_selector_len,
215        }
216    }
217
218    /// Creates a new `RowGroupSelection` from a set of row IDs.
219    ///
220    /// # Arguments
221    /// * `row_ids` - Set of row IDs to select
222    /// * `row_group_size` - The number of rows in each row group (except possibly the last one)
223    /// * `num_row_groups` - Total number of row groups
224    ///
225    /// # Assumptions
226    /// * All row groups (except possibly the last one) have the same number of rows
227    /// * The last row group may have fewer rows than `row_group_size`
228    /// * All row IDs must within the range of [0, num_row_groups * row_group_size)
229    pub fn from_row_ids(
230        row_ids: BTreeSet<u32>,
231        row_group_size: usize,
232        num_row_groups: usize,
233    ) -> Self {
234        // Step 1: Group row IDs by their row group
235        let row_group_to_row_ids =
236            Self::group_row_ids_by_row_group(row_ids, row_group_size, num_row_groups);
237
238        // Step 2: Create row selections for each row group
239        let mut total_row_count = 0;
240        let mut total_selector_len = 0;
241        let mut selection_in_rg = row_group_to_row_ids
242            .into_iter()
243            .map(|(row_group_id, row_ids)| {
244                let selection =
245                    row_selection_from_sorted_row_ids(row_ids.into_iter(), row_group_size);
246                let row_count = selection.row_count();
247                let selector_len = selector_len(&selection);
248                total_row_count += row_count;
249                total_selector_len += selector_len;
250                (
251                    row_group_id,
252                    RowSelectionWithCount {
253                        selection,
254                        row_count,
255                        selector_len,
256                    },
257                )
258            })
259            .collect::<BTreeMap<_, _>>();
260
261        Self::fill_missing_row_groups(&mut selection_in_rg, num_row_groups);
262
263        Self {
264            selection_in_rg,
265            row_count: total_row_count,
266            selector_len: total_selector_len,
267        }
268    }
269
270    /// Creates a new `RowGroupSelection` from a set of row ranges.
271    ///
272    /// # Arguments
273    /// * `row_ranges` - A vector of (row_group_id, row_ranges) pairs
274    /// * `row_group_size` - The number of rows in each row group (except possibly the last one)
275    ///
276    /// # Assumptions
277    /// * All row groups (except possibly the last one) have the same number of rows
278    /// * The last row group may have fewer rows than `row_group_size`
279    /// * All ranges in `row_ranges` must be within the bounds of their respective row groups
280    ///   (i.e., for row group i, all ranges must be within [0, row_group_size) or [0, remaining_rows) for the last row group)
281    /// * Ranges within the same row group must not overlap. Overlapping ranges will result in undefined behavior.
282    pub fn from_row_ranges(
283        row_ranges: Vec<(usize, Vec<Range<usize>>)>,
284        row_group_size: usize,
285    ) -> Self {
286        let mut total_row_count = 0;
287        let mut total_selector_len = 0;
288        let selection_in_rg = row_ranges
289            .into_iter()
290            .map(|(row_group_id, ranges)| {
291                let selection = row_selection_from_row_ranges(ranges.into_iter(), row_group_size);
292                let row_count = selection.row_count();
293                let selector_len = selector_len(&selection);
294                total_row_count += row_count;
295                total_selector_len += selector_len;
296                (
297                    row_group_id,
298                    RowSelectionWithCount {
299                        selection,
300                        row_count,
301                        selector_len,
302                    },
303                )
304            })
305            .collect();
306
307        Self {
308            selection_in_rg,
309            row_count: total_row_count,
310            selector_len: total_selector_len,
311        }
312    }
313
314    /// Groups row IDs by their row group.
315    ///
316    /// # Arguments
317    /// * `row_ids` - Set of row IDs to group
318    /// * `row_group_size` - Size of each row group
319    /// * `num_row_groups` - Total number of row groups
320    ///
321    /// # Returns
322    /// A vector of (row_group_id, row_ids) pairs, where row_ids are the IDs within that row group.
323    fn group_row_ids_by_row_group(
324        row_ids: BTreeSet<u32>,
325        row_group_size: usize,
326        num_row_groups: usize,
327    ) -> Vec<(usize, Vec<usize>)> {
328        let est_rows_per_group = row_ids.len() / num_row_groups;
329        let mut row_group_to_row_ids: Vec<(usize, Vec<usize>)> = Vec::with_capacity(num_row_groups);
330
331        for row_id in row_ids {
332            let row_group_id = row_id as usize / row_group_size;
333            let row_id_in_group = row_id as usize % row_group_size;
334
335            if let Some((rg_id, row_ids)) = row_group_to_row_ids.last_mut()
336                && *rg_id == row_group_id
337            {
338                row_ids.push(row_id_in_group);
339            } else {
340                let mut row_ids = Vec::with_capacity(est_rows_per_group);
341                row_ids.push(row_id_in_group);
342                row_group_to_row_ids.push((row_group_id, row_ids));
343            }
344        }
345
346        row_group_to_row_ids
347    }
348
349    /// Intersects two `RowGroupSelection`s.
350    pub fn intersect(&self, other: &Self) -> Self {
351        let mut res = BTreeMap::new();
352        let mut total_row_count = 0;
353        let mut total_selector_len = 0;
354
355        for (rg_id, x) in other.selection_in_rg.iter() {
356            let Some(y) = self.selection_in_rg.get(rg_id) else {
357                continue;
358            };
359            let selection = intersect_row_selections(&x.selection, &y.selection);
360            let row_count = selection.row_count();
361            let selector_len = selector_len(&selection);
362            if row_count > 0 {
363                total_row_count += row_count;
364                total_selector_len += selector_len;
365                res.insert(
366                    *rg_id,
367                    RowSelectionWithCount {
368                        selection,
369                        row_count,
370                        selector_len,
371                    },
372                );
373            }
374        }
375
376        Self {
377            selection_in_rg: res,
378            row_count: total_row_count,
379            selector_len: total_selector_len,
380        }
381    }
382
383    /// Returns the number of row groups in the selection.
384    pub fn row_group_count(&self) -> usize {
385        self.selection_in_rg.len()
386    }
387
388    /// Returns the number of rows in the selection.
389    pub fn row_count(&self) -> usize {
390        self.row_count
391    }
392
393    /// Returns the first row group in the selection.
394    ///
395    /// Skip the row group if the row count is 0.
396    pub fn pop_first(&mut self) -> Option<(usize, RowSelection)> {
397        while let Some((
398            row_group_id,
399            RowSelectionWithCount {
400                selection,
401                row_count,
402                selector_len,
403            },
404        )) = self.selection_in_rg.pop_first()
405        {
406            if row_count > 0 {
407                self.row_count -= row_count;
408                self.selector_len -= selector_len;
409                return Some((row_group_id, selection));
410            }
411        }
412
413        None
414    }
415
416    /// Removes a row group from the selection.
417    pub fn remove_row_group(&mut self, row_group_id: usize) {
418        let Some(RowSelectionWithCount {
419            row_count,
420            selector_len,
421            ..
422        }) = self.selection_in_rg.remove(&row_group_id)
423        else {
424            return;
425        };
426        self.row_count -= row_count;
427        self.selector_len -= selector_len;
428    }
429
430    /// Returns true if the selection is empty.
431    pub fn is_empty(&self) -> bool {
432        self.selection_in_rg.is_empty()
433    }
434
435    /// Returns true if the selection contains a row group with the given ID.
436    pub fn contains_row_group(&self, row_group_id: usize) -> bool {
437        self.selection_in_rg.contains_key(&row_group_id)
438    }
439
440    /// Returns true if the selection contains a row group with the given ID and the row selection is not empty.
441    pub fn contains_non_empty_row_group(&self, row_group_id: usize) -> bool {
442        self.selection_in_rg
443            .get(&row_group_id)
444            .map(|r| r.row_count > 0)
445            .unwrap_or(false)
446    }
447
448    /// Returns an iterator over the row groups in the selection.
449    pub fn iter(&self) -> impl Iterator<Item = (&usize, &RowSelection)> {
450        self.selection_in_rg
451            .iter()
452            .map(|(row_group_id, x)| (row_group_id, &x.selection))
453    }
454
455    /// Returns the memory usage of the selection.
456    pub fn mem_usage(&self) -> usize {
457        self.selector_len * size_of::<RowSelector>()
458            + self.selection_in_rg.len() * size_of::<RowSelectionWithCount>()
459    }
460
461    /// Concatenates `other` into `self`. `other` must not contain row groups that `self` contains.
462    ///
463    /// Panics if `self` contains row groups that `other` contains.
464    pub fn concat(&mut self, other: &Self) {
465        for (rg_id, other_rs) in other.selection_in_rg.iter() {
466            if self.selection_in_rg.contains_key(rg_id) {
467                panic!("row group {} is already in `self`", rg_id);
468            }
469
470            self.selection_in_rg.insert(*rg_id, other_rs.clone());
471            self.row_count += other_rs.row_count;
472            self.selector_len += other_rs.selector_len;
473        }
474    }
475
476    /// Fills the missing row groups with empty selections.
477    /// This is to indicate that the row groups are searched even if no rows are found.
478    fn fill_missing_row_groups(
479        selection_in_rg: &mut BTreeMap<usize, RowSelectionWithCount>,
480        num_row_groups: usize,
481    ) {
482        for rg_id in 0..num_row_groups {
483            selection_in_rg.entry(rg_id).or_default();
484        }
485    }
486}
487
488/// Ported from `parquet` but trailing rows are removed.
489///
490/// Combine two lists of `RowSelection` return the intersection of them
491/// For example:
492/// self:      NNYYYYNNYYNYN
493/// other:     NYNNNNNNY
494///
495/// returned:  NNNNNNNNY     (modified)
496///            NNNNNNNNYYNYN (original)
497fn intersect_row_selections(left: &RowSelection, right: &RowSelection) -> RowSelection {
498    let mut l_iter = left.iter().copied().peekable();
499    let mut r_iter = right.iter().copied().peekable();
500
501    let iter = std::iter::from_fn(move || {
502        loop {
503            let l = l_iter.peek_mut();
504            let r = r_iter.peek_mut();
505
506            match (l, r) {
507                (Some(a), _) if a.row_count == 0 => {
508                    l_iter.next().unwrap();
509                }
510                (_, Some(b)) if b.row_count == 0 => {
511                    r_iter.next().unwrap();
512                }
513                (Some(l), Some(r)) => {
514                    return match (l.skip, r.skip) {
515                        // Keep both ranges
516                        (false, false) => {
517                            if l.row_count < r.row_count {
518                                r.row_count -= l.row_count;
519                                l_iter.next()
520                            } else {
521                                l.row_count -= r.row_count;
522                                r_iter.next()
523                            }
524                        }
525                        // skip at least one
526                        _ => {
527                            if l.row_count < r.row_count {
528                                let skip = l.row_count;
529                                r.row_count -= l.row_count;
530                                l_iter.next();
531                                Some(RowSelector::skip(skip))
532                            } else {
533                                let skip = r.row_count;
534                                l.row_count -= skip;
535                                r_iter.next();
536                                Some(RowSelector::skip(skip))
537                            }
538                        }
539                    };
540                }
541                (None, _) => return None,
542                (_, None) => return None,
543            }
544        }
545    });
546
547    iter.collect()
548}
549
550/// Converts an iterator of row ranges into a `RowSelection` by creating a sequence of `RowSelector`s.
551///
552/// This function processes each range in the input and either creates a new selector or merges
553/// with the existing one, depending on whether the current range is contiguous with the preceding one
554/// or if there's a gap that requires skipping rows. It handles both "select" and "skip" actions,
555/// optimizing the list of selectors by merging contiguous actions of the same type.
556///
557/// The returned selection intentionally stops at the end of the last matched range and may omit a
558/// trailing `skip` that would extend it to `total_row_count`. That is fine when the selection is
559/// used directly by the parquet reader, which simply stops once the selectors are exhausted.
560///
561/// Note: overlapping ranges are not supported and will result in an incorrect selection.
562pub(crate) fn row_selection_from_row_ranges(
563    row_ranges: impl Iterator<Item = Range<usize>>,
564    total_row_count: usize,
565) -> RowSelection {
566    let (selectors, _) = build_selectors_from_row_ranges(row_ranges, total_row_count);
567    RowSelection::from(selectors)
568}
569
570/// Like [`row_selection_from_row_ranges`] but guarantees the resulting selection
571/// covers exactly `total_row_count` rows by appending a trailing skip if needed.
572///
573/// Required when the result is used as the inner operand of [`RowSelection::and_then`], because
574/// `and_then` expects the inner selection to account for every row selected by the outer one.
575pub(crate) fn row_selection_from_row_ranges_exact(
576    row_ranges: impl Iterator<Item = Range<usize>>,
577    total_row_count: usize,
578) -> RowSelection {
579    let (mut selectors, last_processed_end) =
580        build_selectors_from_row_ranges(row_ranges, total_row_count);
581    if last_processed_end < total_row_count {
582        // Preserve the full logical length of the selection even when the final rows are all
583        // filtered out. Without this trailing skip, `and_then` sees an undersized inner
584        // selection and panics.
585        add_or_merge_selector(&mut selectors, total_row_count - last_processed_end, true);
586    }
587    RowSelection::from(selectors)
588}
589
590fn build_selectors_from_row_ranges(
591    row_ranges: impl Iterator<Item = Range<usize>>,
592    total_row_count: usize,
593) -> (Vec<RowSelector>, usize) {
594    let mut selectors: Vec<RowSelector> = Vec::new();
595    let mut last_processed_end = 0;
596
597    for Range { start, end } in row_ranges {
598        let end = end.min(total_row_count);
599        if start > last_processed_end {
600            add_or_merge_selector(&mut selectors, start - last_processed_end, true);
601        }
602
603        add_or_merge_selector(&mut selectors, end - start, false);
604        last_processed_end = end;
605    }
606
607    (selectors, last_processed_end)
608}
609
610/// Converts an iterator of sorted row IDs into a `RowSelection`.
611///
612/// Note: the input iterator must be sorted in ascending order and
613///       contain unique row IDs in the range [0, total_row_count).
614pub(crate) fn row_selection_from_sorted_row_ids(
615    row_ids: impl Iterator<Item = usize>,
616    total_row_count: usize,
617) -> RowSelection {
618    let mut selectors: Vec<RowSelector> = Vec::new();
619    let mut last_processed_end = 0;
620
621    for row_id in row_ids {
622        let start = row_id;
623        let end = start + 1;
624
625        if start > last_processed_end {
626            add_or_merge_selector(&mut selectors, start - last_processed_end, true);
627        }
628
629        add_or_merge_selector(&mut selectors, end - start, false);
630        last_processed_end = end;
631    }
632
633    if last_processed_end < total_row_count {
634        add_or_merge_selector(&mut selectors, total_row_count - last_processed_end, true);
635    }
636
637    RowSelection::from(selectors)
638}
639
640/// Helper function to either add a new `RowSelector` to `selectors` or merge it with the last one
641/// if they are of the same type (both skip or both select).
642fn add_or_merge_selector(selectors: &mut Vec<RowSelector>, count: usize, is_skip: bool) {
643    if let Some(last) = selectors.last_mut() {
644        // Merge with last if both actions are same
645        if last.skip == is_skip {
646            last.row_count += count;
647            return;
648        }
649    }
650    // Add new selector otherwise
651    let new_selector = if is_skip {
652        RowSelector::skip(count)
653    } else {
654        RowSelector::select(count)
655    };
656    selectors.push(new_selector);
657}
658
659/// Returns the length of the selectors in the selection.
660fn selector_len(selection: &RowSelection) -> usize {
661    selection.iter().size_hint().0
662}
663
664#[cfg(test)]
665#[allow(clippy::single_range_in_vec_init)]
666mod tests {
667    use super::*;
668
669    #[test]
670    fn test_selector_len() {
671        let selection = RowSelection::from(vec![RowSelector::skip(5), RowSelector::select(5)]);
672        assert_eq!(selector_len(&selection), 2);
673
674        let selection = RowSelection::from(vec![
675            RowSelector::select(5),
676            RowSelector::skip(5),
677            RowSelector::select(5),
678        ]);
679        assert_eq!(selector_len(&selection), 3);
680
681        let selection = RowSelection::from(vec![]);
682        assert_eq!(selector_len(&selection), 0);
683    }
684
685    #[test]
686    fn test_single_contiguous_range() {
687        let selection = row_selection_from_row_ranges(Some(5..10).into_iter(), 10);
688        let expected = RowSelection::from(vec![RowSelector::skip(5), RowSelector::select(5)]);
689        assert_eq!(selection, expected);
690    }
691
692    #[test]
693    fn test_non_contiguous_ranges() {
694        let ranges = [1..3, 5..8];
695        let selection = row_selection_from_row_ranges(ranges.iter().cloned(), 10);
696        let expected = RowSelection::from(vec![
697            RowSelector::skip(1),
698            RowSelector::select(2),
699            RowSelector::skip(2),
700            RowSelector::select(3),
701        ]);
702        assert_eq!(selection, expected);
703    }
704
705    #[test]
706    fn test_empty_range() {
707        let ranges = [];
708        let selection = row_selection_from_row_ranges(ranges.iter().cloned(), 10);
709        let expected = RowSelection::from(vec![]);
710        assert_eq!(selection, expected);
711    }
712
713    #[test]
714    fn test_adjacent_ranges() {
715        let ranges = [1..2, 2..3];
716        let selection = row_selection_from_row_ranges(ranges.iter().cloned(), 10);
717        let expected = RowSelection::from(vec![RowSelector::skip(1), RowSelector::select(2)]);
718        assert_eq!(selection, expected);
719    }
720
721    #[test]
722    fn test_large_gap_between_ranges() {
723        let ranges = [1..2, 100..101];
724        let selection = row_selection_from_row_ranges(ranges.iter().cloned(), 10240);
725        let expected = RowSelection::from(vec![
726            RowSelector::skip(1),
727            RowSelector::select(1),
728            RowSelector::skip(98),
729            RowSelector::select(1),
730        ]);
731        assert_eq!(selection, expected);
732    }
733
734    #[test]
735    fn test_range_end_over_total_row_count() {
736        let ranges = Some(1..10);
737        let selection = row_selection_from_row_ranges(ranges.into_iter(), 5);
738        let expected = RowSelection::from(vec![RowSelector::skip(1), RowSelector::select(4)]);
739        assert_eq!(selection, expected);
740    }
741
742    #[test]
743    fn test_exact_single_range_with_trailing_skip() {
744        let selection = row_selection_from_row_ranges_exact(Some(0..3).into_iter(), 6);
745        let expected = RowSelection::from(vec![RowSelector::select(3), RowSelector::skip(3)]);
746        assert_eq!(selection, expected);
747        assert_eq!(selection.row_count(), 3);
748    }
749
750    #[test]
751    fn test_exact_non_contiguous_ranges() {
752        let ranges = [1..3, 5..8];
753        let selection = row_selection_from_row_ranges_exact(ranges.iter().cloned(), 10);
754        let expected = RowSelection::from(vec![
755            RowSelector::skip(1),
756            RowSelector::select(2),
757            RowSelector::skip(2),
758            RowSelector::select(3),
759            RowSelector::skip(2),
760        ]);
761        assert_eq!(selection, expected);
762        assert_eq!(selection.row_count(), 5);
763    }
764
765    #[test]
766    fn test_exact_empty_ranges() {
767        let selection = row_selection_from_row_ranges_exact([].iter().cloned(), 10);
768        let expected = RowSelection::from(vec![RowSelector::skip(10)]);
769        assert_eq!(selection, expected);
770        assert_eq!(selection.row_count(), 0);
771    }
772
773    #[test]
774    fn test_exact_range_covers_all_rows() {
775        let selection = row_selection_from_row_ranges_exact(Some(0..10).into_iter(), 10);
776        let expected = RowSelection::from(vec![RowSelector::select(10)]);
777        assert_eq!(selection, expected);
778        assert_eq!(selection.row_count(), 10);
779    }
780
781    #[test]
782    fn test_exact_compatible_with_and_then() {
783        // Outer selects rows 0..6 out of 10.
784        let outer = RowSelection::from(vec![RowSelector::select(6), RowSelector::skip(4)]);
785        // Inner: within those 6 rows, select only rows 0..3.
786        let inner = row_selection_from_row_ranges_exact(Some(0..3).into_iter(), 6);
787        let result = outer.and_then(&inner);
788        let expected = RowSelection::from(vec![RowSelector::select(3), RowSelector::skip(7)]);
789        assert_eq!(result, expected);
790    }
791
792    #[test]
793    fn test_row_ids_to_selection() {
794        let row_ids = [1, 3, 5, 7, 9].into_iter();
795        let selection = row_selection_from_sorted_row_ids(row_ids, 10);
796        let expected = RowSelection::from(vec![
797            RowSelector::skip(1),
798            RowSelector::select(1),
799            RowSelector::skip(1),
800            RowSelector::select(1),
801            RowSelector::skip(1),
802            RowSelector::select(1),
803            RowSelector::skip(1),
804            RowSelector::select(1),
805            RowSelector::skip(1),
806            RowSelector::select(1),
807        ]);
808        assert_eq!(selection, expected);
809    }
810
811    #[test]
812    fn test_row_ids_to_selection_full() {
813        let row_ids = 0..10;
814        let selection = row_selection_from_sorted_row_ids(row_ids, 10);
815        let expected = RowSelection::from(vec![RowSelector::select(10)]);
816        assert_eq!(selection, expected);
817    }
818
819    #[test]
820    fn test_row_ids_to_selection_empty() {
821        let selection = row_selection_from_sorted_row_ids(None.into_iter(), 10);
822        let expected = RowSelection::from(vec![RowSelector::skip(10)]);
823        assert_eq!(selection, expected);
824    }
825
826    #[test]
827    fn test_group_row_ids() {
828        let row_ids = [0, 1, 2, 5, 6, 7, 8, 12].into_iter().collect();
829        let row_group_size = 5;
830        let num_row_groups = 3;
831
832        let row_group_to_row_ids =
833            RowGroupSelection::group_row_ids_by_row_group(row_ids, row_group_size, num_row_groups);
834
835        assert_eq!(
836            row_group_to_row_ids,
837            vec![(0, vec![0, 1, 2]), (1, vec![0, 1, 2, 3]), (2, vec![2])]
838        );
839    }
840
841    #[test]
842    fn test_row_group_selection_new() {
843        // Test with regular case
844        let selection = RowGroupSelection::new(100, 250);
845        assert_eq!(selection.row_count(), 250);
846        assert_eq!(selection.row_group_count(), 3);
847
848        // Check content of each row group
849        let row_selection = selection.get(0).unwrap();
850        assert_eq!(row_selection.row_count(), 100);
851
852        let row_selection = selection.get(1).unwrap();
853        assert_eq!(row_selection.row_count(), 100);
854
855        let row_selection = selection.get(2).unwrap();
856        assert_eq!(row_selection.row_count(), 50);
857
858        // Test with empty selection
859        let selection = RowGroupSelection::new(100, 0);
860        assert_eq!(selection.row_count(), 0);
861        assert_eq!(selection.row_group_count(), 0);
862        assert!(selection.get(0).is_none());
863
864        // Test with single row group
865        let selection = RowGroupSelection::new(100, 50);
866        assert_eq!(selection.row_count(), 50);
867        assert_eq!(selection.row_group_count(), 1);
868
869        let row_selection = selection.get(0).unwrap();
870        assert_eq!(row_selection.row_count(), 50);
871
872        // Test with row count that doesn't divide evenly
873        let selection = RowGroupSelection::new(100, 150);
874        assert_eq!(selection.row_count(), 150);
875        assert_eq!(selection.row_group_count(), 2);
876
877        let row_selection = selection.get(0).unwrap();
878        assert_eq!(row_selection.row_count(), 100);
879
880        let row_selection = selection.get(1).unwrap();
881        assert_eq!(row_selection.row_count(), 50);
882
883        // Test with row count that's just over a multiple of row_group_size
884        let selection = RowGroupSelection::new(100, 101);
885        assert_eq!(selection.row_count(), 101);
886        assert_eq!(selection.row_group_count(), 2);
887
888        let row_selection = selection.get(0).unwrap();
889        assert_eq!(row_selection.row_count(), 100);
890
891        let row_selection = selection.get(1).unwrap();
892        assert_eq!(row_selection.row_count(), 1);
893    }
894
895    #[test]
896    fn test_from_full_row_group_ids_dedup_duplicates() {
897        let selection = RowGroupSelection::from_full_row_group_ids([0, 0, 2, 2], 10, 25);
898        assert_eq!(selection.row_group_count(), 2);
899        assert_eq!(selection.row_count(), 15);
900
901        assert_eq!(selection.get(0).unwrap().row_count(), 10);
902        assert_eq!(selection.get(2).unwrap().row_count(), 5);
903    }
904
905    #[test]
906    fn test_from_row_ids() {
907        let row_group_size = 100;
908        let num_row_groups = 3;
909
910        // Test with regular case
911        let row_ids: BTreeSet<u32> = vec![5, 15, 25, 35, 105, 115, 205, 215]
912            .into_iter()
913            .collect();
914        let selection = RowGroupSelection::from_row_ids(row_ids, row_group_size, num_row_groups);
915        assert_eq!(selection.row_count(), 8);
916        assert_eq!(selection.row_group_count(), 3);
917
918        // Check content of each row group
919        let row_selection = selection.get(0).unwrap();
920        assert_eq!(row_selection.row_count(), 4); // 5, 15, 25, 35
921
922        let row_selection = selection.get(1).unwrap();
923        assert_eq!(row_selection.row_count(), 2); // 105, 115
924
925        let row_selection = selection.get(2).unwrap();
926        assert_eq!(row_selection.row_count(), 2); // 205, 215
927
928        // Test with empty row IDs
929        let empty_row_ids: BTreeSet<u32> = BTreeSet::new();
930        let selection =
931            RowGroupSelection::from_row_ids(empty_row_ids, row_group_size, num_row_groups);
932        assert_eq!(selection.row_count(), 0);
933        assert_eq!(selection.row_group_count(), 3);
934
935        // Test with consecutive row IDs
936        let consecutive_row_ids: BTreeSet<u32> = vec![5, 6, 7, 8, 9].into_iter().collect();
937        let selection =
938            RowGroupSelection::from_row_ids(consecutive_row_ids, row_group_size, num_row_groups);
939        assert_eq!(selection.row_count(), 5);
940        assert_eq!(selection.row_group_count(), 3);
941
942        let row_selection = selection.get(0).unwrap();
943        assert_eq!(row_selection.row_count(), 5); // 5, 6, 7, 8, 9
944
945        // Test with row IDs at row group boundaries
946        let boundary_row_ids: BTreeSet<u32> = vec![0, 99, 100, 199, 200, 249].into_iter().collect();
947        let selection =
948            RowGroupSelection::from_row_ids(boundary_row_ids, row_group_size, num_row_groups);
949        assert_eq!(selection.row_count(), 6);
950        assert_eq!(selection.row_group_count(), 3);
951
952        let row_selection = selection.get(0).unwrap();
953        assert_eq!(row_selection.row_count(), 2); // 0, 99
954
955        let row_selection = selection.get(1).unwrap();
956        assert_eq!(row_selection.row_count(), 2); // 100, 199
957
958        let row_selection = selection.get(2).unwrap();
959        assert_eq!(row_selection.row_count(), 2); // 200, 249
960
961        // Test with single row group
962        let single_group_row_ids: BTreeSet<u32> = vec![5, 10, 15].into_iter().collect();
963        let selection = RowGroupSelection::from_row_ids(single_group_row_ids, row_group_size, 1);
964        assert_eq!(selection.row_count(), 3);
965        assert_eq!(selection.row_group_count(), 1);
966
967        let row_selection = selection.get(0).unwrap();
968        assert_eq!(row_selection.row_count(), 3); // 5, 10, 15
969    }
970
971    #[test]
972    fn test_from_row_ranges() {
973        let row_group_size = 100;
974
975        // Test with regular case
976        let ranges = vec![
977            (0, vec![5..15, 25..35]), // Within [0, 100)
978            (1, vec![5..15]),         // Within [0, 100)
979            (2, vec![0..5, 10..15]),  // Within [0, 50) for last row group
980        ];
981        let selection = RowGroupSelection::from_row_ranges(ranges, row_group_size);
982        assert_eq!(selection.row_count(), 40);
983        assert_eq!(selection.row_group_count(), 3);
984
985        // Check content of each row group
986        let row_selection = selection.get(0).unwrap();
987        assert_eq!(row_selection.row_count(), 20); // 5..15 (10) + 25..35 (10)
988
989        let row_selection = selection.get(1).unwrap();
990        assert_eq!(row_selection.row_count(), 10); // 5..15 (10)
991
992        let row_selection = selection.get(2).unwrap();
993        assert_eq!(row_selection.row_count(), 10); // 0..5 (5) + 10..15 (5)
994
995        // Test with empty ranges
996        let empty_ranges: Vec<(usize, Vec<Range<usize>>)> = vec![];
997        let selection = RowGroupSelection::from_row_ranges(empty_ranges, row_group_size);
998        assert_eq!(selection.row_count(), 0);
999        assert_eq!(selection.row_group_count(), 0);
1000        assert!(selection.get(0).is_none());
1001
1002        // Test with adjacent ranges within same row group
1003        let adjacent_ranges = vec![
1004            (0, vec![5..15, 15..25]), // Adjacent ranges within [0, 100)
1005        ];
1006        let selection = RowGroupSelection::from_row_ranges(adjacent_ranges, row_group_size);
1007        assert_eq!(selection.row_count(), 20);
1008        assert_eq!(selection.row_group_count(), 1);
1009
1010        let row_selection = selection.get(0).unwrap();
1011        assert_eq!(row_selection.row_count(), 20); // 5..15 (10) + 15..25 (10)
1012
1013        // Test with ranges at row group boundaries
1014        let boundary_ranges = vec![
1015            (0, vec![0..10, 90..100]), // Ranges at start and end of first row group
1016            (1, vec![0..100]),         // Full range of second row group
1017            (2, vec![0..50]),          // Full range of last row group
1018        ];
1019        let selection = RowGroupSelection::from_row_ranges(boundary_ranges, row_group_size);
1020        assert_eq!(selection.row_count(), 170);
1021        assert_eq!(selection.row_group_count(), 3);
1022
1023        let row_selection = selection.get(0).unwrap();
1024        assert_eq!(row_selection.row_count(), 20); // 0..10 (10) + 90..100 (10)
1025
1026        let row_selection = selection.get(1).unwrap();
1027        assert_eq!(row_selection.row_count(), 100); // 0..100 (100)
1028
1029        let row_selection = selection.get(2).unwrap();
1030        assert_eq!(row_selection.row_count(), 50); // 0..50 (50)
1031
1032        // Test with single row group
1033        let single_group_ranges = vec![
1034            (0, vec![0..50]), // Half of first row group
1035        ];
1036        let selection = RowGroupSelection::from_row_ranges(single_group_ranges, row_group_size);
1037        assert_eq!(selection.row_count(), 50);
1038        assert_eq!(selection.row_group_count(), 1);
1039
1040        let row_selection = selection.get(0).unwrap();
1041        assert_eq!(row_selection.row_count(), 50); // 0..50 (50)
1042    }
1043
1044    #[test]
1045    fn test_intersect() {
1046        let row_group_size = 100;
1047
1048        // Test case 1: Regular intersection with partial overlap
1049        let ranges1 = vec![
1050            (0, vec![5..15, 25..35]), // Within [0, 100)
1051            (1, vec![5..15]),         // Within [0, 100)
1052        ];
1053        let selection1 = RowGroupSelection::from_row_ranges(ranges1, row_group_size);
1054
1055        let ranges2 = vec![
1056            (0, vec![10..20]), // Within [0, 100)
1057            (1, vec![10..20]), // Within [0, 100)
1058            (2, vec![0..10]),  // Within [0, 50) for last row group
1059        ];
1060        let selection2 = RowGroupSelection::from_row_ranges(ranges2, row_group_size);
1061
1062        let intersection = selection1.intersect(&selection2);
1063        assert_eq!(intersection.row_count(), 10);
1064        assert_eq!(intersection.row_group_count(), 2);
1065
1066        let row_selection = intersection.get(0).unwrap();
1067        assert_eq!(row_selection.row_count(), 5); // 10..15 (5)
1068
1069        let row_selection = intersection.get(1).unwrap();
1070        assert_eq!(row_selection.row_count(), 5); // 10..15 (5)
1071
1072        // Test case 2: Empty intersection with empty selection
1073        let empty_ranges: Vec<(usize, Vec<Range<usize>>)> = vec![];
1074        let empty_selection = RowGroupSelection::from_row_ranges(empty_ranges, row_group_size);
1075        let intersection = selection1.intersect(&empty_selection);
1076        assert_eq!(intersection.row_count(), 0);
1077        assert_eq!(intersection.row_group_count(), 0);
1078        assert!(intersection.get(0).is_none());
1079
1080        // Test case 3: No overlapping row groups
1081        let non_overlapping_ranges = vec![
1082            (3, vec![0..10]), // Within [0, 50) for last row group
1083        ];
1084        let non_overlapping_selection =
1085            RowGroupSelection::from_row_ranges(non_overlapping_ranges, row_group_size);
1086        let intersection = selection1.intersect(&non_overlapping_selection);
1087        assert_eq!(intersection.row_count(), 0);
1088        assert_eq!(intersection.row_group_count(), 0);
1089        assert!(intersection.get(0).is_none());
1090
1091        // Test case 4: Full overlap within same row group
1092        let full_overlap_ranges1 = vec![
1093            (0, vec![0..50]), // Within [0, 100)
1094        ];
1095        let full_overlap_ranges2 = vec![
1096            (0, vec![0..50]), // Within [0, 100)
1097        ];
1098        let selection1 = RowGroupSelection::from_row_ranges(full_overlap_ranges1, row_group_size);
1099        let selection2 = RowGroupSelection::from_row_ranges(full_overlap_ranges2, row_group_size);
1100        let intersection = selection1.intersect(&selection2);
1101        assert_eq!(intersection.row_count(), 50);
1102        assert_eq!(intersection.row_group_count(), 1);
1103
1104        let row_selection = intersection.get(0).unwrap();
1105        assert_eq!(row_selection.row_count(), 50); // 0..50 (50)
1106
1107        // Test case 5: Partial overlap at row group boundaries
1108        let boundary_ranges1 = vec![
1109            (0, vec![0..10, 90..100]), // Within [0, 100)
1110            (1, vec![0..100]),         // Within [0, 100)
1111        ];
1112        let boundary_ranges2 = vec![
1113            (0, vec![5..15, 95..100]), // Within [0, 100)
1114            (1, vec![50..100]),        // Within [0, 100)
1115        ];
1116        let selection1 = RowGroupSelection::from_row_ranges(boundary_ranges1, row_group_size);
1117        let selection2 = RowGroupSelection::from_row_ranges(boundary_ranges2, row_group_size);
1118        let intersection = selection1.intersect(&selection2);
1119        assert_eq!(intersection.row_count(), 60);
1120        assert_eq!(intersection.row_group_count(), 2);
1121
1122        let row_selection = intersection.get(0).unwrap();
1123        assert_eq!(row_selection.row_count(), 10); // 5..10 (5) + 95..100 (5)
1124
1125        let row_selection = intersection.get(1).unwrap();
1126        assert_eq!(row_selection.row_count(), 50); // 50..100 (50)
1127
1128        // Test case 6: Multiple ranges with complex overlap
1129        let complex_ranges1 = vec![
1130            (0, vec![5..15, 25..35, 45..55]), // Within [0, 100)
1131            (1, vec![10..20, 30..40]),        // Within [0, 100)
1132        ];
1133        let complex_ranges2 = vec![
1134            (0, vec![10..20, 30..40, 50..60]), // Within [0, 100)
1135            (1, vec![15..25, 35..45]),         // Within [0, 100)
1136        ];
1137        let selection1 = RowGroupSelection::from_row_ranges(complex_ranges1, row_group_size);
1138        let selection2 = RowGroupSelection::from_row_ranges(complex_ranges2, row_group_size);
1139        let intersection = selection1.intersect(&selection2);
1140        assert_eq!(intersection.row_count(), 25);
1141        assert_eq!(intersection.row_group_count(), 2);
1142
1143        let row_selection = intersection.get(0).unwrap();
1144        assert_eq!(row_selection.row_count(), 15); // 10..15 (5) + 30..35 (5) + 50..55 (5)
1145
1146        let row_selection = intersection.get(1).unwrap();
1147        assert_eq!(row_selection.row_count(), 10); // 15..20 (5) + 35..40 (5)
1148
1149        // Test case 7: Intersection with last row group (smaller size)
1150        let last_rg_ranges1 = vec![
1151            (2, vec![0..25, 30..40]), // Within [0, 50) for last row group
1152        ];
1153        let last_rg_ranges2 = vec![
1154            (2, vec![20..30, 35..45]), // Within [0, 50) for last row group
1155        ];
1156        let selection1 = RowGroupSelection::from_row_ranges(last_rg_ranges1, row_group_size);
1157        let selection2 = RowGroupSelection::from_row_ranges(last_rg_ranges2, row_group_size);
1158        let intersection = selection1.intersect(&selection2);
1159        assert_eq!(intersection.row_count(), 10);
1160        assert_eq!(intersection.row_group_count(), 1);
1161
1162        let row_selection = intersection.get(2).unwrap();
1163        assert_eq!(row_selection.row_count(), 10); // 20..25 (5) + 35..40 (5)
1164
1165        // Test case 8: Intersection with empty ranges in one selection
1166        let empty_ranges = vec![
1167            (0, vec![]),      // Empty ranges
1168            (1, vec![5..15]), // Within [0, 100)
1169        ];
1170        let selection1 = RowGroupSelection::from_row_ranges(empty_ranges, row_group_size);
1171        let ranges2 = vec![
1172            (0, vec![5..15, 25..35]), // Within [0, 100)
1173            (1, vec![5..15]),         // Within [0, 100)
1174        ];
1175        let selection2 = RowGroupSelection::from_row_ranges(ranges2, row_group_size);
1176        let intersection = selection1.intersect(&selection2);
1177        assert_eq!(intersection.row_count(), 10);
1178        assert_eq!(intersection.row_group_count(), 1);
1179
1180        let row_selection = intersection.get(1).unwrap();
1181        assert_eq!(row_selection.row_count(), 10); // 5..15 (10)
1182    }
1183
1184    #[test]
1185    fn test_pop_first() {
1186        let row_group_size = 100;
1187        let ranges = vec![
1188            (0, vec![5..15]), // Within [0, 100)
1189            (1, vec![5..15]), // Within [0, 100)
1190            (2, vec![0..5]),  // Within [0, 50) for last row group
1191        ];
1192        let mut selection = RowGroupSelection::from_row_ranges(ranges, row_group_size);
1193
1194        // Test popping first row group
1195        let (rg_id, row_selection) = selection.pop_first().unwrap();
1196        assert_eq!(rg_id, 0);
1197        assert_eq!(row_selection.row_count(), 10); // 5..15 (10)
1198        assert_eq!(selection.row_count(), 15);
1199        assert_eq!(selection.row_group_count(), 2);
1200
1201        // Verify remaining row groups' content
1202        let row_selection = selection.get(1).unwrap();
1203        assert_eq!(row_selection.row_count(), 10); // 5..15 (10)
1204
1205        let row_selection = selection.get(2).unwrap();
1206        assert_eq!(row_selection.row_count(), 5); // 0..5 (5)
1207
1208        // Test popping second row group
1209        let (rg_id, row_selection) = selection.pop_first().unwrap();
1210        assert_eq!(rg_id, 1);
1211        assert_eq!(row_selection.row_count(), 10); // 5..15 (10)
1212        assert_eq!(selection.row_count(), 5);
1213        assert_eq!(selection.row_group_count(), 1);
1214
1215        // Verify remaining row group's content
1216        let row_selection = selection.get(2).unwrap();
1217        assert_eq!(row_selection.row_count(), 5); // 0..5 (5)
1218
1219        // Test popping last row group
1220        let (rg_id, row_selection) = selection.pop_first().unwrap();
1221        assert_eq!(rg_id, 2);
1222        assert_eq!(row_selection.row_count(), 5); // 0..5 (5)
1223        assert_eq!(selection.row_count(), 0);
1224        assert_eq!(selection.row_group_count(), 0);
1225        assert!(selection.is_empty());
1226
1227        // Test popping from empty selection
1228        let mut empty_selection = RowGroupSelection::from_row_ranges(vec![], row_group_size);
1229        assert!(empty_selection.pop_first().is_none());
1230        assert_eq!(empty_selection.row_count(), 0);
1231        assert_eq!(empty_selection.row_group_count(), 0);
1232        assert!(empty_selection.is_empty());
1233    }
1234
1235    #[test]
1236    fn test_remove_row_group() {
1237        let row_group_size = 100;
1238        let ranges = vec![
1239            (0, vec![5..15]), // Within [0, 100)
1240            (1, vec![5..15]), // Within [0, 100)
1241            (2, vec![0..5]),  // Within [0, 50) for last row group
1242        ];
1243        let mut selection = RowGroupSelection::from_row_ranges(ranges, row_group_size);
1244
1245        // Test removing existing row group
1246        selection.remove_row_group(1);
1247        assert_eq!(selection.row_count(), 15);
1248        assert_eq!(selection.row_group_count(), 2);
1249        assert!(!selection.contains_row_group(1));
1250
1251        // Verify remaining row groups' content
1252        let row_selection = selection.get(0).unwrap();
1253        assert_eq!(row_selection.row_count(), 10); // 5..15 (10)
1254
1255        let row_selection = selection.get(2).unwrap();
1256        assert_eq!(row_selection.row_count(), 5); // 0..5 (5)
1257
1258        // Test removing non-existent row group
1259        selection.remove_row_group(5);
1260        assert_eq!(selection.row_count(), 15);
1261        assert_eq!(selection.row_group_count(), 2);
1262
1263        // Test removing all row groups
1264        selection.remove_row_group(0);
1265        assert_eq!(selection.row_count(), 5);
1266        assert_eq!(selection.row_group_count(), 1);
1267
1268        let row_selection = selection.get(2).unwrap();
1269        assert_eq!(row_selection.row_count(), 5); // 0..5 (5)
1270
1271        selection.remove_row_group(2);
1272        assert_eq!(selection.row_count(), 0);
1273        assert_eq!(selection.row_group_count(), 0);
1274        assert!(selection.is_empty());
1275
1276        // Test removing from empty selection
1277        let mut empty_selection = RowGroupSelection::from_row_ranges(vec![], row_group_size);
1278        empty_selection.remove_row_group(0);
1279        assert_eq!(empty_selection.row_count(), 0);
1280        assert_eq!(empty_selection.row_group_count(), 0);
1281        assert!(empty_selection.is_empty());
1282    }
1283
1284    #[test]
1285    fn test_contains_row_group() {
1286        let row_group_size = 100;
1287        let ranges = vec![
1288            (0, vec![5..15]), // Within [0, 100)
1289            (1, vec![5..15]), // Within [0, 100)
1290        ];
1291        let selection = RowGroupSelection::from_row_ranges(ranges, row_group_size);
1292
1293        assert!(selection.contains_row_group(0));
1294        assert!(selection.contains_row_group(1));
1295        assert!(!selection.contains_row_group(2));
1296
1297        // Test empty selection
1298        let empty_selection = RowGroupSelection::from_row_ranges(vec![], row_group_size);
1299        assert!(!empty_selection.contains_row_group(0));
1300    }
1301
1302    #[test]
1303    fn test_concat() {
1304        let row_group_size = 100;
1305        let ranges1 = vec![
1306            (0, vec![5..15]), // Within [0, 100)
1307            (1, vec![5..15]), // Within [0, 100)
1308        ];
1309
1310        let ranges2 = vec![
1311            (2, vec![5..15]), // Within [0, 100)
1312            (3, vec![5..15]), // Within [0, 100)
1313        ];
1314
1315        let mut selection1 = RowGroupSelection::from_row_ranges(ranges1, row_group_size);
1316        let selection2 = RowGroupSelection::from_row_ranges(ranges2, row_group_size);
1317
1318        selection1.concat(&selection2);
1319        assert_eq!(selection1.row_count(), 40);
1320        assert_eq!(selection1.row_group_count(), 4);
1321    }
1322}