pub trait RecordBatchDedupStrategy: Send {
// Required methods
fn push_batch(
&mut self,
batch: RecordBatch,
metrics: &mut DedupMetrics,
) -> Result<Option<RecordBatch>>;
fn finish(
&mut self,
metrics: &mut DedupMetrics,
) -> Result<Option<RecordBatch>>;
}
Expand description
Strategy to remove duplicate rows from sorted record batches.
Required Methods§
Sourcefn push_batch(
&mut self,
batch: RecordBatch,
metrics: &mut DedupMetrics,
) -> Result<Option<RecordBatch>>
fn push_batch( &mut self, batch: RecordBatch, metrics: &mut DedupMetrics, ) -> Result<Option<RecordBatch>>
Pushes a batch to the dedup strategy. Returns a batch if the strategy ensures there is no duplications based on the input batch.
Sourcefn finish(&mut self, metrics: &mut DedupMetrics) -> Result<Option<RecordBatch>>
fn finish(&mut self, metrics: &mut DedupMetrics) -> Result<Option<RecordBatch>>
Finishes the deduplication process and returns any remaining batch.
Users must ensure that push_batch
is called for all batches before
calling this method.