mito2/sst/index/fulltext_index/
creator.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashMap;
16use std::sync::Arc;
17use std::sync::atomic::AtomicUsize;
18
19use api::v1::SemanticType;
20use common_telemetry::warn;
21use datatypes::arrow::array::{Array, LargeStringArray, StringArray};
22use datatypes::arrow::datatypes::DataType;
23use datatypes::arrow::record_batch::RecordBatch;
24use datatypes::schema::{FulltextAnalyzer, FulltextBackend};
25use index::fulltext_index::create::{
26    BloomFilterFulltextIndexCreator, FulltextIndexCreator, TantivyFulltextIndexCreator,
27};
28use index::fulltext_index::{Analyzer, Config};
29use index::target::IndexTarget;
30use puffin::blob_metadata::CompressionCodec;
31use puffin::puffin_manager::PutOptions;
32use snafu::{ResultExt, ensure};
33use store_api::metadata::RegionMetadataRef;
34use store_api::storage::{ColumnId, ConcreteDataType, FileId, RegionId};
35
36use crate::error::{
37    CastVectorSnafu, ComputeArrowSnafu, CreateFulltextCreatorSnafu, DataTypeMismatchSnafu,
38    FulltextFinishSnafu, FulltextPushTextSnafu, IndexOptionsSnafu, OperateAbortedIndexSnafu,
39    Result,
40};
41use crate::read::Batch;
42use crate::sst::index::TYPE_FULLTEXT_INDEX;
43use crate::sst::index::fulltext_index::{INDEX_BLOB_TYPE_BLOOM, INDEX_BLOB_TYPE_TANTIVY};
44use crate::sst::index::intermediate::{
45    IntermediateLocation, IntermediateManager, TempFileProvider,
46};
47use crate::sst::index::puffin_manager::SstPuffinWriter;
48use crate::sst::index::statistics::{ByteCount, RowCount, Statistics};
49
50/// `FulltextIndexer` is responsible for creating fulltext indexes for SST files.
51pub struct FulltextIndexer {
52    /// Creators for each column.
53    creators: HashMap<ColumnId, SingleCreator>,
54    /// Whether the index creation was aborted.
55    aborted: bool,
56    /// Statistics of index creation.
57    stats: Statistics,
58}
59
60impl FulltextIndexer {
61    /// Creates a new `FulltextIndexer`.
62    pub async fn new(
63        region_id: &RegionId,
64        sst_file_id: &FileId,
65        intermediate_manager: &IntermediateManager,
66        metadata: &RegionMetadataRef,
67        compress: bool,
68        mem_limit: usize,
69    ) -> Result<Option<Self>> {
70        let mut creators = HashMap::new();
71
72        for column in &metadata.column_metadatas {
73            // Tag columns don't support fulltext index now.
74            // If we need to support fulltext index for tag columns, we also need to parse
75            // the codec and handle sparse encoding for flat format specially.
76            if column.semantic_type == SemanticType::Tag {
77                common_telemetry::debug!(
78                    "Skip creating fulltext index for tag column {}",
79                    column.column_schema.name
80                );
81                continue;
82            }
83
84            let options = column
85                .column_schema
86                .fulltext_options()
87                .context(IndexOptionsSnafu {
88                    column_name: &column.column_schema.name,
89                })?;
90
91            // Relax the type constraint here as many types can be casted to string.
92
93            let options = match options {
94                Some(options) if options.enable => options,
95                _ => continue,
96            };
97
98            let column_id = column.column_id;
99            let intm_path = intermediate_manager.fulltext_path(region_id, sst_file_id, column_id);
100
101            let config = Config {
102                analyzer: match options.analyzer {
103                    FulltextAnalyzer::English => Analyzer::English,
104                    FulltextAnalyzer::Chinese => Analyzer::Chinese,
105                },
106                case_sensitive: options.case_sensitive,
107            };
108
109            let inner = match options.backend {
110                FulltextBackend::Tantivy => {
111                    let creator = TantivyFulltextIndexCreator::new(&intm_path, config, mem_limit)
112                        .await
113                        .context(CreateFulltextCreatorSnafu)?;
114                    AltFulltextCreator::Tantivy(creator)
115                }
116                FulltextBackend::Bloom => {
117                    let temp_file_provider = Arc::new(TempFileProvider::new(
118                        IntermediateLocation::new(&metadata.region_id, sst_file_id),
119                        intermediate_manager.clone(),
120                    ));
121                    let global_memory_usage = Arc::new(AtomicUsize::new(0));
122                    let creator = BloomFilterFulltextIndexCreator::new(
123                        config,
124                        options.granularity as _,
125                        options.false_positive_rate(),
126                        temp_file_provider,
127                        global_memory_usage,
128                        Some(mem_limit),
129                    );
130                    AltFulltextCreator::Bloom(creator)
131                }
132            };
133
134            creators.insert(
135                column_id,
136                SingleCreator {
137                    column_id,
138                    column_name: column.column_schema.name.clone(),
139                    inner,
140                    compress,
141                },
142            );
143        }
144
145        Ok((!creators.is_empty()).then(move || Self {
146            creators,
147            aborted: false,
148            stats: Statistics::new(TYPE_FULLTEXT_INDEX),
149        }))
150    }
151
152    /// Updates the index with the given batch.
153    pub async fn update(&mut self, batch: &mut Batch) -> Result<()> {
154        ensure!(!self.aborted, OperateAbortedIndexSnafu);
155
156        if let Err(update_err) = self.do_update(batch).await {
157            if let Err(err) = self.do_abort().await {
158                if cfg!(any(test, feature = "test")) {
159                    panic!("Failed to abort index creator, err: {err}");
160                } else {
161                    warn!(err; "Failed to abort index creator");
162                }
163            }
164            return Err(update_err);
165        }
166
167        Ok(())
168    }
169
170    /// Updates the fulltext index with the given flat format RecordBatch.
171    pub async fn update_flat(&mut self, batch: &RecordBatch) -> Result<()> {
172        ensure!(!self.aborted, OperateAbortedIndexSnafu);
173
174        if batch.num_rows() == 0 {
175            return Ok(());
176        }
177
178        if let Err(update_err) = self.do_update_flat(batch).await {
179            if let Err(err) = self.do_abort().await {
180                if cfg!(any(test, feature = "test")) {
181                    panic!("Failed to abort index creator, err: {err}");
182                } else {
183                    warn!(err; "Failed to abort index creator");
184                }
185            }
186            return Err(update_err);
187        }
188
189        Ok(())
190    }
191
192    /// Finalizes the index creation.
193    pub async fn finish(
194        &mut self,
195        puffin_writer: &mut SstPuffinWriter,
196    ) -> Result<(RowCount, ByteCount)> {
197        ensure!(!self.aborted, OperateAbortedIndexSnafu);
198
199        match self.do_finish(puffin_writer).await {
200            Ok(()) => Ok((self.stats.row_count(), self.stats.byte_count())),
201            Err(finish_err) => {
202                if let Err(err) = self.do_abort().await {
203                    if cfg!(any(test, feature = "test")) {
204                        panic!("Failed to abort index creator, err: {err}");
205                    } else {
206                        warn!(err; "Failed to abort index creator");
207                    }
208                }
209                Err(finish_err)
210            }
211        }
212    }
213
214    /// Aborts the index creation.
215    pub async fn abort(&mut self) -> Result<()> {
216        if self.aborted {
217            return Ok(());
218        }
219
220        self.do_abort().await
221    }
222
223    /// Returns the memory usage of the index creator.
224    pub fn memory_usage(&self) -> usize {
225        self.creators.values().map(|c| c.inner.memory_usage()).sum()
226    }
227
228    /// Returns IDs of columns that the creator is responsible for.
229    pub fn column_ids(&self) -> impl Iterator<Item = ColumnId> + '_ {
230        self.creators.keys().copied()
231    }
232}
233
234impl FulltextIndexer {
235    async fn do_update(&mut self, batch: &mut Batch) -> Result<()> {
236        let mut guard = self.stats.record_update();
237        guard.inc_row_count(batch.num_rows());
238
239        for creator in self.creators.values_mut() {
240            creator.update(batch).await?;
241        }
242
243        Ok(())
244    }
245
246    async fn do_update_flat(&mut self, batch: &RecordBatch) -> Result<()> {
247        let mut guard = self.stats.record_update();
248        guard.inc_row_count(batch.num_rows());
249
250        for creator in self.creators.values_mut() {
251            creator.update_flat(batch).await?;
252        }
253
254        Ok(())
255    }
256
257    async fn do_finish(&mut self, puffin_writer: &mut SstPuffinWriter) -> Result<()> {
258        let mut guard = self.stats.record_finish();
259
260        let mut written_bytes = 0;
261        for creator in self.creators.values_mut() {
262            written_bytes += creator.finish(puffin_writer).await?;
263        }
264
265        guard.inc_byte_count(written_bytes);
266        Ok(())
267    }
268
269    async fn do_abort(&mut self) -> Result<()> {
270        let _guard = self.stats.record_cleanup();
271
272        self.aborted = true;
273
274        for (_, mut creator) in self.creators.drain() {
275            creator.abort().await?;
276        }
277
278        Ok(())
279    }
280}
281
282/// `SingleCreator` is a creator for a single column.
283struct SingleCreator {
284    /// Column ID.
285    column_id: ColumnId,
286    /// Column name.
287    column_name: String,
288    /// Inner creator.
289    inner: AltFulltextCreator,
290    /// Whether the index should be compressed.
291    compress: bool,
292}
293
294impl SingleCreator {
295    async fn update(&mut self, batch: &mut Batch) -> Result<()> {
296        let text_column = batch
297            .fields()
298            .iter()
299            .find(|c| c.column_id == self.column_id);
300        match text_column {
301            Some(column) => {
302                let data = column
303                    .data
304                    .cast(&ConcreteDataType::string_datatype())
305                    .context(CastVectorSnafu {
306                        from: column.data.data_type(),
307                        to: ConcreteDataType::string_datatype(),
308                    })?;
309
310                for i in 0..batch.num_rows() {
311                    let data = data.get_ref(i);
312                    let text = data
313                        .try_into_string()
314                        .context(DataTypeMismatchSnafu)?
315                        .unwrap_or_default();
316                    self.inner.push_text(text).await?;
317                }
318            }
319            _ => {
320                // If the column is not found in the batch, push empty text.
321                // Ensure that the number of texts pushed is the same as the number of rows in the SST,
322                // so that the texts are aligned with the row ids.
323                for _ in 0..batch.num_rows() {
324                    self.inner.push_text("").await?;
325                }
326            }
327        }
328
329        Ok(())
330    }
331
332    async fn update_flat(&mut self, batch: &RecordBatch) -> Result<()> {
333        // Find the column in the RecordBatch by name
334        if let Some(column_array) = batch.column_by_name(&self.column_name) {
335            // Convert Arrow array to string array.
336            // TODO(yingwen): Use Utf8View later if possible.
337            match column_array.data_type() {
338                DataType::Utf8 => {
339                    let string_array = column_array.as_any().downcast_ref::<StringArray>().unwrap();
340                    for text_opt in string_array.iter() {
341                        let text = text_opt.unwrap_or_default();
342                        self.inner.push_text(text).await?;
343                    }
344                }
345                DataType::LargeUtf8 => {
346                    let large_string_array = column_array
347                        .as_any()
348                        .downcast_ref::<LargeStringArray>()
349                        .unwrap();
350                    for text_opt in large_string_array.iter() {
351                        let text = text_opt.unwrap_or_default();
352                        self.inner.push_text(text).await?;
353                    }
354                }
355                _ => {
356                    // For other types, cast to Utf8 as before
357                    let array = datatypes::arrow::compute::cast(column_array, &DataType::Utf8)
358                        .context(ComputeArrowSnafu)?;
359                    let string_array = array.as_any().downcast_ref::<StringArray>().unwrap();
360                    for text_opt in string_array.iter() {
361                        let text = text_opt.unwrap_or_default();
362                        self.inner.push_text(text).await?;
363                    }
364                }
365            }
366        } else {
367            // If the column is not found in the batch, push empty text.
368            // Ensure that the number of texts pushed is the same as the number of rows in the SST,
369            // so that the texts are aligned with the row ids.
370            for _ in 0..batch.num_rows() {
371                self.inner.push_text("").await?;
372            }
373        }
374
375        Ok(())
376    }
377
378    async fn finish(&mut self, puffin_writer: &mut SstPuffinWriter) -> Result<ByteCount> {
379        let options = PutOptions {
380            compression: self.compress.then_some(CompressionCodec::Zstd),
381        };
382        self.inner
383            .finish(puffin_writer, &self.column_id, options)
384            .await
385    }
386
387    async fn abort(&mut self) -> Result<()> {
388        self.inner.abort(&self.column_id).await;
389        Ok(())
390    }
391}
392
393#[allow(dead_code, clippy::large_enum_variant)]
394/// `AltFulltextCreator` is an alternative fulltext index creator that can be either Tantivy or BloomFilter.
395enum AltFulltextCreator {
396    Tantivy(TantivyFulltextIndexCreator),
397    Bloom(BloomFilterFulltextIndexCreator),
398}
399
400impl AltFulltextCreator {
401    async fn push_text(&mut self, text: &str) -> Result<()> {
402        match self {
403            Self::Tantivy(creator) => creator.push_text(text).await.context(FulltextPushTextSnafu),
404            Self::Bloom(creator) => creator.push_text(text).await.context(FulltextPushTextSnafu),
405        }
406    }
407
408    fn memory_usage(&self) -> usize {
409        match self {
410            Self::Tantivy(creator) => creator.memory_usage(),
411            Self::Bloom(creator) => creator.memory_usage(),
412        }
413    }
414
415    async fn finish(
416        &mut self,
417        puffin_writer: &mut SstPuffinWriter,
418        column_id: &ColumnId,
419        put_options: PutOptions,
420    ) -> Result<ByteCount> {
421        match self {
422            Self::Tantivy(creator) => {
423                let blob_key = format!(
424                    "{INDEX_BLOB_TYPE_TANTIVY}-{}",
425                    IndexTarget::ColumnId(*column_id)
426                );
427                creator
428                    .finish(puffin_writer, &blob_key, put_options)
429                    .await
430                    .context(FulltextFinishSnafu)
431            }
432            Self::Bloom(creator) => {
433                let blob_key = format!(
434                    "{INDEX_BLOB_TYPE_BLOOM}-{}",
435                    IndexTarget::ColumnId(*column_id)
436                );
437                creator
438                    .finish(puffin_writer, &blob_key, put_options)
439                    .await
440                    .context(FulltextFinishSnafu)
441            }
442        }
443    }
444
445    async fn abort(&mut self, column_id: &ColumnId) {
446        match self {
447            Self::Tantivy(creator) => {
448                if let Err(err) = creator.abort().await {
449                    warn!(err; "Failed to abort the fulltext index creator in the Tantivy flavor, col_id: {:?}", column_id);
450                }
451            }
452            Self::Bloom(creator) => {
453                if let Err(err) = creator.abort().await {
454                    warn!(err; "Failed to abort the fulltext index creator in the Bloom Filter flavor, col_id: {:?}", column_id);
455                }
456            }
457        }
458    }
459}
460
461#[cfg(test)]
462mod tests {
463    use std::collections::{BTreeMap, BTreeSet};
464    use std::sync::Arc;
465
466    use api::v1::SemanticType;
467    use common_base::BitVec;
468    use datatypes::data_type::DataType;
469    use datatypes::schema::{ColumnSchema, FulltextAnalyzer, FulltextOptions};
470    use datatypes::vectors::{UInt8Vector, UInt64Vector};
471    use futures::FutureExt;
472    use futures::future::BoxFuture;
473    use index::fulltext_index::search::RowId;
474    use object_store::ObjectStore;
475    use object_store::services::Memory;
476    use puffin::puffin_manager::{PuffinManager, PuffinWriter};
477    use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder, RegionMetadataRef};
478    use store_api::region_request::PathType;
479    use store_api::storage::{ConcreteDataType, FileId, RegionId};
480
481    use super::*;
482    use crate::access_layer::RegionFilePathFactory;
483    use crate::read::{Batch, BatchColumn};
484    use crate::sst::file::RegionFileId;
485    use crate::sst::index::fulltext_index::applier::FulltextIndexApplier;
486    use crate::sst::index::fulltext_index::applier::builder::{
487        FulltextQuery, FulltextRequest, FulltextTerm,
488    };
489    use crate::sst::index::puffin_manager::PuffinManagerFactory;
490
491    fn mock_object_store() -> ObjectStore {
492        ObjectStore::new(Memory::default()).unwrap().finish()
493    }
494
495    async fn new_intm_mgr(path: impl AsRef<str>) -> IntermediateManager {
496        IntermediateManager::init_fs(path).await.unwrap()
497    }
498
499    fn mock_region_metadata(backend: FulltextBackend) -> RegionMetadataRef {
500        let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 2));
501        builder
502            .push_column_metadata(ColumnMetadata {
503                column_schema: ColumnSchema::new(
504                    "text_english_case_sensitive",
505                    ConcreteDataType::string_datatype(),
506                    true,
507                )
508                .with_fulltext_options(FulltextOptions::new_unchecked(
509                    true,
510                    FulltextAnalyzer::English,
511                    true,
512                    backend.clone(),
513                    1,
514                    0.01,
515                ))
516                .unwrap(),
517                semantic_type: SemanticType::Field,
518                column_id: 1,
519            })
520            .push_column_metadata(ColumnMetadata {
521                column_schema: ColumnSchema::new(
522                    "text_english_case_insensitive",
523                    ConcreteDataType::string_datatype(),
524                    true,
525                )
526                .with_fulltext_options(FulltextOptions::new_unchecked(
527                    true,
528                    FulltextAnalyzer::English,
529                    false,
530                    backend.clone(),
531                    1,
532                    0.01,
533                ))
534                .unwrap(),
535                semantic_type: SemanticType::Field,
536                column_id: 2,
537            })
538            .push_column_metadata(ColumnMetadata {
539                column_schema: ColumnSchema::new(
540                    "text_chinese",
541                    ConcreteDataType::string_datatype(),
542                    true,
543                )
544                .with_fulltext_options(FulltextOptions::new_unchecked(
545                    true,
546                    FulltextAnalyzer::Chinese,
547                    false,
548                    backend.clone(),
549                    1,
550                    0.01,
551                ))
552                .unwrap(),
553                semantic_type: SemanticType::Field,
554                column_id: 3,
555            })
556            .push_column_metadata(ColumnMetadata {
557                column_schema: ColumnSchema::new(
558                    "ts",
559                    ConcreteDataType::timestamp_millisecond_datatype(),
560                    false,
561                ),
562                semantic_type: SemanticType::Timestamp,
563                column_id: 4,
564            });
565
566        Arc::new(builder.build().unwrap())
567    }
568
569    fn new_batch(
570        rows: &[(
571            Option<&str>, // text_english_case_sensitive
572            Option<&str>, // text_english_case_insensitive
573            Option<&str>, // text_chinese
574        )],
575    ) -> Batch {
576        let mut vec_english_sensitive =
577            ConcreteDataType::string_datatype().create_mutable_vector(0);
578        let mut vec_english_insensitive =
579            ConcreteDataType::string_datatype().create_mutable_vector(0);
580        let mut vec_chinese = ConcreteDataType::string_datatype().create_mutable_vector(0);
581
582        for (text_english_case_sensitive, text_english_case_insensitive, text_chinese) in rows {
583            match text_english_case_sensitive {
584                Some(s) => vec_english_sensitive.push_value_ref(&(*s).into()),
585                None => vec_english_sensitive.push_null(),
586            }
587            match text_english_case_insensitive {
588                Some(s) => vec_english_insensitive.push_value_ref(&(*s).into()),
589                None => vec_english_insensitive.push_null(),
590            }
591            match text_chinese {
592                Some(s) => vec_chinese.push_value_ref(&(*s).into()),
593                None => vec_chinese.push_null(),
594            }
595        }
596
597        let num_rows = vec_english_sensitive.len();
598        Batch::new(
599            vec![],
600            Arc::new(UInt64Vector::from_iter_values(
601                (0..num_rows).map(|n| n as u64),
602            )),
603            Arc::new(UInt64Vector::from_iter_values(std::iter::repeat_n(
604                0, num_rows,
605            ))),
606            Arc::new(UInt8Vector::from_iter_values(std::iter::repeat_n(
607                1, num_rows,
608            ))),
609            vec![
610                BatchColumn {
611                    column_id: 1,
612                    data: vec_english_sensitive.to_vector(),
613                },
614                BatchColumn {
615                    column_id: 2,
616                    data: vec_english_insensitive.to_vector(),
617                },
618                BatchColumn {
619                    column_id: 3,
620                    data: vec_chinese.to_vector(),
621                },
622            ],
623        )
624        .unwrap()
625    }
626
627    /// Applier factory that can handle both queries and terms.
628    ///
629    /// It builds a fulltext index with the given data rows, and returns a function
630    /// that can handle both queries and terms in a single request.
631    ///
632    /// The function takes two parameters:
633    /// - `queries`: A list of (ColumnId, query_string) pairs for fulltext queries
634    /// - `terms`: A list of (ColumnId, [(bool, String)]) for fulltext terms, where bool indicates if term is lowercased
635    async fn build_fulltext_applier_factory(
636        prefix: &str,
637        backend: FulltextBackend,
638        rows: &[(
639            Option<&str>, // text_english_case_sensitive
640            Option<&str>, // text_english_case_insensitive
641            Option<&str>, // text_chinese
642        )],
643    ) -> impl Fn(
644        Vec<(ColumnId, &str)>,
645        Vec<(ColumnId, Vec<(bool, &str)>)>,
646        Option<BitVec>,
647    ) -> BoxFuture<'static, Option<BTreeSet<RowId>>> {
648        let (d, factory) = PuffinManagerFactory::new_for_test_async(prefix).await;
649        let table_dir = "table0".to_string();
650        let sst_file_id = FileId::random();
651        let object_store = mock_object_store();
652        let region_metadata = mock_region_metadata(backend.clone());
653        let intm_mgr = new_intm_mgr(d.path().to_string_lossy()).await;
654
655        let mut indexer = FulltextIndexer::new(
656            &region_metadata.region_id,
657            &sst_file_id,
658            &intm_mgr,
659            &region_metadata,
660            true,
661            1024,
662        )
663        .await
664        .unwrap()
665        .unwrap();
666
667        let mut batch = new_batch(rows);
668        indexer.update(&mut batch).await.unwrap();
669
670        let puffin_manager = factory.build(
671            object_store.clone(),
672            RegionFilePathFactory::new(table_dir.clone(), PathType::Bare),
673        );
674        let region_file_id = RegionFileId::new(region_metadata.region_id, sst_file_id);
675        let mut writer = puffin_manager.writer(&region_file_id).await.unwrap();
676        let _ = indexer.finish(&mut writer).await.unwrap();
677        writer.finish().await.unwrap();
678
679        move |queries: Vec<(ColumnId, &str)>,
680              terms_requests: Vec<(ColumnId, Vec<(bool, &str)>)>,
681              coarse_mask: Option<BitVec>| {
682            let _d = &d;
683            let table_dir = table_dir.clone();
684            let object_store = object_store.clone();
685            let factory = factory.clone();
686
687            let mut requests: BTreeMap<ColumnId, FulltextRequest> = BTreeMap::new();
688
689            // Add queries
690            for (column_id, query) in queries {
691                requests
692                    .entry(column_id)
693                    .or_default()
694                    .queries
695                    .push(FulltextQuery(query.to_string()));
696            }
697
698            // Add terms
699            for (column_id, terms) in terms_requests {
700                let fulltext_terms = terms
701                    .into_iter()
702                    .map(|(col_lowered, term)| FulltextTerm {
703                        col_lowered,
704                        term: term.to_string(),
705                    })
706                    .collect::<Vec<_>>();
707
708                requests
709                    .entry(column_id)
710                    .or_default()
711                    .terms
712                    .extend(fulltext_terms);
713            }
714
715            let applier = FulltextIndexApplier::new(
716                table_dir,
717                PathType::Bare,
718                object_store,
719                requests,
720                factory,
721            );
722
723            let backend = backend.clone();
724            async move {
725                match backend {
726                    FulltextBackend::Tantivy => {
727                        applier.apply_fine(region_file_id, None).await.unwrap()
728                    }
729                    FulltextBackend::Bloom => {
730                        let coarse_mask = coarse_mask.unwrap_or_default();
731                        let row_groups = (0..coarse_mask.len()).map(|i| (1, coarse_mask[i]));
732                        // row group id == row id
733                        let resp = applier
734                            .apply_coarse(region_file_id, None, row_groups)
735                            .await
736                            .unwrap();
737                        resp.map(|r| {
738                            r.into_iter()
739                                .filter(|(_, ranges)| !ranges.is_empty())
740                                .map(|(row_group_id, _)| row_group_id as RowId)
741                                .collect()
742                        })
743                    }
744                }
745            }
746            .boxed()
747        }
748    }
749
750    fn rows(row_ids: impl IntoIterator<Item = RowId>) -> BTreeSet<RowId> {
751        row_ids.into_iter().collect()
752    }
753
754    #[tokio::test]
755    async fn test_fulltext_index_basic_case_sensitive_tantivy() {
756        let applier_factory = build_fulltext_applier_factory(
757            "test_fulltext_index_basic_case_sensitive_tantivy_",
758            FulltextBackend::Tantivy,
759            &[
760                (Some("hello"), None, None),
761                (Some("world"), None, None),
762                (None, None, None),
763                (Some("Hello, World"), None, None),
764            ],
765        )
766        .await;
767
768        let row_ids = applier_factory(vec![(1, "hello")], vec![], None).await;
769        assert_eq!(row_ids, Some(rows([0])));
770
771        let row_ids = applier_factory(vec![(1, "world")], vec![], None).await;
772        assert_eq!(row_ids, Some(rows([1])));
773
774        let row_ids = applier_factory(vec![(1, "Hello")], vec![], None).await;
775        assert_eq!(row_ids, Some(rows([3])));
776
777        let row_ids = applier_factory(vec![(1, "World")], vec![], None).await;
778        assert_eq!(row_ids, Some(rows([3])));
779
780        let row_ids = applier_factory(vec![], vec![(1, vec![(false, "hello")])], None).await;
781        assert_eq!(row_ids, Some(rows([0])));
782
783        let row_ids = applier_factory(vec![], vec![(1, vec![(true, "hello")])], None).await;
784        assert_eq!(row_ids, None);
785
786        let row_ids = applier_factory(vec![], vec![(1, vec![(false, "world")])], None).await;
787        assert_eq!(row_ids, Some(rows([1])));
788
789        let row_ids = applier_factory(vec![], vec![(1, vec![(true, "world")])], None).await;
790        assert_eq!(row_ids, None);
791
792        let row_ids = applier_factory(vec![], vec![(1, vec![(false, "Hello")])], None).await;
793        assert_eq!(row_ids, Some(rows([3])));
794
795        let row_ids = applier_factory(vec![], vec![(1, vec![(true, "Hello")])], None).await;
796        assert_eq!(row_ids, None);
797
798        let row_ids = applier_factory(vec![], vec![(1, vec![(false, "Hello, World")])], None).await;
799        assert_eq!(row_ids, Some(rows([3])));
800
801        let row_ids = applier_factory(vec![], vec![(1, vec![(true, "Hello, World")])], None).await;
802        assert_eq!(row_ids, None);
803    }
804
805    #[tokio::test]
806    async fn test_fulltext_index_basic_case_sensitive_bloom() {
807        let applier_factory = build_fulltext_applier_factory(
808            "test_fulltext_index_basic_case_sensitive_bloom_",
809            FulltextBackend::Bloom,
810            &[
811                (Some("hello"), None, None),
812                (Some("world"), None, None),
813                (None, None, None),
814                (Some("Hello, World"), None, None),
815            ],
816        )
817        .await;
818
819        let row_ids = applier_factory(
820            vec![],
821            vec![(1, vec![(false, "hello")])],
822            Some(BitVec::from_slice(&[0b1111])),
823        )
824        .await;
825        assert_eq!(row_ids, Some(rows([0])));
826
827        let row_ids = applier_factory(
828            vec![],
829            vec![(1, vec![(false, "hello")])],
830            Some(BitVec::from_slice(&[0b1110])), // row 0 is filtered out
831        )
832        .await;
833        assert_eq!(row_ids, Some(rows([])));
834
835        let row_ids = applier_factory(
836            vec![],
837            vec![(1, vec![(true, "hello")])],
838            Some(BitVec::from_slice(&[0b1111])),
839        )
840        .await;
841        assert_eq!(row_ids, None);
842
843        let row_ids = applier_factory(
844            vec![],
845            vec![(1, vec![(false, "world")])],
846            Some(BitVec::from_slice(&[0b1111])),
847        )
848        .await;
849        assert_eq!(row_ids, Some(rows([1])));
850
851        let row_ids = applier_factory(
852            vec![],
853            vec![(1, vec![(false, "world")])],
854            Some(BitVec::from_slice(&[0b1101])), // row 1 is filtered out
855        )
856        .await;
857        assert_eq!(row_ids, Some(rows([])));
858
859        let row_ids = applier_factory(
860            vec![],
861            vec![(1, vec![(true, "world")])],
862            Some(BitVec::from_slice(&[0b1111])),
863        )
864        .await;
865        assert_eq!(row_ids, None);
866
867        let row_ids = applier_factory(
868            vec![],
869            vec![(1, vec![(false, "Hello")])],
870            Some(BitVec::from_slice(&[0b1111])),
871        )
872        .await;
873        assert_eq!(row_ids, Some(rows([3])));
874
875        let row_ids = applier_factory(
876            vec![],
877            vec![(1, vec![(false, "Hello")])],
878            Some(BitVec::from_slice(&[0b0111])), // row 3 is filtered out
879        )
880        .await;
881        assert_eq!(row_ids, Some(rows([])));
882
883        let row_ids = applier_factory(
884            vec![],
885            vec![(1, vec![(true, "Hello")])],
886            Some(BitVec::from_slice(&[0b1111])),
887        )
888        .await;
889        assert_eq!(row_ids, None);
890
891        let row_ids = applier_factory(
892            vec![],
893            vec![(1, vec![(false, "Hello, World")])],
894            Some(BitVec::from_slice(&[0b1111])),
895        )
896        .await;
897        assert_eq!(row_ids, Some(rows([3])));
898
899        let row_ids = applier_factory(
900            vec![],
901            vec![(1, vec![(false, "Hello, World")])],
902            Some(BitVec::from_slice(&[0b0111])), // row 3 is filtered out
903        )
904        .await;
905        assert_eq!(row_ids, Some(rows([])));
906
907        let row_ids = applier_factory(
908            vec![],
909            vec![(1, vec![(true, "Hello, World")])],
910            Some(BitVec::from_slice(&[0b1111])),
911        )
912        .await;
913        assert_eq!(row_ids, None);
914    }
915
916    #[tokio::test]
917    async fn test_fulltext_index_basic_case_insensitive_tantivy() {
918        let applier_factory = build_fulltext_applier_factory(
919            "test_fulltext_index_basic_case_insensitive_tantivy_",
920            FulltextBackend::Tantivy,
921            &[
922                (None, Some("hello"), None),
923                (None, None, None),
924                (None, Some("world"), None),
925                (None, Some("Hello, World"), None),
926            ],
927        )
928        .await;
929
930        let row_ids = applier_factory(vec![(2, "hello")], vec![], None).await;
931        assert_eq!(row_ids, Some(rows([0, 3])));
932
933        let row_ids = applier_factory(vec![(2, "world")], vec![], None).await;
934        assert_eq!(row_ids, Some(rows([2, 3])));
935
936        let row_ids = applier_factory(vec![(2, "Hello")], vec![], None).await;
937        assert_eq!(row_ids, Some(rows([0, 3])));
938
939        let row_ids = applier_factory(vec![(2, "World")], vec![], None).await;
940        assert_eq!(row_ids, Some(rows([2, 3])));
941
942        let row_ids = applier_factory(vec![], vec![(2, vec![(false, "hello")])], None).await;
943        assert_eq!(row_ids, Some(rows([0, 3])));
944
945        let row_ids = applier_factory(vec![], vec![(2, vec![(true, "hello")])], None).await;
946        assert_eq!(row_ids, Some(rows([0, 3])));
947
948        let row_ids = applier_factory(vec![], vec![(2, vec![(false, "world")])], None).await;
949        assert_eq!(row_ids, Some(rows([2, 3])));
950
951        let row_ids = applier_factory(vec![], vec![(2, vec![(true, "world")])], None).await;
952        assert_eq!(row_ids, Some(rows([2, 3])));
953
954        let row_ids = applier_factory(vec![], vec![(2, vec![(false, "Hello")])], None).await;
955        assert_eq!(row_ids, Some(rows([0, 3])));
956
957        let row_ids = applier_factory(vec![], vec![(2, vec![(true, "Hello")])], None).await;
958        assert_eq!(row_ids, Some(rows([0, 3])));
959
960        let row_ids = applier_factory(vec![], vec![(2, vec![(false, "World")])], None).await;
961        assert_eq!(row_ids, Some(rows([2, 3])));
962
963        let row_ids = applier_factory(vec![], vec![(2, vec![(true, "World")])], None).await;
964        assert_eq!(row_ids, Some(rows([2, 3])));
965    }
966
967    #[tokio::test]
968    async fn test_fulltext_index_basic_case_insensitive_bloom() {
969        let applier_factory = build_fulltext_applier_factory(
970            "test_fulltext_index_basic_case_insensitive_bloom_",
971            FulltextBackend::Bloom,
972            &[
973                (None, Some("hello"), None),
974                (None, None, None),
975                (None, Some("world"), None),
976                (None, Some("Hello, World"), None),
977            ],
978        )
979        .await;
980
981        let row_ids = applier_factory(
982            vec![],
983            vec![(2, vec![(false, "hello")])],
984            Some(BitVec::from_slice(&[0b1111])),
985        )
986        .await;
987        assert_eq!(row_ids, Some(rows([0, 3])));
988
989        let row_ids = applier_factory(
990            vec![],
991            vec![(2, vec![(false, "hello")])],
992            Some(BitVec::from_slice(&[0b1110])), // row 0 is filtered out
993        )
994        .await;
995        assert_eq!(row_ids, Some(rows([3])));
996
997        let row_ids = applier_factory(
998            vec![],
999            vec![(2, vec![(true, "hello")])],
1000            Some(BitVec::from_slice(&[0b1111])),
1001        )
1002        .await;
1003        assert_eq!(row_ids, Some(rows([0, 3])));
1004
1005        let row_ids = applier_factory(
1006            vec![],
1007            vec![(2, vec![(true, "hello")])],
1008            Some(BitVec::from_slice(&[0b1110])), // row 0 is filtered out
1009        )
1010        .await;
1011        assert_eq!(row_ids, Some(rows([3])));
1012
1013        let row_ids = applier_factory(
1014            vec![],
1015            vec![(2, vec![(false, "world")])],
1016            Some(BitVec::from_slice(&[0b1111])),
1017        )
1018        .await;
1019        assert_eq!(row_ids, Some(rows([2, 3])));
1020
1021        let row_ids = applier_factory(
1022            vec![],
1023            vec![(2, vec![(false, "world")])],
1024            Some(BitVec::from_slice(&[0b1011])), // row 2 is filtered out
1025        )
1026        .await;
1027        assert_eq!(row_ids, Some(rows([3])));
1028
1029        let row_ids = applier_factory(
1030            vec![],
1031            vec![(2, vec![(true, "world")])],
1032            Some(BitVec::from_slice(&[0b1111])),
1033        )
1034        .await;
1035        assert_eq!(row_ids, Some(rows([2, 3])));
1036
1037        let row_ids = applier_factory(
1038            vec![],
1039            vec![(2, vec![(true, "world")])],
1040            Some(BitVec::from_slice(&[0b1011])), // row 2 is filtered out
1041        )
1042        .await;
1043        assert_eq!(row_ids, Some(rows([3])));
1044
1045        let row_ids = applier_factory(
1046            vec![],
1047            vec![(2, vec![(false, "Hello")])],
1048            Some(BitVec::from_slice(&[0b1111])),
1049        )
1050        .await;
1051        assert_eq!(row_ids, Some(rows([0, 3])));
1052
1053        let row_ids = applier_factory(
1054            vec![],
1055            vec![(2, vec![(false, "Hello")])],
1056            Some(BitVec::from_slice(&[0b0111])), // row 3 is filtered out
1057        )
1058        .await;
1059        assert_eq!(row_ids, Some(rows([0])));
1060
1061        let row_ids = applier_factory(
1062            vec![],
1063            vec![(2, vec![(true, "Hello")])],
1064            Some(BitVec::from_slice(&[0b1111])),
1065        )
1066        .await;
1067        assert_eq!(row_ids, Some(rows([0, 3])));
1068
1069        let row_ids = applier_factory(
1070            vec![],
1071            vec![(2, vec![(true, "Hello")])],
1072            Some(BitVec::from_slice(&[0b1110])), // row 0 is filtered out
1073        )
1074        .await;
1075        assert_eq!(row_ids, Some(rows([3])));
1076
1077        let row_ids = applier_factory(
1078            vec![],
1079            vec![(2, vec![(false, "World")])],
1080            Some(BitVec::from_slice(&[0b1111])),
1081        )
1082        .await;
1083        assert_eq!(row_ids, Some(rows([2, 3])));
1084
1085        let row_ids = applier_factory(
1086            vec![],
1087            vec![(2, vec![(false, "World")])],
1088            Some(BitVec::from_slice(&[0b0111])), // row 3 is filtered out
1089        )
1090        .await;
1091        assert_eq!(row_ids, Some(rows([2])));
1092
1093        let row_ids = applier_factory(
1094            vec![],
1095            vec![(2, vec![(true, "World")])],
1096            Some(BitVec::from_slice(&[0b1111])),
1097        )
1098        .await;
1099        assert_eq!(row_ids, Some(rows([2, 3])));
1100
1101        let row_ids = applier_factory(
1102            vec![],
1103            vec![(2, vec![(true, "World")])],
1104            Some(BitVec::from_slice(&[0b1011])), // row 2 is filtered out
1105        )
1106        .await;
1107        assert_eq!(row_ids, Some(rows([3])));
1108    }
1109
1110    #[tokio::test]
1111    async fn test_fulltext_index_basic_chinese_tantivy() {
1112        let applier_factory = build_fulltext_applier_factory(
1113            "test_fulltext_index_basic_chinese_tantivy_",
1114            FulltextBackend::Tantivy,
1115            &[
1116                (None, None, Some("你好")),
1117                (None, None, None),
1118                (None, None, Some("世界")),
1119                (None, None, Some("你好,世界")),
1120            ],
1121        )
1122        .await;
1123
1124        let row_ids = applier_factory(vec![(3, "你好")], vec![], None).await;
1125        assert_eq!(row_ids, Some(rows([0, 3])));
1126
1127        let row_ids = applier_factory(vec![(3, "世界")], vec![], None).await;
1128        assert_eq!(row_ids, Some(rows([2, 3])));
1129
1130        let row_ids = applier_factory(vec![], vec![(3, vec![(false, "你好")])], None).await;
1131        assert_eq!(row_ids, Some(rows([0, 3])));
1132
1133        let row_ids = applier_factory(vec![], vec![(3, vec![(false, "世界")])], None).await;
1134        assert_eq!(row_ids, Some(rows([2, 3])));
1135    }
1136
1137    #[tokio::test]
1138    async fn test_fulltext_index_basic_chinese_bloom() {
1139        let applier_factory = build_fulltext_applier_factory(
1140            "test_fulltext_index_basic_chinese_bloom_",
1141            FulltextBackend::Bloom,
1142            &[
1143                (None, None, Some("你好")),
1144                (None, None, None),
1145                (None, None, Some("世界")),
1146                (None, None, Some("你好,世界")),
1147            ],
1148        )
1149        .await;
1150
1151        let row_ids = applier_factory(
1152            vec![],
1153            vec![(3, vec![(false, "你好")])],
1154            Some(BitVec::from_slice(&[0b1111])),
1155        )
1156        .await;
1157        assert_eq!(row_ids, Some(rows([0, 3])));
1158
1159        let row_ids = applier_factory(
1160            vec![],
1161            vec![(3, vec![(false, "你好")])],
1162            Some(BitVec::from_slice(&[0b1110])), // row 0 is filtered out
1163        )
1164        .await;
1165        assert_eq!(row_ids, Some(rows([3])));
1166
1167        let row_ids = applier_factory(
1168            vec![],
1169            vec![(3, vec![(false, "世界")])],
1170            Some(BitVec::from_slice(&[0b1111])),
1171        )
1172        .await;
1173        assert_eq!(row_ids, Some(rows([2, 3])));
1174
1175        let row_ids = applier_factory(
1176            vec![],
1177            vec![(3, vec![(false, "世界")])],
1178            Some(BitVec::from_slice(&[0b1011])), // row 2 is filtered out
1179        )
1180        .await;
1181        assert_eq!(row_ids, Some(rows([3])));
1182    }
1183
1184    #[tokio::test]
1185    async fn test_fulltext_index_multi_terms_case_sensitive_tantivy() {
1186        let applier_factory = build_fulltext_applier_factory(
1187            "test_fulltext_index_multi_terms_case_sensitive_tantivy_",
1188            FulltextBackend::Tantivy,
1189            &[
1190                (Some("Hello"), None, None),
1191                (Some("World"), None, None),
1192                (None, None, None),
1193                (Some("Hello, World"), None, None),
1194            ],
1195        )
1196        .await;
1197
1198        let row_ids = applier_factory(
1199            vec![],
1200            vec![(1, vec![(false, "hello"), (false, "world")])],
1201            None,
1202        )
1203        .await;
1204        assert_eq!(row_ids, Some(rows([])));
1205
1206        let row_ids = applier_factory(
1207            vec![],
1208            vec![(1, vec![(false, "Hello"), (false, "World")])],
1209            None,
1210        )
1211        .await;
1212        assert_eq!(row_ids, Some(rows([3])));
1213
1214        let row_ids = applier_factory(
1215            vec![],
1216            vec![(1, vec![(true, "Hello"), (false, "World")])],
1217            None,
1218        )
1219        .await;
1220        assert_eq!(row_ids, Some(rows([1, 3])));
1221
1222        let row_ids = applier_factory(
1223            vec![],
1224            vec![(1, vec![(false, "Hello"), (true, "World")])],
1225            None,
1226        )
1227        .await;
1228        assert_eq!(row_ids, Some(rows([0, 3])));
1229
1230        let row_ids = applier_factory(
1231            vec![],
1232            vec![(1, vec![(true, "Hello"), (true, "World")])],
1233            None,
1234        )
1235        .await;
1236        assert_eq!(row_ids, None);
1237    }
1238
1239    #[tokio::test]
1240    async fn test_fulltext_index_multi_terms_case_sensitive_bloom() {
1241        let applier_factory = build_fulltext_applier_factory(
1242            "test_fulltext_index_multi_terms_case_sensitive_bloom_",
1243            FulltextBackend::Bloom,
1244            &[
1245                (Some("Hello"), None, None),
1246                (Some("World"), None, None),
1247                (None, None, None),
1248                (Some("Hello, World"), None, None),
1249            ],
1250        )
1251        .await;
1252
1253        let row_ids = applier_factory(
1254            vec![],
1255            vec![(1, vec![(false, "hello"), (false, "world")])],
1256            Some(BitVec::from_slice(&[0b1111])),
1257        )
1258        .await;
1259        assert_eq!(row_ids, Some(rows([])));
1260
1261        let row_ids = applier_factory(
1262            vec![],
1263            vec![(1, vec![(false, "Hello"), (false, "World")])],
1264            Some(BitVec::from_slice(&[0b1111])),
1265        )
1266        .await;
1267        assert_eq!(row_ids, Some(rows([3])));
1268
1269        let row_ids = applier_factory(
1270            vec![],
1271            vec![(1, vec![(true, "Hello"), (false, "World")])],
1272            Some(BitVec::from_slice(&[0b1111])),
1273        )
1274        .await;
1275        assert_eq!(row_ids, Some(rows([1, 3])));
1276
1277        let row_ids = applier_factory(
1278            vec![],
1279            vec![(1, vec![(false, "Hello"), (true, "World")])],
1280            Some(BitVec::from_slice(&[0b1111])),
1281        )
1282        .await;
1283        assert_eq!(row_ids, Some(rows([0, 3])));
1284
1285        let row_ids = applier_factory(
1286            vec![],
1287            vec![(1, vec![(true, "Hello"), (true, "World")])],
1288            Some(BitVec::from_slice(&[0b1111])),
1289        )
1290        .await;
1291        assert_eq!(row_ids, None);
1292    }
1293
1294    #[tokio::test]
1295    async fn test_fulltext_index_multi_terms_case_insensitive_tantivy() {
1296        let applier_factory = build_fulltext_applier_factory(
1297            "test_fulltext_index_multi_terms_case_insensitive_tantivy_",
1298            FulltextBackend::Tantivy,
1299            &[
1300                (None, Some("hello"), None),
1301                (None, None, None),
1302                (None, Some("world"), None),
1303                (None, Some("Hello, World"), None),
1304            ],
1305        )
1306        .await;
1307
1308        let row_ids = applier_factory(
1309            vec![],
1310            vec![(2, vec![(false, "hello"), (false, "world")])],
1311            None,
1312        )
1313        .await;
1314        assert_eq!(row_ids, Some(rows([3])));
1315
1316        let row_ids = applier_factory(
1317            vec![],
1318            vec![(2, vec![(true, "hello"), (false, "world")])],
1319            None,
1320        )
1321        .await;
1322        assert_eq!(row_ids, Some(rows([3])));
1323
1324        let row_ids = applier_factory(
1325            vec![],
1326            vec![(2, vec![(false, "hello"), (true, "world")])],
1327            None,
1328        )
1329        .await;
1330        assert_eq!(row_ids, Some(rows([3])));
1331
1332        let row_ids = applier_factory(
1333            vec![],
1334            vec![(2, vec![(true, "hello"), (true, "world")])],
1335            None,
1336        )
1337        .await;
1338        assert_eq!(row_ids, Some(rows([3])));
1339    }
1340
1341    #[tokio::test]
1342    async fn test_fulltext_index_multi_terms_case_insensitive_bloom() {
1343        let applier_factory = build_fulltext_applier_factory(
1344            "test_fulltext_index_multi_terms_case_insensitive_bloom_",
1345            FulltextBackend::Bloom,
1346            &[
1347                (None, Some("hello"), None),
1348                (None, None, None),
1349                (None, Some("world"), None),
1350                (None, Some("Hello, World"), None),
1351            ],
1352        )
1353        .await;
1354
1355        let row_ids = applier_factory(
1356            vec![],
1357            vec![(2, vec![(false, "hello"), (false, "world")])],
1358            Some(BitVec::from_slice(&[0b1111])),
1359        )
1360        .await;
1361        assert_eq!(row_ids, Some(rows([3])));
1362
1363        let row_ids = applier_factory(
1364            vec![],
1365            vec![(2, vec![(true, "hello"), (false, "world")])],
1366            Some(BitVec::from_slice(&[0b1111])),
1367        )
1368        .await;
1369        assert_eq!(row_ids, Some(rows([3])));
1370
1371        let row_ids = applier_factory(
1372            vec![],
1373            vec![(2, vec![(false, "hello"), (true, "world")])],
1374            Some(BitVec::from_slice(&[0b1111])),
1375        )
1376        .await;
1377        assert_eq!(row_ids, Some(rows([3])));
1378
1379        let row_ids = applier_factory(
1380            vec![],
1381            vec![(2, vec![(true, "hello"), (true, "world")])],
1382            Some(BitVec::from_slice(&[0b1111])),
1383        )
1384        .await;
1385        assert_eq!(row_ids, Some(rows([3])));
1386    }
1387
1388    #[tokio::test]
1389    async fn test_fulltext_index_multi_columns_tantivy() {
1390        let applier_factory = build_fulltext_applier_factory(
1391            "test_fulltext_index_multi_columns_tantivy_",
1392            FulltextBackend::Tantivy,
1393            &[
1394                (Some("Hello"), None, Some("你好")),
1395                (Some("World"), Some("world"), None),
1396                (None, Some("World"), Some("世界")),
1397                (
1398                    Some("Hello, World"),
1399                    Some("Hello, World"),
1400                    Some("你好,世界"),
1401                ),
1402            ],
1403        )
1404        .await;
1405
1406        let row_ids = applier_factory(
1407            vec![(1, "Hello"), (3, "你好")],
1408            vec![(2, vec![(false, "world")])],
1409            None,
1410        )
1411        .await;
1412        assert_eq!(row_ids, Some(rows([3])));
1413
1414        let row_ids =
1415            applier_factory(vec![(2, "World")], vec![(1, vec![(false, "World")])], None).await;
1416        assert_eq!(row_ids, Some(rows([1, 3])));
1417    }
1418
1419    #[tokio::test]
1420    async fn test_fulltext_index_multi_columns_bloom() {
1421        let applier_factory = build_fulltext_applier_factory(
1422            "test_fulltext_index_multi_columns_bloom_",
1423            FulltextBackend::Bloom,
1424            &[
1425                (Some("Hello"), None, Some("你好")),
1426                (Some("World"), Some("world"), None),
1427                (None, Some("World"), Some("世界")),
1428                (
1429                    Some("Hello, World"),
1430                    Some("Hello, World"),
1431                    Some("你好,世界"),
1432                ),
1433            ],
1434        )
1435        .await;
1436
1437        let row_ids = applier_factory(
1438            vec![],
1439            vec![
1440                (1, vec![(false, "Hello")]),
1441                (2, vec![(false, "world")]),
1442                (3, vec![(false, "你好")]),
1443            ],
1444            Some(BitVec::from_slice(&[0b1111])),
1445        )
1446        .await;
1447        assert_eq!(row_ids, Some(rows([3])));
1448
1449        let row_ids = applier_factory(
1450            vec![],
1451            vec![(1, vec![(false, "World")]), (2, vec![(false, "World")])],
1452            Some(BitVec::from_slice(&[0b1111])),
1453        )
1454        .await;
1455        assert_eq!(row_ids, Some(rows([1, 3])));
1456    }
1457}