index/fulltext_index/search/
tantivy.rs1use std::collections::{BTreeSet, HashMap};
16use std::path::Path;
17use std::time::Instant;
18
19use async_trait::async_trait;
20use common_telemetry::debug;
21use snafu::{OptionExt, ResultExt};
22use tantivy::collector::DocSetCollector;
23use tantivy::query::QueryParser;
24use tantivy::schema::{Field, Value};
25use tantivy::{Index, IndexReader, ReloadPolicy, TantivyDocument};
26
27use crate::fulltext_index::create::{ROWID_FIELD_NAME, TEXT_FIELD_NAME};
28use crate::fulltext_index::error::{
29 Result, TantivyDocNotFoundSnafu, TantivyParserSnafu, TantivySnafu,
30};
31use crate::fulltext_index::search::{FulltextIndexSearcher, RowId};
32use crate::fulltext_index::Config;
33
34pub struct TantivyFulltextIndexSearcher {
36 index: Index,
38 reader: IndexReader,
40 default_field: Field,
42}
43
44impl TantivyFulltextIndexSearcher {
45 pub fn new(path: impl AsRef<Path>, config: Config) -> Result<Self> {
47 let now = Instant::now();
48
49 let mut index = Index::open_in_dir(path.as_ref()).context(TantivySnafu)?;
50 index.set_tokenizers(config.build_tantivy_tokenizer());
51 let reader = index
52 .reader_builder()
53 .reload_policy(ReloadPolicy::Manual)
54 .num_warming_threads(0)
55 .try_into()
56 .context(TantivySnafu)?;
57 let default_field = index
58 .schema()
59 .get_field(TEXT_FIELD_NAME)
60 .context(TantivySnafu)?;
61
62 debug!(
63 "Opened tantivy index on {:?} in {:?}",
64 path.as_ref(),
65 now.elapsed()
66 );
67
68 Ok(Self {
69 index,
70 reader,
71 default_field,
72 })
73 }
74}
75
76#[async_trait]
77impl FulltextIndexSearcher for TantivyFulltextIndexSearcher {
78 async fn search(&self, query: &str) -> Result<BTreeSet<RowId>> {
79 let searcher = self.reader.searcher();
80 let query_parser = QueryParser::for_index(&self.index, vec![self.default_field]);
81 let query = query_parser
82 .parse_query(query)
83 .context(TantivyParserSnafu)?;
84 let doc_addrs = searcher
85 .search(&query, &DocSetCollector)
86 .context(TantivySnafu)?;
87
88 let seg_metas = self
89 .index
90 .searchable_segment_metas()
91 .context(TantivySnafu)?;
92
93 if seg_metas.len() == 1 {
96 return Ok(doc_addrs.into_iter().map(|d| d.doc_id).collect());
97 }
98
99 let rowid_field = searcher
101 .schema()
102 .get_field(ROWID_FIELD_NAME)
103 .context(TantivySnafu)?;
104 let mut seg_offsets = HashMap::with_capacity(seg_metas.len());
105 let mut res = BTreeSet::new();
106 for doc_addr in doc_addrs {
107 let offset = if let Some(offset) = seg_offsets.get(&doc_addr.segment_ord) {
108 *offset
109 } else {
110 let doc: TantivyDocument = searcher.doc(doc_addr).context(TantivySnafu)?;
113 let rowid = doc
114 .get_first(rowid_field)
115 .and_then(|v| v.as_u64())
116 .context(TantivyDocNotFoundSnafu { doc_addr })?;
117
118 let offset = rowid as u32 - doc_addr.doc_id;
119 seg_offsets.insert(doc_addr.segment_ord, offset);
120 offset
121 };
122
123 res.insert(doc_addr.doc_id + offset);
124 }
125
126 Ok(res)
127 }
128}