1use puffin::blob_metadata::BlobMetadata;
16use serde::{Deserialize, Serialize};
17use snafu::ResultExt;
18use tantivy::tokenizer::{LowerCaser, SimpleTokenizer, TextAnalyzer, TokenizerManager};
19use tantivy_jieba::JiebaTokenizer;
20pub mod create;
21pub mod error;
22pub mod search;
23pub mod tokenizer;
24
25pub const KEY_FULLTEXT_CONFIG: &str = "fulltext_config";
26
27use crate::fulltext_index::error::{DeserializeFromJsonSnafu, Result};
28
29#[cfg(test)]
30mod tests;
31
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
34pub struct Config {
35 pub analyzer: Analyzer,
37
38 pub case_sensitive: bool,
40}
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
44pub enum Analyzer {
45 #[default]
46 English,
47
48 Chinese,
49}
50
51impl Config {
52 fn build_tantivy_tokenizer(&self) -> TokenizerManager {
53 let mut builder = match self.analyzer {
54 Analyzer::English => TextAnalyzer::builder(SimpleTokenizer::default()).dynamic(),
55 Analyzer::Chinese => TextAnalyzer::builder(JiebaTokenizer {}).dynamic(),
56 };
57
58 if !self.case_sensitive {
59 builder = builder.filter_dynamic(LowerCaser);
60 }
61
62 let tokenizer = builder.build();
63 let tokenizer_manager = TokenizerManager::new();
64 tokenizer_manager.register("default", tokenizer);
65 tokenizer_manager
66 }
67
68 pub fn from_blob_metadata(metadata: &BlobMetadata) -> Result<Self> {
70 if let Some(config) = metadata.properties.get(KEY_FULLTEXT_CONFIG) {
71 let config = serde_json::from_str(config).context(DeserializeFromJsonSnafu)?;
72 return Ok(config);
73 }
74
75 Ok(Self::default())
76 }
77}