Skip to main content

mito2/
config.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Configurations.
16
17use std::cmp;
18use std::path::Path;
19use std::time::Duration;
20
21use common_base::memory_limit::MemoryLimit;
22use common_base::readable_size::ReadableSize;
23use common_memory_manager::OnExhaustedPolicy;
24use common_stat::{get_total_cpu_cores, get_total_memory_readable};
25use common_telemetry::warn;
26use serde::{Deserialize, Serialize};
27use serde_with::serde_as;
28
29use crate::cache::file_cache::DEFAULT_INDEX_CACHE_PERCENT;
30use crate::error::Result;
31use crate::gc::GcConfig;
32use crate::memtable::MemtableConfig;
33use crate::sst::DEFAULT_WRITE_BUFFER_SIZE;
34
35const MULTIPART_UPLOAD_MINIMUM_SIZE: ReadableSize = ReadableSize::mb(5);
36/// Default maximum number of SST files to scan concurrently.
37pub(crate) const DEFAULT_MAX_CONCURRENT_SCAN_FILES: usize = 384;
38
39// Use `1/GLOBAL_WRITE_BUFFER_SIZE_FACTOR` of OS memory as global write buffer size in default mode
40const GLOBAL_WRITE_BUFFER_SIZE_FACTOR: u64 = 8;
41/// Use `1/SST_META_CACHE_SIZE_FACTOR` of OS memory size as SST meta cache size in default mode
42const SST_META_CACHE_SIZE_FACTOR: u64 = 32;
43/// Use `1/MEM_CACHE_SIZE_FACTOR` of OS memory size as mem cache size in default mode
44const MEM_CACHE_SIZE_FACTOR: u64 = 16;
45/// Use `1/PAGE_CACHE_SIZE_FACTOR` of OS memory size as page cache size in default mode
46const PAGE_CACHE_SIZE_FACTOR: u64 = 8;
47/// Use `1/INDEX_CREATE_MEM_THRESHOLD_FACTOR` of OS memory size as mem threshold for creating index
48const INDEX_CREATE_MEM_THRESHOLD_FACTOR: u64 = 16;
49
50/// Fetch option timeout
51pub(crate) const FETCH_OPTION_TIMEOUT: Duration = Duration::from_secs(3);
52
53/// Configuration for [MitoEngine](crate::engine::MitoEngine).
54/// Before using the config, make sure to call `MitoConfig::validate()` to check if the config is valid.
55#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
56#[serde(default)]
57pub struct MitoConfig {
58    // Worker configs:
59    /// Number of region workers (default: 1/2 of cpu cores).
60    /// Sets to 0 to use the default value.
61    pub num_workers: usize,
62    /// Request channel size of each worker (default 128).
63    pub worker_channel_size: usize,
64    /// Max batch size for a worker to handle requests (default 64).
65    pub worker_request_batch_size: usize,
66
67    // Manifest configs:
68    /// Number of meta action updated to trigger a new checkpoint
69    /// for the manifest (default 10).
70    pub manifest_checkpoint_distance: u64,
71    /// Number of removed files to keep in manifest's `removed_files` field before also
72    /// remove them from `removed_files`. Mostly for debugging purpose.
73    /// If set to 0, it will only use `keep_removed_file_ttl` to decide when to remove files
74    /// from `removed_files` field.
75    pub experimental_manifest_keep_removed_file_count: usize,
76    /// How long to keep removed files in the `removed_files` field of manifest
77    /// after they are removed from manifest.
78    /// files will only be removed from `removed_files` field
79    /// if both `keep_removed_file_count` and `keep_removed_file_ttl` is reached.
80    #[serde(with = "humantime_serde")]
81    pub experimental_manifest_keep_removed_file_ttl: Duration,
82    /// Whether to compress manifest and checkpoint file by gzip (default false).
83    pub compress_manifest: bool,
84
85    // Background job configs:
86    /// Max number of running background index build jobs (default: 1/8 of cpu cores).
87    pub max_background_index_builds: usize,
88    /// Max number of running background flush jobs (default: 1/2 of cpu cores).
89    pub max_background_flushes: usize,
90    /// Max number of running background compaction jobs (default: 1/4 of cpu cores).
91    pub max_background_compactions: usize,
92    /// Max number of running background purge jobs (default: number of cpu cores).
93    pub max_background_purges: usize,
94    /// Memory budget for compaction tasks.
95    /// Supports absolute size (e.g., "2GiB", "512MB") or percentage of system memory (e.g., "50%").
96    /// Setting it to 0 or "unlimited" disables the limit.
97    pub experimental_compaction_memory_limit: MemoryLimit,
98    /// Behavior when compaction cannot acquire memory from the budget.
99    pub experimental_compaction_on_exhausted: OnExhaustedPolicy,
100
101    // Flush configs:
102    /// Interval to auto flush a region if it has not flushed yet (default 30 min).
103    #[serde(with = "humantime_serde")]
104    pub auto_flush_interval: Duration,
105    /// Global write buffer size threshold to trigger flush.
106    pub global_write_buffer_size: ReadableSize,
107    /// Global write buffer size threshold to reject write requests.
108    pub global_write_buffer_reject_size: ReadableSize,
109
110    // Cache configs:
111    /// Cache size for SST metadata. Setting it to 0 to disable the cache.
112    pub sst_meta_cache_size: ReadableSize,
113    /// Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
114    pub vector_cache_size: ReadableSize,
115    /// Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
116    pub page_cache_size: ReadableSize,
117    /// Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.
118    pub selector_result_cache_size: ReadableSize,
119    /// Cache size for flat range scan results. Setting it to 0 to disable the cache.
120    pub range_result_cache_size: ReadableSize,
121    /// Whether to enable the write cache.
122    pub enable_write_cache: bool,
123    /// File system path for write cache dir's root, defaults to `{data_home}`.
124    pub write_cache_path: String,
125    /// Capacity for write cache.
126    pub write_cache_size: ReadableSize,
127    /// TTL for write cache.
128    #[serde(with = "humantime_serde")]
129    pub write_cache_ttl: Option<Duration>,
130    /// Preload index (puffin) files into cache on region open (default: true).
131    pub preload_index_cache: bool,
132    /// Percentage of write cache capacity allocated for index (puffin) files (default: 20).
133    /// The remaining capacity is used for data (parquet) files.
134    /// Must be between 0 and 100 (exclusive).
135    pub index_cache_percent: u8,
136    /// Enable background downloading of files to the local cache when accessed during queries (default: true).
137    /// When enabled, files will be asynchronously downloaded to improve performance for subsequent reads.
138    pub enable_refill_cache_on_read: bool,
139    /// Capacity for manifest cache (default: 256MB).
140    pub manifest_cache_size: ReadableSize,
141
142    // Other configs:
143    /// Buffer size for SST writing.
144    pub sst_write_buffer_size: ReadableSize,
145    /// Maximum number of SST files to scan concurrently (default 384).
146    pub max_concurrent_scan_files: usize,
147    /// Whether to allow stale entries read during replay.
148    pub allow_stale_entries: bool,
149    /// Memory limit for table scans across all queries. Setting it to 0 disables the limit.
150    /// Supports absolute size (e.g., "2GB") or percentage (e.g., "50%").
151    pub scan_memory_limit: MemoryLimit,
152    /// Behavior when scan memory tracking cannot acquire memory from the budget.
153    /// `wait` means `wait(10s)`, not unlimited waiting.
154    /// Defaults to [`OnExhaustedPolicy::Fail`], which intentionally differs from
155    /// [`OnExhaustedPolicy::default()`].
156    pub scan_memory_on_exhausted: OnExhaustedPolicy,
157
158    /// Index configs.
159    pub index: IndexConfig,
160    /// Inverted index configs.
161    pub inverted_index: InvertedIndexConfig,
162    /// Full-text index configs.
163    pub fulltext_index: FulltextIndexConfig,
164    /// Bloom filter index configs.
165    pub bloom_filter_index: BloomFilterConfig,
166    /// Vector index configs (HNSW).
167    #[cfg(feature = "vector_index")]
168    pub vector_index: VectorIndexConfig,
169
170    /// Memtable config
171    pub memtable: MemtableConfig,
172
173    /// Minimum time interval between two compactions.
174    /// To align with the old behavior, the default value is 0 (no restrictions).
175    #[serde(with = "humantime_serde")]
176    pub min_compaction_interval: Duration,
177
178    /// Whether to enable flat format as the default SST format.
179    /// When enabled, forces using BulkMemtable and BulkMemtableBuilder.
180    pub default_flat_format: bool,
181
182    pub gc: GcConfig,
183}
184
185impl Default for MitoConfig {
186    fn default() -> Self {
187        let mut mito_config = MitoConfig {
188            num_workers: divide_num_cpus(2),
189            worker_channel_size: 128,
190            worker_request_batch_size: 64,
191            manifest_checkpoint_distance: 10,
192            experimental_manifest_keep_removed_file_count: 256,
193            experimental_manifest_keep_removed_file_ttl: Duration::from_secs(60 * 60),
194            compress_manifest: false,
195            max_background_index_builds: divide_num_cpus(8),
196            max_background_flushes: divide_num_cpus(2),
197            max_background_compactions: divide_num_cpus(4),
198            max_background_purges: get_total_cpu_cores(),
199            experimental_compaction_memory_limit: MemoryLimit::Unlimited,
200            experimental_compaction_on_exhausted: OnExhaustedPolicy::default(),
201            auto_flush_interval: Duration::from_secs(30 * 60),
202            global_write_buffer_size: ReadableSize::gb(1),
203            global_write_buffer_reject_size: ReadableSize::gb(2),
204            sst_meta_cache_size: ReadableSize::mb(128),
205            vector_cache_size: ReadableSize::mb(512),
206            page_cache_size: ReadableSize::mb(512),
207            selector_result_cache_size: ReadableSize::mb(512),
208            range_result_cache_size: ReadableSize::mb(512),
209            enable_write_cache: false,
210            write_cache_path: String::new(),
211            write_cache_size: ReadableSize::gb(5),
212            write_cache_ttl: None,
213            preload_index_cache: true,
214            index_cache_percent: DEFAULT_INDEX_CACHE_PERCENT,
215            enable_refill_cache_on_read: true,
216            manifest_cache_size: ReadableSize::mb(256),
217            sst_write_buffer_size: DEFAULT_WRITE_BUFFER_SIZE,
218            max_concurrent_scan_files: DEFAULT_MAX_CONCURRENT_SCAN_FILES,
219            allow_stale_entries: false,
220            scan_memory_limit: MemoryLimit::default(),
221            scan_memory_on_exhausted: OnExhaustedPolicy::Fail,
222            index: IndexConfig::default(),
223            inverted_index: InvertedIndexConfig::default(),
224            fulltext_index: FulltextIndexConfig::default(),
225            bloom_filter_index: BloomFilterConfig::default(),
226            #[cfg(feature = "vector_index")]
227            vector_index: VectorIndexConfig::default(),
228            memtable: MemtableConfig::default(),
229            min_compaction_interval: Duration::from_secs(0),
230            default_flat_format: true,
231            gc: GcConfig::default(),
232        };
233
234        // Adjust buffer and cache size according to system memory if we can.
235        if let Some(sys_memory) = get_total_memory_readable() {
236            mito_config.adjust_buffer_and_cache_size(sys_memory);
237        }
238
239        mito_config
240    }
241}
242
243impl MitoConfig {
244    /// Sanitize incorrect configurations.
245    ///
246    /// Returns an error if there is a configuration that unable to sanitize.
247    pub fn sanitize(&mut self, data_home: &str) -> Result<()> {
248        // Use default value if `num_workers` is 0.
249        if self.num_workers == 0 {
250            self.num_workers = divide_num_cpus(2);
251        }
252
253        // Sanitize channel size.
254        if self.worker_channel_size == 0 {
255            warn!("Sanitize channel size 0 to 1");
256            self.worker_channel_size = 1;
257        }
258
259        if self.max_background_flushes == 0 {
260            warn!(
261                "Sanitize max background flushes 0 to {}",
262                divide_num_cpus(2)
263            );
264            self.max_background_flushes = divide_num_cpus(2);
265        }
266        if self.max_background_compactions == 0 {
267            warn!(
268                "Sanitize max background compactions 0 to {}",
269                divide_num_cpus(4)
270            );
271            self.max_background_compactions = divide_num_cpus(4);
272        }
273        if self.max_background_purges == 0 {
274            let cpu_cores = get_total_cpu_cores();
275            warn!("Sanitize max background purges 0 to {}", cpu_cores);
276            self.max_background_purges = cpu_cores;
277        }
278
279        if self.global_write_buffer_reject_size <= self.global_write_buffer_size {
280            self.global_write_buffer_reject_size = self.global_write_buffer_size * 2;
281            warn!(
282                "Sanitize global write buffer reject size to {}",
283                self.global_write_buffer_reject_size
284            );
285        }
286
287        if self.sst_write_buffer_size < MULTIPART_UPLOAD_MINIMUM_SIZE {
288            self.sst_write_buffer_size = MULTIPART_UPLOAD_MINIMUM_SIZE;
289            warn!(
290                "Sanitize sst write buffer size to {}",
291                self.sst_write_buffer_size
292            );
293        }
294
295        // Sets write cache path if it is empty.
296        if self.write_cache_path.trim().is_empty() {
297            self.write_cache_path = data_home.to_string();
298        }
299
300        // Validate index_cache_percent is within valid range (0, 100)
301        if self.index_cache_percent == 0 || self.index_cache_percent >= 100 {
302            warn!(
303                "Invalid index_cache_percent {}, resetting to default {}",
304                self.index_cache_percent, DEFAULT_INDEX_CACHE_PERCENT
305            );
306            self.index_cache_percent = DEFAULT_INDEX_CACHE_PERCENT;
307        }
308
309        self.index.sanitize(data_home, &self.inverted_index)?;
310
311        Ok(())
312    }
313
314    fn adjust_buffer_and_cache_size(&mut self, sys_memory: ReadableSize) {
315        // shouldn't be greater than 1G in default mode.
316        let global_write_buffer_size = cmp::min(
317            sys_memory / GLOBAL_WRITE_BUFFER_SIZE_FACTOR,
318            ReadableSize::gb(1),
319        );
320        // Use 2x of global write buffer size as global write buffer reject size.
321        let global_write_buffer_reject_size = global_write_buffer_size * 2;
322        // shouldn't be greater than 128MB in default mode.
323        let sst_meta_cache_size = cmp::min(
324            sys_memory / SST_META_CACHE_SIZE_FACTOR,
325            ReadableSize::mb(128),
326        );
327        // shouldn't be greater than 512MB in default mode.
328        let mem_cache_size = cmp::min(sys_memory / MEM_CACHE_SIZE_FACTOR, ReadableSize::mb(512));
329        let page_cache_size = sys_memory / PAGE_CACHE_SIZE_FACTOR;
330
331        self.global_write_buffer_size = global_write_buffer_size;
332        self.global_write_buffer_reject_size = global_write_buffer_reject_size;
333        self.sst_meta_cache_size = sst_meta_cache_size;
334        self.vector_cache_size = mem_cache_size;
335        self.page_cache_size = page_cache_size;
336        self.selector_result_cache_size = mem_cache_size;
337        self.range_result_cache_size = mem_cache_size;
338
339        self.index.adjust_buffer_and_cache_size(sys_memory);
340    }
341
342    /// Enable write cache.
343    #[cfg(test)]
344    pub fn enable_write_cache(
345        mut self,
346        path: String,
347        size: ReadableSize,
348        ttl: Option<Duration>,
349    ) -> Self {
350        self.enable_write_cache = true;
351        self.write_cache_path = path;
352        self.write_cache_size = size;
353        self.write_cache_ttl = ttl;
354        self
355    }
356}
357
358/// Index build mode.
359#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Default)]
360#[serde(rename_all = "snake_case")]
361pub enum IndexBuildMode {
362    /// Build index synchronously.
363    #[default]
364    Sync,
365    /// Build index asynchronously.
366    Async,
367}
368
369#[serde_as]
370#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
371#[serde(default)]
372pub struct IndexConfig {
373    /// Auxiliary directory path for the index in filesystem, used to
374    /// store intermediate files for creating the index and staging files
375    /// for searching the index, defaults to `{data_home}/index_intermediate`.
376    ///
377    /// This path contains two subdirectories:
378    /// - `__intm`: for storing intermediate files used during creating index.
379    /// - `staging`: for storing staging files used during searching index.
380    ///
381    /// The default name for this directory is `index_intermediate` for backward compatibility.
382    pub aux_path: String,
383
384    /// The max capacity of the staging directory.
385    pub staging_size: ReadableSize,
386    /// The TTL of the staging directory.
387    /// Defaults to 7 days.
388    /// Setting it to "0s" to disable TTL.
389    #[serde(with = "humantime_serde")]
390    pub staging_ttl: Option<Duration>,
391
392    /// Index Build Mode
393    pub build_mode: IndexBuildMode,
394
395    /// Write buffer size for creating the index.
396    pub write_buffer_size: ReadableSize,
397
398    /// Cache size for metadata of puffin files. Setting it to 0 to disable the cache.
399    pub metadata_cache_size: ReadableSize,
400    /// Cache size for inverted index content. Setting it to 0 to disable the cache.
401    pub content_cache_size: ReadableSize,
402    /// Page size for inverted index content.
403    pub content_cache_page_size: ReadableSize,
404    /// Cache size for index result. Setting it to 0 to disable the cache.
405    pub result_cache_size: ReadableSize,
406}
407
408impl Default for IndexConfig {
409    fn default() -> Self {
410        Self {
411            aux_path: String::new(),
412            staging_size: ReadableSize::gb(2),
413            staging_ttl: Some(Duration::from_secs(7 * 24 * 60 * 60)),
414            build_mode: IndexBuildMode::default(),
415            write_buffer_size: ReadableSize::mb(8),
416            metadata_cache_size: ReadableSize::mb(64),
417            content_cache_size: ReadableSize::mb(128),
418            content_cache_page_size: ReadableSize::kb(64),
419            result_cache_size: ReadableSize::mb(128),
420        }
421    }
422}
423
424impl IndexConfig {
425    pub fn sanitize(
426        &mut self,
427        data_home: &str,
428        inverted_index: &InvertedIndexConfig,
429    ) -> Result<()> {
430        #[allow(deprecated)]
431        if self.aux_path.is_empty() && !inverted_index.intermediate_path.is_empty() {
432            self.aux_path.clone_from(&inverted_index.intermediate_path);
433            warn!(
434                "`inverted_index.intermediate_path` is deprecated, use
435                 `index.aux_path` instead. Set `index.aux_path` to {}",
436                &inverted_index.intermediate_path
437            )
438        }
439        if self.aux_path.is_empty() {
440            let path = Path::new(data_home).join("index_intermediate");
441            self.aux_path = path.as_os_str().to_string_lossy().to_string();
442        }
443
444        if self.write_buffer_size < MULTIPART_UPLOAD_MINIMUM_SIZE {
445            self.write_buffer_size = MULTIPART_UPLOAD_MINIMUM_SIZE;
446            warn!(
447                "Sanitize index write buffer size to {}",
448                self.write_buffer_size
449            );
450        }
451
452        if self.staging_ttl.map(|ttl| ttl.is_zero()).unwrap_or(false) {
453            self.staging_ttl = None;
454        }
455
456        Ok(())
457    }
458
459    pub fn adjust_buffer_and_cache_size(&mut self, sys_memory: ReadableSize) {
460        let cache_size = cmp::min(sys_memory / MEM_CACHE_SIZE_FACTOR, ReadableSize::mb(128));
461        self.result_cache_size = cmp::min(self.result_cache_size, cache_size);
462        self.content_cache_size = cmp::min(self.content_cache_size, cache_size);
463
464        let metadata_cache_size = cmp::min(
465            sys_memory / SST_META_CACHE_SIZE_FACTOR,
466            ReadableSize::mb(64),
467        );
468        self.metadata_cache_size = cmp::min(self.metadata_cache_size, metadata_cache_size);
469    }
470}
471
472/// Operational mode for certain actions.
473#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Default)]
474#[serde(rename_all = "snake_case")]
475pub enum Mode {
476    /// The action is performed automatically based on internal criteria.
477    #[default]
478    Auto,
479    /// The action is explicitly disabled.
480    Disable,
481}
482
483impl Mode {
484    /// Whether the action is disabled.
485    pub fn disabled(&self) -> bool {
486        matches!(self, Mode::Disable)
487    }
488
489    /// Whether the action is automatic.
490    pub fn auto(&self) -> bool {
491        matches!(self, Mode::Auto)
492    }
493}
494
495/// Memory threshold for performing certain actions.
496#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
497#[serde(rename_all = "snake_case")]
498pub enum MemoryThreshold {
499    /// Automatically determine the threshold based on internal criteria.
500    #[default]
501    Auto,
502    /// Unlimited memory.
503    Unlimited,
504    /// Fixed memory threshold.
505    #[serde(untagged)]
506    Size(ReadableSize),
507}
508
509/// Configuration options for the inverted index.
510#[serde_as]
511#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
512#[serde(default)]
513pub struct InvertedIndexConfig {
514    /// Whether to create the index on flush: automatically or never.
515    pub create_on_flush: Mode,
516    /// Whether to create the index on compaction: automatically or never.
517    pub create_on_compaction: Mode,
518    /// Whether to apply the index on query: automatically or never.
519    pub apply_on_query: Mode,
520
521    /// Memory threshold for performing an external sort during index creation.
522    pub mem_threshold_on_create: MemoryThreshold,
523
524    #[deprecated = "use [IndexConfig::aux_path] instead"]
525    #[serde(skip_serializing)]
526    pub intermediate_path: String,
527
528    #[deprecated = "use [IndexConfig::write_buffer_size] instead"]
529    #[serde(skip_serializing)]
530    pub write_buffer_size: ReadableSize,
531}
532
533impl Default for InvertedIndexConfig {
534    #[allow(deprecated)]
535    fn default() -> Self {
536        Self {
537            create_on_flush: Mode::Auto,
538            create_on_compaction: Mode::Auto,
539            apply_on_query: Mode::Auto,
540            mem_threshold_on_create: MemoryThreshold::Auto,
541            write_buffer_size: ReadableSize::mb(8),
542            intermediate_path: String::new(),
543        }
544    }
545}
546
547impl InvertedIndexConfig {
548    pub fn mem_threshold_on_create(&self) -> Option<usize> {
549        match self.mem_threshold_on_create {
550            MemoryThreshold::Auto => {
551                if let Some(sys_memory) = get_total_memory_readable() {
552                    Some((sys_memory / INDEX_CREATE_MEM_THRESHOLD_FACTOR).as_bytes() as usize)
553                } else {
554                    Some(ReadableSize::mb(64).as_bytes() as usize)
555                }
556            }
557            MemoryThreshold::Unlimited => None,
558            MemoryThreshold::Size(size) => Some(size.as_bytes() as usize),
559        }
560    }
561}
562
563/// Configuration options for the full-text index.
564#[serde_as]
565#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
566#[serde(default)]
567pub struct FulltextIndexConfig {
568    /// Whether to create the index on flush: automatically or never.
569    pub create_on_flush: Mode,
570    /// Whether to create the index on compaction: automatically or never.
571    pub create_on_compaction: Mode,
572    /// Whether to apply the index on query: automatically or never.
573    pub apply_on_query: Mode,
574    /// Memory threshold for creating the index.
575    pub mem_threshold_on_create: MemoryThreshold,
576    /// Whether to compress the index data.
577    pub compress: bool,
578}
579
580impl Default for FulltextIndexConfig {
581    fn default() -> Self {
582        Self {
583            create_on_flush: Mode::Auto,
584            create_on_compaction: Mode::Auto,
585            apply_on_query: Mode::Auto,
586            mem_threshold_on_create: MemoryThreshold::Auto,
587            compress: true,
588        }
589    }
590}
591
592impl FulltextIndexConfig {
593    pub fn mem_threshold_on_create(&self) -> usize {
594        match self.mem_threshold_on_create {
595            MemoryThreshold::Auto => {
596                if let Some(sys_memory) = get_total_memory_readable() {
597                    (sys_memory / INDEX_CREATE_MEM_THRESHOLD_FACTOR).as_bytes() as _
598                } else {
599                    ReadableSize::mb(64).as_bytes() as _
600                }
601            }
602            MemoryThreshold::Unlimited => usize::MAX,
603            MemoryThreshold::Size(size) => size.as_bytes() as _,
604        }
605    }
606}
607
608/// Configuration options for the bloom filter.
609#[serde_as]
610#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
611#[serde(default)]
612pub struct BloomFilterConfig {
613    /// Whether to create the index on flush: automatically or never.
614    pub create_on_flush: Mode,
615    /// Whether to create the index on compaction: automatically or never.
616    pub create_on_compaction: Mode,
617    /// Whether to apply the index on query: automatically or never.
618    pub apply_on_query: Mode,
619    /// Memory threshold for creating the index.
620    pub mem_threshold_on_create: MemoryThreshold,
621}
622
623impl Default for BloomFilterConfig {
624    fn default() -> Self {
625        Self {
626            create_on_flush: Mode::Auto,
627            create_on_compaction: Mode::Auto,
628            apply_on_query: Mode::Auto,
629            mem_threshold_on_create: MemoryThreshold::Auto,
630        }
631    }
632}
633
634impl BloomFilterConfig {
635    pub fn mem_threshold_on_create(&self) -> Option<usize> {
636        match self.mem_threshold_on_create {
637            MemoryThreshold::Auto => {
638                if let Some(sys_memory) = get_total_memory_readable() {
639                    Some((sys_memory / INDEX_CREATE_MEM_THRESHOLD_FACTOR).as_bytes() as usize)
640                } else {
641                    Some(ReadableSize::mb(64).as_bytes() as usize)
642                }
643            }
644            MemoryThreshold::Unlimited => None,
645            MemoryThreshold::Size(size) => Some(size.as_bytes() as usize),
646        }
647    }
648}
649
650/// Configuration options for the vector index (HNSW).
651#[cfg(feature = "vector_index")]
652#[serde_as]
653#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
654#[serde(default)]
655pub struct VectorIndexConfig {
656    /// Whether to create the index on flush: automatically or never.
657    pub create_on_flush: Mode,
658    /// Whether to create the index on compaction: automatically or never.
659    pub create_on_compaction: Mode,
660    /// Whether to apply the index on query: automatically or never.
661    pub apply_on_query: Mode,
662    /// Memory threshold for creating the index.
663    pub mem_threshold_on_create: MemoryThreshold,
664}
665
666#[cfg(feature = "vector_index")]
667impl Default for VectorIndexConfig {
668    fn default() -> Self {
669        Self {
670            create_on_flush: Mode::Auto,
671            create_on_compaction: Mode::Auto,
672            apply_on_query: Mode::Auto,
673            mem_threshold_on_create: MemoryThreshold::Auto,
674        }
675    }
676}
677
678#[cfg(feature = "vector_index")]
679impl VectorIndexConfig {
680    pub fn mem_threshold_on_create(&self) -> Option<usize> {
681        match self.mem_threshold_on_create {
682            MemoryThreshold::Auto => {
683                if let Some(sys_memory) = get_total_memory_readable() {
684                    Some((sys_memory / INDEX_CREATE_MEM_THRESHOLD_FACTOR).as_bytes() as usize)
685                } else {
686                    Some(ReadableSize::mb(64).as_bytes() as usize)
687                }
688            }
689            MemoryThreshold::Unlimited => None,
690            MemoryThreshold::Size(size) => Some(size.as_bytes() as usize),
691        }
692    }
693}
694
695/// Divide cpu num by a non-zero `divisor` and returns at least 1.
696fn divide_num_cpus(divisor: usize) -> usize {
697    debug_assert!(divisor > 0);
698    let cores = get_total_cpu_cores();
699    debug_assert!(cores > 0);
700
701    cores.div_ceil(divisor)
702}
703
704#[cfg(test)]
705mod tests {
706    use super::*;
707
708    #[test]
709    fn test_deserialize_config() {
710        let s = r#"
711[memtable]
712type = "partition_tree"
713index_max_keys_per_shard = 8192
714data_freeze_threshold = 1024
715dedup = true
716fork_dictionary_bytes = "512MiB"
717"#;
718        let config: MitoConfig = toml::from_str(s).unwrap();
719        let MemtableConfig::PartitionTree(config) = &config.memtable else {
720            unreachable!()
721        };
722        assert_eq!(1024, config.data_freeze_threshold);
723    }
724}