metric_engine/engine/
options.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Specific options for the metric engine to create or open a region.
16
17use std::collections::HashMap;
18
19use store_api::metric_engine_consts::{
20    MEMTABLE_PARTITION_TREE_PRIMARY_KEY_ENCODING,
21    METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION,
22    METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION_DEFAULT,
23    METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION,
24    METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION_DEFAULT, METRIC_ENGINE_INDEX_TYPE_OPTION,
25};
26use store_api::mito_engine_options::{COMPACTION_TYPE, COMPACTION_TYPE_TWCS, TWCS_TIME_WINDOW};
27
28use crate::error::{Error, ParseRegionOptionsSnafu, Result};
29
30/// The empirical value for the seg row count of the metric data region.
31/// Compared to the mito engine, the pattern of the metric engine constructs smaller indices.
32/// Therefore, compared to the default seg row count of 1024, by adjusting it to a smaller
33/// value and appropriately increasing the size of the index, it results in an improved indexing effect.
34const SEG_ROW_COUNT_FOR_DATA_REGION: u32 = 256;
35
36/// The default compaction time window for metric engine data regions.
37const DEFAULT_DATA_REGION_COMPACTION_TIME_WINDOW: &str = "1d";
38
39/// Physical region options.
40#[derive(Debug, Clone, Copy, PartialEq)]
41pub struct PhysicalRegionOptions {
42    pub index: IndexOptions,
43}
44
45/// Index options for auto created columns
46#[derive(Debug, Clone, Copy, Default, PartialEq)]
47pub enum IndexOptions {
48    #[default]
49    None,
50    Inverted,
51    Skipping {
52        granularity: u32,
53        false_positive_rate: f64,
54    },
55}
56
57/// Sets data region specific options.
58pub fn set_data_region_options(
59    options: &mut HashMap<String, String>,
60    sparse_primary_key_encoding_if_absent: bool,
61) {
62    options.remove(METRIC_ENGINE_INDEX_TYPE_OPTION);
63    options.remove(METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION);
64    options.remove(METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION);
65    options.insert(
66        "index.inverted_index.segment_row_count".to_string(),
67        SEG_ROW_COUNT_FOR_DATA_REGION.to_string(),
68    );
69    // Set memtable options for the data region.
70    options.insert("memtable.type".to_string(), "partition_tree".to_string());
71    if sparse_primary_key_encoding_if_absent
72        && !options.contains_key(MEMTABLE_PARTITION_TREE_PRIMARY_KEY_ENCODING)
73    {
74        options.insert(
75            MEMTABLE_PARTITION_TREE_PRIMARY_KEY_ENCODING.to_string(),
76            "sparse".to_string(),
77        );
78    }
79    if !options.contains_key(TWCS_TIME_WINDOW) {
80        options.insert(
81            COMPACTION_TYPE.to_string(),
82            COMPACTION_TYPE_TWCS.to_string(),
83        );
84        options.insert(
85            TWCS_TIME_WINDOW.to_string(),
86            DEFAULT_DATA_REGION_COMPACTION_TIME_WINDOW.to_string(),
87        );
88    }
89}
90
91impl TryFrom<&HashMap<String, String>> for PhysicalRegionOptions {
92    type Error = Error;
93
94    fn try_from(value: &HashMap<String, String>) -> Result<Self> {
95        let index = match value
96            .get(METRIC_ENGINE_INDEX_TYPE_OPTION)
97            .map(|s| s.to_lowercase())
98        {
99            Some(ref index_type) if index_type == "inverted" => Ok(IndexOptions::Inverted),
100            Some(ref index_type) if index_type == "skipping" => {
101                let granularity = value
102                    .get(METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION)
103                    .map_or(
104                        Ok(METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION_DEFAULT),
105                        |g| {
106                            g.parse().map_err(|_| {
107                                ParseRegionOptionsSnafu {
108                                    reason: format!("Invalid granularity: {}", g),
109                                }
110                                .build()
111                            })
112                        },
113                    )?;
114                let false_positive_rate = value
115                    .get(METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION)
116                    .map_or(
117                        Ok(METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION_DEFAULT),
118                        |f| {
119                            f.parse().ok().filter(|f| *f > 0.0 && *f <= 1.0).ok_or(
120                                ParseRegionOptionsSnafu {
121                                    reason: format!("Invalid false positive rate: {}", f),
122                                }
123                                .build(),
124                            )
125                        },
126                    )?;
127                Ok(IndexOptions::Skipping {
128                    granularity,
129                    false_positive_rate,
130                })
131            }
132            Some(index_type) => ParseRegionOptionsSnafu {
133                reason: format!("Invalid index type: {}", index_type),
134            }
135            .fail(),
136            None => Ok(IndexOptions::default()),
137        }?;
138
139        Ok(PhysicalRegionOptions { index })
140    }
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146
147    #[test]
148    fn test_set_data_region_options_should_remove_metric_engine_options() {
149        let mut options = HashMap::new();
150        options.insert(
151            METRIC_ENGINE_INDEX_TYPE_OPTION.to_string(),
152            "inverted".to_string(),
153        );
154        options.insert(
155            METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION.to_string(),
156            "102400".to_string(),
157        );
158        options.insert(
159            METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION.to_string(),
160            "0.01".to_string(),
161        );
162        set_data_region_options(&mut options, false);
163
164        for key in [
165            METRIC_ENGINE_INDEX_TYPE_OPTION,
166            METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION,
167            METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION,
168        ] {
169            assert_eq!(options.get(key), None);
170        }
171    }
172
173    #[test]
174    fn test_deserialize_physical_region_options_from_hashmap() {
175        let mut options = HashMap::new();
176        options.insert(
177            METRIC_ENGINE_INDEX_TYPE_OPTION.to_string(),
178            "inverted".to_string(),
179        );
180        options.insert(
181            METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION.to_string(),
182            "102400".to_string(),
183        );
184        let physical_region_options = PhysicalRegionOptions::try_from(&options).unwrap();
185        assert_eq!(physical_region_options.index, IndexOptions::Inverted);
186
187        let mut options = HashMap::new();
188        options.insert(
189            METRIC_ENGINE_INDEX_TYPE_OPTION.to_string(),
190            "skipping".to_string(),
191        );
192        options.insert(
193            METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION.to_string(),
194            "102400".to_string(),
195        );
196        options.insert(
197            METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION.to_string(),
198            "0.01".to_string(),
199        );
200        let physical_region_options = PhysicalRegionOptions::try_from(&options).unwrap();
201        assert_eq!(
202            physical_region_options.index,
203            IndexOptions::Skipping {
204                granularity: 102400,
205                false_positive_rate: 0.01,
206            }
207        );
208    }
209
210    #[test]
211    fn test_set_data_region_options_default_compaction_time_window() {
212        // Test that default time window is set when not specified
213        let mut options = HashMap::new();
214        set_data_region_options(&mut options, false);
215
216        assert_eq!(
217            options.get(COMPACTION_TYPE),
218            Some(&COMPACTION_TYPE_TWCS.to_string())
219        );
220        assert_eq!(options.get(TWCS_TIME_WINDOW), Some(&"1d".to_string()));
221    }
222
223    #[test]
224    fn test_set_data_region_options_respects_user_compaction_time_window() {
225        // Test that user-specified time window is preserved
226        let mut options = HashMap::new();
227        options.insert(TWCS_TIME_WINDOW.to_string(), "2h".to_string());
228        options.insert(COMPACTION_TYPE.to_string(), "twcs".to_string());
229        set_data_region_options(&mut options, false);
230
231        // User's time window should be preserved
232        assert_eq!(options.get(TWCS_TIME_WINDOW), Some(&"2h".to_string()));
233    }
234}