api/v1/
column_def.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashMap;
16
17use arrow_schema::extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY};
18use datatypes::schema::{
19    COMMENT_KEY, ColumnDefaultConstraint, ColumnSchema, FULLTEXT_KEY, FulltextAnalyzer,
20    FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY, Metadata, SKIPPING_INDEX_KEY,
21    SkippingIndexOptions, SkippingIndexType, VECTOR_INDEX_KEY,
22};
23use greptime_proto::v1::{
24    Analyzer, FulltextBackend as PbFulltextBackend, SkippingIndexType as PbSkippingIndexType,
25};
26use snafu::ResultExt;
27
28use crate::error::{self, ConvertColumnDefaultConstraintSnafu, Result};
29use crate::helper::ColumnDataTypeWrapper;
30use crate::v1::{ColumnDef, ColumnOptions, SemanticType};
31
32/// Key used to store fulltext options in gRPC column options.
33const FULLTEXT_GRPC_KEY: &str = "fulltext";
34/// Key used to store inverted index options in gRPC column options.
35const INVERTED_INDEX_GRPC_KEY: &str = "inverted_index";
36/// Key used to store skip index options in gRPC column options.
37const SKIPPING_INDEX_GRPC_KEY: &str = "skipping_index";
38/// Key used to store vector index options in gRPC column options.
39const VECTOR_INDEX_GRPC_KEY: &str = "vector_index";
40
41const COLUMN_OPTION_MAPPINGS: [(&str, &str); 6] = [
42    (FULLTEXT_GRPC_KEY, FULLTEXT_KEY),
43    (INVERTED_INDEX_GRPC_KEY, INVERTED_INDEX_KEY),
44    (SKIPPING_INDEX_GRPC_KEY, SKIPPING_INDEX_KEY),
45    (VECTOR_INDEX_GRPC_KEY, VECTOR_INDEX_KEY),
46    (EXTENSION_TYPE_NAME_KEY, EXTENSION_TYPE_NAME_KEY),
47    (EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_METADATA_KEY),
48];
49
50/// Tries to construct a `ColumnSchema` from the given  `ColumnDef`.
51pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
52    let data_type = ColumnDataTypeWrapper::try_new(
53        column_def.data_type,
54        column_def.datatype_extension.clone(),
55    )?;
56
57    let constraint = if column_def.default_constraint.is_empty() {
58        None
59    } else {
60        Some(
61            ColumnDefaultConstraint::try_from(column_def.default_constraint.as_slice()).context(
62                error::ConvertColumnDefaultConstraintSnafu {
63                    column: &column_def.name,
64                },
65            )?,
66        )
67    };
68
69    let mut metadata = HashMap::new();
70    if !column_def.comment.is_empty() {
71        metadata.insert(COMMENT_KEY.to_string(), column_def.comment.clone());
72    }
73    if let Some(options) = column_def.options.as_ref() {
74        if let Some(fulltext) = options.options.get(FULLTEXT_GRPC_KEY) {
75            metadata.insert(FULLTEXT_KEY.to_string(), fulltext.to_owned());
76        }
77        if let Some(inverted_index) = options.options.get(INVERTED_INDEX_GRPC_KEY) {
78            metadata.insert(INVERTED_INDEX_KEY.to_string(), inverted_index.to_owned());
79        }
80        if let Some(skipping_index) = options.options.get(SKIPPING_INDEX_GRPC_KEY) {
81            metadata.insert(SKIPPING_INDEX_KEY.to_string(), skipping_index.to_owned());
82        }
83        if let Some(vector_index) = options.options.get(VECTOR_INDEX_GRPC_KEY) {
84            metadata.insert(VECTOR_INDEX_KEY.to_string(), vector_index.to_owned());
85        }
86        if let Some(extension_name) = options.options.get(EXTENSION_TYPE_NAME_KEY) {
87            metadata.insert(EXTENSION_TYPE_NAME_KEY.to_string(), extension_name.clone());
88        }
89        if let Some(extension_metadata) = options.options.get(EXTENSION_TYPE_METADATA_KEY) {
90            metadata.insert(
91                EXTENSION_TYPE_METADATA_KEY.to_string(),
92                extension_metadata.clone(),
93            );
94        }
95    }
96
97    ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable)
98        .with_metadata(metadata)
99        .with_time_index(column_def.semantic_type() == SemanticType::Timestamp)
100        .with_default_constraint(constraint)
101        .context(error::InvalidColumnDefaultConstraintSnafu {
102            column: &column_def.name,
103        })
104}
105
106/// Tries to construct a `ColumnDef` from the given `ColumnSchema`.
107///
108/// TODO(weny): Add tests for this function.
109pub fn try_as_column_def(column_schema: &ColumnSchema, is_primary_key: bool) -> Result<ColumnDef> {
110    let column_datatype =
111        ColumnDataTypeWrapper::try_from(column_schema.data_type.clone()).map(|w| w.to_parts())?;
112
113    let semantic_type = if column_schema.is_time_index() {
114        SemanticType::Timestamp
115    } else if is_primary_key {
116        SemanticType::Tag
117    } else {
118        SemanticType::Field
119    } as i32;
120    let comment = column_schema
121        .metadata()
122        .get(COMMENT_KEY)
123        .cloned()
124        .unwrap_or_default();
125
126    let default_constraint = match column_schema.default_constraint() {
127        None => vec![],
128        Some(v) => v
129            .clone()
130            .try_into()
131            .context(ConvertColumnDefaultConstraintSnafu {
132                column: &column_schema.name,
133            })?,
134    };
135    let options = options_from_column_schema(column_schema);
136    Ok(ColumnDef {
137        name: column_schema.name.clone(),
138        data_type: column_datatype.0 as i32,
139        is_nullable: column_schema.is_nullable(),
140        default_constraint,
141        semantic_type,
142        comment,
143        datatype_extension: column_datatype.1,
144        options,
145    })
146}
147
148/// Collect the [ColumnOptions] into the [Metadata] that can be used in, for example, [ColumnSchema].
149pub fn collect_column_options(column_options: Option<&ColumnOptions>) -> Metadata {
150    let Some(ColumnOptions { options }) = column_options else {
151        return Metadata::default();
152    };
153
154    let mut metadata = Metadata::with_capacity(options.len());
155    for (x, y) in COLUMN_OPTION_MAPPINGS {
156        if let Some(v) = options.get(x) {
157            metadata.insert(y.to_string(), v.clone());
158        }
159    }
160    metadata
161}
162
163/// Constructs a `ColumnOptions` from the given `ColumnSchema`.
164pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option<ColumnOptions> {
165    let mut options = ColumnOptions::default();
166    if let Some(fulltext) = column_schema.metadata().get(FULLTEXT_KEY) {
167        options
168            .options
169            .insert(FULLTEXT_GRPC_KEY.to_string(), fulltext.to_owned());
170    }
171    if let Some(inverted_index) = column_schema.metadata().get(INVERTED_INDEX_KEY) {
172        options
173            .options
174            .insert(INVERTED_INDEX_GRPC_KEY.to_string(), inverted_index.clone());
175    }
176    if let Some(skipping_index) = column_schema.metadata().get(SKIPPING_INDEX_KEY) {
177        options
178            .options
179            .insert(SKIPPING_INDEX_GRPC_KEY.to_string(), skipping_index.clone());
180    }
181    if let Some(vector_index) = column_schema.metadata().get(VECTOR_INDEX_KEY) {
182        options
183            .options
184            .insert(VECTOR_INDEX_GRPC_KEY.to_string(), vector_index.clone());
185    }
186    if let Some(extension_name) = column_schema.metadata().get(EXTENSION_TYPE_NAME_KEY) {
187        options
188            .options
189            .insert(EXTENSION_TYPE_NAME_KEY.to_string(), extension_name.clone());
190    }
191    if let Some(extension_metadata) = column_schema.metadata().get(EXTENSION_TYPE_METADATA_KEY) {
192        options.options.insert(
193            EXTENSION_TYPE_METADATA_KEY.to_string(),
194            extension_metadata.clone(),
195        );
196    }
197
198    (!options.options.is_empty()).then_some(options)
199}
200
201/// Checks if the `ColumnOptions` contains fulltext options.
202pub fn contains_fulltext(options: &Option<ColumnOptions>) -> bool {
203    options
204        .as_ref()
205        .is_some_and(|o| o.options.contains_key(FULLTEXT_GRPC_KEY))
206}
207
208/// Checks if the `ColumnOptions` contains skipping index options.
209pub fn contains_skipping(options: &Option<ColumnOptions>) -> bool {
210    options
211        .as_ref()
212        .is_some_and(|o| o.options.contains_key(SKIPPING_INDEX_GRPC_KEY))
213}
214
215/// Tries to construct a `ColumnOptions` from the given `FulltextOptions`.
216pub fn options_from_fulltext(fulltext: &FulltextOptions) -> Result<Option<ColumnOptions>> {
217    let mut options = ColumnOptions::default();
218
219    let v = serde_json::to_string(fulltext).context(error::SerializeJsonSnafu)?;
220    options.options.insert(FULLTEXT_GRPC_KEY.to_string(), v);
221
222    Ok((!options.options.is_empty()).then_some(options))
223}
224
225/// Tries to construct a `ColumnOptions` from the given `SkippingIndexOptions`.
226pub fn options_from_skipping(skipping: &SkippingIndexOptions) -> Result<Option<ColumnOptions>> {
227    let mut options = ColumnOptions::default();
228
229    let v = serde_json::to_string(skipping).context(error::SerializeJsonSnafu)?;
230    options
231        .options
232        .insert(SKIPPING_INDEX_GRPC_KEY.to_string(), v);
233
234    Ok((!options.options.is_empty()).then_some(options))
235}
236
237/// Tries to construct a `ColumnOptions` for inverted index.
238pub fn options_from_inverted() -> ColumnOptions {
239    let mut options = ColumnOptions::default();
240    options
241        .options
242        .insert(INVERTED_INDEX_GRPC_KEY.to_string(), "true".to_string());
243    options
244}
245
246/// Tries to construct a `FulltextAnalyzer` from the given analyzer.
247pub fn as_fulltext_option_analyzer(analyzer: Analyzer) -> FulltextAnalyzer {
248    match analyzer {
249        Analyzer::English => FulltextAnalyzer::English,
250        Analyzer::Chinese => FulltextAnalyzer::Chinese,
251    }
252}
253
254/// Tries to construct a `FulltextBackend` from the given backend.
255pub fn as_fulltext_option_backend(backend: PbFulltextBackend) -> FulltextBackend {
256    match backend {
257        PbFulltextBackend::Bloom => FulltextBackend::Bloom,
258        PbFulltextBackend::Tantivy => FulltextBackend::Tantivy,
259    }
260}
261
262/// Tries to construct a `SkippingIndexType` from the given skipping index type.
263pub fn as_skipping_index_type(skipping_index_type: PbSkippingIndexType) -> SkippingIndexType {
264    match skipping_index_type {
265        PbSkippingIndexType::BloomFilter => SkippingIndexType::BloomFilter,
266    }
267}
268
269#[cfg(test)]
270mod tests {
271
272    use datatypes::data_type::ConcreteDataType;
273    use datatypes::schema::{
274        FulltextAnalyzer, FulltextBackend, VectorDistanceMetric, VectorIndexOptions,
275    };
276    use serde_json::json;
277
278    use super::*;
279    use crate::v1::ColumnDataType;
280
281    #[test]
282    fn test_try_as_column_schema() {
283        let column_def = ColumnDef {
284            name: "test".to_string(),
285            data_type: ColumnDataType::String as i32,
286            is_nullable: true,
287            default_constraint: ColumnDefaultConstraint::Value("test_default".into())
288                .try_into()
289                .unwrap(),
290            semantic_type: SemanticType::Field as i32,
291            comment: "test_comment".to_string(),
292            datatype_extension: None,
293            options: Some(ColumnOptions {
294                options: HashMap::from([
295                    (
296                        FULLTEXT_GRPC_KEY.to_string(),
297                        "{\"enable\":true}".to_string(),
298                    ),
299                    (INVERTED_INDEX_GRPC_KEY.to_string(), "true".to_string()),
300                    (
301                        VECTOR_INDEX_GRPC_KEY.to_string(),
302                        "{\"engine\":\"usearch\",\"metric\":\"l2sq\",\"connectivity\":16,\"expansion-add\":128,\"expansion-search\":64}".to_string(),
303                    ),
304                ]),
305            }),
306        };
307
308        let schema = try_as_column_schema(&column_def).unwrap();
309        assert_eq!(schema.name, "test");
310        assert_eq!(schema.data_type, ConcreteDataType::string_datatype());
311        assert!(!schema.is_time_index());
312        assert!(schema.is_nullable());
313        assert_eq!(
314            schema.default_constraint().unwrap(),
315            &ColumnDefaultConstraint::Value("test_default".into())
316        );
317        assert_eq!(schema.metadata().get(COMMENT_KEY).unwrap(), "test_comment");
318        assert_eq!(
319            schema.fulltext_options().unwrap().unwrap(),
320            FulltextOptions {
321                enable: true,
322                ..Default::default()
323            }
324        );
325        assert!(schema.is_inverted_indexed());
326        let vector_options = schema.vector_index_options().unwrap().unwrap();
327        assert_eq!(vector_options.metric, VectorDistanceMetric::L2sq);
328    }
329
330    #[test]
331    fn test_options_from_column_schema() {
332        let schema = ColumnSchema::new("test", ConcreteDataType::string_datatype(), true);
333        let options = options_from_column_schema(&schema);
334        assert!(options.is_none());
335
336        let mut schema = ColumnSchema::new("test", ConcreteDataType::string_datatype(), true)
337            .with_fulltext_options(FulltextOptions::new_unchecked(
338                true,
339                FulltextAnalyzer::English,
340                false,
341                FulltextBackend::Bloom,
342                10240,
343                0.01,
344            ))
345            .unwrap();
346        schema.set_inverted_index(true);
347        let options = options_from_column_schema(&schema).unwrap();
348        assert_eq!(
349            options.options.get(FULLTEXT_GRPC_KEY).unwrap(),
350            "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\",\"granularity\":10240,\"false-positive-rate-in-10000\":100}"
351        );
352        assert_eq!(
353            options.options.get(INVERTED_INDEX_GRPC_KEY).unwrap(),
354            "true"
355        );
356    }
357
358    #[test]
359    fn test_vector_index_options_roundtrip() {
360        let schema = ColumnSchema::new("test", ConcreteDataType::vector_datatype(4), true)
361            .with_vector_index_options(&VectorIndexOptions::default())
362            .unwrap();
363        let column_def = try_as_column_def(&schema, false).unwrap();
364        let roundtrip = try_as_column_schema(&column_def).unwrap();
365        let options = roundtrip.vector_index_options().unwrap().unwrap();
366        assert_eq!(options.metric, VectorDistanceMetric::L2sq);
367
368        let options = column_def.options.unwrap();
369        let raw = options.options.get(VECTOR_INDEX_GRPC_KEY).unwrap();
370        let json_value: serde_json::Value = serde_json::from_str(raw).unwrap();
371        let expected = json!({
372            "engine": "usearch",
373            "metric": "l2sq",
374            "connectivity": 16,
375            "expansion-add": 128,
376            "expansion-search": 64
377        });
378        assert_eq!(json_value, expected);
379    }
380
381    #[test]
382    fn test_options_with_fulltext() {
383        let fulltext = FulltextOptions::new_unchecked(
384            true,
385            FulltextAnalyzer::English,
386            false,
387            FulltextBackend::Bloom,
388            10240,
389            0.01,
390        );
391        let options = options_from_fulltext(&fulltext).unwrap().unwrap();
392        assert_eq!(
393            options.options.get(FULLTEXT_GRPC_KEY).unwrap(),
394            "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\",\"granularity\":10240,\"false-positive-rate-in-10000\":100}"
395        );
396    }
397
398    #[test]
399    fn test_contains_fulltext() {
400        let options = ColumnOptions {
401            options: HashMap::from([(
402                FULLTEXT_GRPC_KEY.to_string(),
403                "{\"enable\":true}".to_string(),
404            )]),
405        };
406        assert!(contains_fulltext(&Some(options)));
407
408        let options = ColumnOptions {
409            options: HashMap::new(),
410        };
411        assert!(!contains_fulltext(&Some(options)));
412
413        assert!(!contains_fulltext(&None));
414    }
415}