1use std::collections::HashMap;
16
17use arrow_schema::extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY};
18use datatypes::schema::{
19 COMMENT_KEY, ColumnDefaultConstraint, ColumnSchema, FULLTEXT_KEY, FulltextAnalyzer,
20 FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY, Metadata, SKIPPING_INDEX_KEY,
21 SkippingIndexOptions, SkippingIndexType, VECTOR_INDEX_KEY,
22};
23use greptime_proto::v1::{
24 Analyzer, FulltextBackend as PbFulltextBackend, SkippingIndexType as PbSkippingIndexType,
25};
26use snafu::ResultExt;
27
28use crate::error::{self, ConvertColumnDefaultConstraintSnafu, Result};
29use crate::helper::ColumnDataTypeWrapper;
30use crate::v1::{ColumnDef, ColumnOptions, SemanticType};
31
32const FULLTEXT_GRPC_KEY: &str = "fulltext";
34const INVERTED_INDEX_GRPC_KEY: &str = "inverted_index";
36const SKIPPING_INDEX_GRPC_KEY: &str = "skipping_index";
38const VECTOR_INDEX_GRPC_KEY: &str = "vector_index";
40
41const COLUMN_OPTION_MAPPINGS: [(&str, &str); 6] = [
42 (FULLTEXT_GRPC_KEY, FULLTEXT_KEY),
43 (INVERTED_INDEX_GRPC_KEY, INVERTED_INDEX_KEY),
44 (SKIPPING_INDEX_GRPC_KEY, SKIPPING_INDEX_KEY),
45 (VECTOR_INDEX_GRPC_KEY, VECTOR_INDEX_KEY),
46 (EXTENSION_TYPE_NAME_KEY, EXTENSION_TYPE_NAME_KEY),
47 (EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_METADATA_KEY),
48];
49
50pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
52 let data_type = ColumnDataTypeWrapper::try_new(
53 column_def.data_type,
54 column_def.datatype_extension.clone(),
55 )?;
56
57 let constraint = if column_def.default_constraint.is_empty() {
58 None
59 } else {
60 Some(
61 ColumnDefaultConstraint::try_from(column_def.default_constraint.as_slice()).context(
62 error::ConvertColumnDefaultConstraintSnafu {
63 column: &column_def.name,
64 },
65 )?,
66 )
67 };
68
69 let mut metadata = HashMap::new();
70 if !column_def.comment.is_empty() {
71 metadata.insert(COMMENT_KEY.to_string(), column_def.comment.clone());
72 }
73 if let Some(options) = column_def.options.as_ref() {
74 if let Some(fulltext) = options.options.get(FULLTEXT_GRPC_KEY) {
75 metadata.insert(FULLTEXT_KEY.to_string(), fulltext.to_owned());
76 }
77 if let Some(inverted_index) = options.options.get(INVERTED_INDEX_GRPC_KEY) {
78 metadata.insert(INVERTED_INDEX_KEY.to_string(), inverted_index.to_owned());
79 }
80 if let Some(skipping_index) = options.options.get(SKIPPING_INDEX_GRPC_KEY) {
81 metadata.insert(SKIPPING_INDEX_KEY.to_string(), skipping_index.to_owned());
82 }
83 if let Some(vector_index) = options.options.get(VECTOR_INDEX_GRPC_KEY) {
84 metadata.insert(VECTOR_INDEX_KEY.to_string(), vector_index.to_owned());
85 }
86 if let Some(extension_name) = options.options.get(EXTENSION_TYPE_NAME_KEY) {
87 metadata.insert(EXTENSION_TYPE_NAME_KEY.to_string(), extension_name.clone());
88 }
89 if let Some(extension_metadata) = options.options.get(EXTENSION_TYPE_METADATA_KEY) {
90 metadata.insert(
91 EXTENSION_TYPE_METADATA_KEY.to_string(),
92 extension_metadata.clone(),
93 );
94 }
95 }
96
97 ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable)
98 .with_metadata(metadata)
99 .with_time_index(column_def.semantic_type() == SemanticType::Timestamp)
100 .with_default_constraint(constraint)
101 .context(error::InvalidColumnDefaultConstraintSnafu {
102 column: &column_def.name,
103 })
104}
105
106pub fn try_as_column_def(column_schema: &ColumnSchema, is_primary_key: bool) -> Result<ColumnDef> {
110 let column_datatype =
111 ColumnDataTypeWrapper::try_from(column_schema.data_type.clone()).map(|w| w.to_parts())?;
112
113 let semantic_type = if column_schema.is_time_index() {
114 SemanticType::Timestamp
115 } else if is_primary_key {
116 SemanticType::Tag
117 } else {
118 SemanticType::Field
119 } as i32;
120 let comment = column_schema
121 .metadata()
122 .get(COMMENT_KEY)
123 .cloned()
124 .unwrap_or_default();
125
126 let default_constraint = match column_schema.default_constraint() {
127 None => vec![],
128 Some(v) => v
129 .clone()
130 .try_into()
131 .context(ConvertColumnDefaultConstraintSnafu {
132 column: &column_schema.name,
133 })?,
134 };
135 let options = options_from_column_schema(column_schema);
136 Ok(ColumnDef {
137 name: column_schema.name.clone(),
138 data_type: column_datatype.0 as i32,
139 is_nullable: column_schema.is_nullable(),
140 default_constraint,
141 semantic_type,
142 comment,
143 datatype_extension: column_datatype.1,
144 options,
145 })
146}
147
148pub fn collect_column_options(column_options: Option<&ColumnOptions>) -> Metadata {
150 let Some(ColumnOptions { options }) = column_options else {
151 return Metadata::default();
152 };
153
154 let mut metadata = Metadata::with_capacity(options.len());
155 for (x, y) in COLUMN_OPTION_MAPPINGS {
156 if let Some(v) = options.get(x) {
157 metadata.insert(y.to_string(), v.clone());
158 }
159 }
160 metadata
161}
162
163pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option<ColumnOptions> {
165 let mut options = ColumnOptions::default();
166 if let Some(fulltext) = column_schema.metadata().get(FULLTEXT_KEY) {
167 options
168 .options
169 .insert(FULLTEXT_GRPC_KEY.to_string(), fulltext.to_owned());
170 }
171 if let Some(inverted_index) = column_schema.metadata().get(INVERTED_INDEX_KEY) {
172 options
173 .options
174 .insert(INVERTED_INDEX_GRPC_KEY.to_string(), inverted_index.clone());
175 }
176 if let Some(skipping_index) = column_schema.metadata().get(SKIPPING_INDEX_KEY) {
177 options
178 .options
179 .insert(SKIPPING_INDEX_GRPC_KEY.to_string(), skipping_index.clone());
180 }
181 if let Some(vector_index) = column_schema.metadata().get(VECTOR_INDEX_KEY) {
182 options
183 .options
184 .insert(VECTOR_INDEX_GRPC_KEY.to_string(), vector_index.clone());
185 }
186 if let Some(extension_name) = column_schema.metadata().get(EXTENSION_TYPE_NAME_KEY) {
187 options
188 .options
189 .insert(EXTENSION_TYPE_NAME_KEY.to_string(), extension_name.clone());
190 }
191 if let Some(extension_metadata) = column_schema.metadata().get(EXTENSION_TYPE_METADATA_KEY) {
192 options.options.insert(
193 EXTENSION_TYPE_METADATA_KEY.to_string(),
194 extension_metadata.clone(),
195 );
196 }
197
198 (!options.options.is_empty()).then_some(options)
199}
200
201pub fn contains_fulltext(options: &Option<ColumnOptions>) -> bool {
203 options
204 .as_ref()
205 .is_some_and(|o| o.options.contains_key(FULLTEXT_GRPC_KEY))
206}
207
208pub fn contains_skipping(options: &Option<ColumnOptions>) -> bool {
210 options
211 .as_ref()
212 .is_some_and(|o| o.options.contains_key(SKIPPING_INDEX_GRPC_KEY))
213}
214
215pub fn options_from_fulltext(fulltext: &FulltextOptions) -> Result<Option<ColumnOptions>> {
217 let mut options = ColumnOptions::default();
218
219 let v = serde_json::to_string(fulltext).context(error::SerializeJsonSnafu)?;
220 options.options.insert(FULLTEXT_GRPC_KEY.to_string(), v);
221
222 Ok((!options.options.is_empty()).then_some(options))
223}
224
225pub fn options_from_skipping(skipping: &SkippingIndexOptions) -> Result<Option<ColumnOptions>> {
227 let mut options = ColumnOptions::default();
228
229 let v = serde_json::to_string(skipping).context(error::SerializeJsonSnafu)?;
230 options
231 .options
232 .insert(SKIPPING_INDEX_GRPC_KEY.to_string(), v);
233
234 Ok((!options.options.is_empty()).then_some(options))
235}
236
237pub fn options_from_inverted() -> ColumnOptions {
239 let mut options = ColumnOptions::default();
240 options
241 .options
242 .insert(INVERTED_INDEX_GRPC_KEY.to_string(), "true".to_string());
243 options
244}
245
246pub fn as_fulltext_option_analyzer(analyzer: Analyzer) -> FulltextAnalyzer {
248 match analyzer {
249 Analyzer::English => FulltextAnalyzer::English,
250 Analyzer::Chinese => FulltextAnalyzer::Chinese,
251 }
252}
253
254pub fn as_fulltext_option_backend(backend: PbFulltextBackend) -> FulltextBackend {
256 match backend {
257 PbFulltextBackend::Bloom => FulltextBackend::Bloom,
258 PbFulltextBackend::Tantivy => FulltextBackend::Tantivy,
259 }
260}
261
262pub fn as_skipping_index_type(skipping_index_type: PbSkippingIndexType) -> SkippingIndexType {
264 match skipping_index_type {
265 PbSkippingIndexType::BloomFilter => SkippingIndexType::BloomFilter,
266 }
267}
268
269#[cfg(test)]
270mod tests {
271
272 use datatypes::data_type::ConcreteDataType;
273 use datatypes::schema::{
274 FulltextAnalyzer, FulltextBackend, VectorDistanceMetric, VectorIndexOptions,
275 };
276 use serde_json::json;
277
278 use super::*;
279 use crate::v1::ColumnDataType;
280
281 #[test]
282 fn test_try_as_column_schema() {
283 let column_def = ColumnDef {
284 name: "test".to_string(),
285 data_type: ColumnDataType::String as i32,
286 is_nullable: true,
287 default_constraint: ColumnDefaultConstraint::Value("test_default".into())
288 .try_into()
289 .unwrap(),
290 semantic_type: SemanticType::Field as i32,
291 comment: "test_comment".to_string(),
292 datatype_extension: None,
293 options: Some(ColumnOptions {
294 options: HashMap::from([
295 (
296 FULLTEXT_GRPC_KEY.to_string(),
297 "{\"enable\":true}".to_string(),
298 ),
299 (INVERTED_INDEX_GRPC_KEY.to_string(), "true".to_string()),
300 (
301 VECTOR_INDEX_GRPC_KEY.to_string(),
302 "{\"engine\":\"usearch\",\"metric\":\"l2sq\",\"connectivity\":16,\"expansion-add\":128,\"expansion-search\":64}".to_string(),
303 ),
304 ]),
305 }),
306 };
307
308 let schema = try_as_column_schema(&column_def).unwrap();
309 assert_eq!(schema.name, "test");
310 assert_eq!(schema.data_type, ConcreteDataType::string_datatype());
311 assert!(!schema.is_time_index());
312 assert!(schema.is_nullable());
313 assert_eq!(
314 schema.default_constraint().unwrap(),
315 &ColumnDefaultConstraint::Value("test_default".into())
316 );
317 assert_eq!(schema.metadata().get(COMMENT_KEY).unwrap(), "test_comment");
318 assert_eq!(
319 schema.fulltext_options().unwrap().unwrap(),
320 FulltextOptions {
321 enable: true,
322 ..Default::default()
323 }
324 );
325 assert!(schema.is_inverted_indexed());
326 let vector_options = schema.vector_index_options().unwrap().unwrap();
327 assert_eq!(vector_options.metric, VectorDistanceMetric::L2sq);
328 }
329
330 #[test]
331 fn test_options_from_column_schema() {
332 let schema = ColumnSchema::new("test", ConcreteDataType::string_datatype(), true);
333 let options = options_from_column_schema(&schema);
334 assert!(options.is_none());
335
336 let mut schema = ColumnSchema::new("test", ConcreteDataType::string_datatype(), true)
337 .with_fulltext_options(FulltextOptions::new_unchecked(
338 true,
339 FulltextAnalyzer::English,
340 false,
341 FulltextBackend::Bloom,
342 10240,
343 0.01,
344 ))
345 .unwrap();
346 schema.set_inverted_index(true);
347 let options = options_from_column_schema(&schema).unwrap();
348 assert_eq!(
349 options.options.get(FULLTEXT_GRPC_KEY).unwrap(),
350 "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\",\"granularity\":10240,\"false-positive-rate-in-10000\":100}"
351 );
352 assert_eq!(
353 options.options.get(INVERTED_INDEX_GRPC_KEY).unwrap(),
354 "true"
355 );
356 }
357
358 #[test]
359 fn test_vector_index_options_roundtrip() {
360 let schema = ColumnSchema::new("test", ConcreteDataType::vector_datatype(4), true)
361 .with_vector_index_options(&VectorIndexOptions::default())
362 .unwrap();
363 let column_def = try_as_column_def(&schema, false).unwrap();
364 let roundtrip = try_as_column_schema(&column_def).unwrap();
365 let options = roundtrip.vector_index_options().unwrap().unwrap();
366 assert_eq!(options.metric, VectorDistanceMetric::L2sq);
367
368 let options = column_def.options.unwrap();
369 let raw = options.options.get(VECTOR_INDEX_GRPC_KEY).unwrap();
370 let json_value: serde_json::Value = serde_json::from_str(raw).unwrap();
371 let expected = json!({
372 "engine": "usearch",
373 "metric": "l2sq",
374 "connectivity": 16,
375 "expansion-add": 128,
376 "expansion-search": 64
377 });
378 assert_eq!(json_value, expected);
379 }
380
381 #[test]
382 fn test_options_with_fulltext() {
383 let fulltext = FulltextOptions::new_unchecked(
384 true,
385 FulltextAnalyzer::English,
386 false,
387 FulltextBackend::Bloom,
388 10240,
389 0.01,
390 );
391 let options = options_from_fulltext(&fulltext).unwrap().unwrap();
392 assert_eq!(
393 options.options.get(FULLTEXT_GRPC_KEY).unwrap(),
394 "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\",\"granularity\":10240,\"false-positive-rate-in-10000\":100}"
395 );
396 }
397
398 #[test]
399 fn test_contains_fulltext() {
400 let options = ColumnOptions {
401 options: HashMap::from([(
402 FULLTEXT_GRPC_KEY.to_string(),
403 "{\"enable\":true}".to_string(),
404 )]),
405 };
406 assert!(contains_fulltext(&Some(options)));
407
408 let options = ColumnOptions {
409 options: HashMap::new(),
410 };
411 assert!(!contains_fulltext(&Some(options)));
412
413 assert!(!contains_fulltext(&None));
414 }
415}