datatypes/schema/
column_schema.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashMap;
16use std::fmt;
17use std::str::FromStr;
18
19use arrow::datatypes::Field;
20use arrow_schema::extension::{
21    EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY, ExtensionType,
22};
23use serde::{Deserialize, Serialize};
24use snafu::{ResultExt, ensure};
25use sqlparser_derive::{Visit, VisitMut};
26
27use crate::data_type::{ConcreteDataType, DataType};
28use crate::error::{
29    self, ArrowMetadataSnafu, Error, InvalidFulltextOptionSnafu, ParseExtendedTypeSnafu, Result,
30};
31use crate::schema::TYPE_KEY;
32use crate::schema::constraint::ColumnDefaultConstraint;
33use crate::value::Value;
34use crate::vectors::VectorRef;
35
36pub type Metadata = HashMap<String, String>;
37
38/// Key used to store whether the column is time index in arrow field's metadata.
39pub const TIME_INDEX_KEY: &str = "greptime:time_index";
40pub const COMMENT_KEY: &str = "greptime:storage:comment";
41/// Key used to store default constraint in arrow field's metadata.
42const DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint";
43/// Key used to store fulltext options in arrow field's metadata.
44pub const FULLTEXT_KEY: &str = "greptime:fulltext";
45/// Key used to store whether the column has inverted index in arrow field's metadata.
46pub const INVERTED_INDEX_KEY: &str = "greptime:inverted_index";
47/// Key used to store skip options in arrow field's metadata.
48pub const SKIPPING_INDEX_KEY: &str = "greptime:skipping_index";
49/// Key used to store vector index options in arrow field's metadata.
50pub const VECTOR_INDEX_KEY: &str = "greptime:vector_index";
51
52/// Keys used in fulltext options
53pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable";
54pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer";
55pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive";
56pub const COLUMN_FULLTEXT_OPT_KEY_BACKEND: &str = "backend";
57pub const COLUMN_FULLTEXT_OPT_KEY_GRANULARITY: &str = "granularity";
58pub const COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE: &str = "false_positive_rate";
59
60/// Keys used in SKIPPING index options
61pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity";
62pub const COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE: &str = "false_positive_rate";
63pub const COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE: &str = "type";
64
65pub const DEFAULT_GRANULARITY: u32 = 10240;
66
67pub const DEFAULT_FALSE_POSITIVE_RATE: f64 = 0.01;
68
69/// Schema of a column, used as an immutable struct.
70#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
71pub struct ColumnSchema {
72    pub name: String,
73    pub data_type: ConcreteDataType,
74    is_nullable: bool,
75    is_time_index: bool,
76    default_constraint: Option<ColumnDefaultConstraint>,
77    metadata: Metadata,
78}
79
80impl fmt::Debug for ColumnSchema {
81    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
82        write!(
83            f,
84            "{} {} {}",
85            self.name,
86            self.data_type,
87            if self.is_nullable { "null" } else { "not null" },
88        )?;
89
90        if self.is_time_index {
91            write!(f, " time_index")?;
92        }
93
94        // Add default constraint if present
95        if let Some(default_constraint) = &self.default_constraint {
96            write!(f, " default={:?}", default_constraint)?;
97        }
98
99        // Add metadata if present
100        if !self.metadata.is_empty() {
101            write!(f, " metadata={:?}", self.metadata)?;
102        }
103
104        Ok(())
105    }
106}
107
108impl ColumnSchema {
109    pub fn new<T: Into<String>>(
110        name: T,
111        data_type: ConcreteDataType,
112        is_nullable: bool,
113    ) -> ColumnSchema {
114        ColumnSchema {
115            name: name.into(),
116            data_type,
117            is_nullable,
118            is_time_index: false,
119            default_constraint: None,
120            metadata: Metadata::new(),
121        }
122    }
123
124    #[inline]
125    pub fn is_time_index(&self) -> bool {
126        self.is_time_index
127    }
128
129    #[inline]
130    pub fn is_nullable(&self) -> bool {
131        self.is_nullable
132    }
133
134    #[inline]
135    pub fn default_constraint(&self) -> Option<&ColumnDefaultConstraint> {
136        self.default_constraint.as_ref()
137    }
138
139    /// Check if the default constraint is a impure function.
140    pub fn is_default_impure(&self) -> bool {
141        self.default_constraint
142            .as_ref()
143            .map(|c| c.is_function())
144            .unwrap_or(false)
145    }
146
147    #[inline]
148    pub fn metadata(&self) -> &Metadata {
149        &self.metadata
150    }
151
152    #[inline]
153    pub fn mut_metadata(&mut self) -> &mut Metadata {
154        &mut self.metadata
155    }
156
157    /// Retrieve the column comment
158    pub fn column_comment(&self) -> Option<&String> {
159        self.metadata.get(COMMENT_KEY)
160    }
161
162    pub fn with_time_index(mut self, is_time_index: bool) -> Self {
163        self.is_time_index = is_time_index;
164        if is_time_index {
165            let _ = self
166                .metadata
167                .insert(TIME_INDEX_KEY.to_string(), "true".to_string());
168        } else {
169            let _ = self.metadata.remove(TIME_INDEX_KEY);
170        }
171        self
172    }
173
174    /// Set the inverted index for the column.
175    /// Similar to [with_inverted_index] but don't take the ownership.
176    ///
177    /// [with_inverted_index]: Self::with_inverted_index
178    pub fn set_inverted_index(&mut self, value: bool) {
179        match value {
180            true => {
181                self.metadata
182                    .insert(INVERTED_INDEX_KEY.to_string(), value.to_string());
183            }
184            false => {
185                self.metadata.remove(INVERTED_INDEX_KEY);
186            }
187        }
188    }
189
190    /// Set the inverted index for the column.
191    /// Similar to [set_inverted_index] but take the ownership and return a owned value.
192    ///
193    /// [set_inverted_index]: Self::set_inverted_index
194    pub fn with_inverted_index(mut self, value: bool) -> Self {
195        self.set_inverted_index(value);
196        self
197    }
198
199    pub fn is_inverted_indexed(&self) -> bool {
200        self.metadata
201            .get(INVERTED_INDEX_KEY)
202            .map(|v| v.eq_ignore_ascii_case("true"))
203            .unwrap_or(false)
204    }
205
206    pub fn is_fulltext_indexed(&self) -> bool {
207        self.fulltext_options()
208            .unwrap_or_default()
209            .map(|option| option.enable)
210            .unwrap_or_default()
211    }
212
213    pub fn is_skipping_indexed(&self) -> bool {
214        self.skipping_index_options().unwrap_or_default().is_some()
215    }
216
217    pub fn has_inverted_index_key(&self) -> bool {
218        self.metadata.contains_key(INVERTED_INDEX_KEY)
219    }
220
221    /// Checks if this column has a vector index.
222    pub fn is_vector_indexed(&self) -> bool {
223        match self.vector_index_options() {
224            Ok(opts) => opts.is_some(),
225            Err(e) => {
226                common_telemetry::warn!(
227                    "Failed to deserialize vector_index_options for column '{}': {}",
228                    self.name,
229                    e
230                );
231                false
232            }
233        }
234    }
235
236    /// Gets the vector index options.
237    pub fn vector_index_options(&self) -> Result<Option<VectorIndexOptions>> {
238        match self.metadata.get(VECTOR_INDEX_KEY) {
239            None => Ok(None),
240            Some(json) => {
241                let options =
242                    serde_json::from_str(json).context(error::DeserializeSnafu { json })?;
243                Ok(Some(options))
244            }
245        }
246    }
247
248    /// Sets the vector index options.
249    pub fn set_vector_index_options(&mut self, options: &VectorIndexOptions) -> Result<()> {
250        self.metadata.insert(
251            VECTOR_INDEX_KEY.to_string(),
252            serde_json::to_string(options).context(error::SerializeSnafu)?,
253        );
254        Ok(())
255    }
256
257    /// Removes the vector index options.
258    pub fn unset_vector_index_options(&mut self) {
259        self.metadata.remove(VECTOR_INDEX_KEY);
260    }
261
262    /// Sets vector index options and returns self for chaining.
263    pub fn with_vector_index_options(mut self, options: &VectorIndexOptions) -> Result<Self> {
264        self.set_vector_index_options(options)?;
265        Ok(self)
266    }
267
268    /// Set default constraint.
269    ///
270    /// If a default constraint exists for the column, this method will
271    /// validate it against the column's data type and nullability.
272    pub fn with_default_constraint(
273        mut self,
274        default_constraint: Option<ColumnDefaultConstraint>,
275    ) -> Result<Self> {
276        if let Some(constraint) = &default_constraint {
277            constraint.validate(&self.data_type, self.is_nullable)?;
278        }
279
280        self.default_constraint = default_constraint;
281        Ok(self)
282    }
283
284    /// Set the nullablity to `true` of the column.
285    /// Similar to [set_nullable] but take the ownership and return a owned value.
286    ///
287    /// [set_nullable]: Self::set_nullable
288    pub fn with_nullable_set(mut self) -> Self {
289        self.is_nullable = true;
290        self
291    }
292
293    /// Set the nullability to `true` of the column.
294    /// Similar to [with_nullable_set] but don't take the ownership
295    ///
296    /// [with_nullable_set]: Self::with_nullable_set
297    pub fn set_nullable(&mut self) {
298        self.is_nullable = true;
299    }
300
301    /// Set the `is_time_index` to `true` of the column.
302    /// Similar to [with_time_index] but don't take the ownership.
303    ///
304    /// [with_time_index]: Self::with_time_index
305    pub fn set_time_index(&mut self) {
306        self.is_time_index = true;
307    }
308
309    /// Creates a new [`ColumnSchema`] with given metadata.
310    pub fn with_metadata(mut self, metadata: Metadata) -> Self {
311        self.metadata = metadata;
312        self
313    }
314
315    /// Creates a vector with default value for this column.
316    ///
317    /// If the column is `NOT NULL` but doesn't has `DEFAULT` value supplied, returns `Ok(None)`.
318    pub fn create_default_vector(&self, num_rows: usize) -> Result<Option<VectorRef>> {
319        match &self.default_constraint {
320            Some(c) => c
321                .create_default_vector(&self.data_type, self.is_nullable, num_rows)
322                .map(Some),
323            None => {
324                if self.is_nullable {
325                    // No default constraint, use null as default value.
326                    // TODO(yingwen): Use NullVector once it supports setting logical type.
327                    ColumnDefaultConstraint::null_value()
328                        .create_default_vector(&self.data_type, self.is_nullable, num_rows)
329                        .map(Some)
330                } else {
331                    Ok(None)
332                }
333            }
334        }
335    }
336
337    /// Creates a vector for padding.
338    ///
339    /// This method always returns a vector since it uses [DataType::default_value]
340    /// to fill the vector. Callers should only use the created vector for padding
341    /// and never read its content.
342    pub fn create_default_vector_for_padding(&self, num_rows: usize) -> VectorRef {
343        let padding_value = if self.is_nullable {
344            Value::Null
345        } else {
346            // If the column is not null, use the data type's default value as it is
347            // more efficient to acquire.
348            self.data_type.default_value()
349        };
350        let value_ref = padding_value.as_value_ref();
351        let mut mutable_vector = self.data_type.create_mutable_vector(num_rows);
352        for _ in 0..num_rows {
353            mutable_vector.push_value_ref(&value_ref);
354        }
355        mutable_vector.to_vector()
356    }
357
358    /// Creates a default value for this column.
359    ///
360    /// If the column is `NOT NULL` but doesn't has `DEFAULT` value supplied, returns `Ok(None)`.
361    pub fn create_default(&self) -> Result<Option<Value>> {
362        match &self.default_constraint {
363            Some(c) => c
364                .create_default(&self.data_type, self.is_nullable)
365                .map(Some),
366            None => {
367                if self.is_nullable {
368                    // No default constraint, use null as default value.
369                    ColumnDefaultConstraint::null_value()
370                        .create_default(&self.data_type, self.is_nullable)
371                        .map(Some)
372                } else {
373                    Ok(None)
374                }
375            }
376        }
377    }
378
379    /// Creates an impure default value for this column, only if it have a impure default constraint.
380    /// Otherwise, returns `Ok(None)`.
381    pub fn create_impure_default(&self) -> Result<Option<Value>> {
382        match &self.default_constraint {
383            Some(c) => c.create_impure_default(&self.data_type),
384            None => Ok(None),
385        }
386    }
387
388    /// Retrieves the fulltext options for the column.
389    pub fn fulltext_options(&self) -> Result<Option<FulltextOptions>> {
390        match self.metadata.get(FULLTEXT_KEY) {
391            None => Ok(None),
392            Some(json) => {
393                let options =
394                    serde_json::from_str(json).context(error::DeserializeSnafu { json })?;
395                Ok(Some(options))
396            }
397        }
398    }
399
400    pub fn with_fulltext_options(mut self, options: FulltextOptions) -> Result<Self> {
401        self.metadata.insert(
402            FULLTEXT_KEY.to_string(),
403            serde_json::to_string(&options).context(error::SerializeSnafu)?,
404        );
405        Ok(self)
406    }
407
408    pub fn set_fulltext_options(&mut self, options: &FulltextOptions) -> Result<()> {
409        self.metadata.insert(
410            FULLTEXT_KEY.to_string(),
411            serde_json::to_string(options).context(error::SerializeSnafu)?,
412        );
413        Ok(())
414    }
415
416    /// Retrieves the skipping index options for the column.
417    pub fn skipping_index_options(&self) -> Result<Option<SkippingIndexOptions>> {
418        match self.metadata.get(SKIPPING_INDEX_KEY) {
419            None => Ok(None),
420            Some(json) => {
421                let options =
422                    serde_json::from_str(json).context(error::DeserializeSnafu { json })?;
423                Ok(Some(options))
424            }
425        }
426    }
427
428    pub fn with_skipping_options(mut self, options: SkippingIndexOptions) -> Result<Self> {
429        self.metadata.insert(
430            SKIPPING_INDEX_KEY.to_string(),
431            serde_json::to_string(&options).context(error::SerializeSnafu)?,
432        );
433        Ok(self)
434    }
435
436    pub fn set_skipping_options(&mut self, options: &SkippingIndexOptions) -> Result<()> {
437        self.metadata.insert(
438            SKIPPING_INDEX_KEY.to_string(),
439            serde_json::to_string(options).context(error::SerializeSnafu)?,
440        );
441        Ok(())
442    }
443
444    pub fn unset_skipping_options(&mut self) -> Result<()> {
445        self.metadata.remove(SKIPPING_INDEX_KEY);
446        Ok(())
447    }
448
449    pub fn extension_type<E>(&self) -> Result<Option<E>>
450    where
451        E: ExtensionType,
452    {
453        let extension_type_name = self.metadata.get(EXTENSION_TYPE_NAME_KEY);
454
455        if extension_type_name.map(|s| s.as_str()) == Some(E::NAME) {
456            let extension_metadata = self.metadata.get(EXTENSION_TYPE_METADATA_KEY);
457            let extension_metadata =
458                E::deserialize_metadata(extension_metadata.map(|s| s.as_str()))
459                    .context(ArrowMetadataSnafu)?;
460
461            let extension = E::try_new(&self.data_type.as_arrow_type(), extension_metadata)
462                .context(ArrowMetadataSnafu)?;
463            Ok(Some(extension))
464        } else {
465            Ok(None)
466        }
467    }
468
469    pub fn with_extension_type<E>(&mut self, extension_type: &E) -> Result<()>
470    where
471        E: ExtensionType,
472    {
473        self.metadata
474            .insert(EXTENSION_TYPE_NAME_KEY.to_string(), E::NAME.to_string());
475
476        if let Some(extension_metadata) = extension_type.serialize_metadata() {
477            self.metadata
478                .insert(EXTENSION_TYPE_METADATA_KEY.to_string(), extension_metadata);
479        }
480
481        Ok(())
482    }
483
484    pub fn is_indexed(&self) -> bool {
485        self.is_inverted_indexed() || self.is_fulltext_indexed() || self.is_skipping_indexed()
486    }
487}
488
489/// Column extended type set in column schema's metadata.
490#[derive(Debug, Clone, PartialEq, Eq)]
491pub enum ColumnExtType {
492    /// Json type.
493    Json,
494
495    /// Vector type with dimension.
496    Vector(u32),
497}
498
499impl fmt::Display for ColumnExtType {
500    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
501        match self {
502            ColumnExtType::Json => write!(f, "Json"),
503            ColumnExtType::Vector(dim) => write!(f, "Vector({})", dim),
504        }
505    }
506}
507
508impl FromStr for ColumnExtType {
509    type Err = String;
510
511    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
512        match s {
513            "Json" => Ok(ColumnExtType::Json),
514            _ if s.starts_with("Vector(") && s.ends_with(')') => s[7..s.len() - 1]
515                .parse::<u32>()
516                .map(ColumnExtType::Vector)
517                .map_err(|_| "Invalid dimension for Vector".to_string()),
518            _ => Err("Unknown variant".to_string()),
519        }
520    }
521}
522
523impl TryFrom<&Field> for ColumnSchema {
524    type Error = Error;
525
526    fn try_from(field: &Field) -> Result<ColumnSchema> {
527        let mut data_type = ConcreteDataType::try_from(field.data_type())?;
528        // Override the data type if it is specified in the metadata.
529        if let Some(s) = field.metadata().get(TYPE_KEY) {
530            let extype = ColumnExtType::from_str(s)
531                .map_err(|_| ParseExtendedTypeSnafu { value: s }.build())?;
532            match extype {
533                ColumnExtType::Json => {
534                    data_type = ConcreteDataType::json_datatype();
535                }
536                ColumnExtType::Vector(dim) => {
537                    data_type = ConcreteDataType::vector_datatype(dim);
538                }
539            }
540        }
541        let mut metadata = field.metadata().clone();
542        let default_constraint = match metadata.remove(DEFAULT_CONSTRAINT_KEY) {
543            Some(json) => {
544                Some(serde_json::from_str(&json).context(error::DeserializeSnafu { json })?)
545            }
546            None => None,
547        };
548        let mut is_time_index = metadata.contains_key(TIME_INDEX_KEY);
549        if is_time_index && !data_type.is_timestamp() {
550            // If the column is time index but the data type is not timestamp, it is invalid.
551            // We set the time index to false and remove the metadata.
552            // This is possible if we cast the time index column to another type. DataFusion will
553            // keep the metadata:
554            // https://github.com/apache/datafusion/pull/12951
555            is_time_index = false;
556            metadata.remove(TIME_INDEX_KEY);
557            common_telemetry::debug!(
558                "Column {} is not timestamp ({:?}) but has time index metadata",
559                data_type,
560                field.name(),
561            );
562        }
563
564        Ok(ColumnSchema {
565            name: field.name().clone(),
566            data_type,
567            is_nullable: field.is_nullable(),
568            is_time_index,
569            default_constraint,
570            metadata,
571        })
572    }
573}
574
575impl TryFrom<&ColumnSchema> for Field {
576    type Error = Error;
577
578    fn try_from(column_schema: &ColumnSchema) -> Result<Field> {
579        let mut metadata = column_schema.metadata.clone();
580        if let Some(value) = &column_schema.default_constraint {
581            // Adds an additional metadata to store the default constraint.
582            let old = metadata.insert(
583                DEFAULT_CONSTRAINT_KEY.to_string(),
584                serde_json::to_string(&value).context(error::SerializeSnafu)?,
585            );
586
587            ensure!(
588                old.is_none(),
589                error::DuplicateMetaSnafu {
590                    key: DEFAULT_CONSTRAINT_KEY,
591                }
592            );
593        }
594
595        Ok(Field::new(
596            &column_schema.name,
597            column_schema.data_type.as_arrow_type(),
598            column_schema.is_nullable(),
599        )
600        .with_metadata(metadata))
601    }
602}
603
604/// Fulltext options for a column.
605#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
606#[serde(rename_all = "kebab-case")]
607pub struct FulltextOptions {
608    /// Whether the fulltext index is enabled.
609    pub enable: bool,
610    /// The fulltext analyzer to use.
611    #[serde(default)]
612    pub analyzer: FulltextAnalyzer,
613    /// Whether the fulltext index is case-sensitive.
614    #[serde(default)]
615    pub case_sensitive: bool,
616    /// The fulltext backend to use.
617    #[serde(default)]
618    pub backend: FulltextBackend,
619    /// The granularity of the fulltext index (for bloom backend only)
620    #[serde(default = "fulltext_options_default_granularity")]
621    pub granularity: u32,
622    /// The false positive rate of the fulltext index (for bloom backend only)
623    #[serde(default = "index_options_default_false_positive_rate_in_10000")]
624    pub false_positive_rate_in_10000: u32,
625}
626
627fn fulltext_options_default_granularity() -> u32 {
628    DEFAULT_GRANULARITY
629}
630
631fn index_options_default_false_positive_rate_in_10000() -> u32 {
632    (DEFAULT_FALSE_POSITIVE_RATE * 10000.0) as u32
633}
634
635impl FulltextOptions {
636    /// Creates a new fulltext options.
637    pub fn new(
638        enable: bool,
639        analyzer: FulltextAnalyzer,
640        case_sensitive: bool,
641        backend: FulltextBackend,
642        granularity: u32,
643        false_positive_rate: f64,
644    ) -> Result<Self> {
645        ensure!(
646            0.0 < false_positive_rate && false_positive_rate <= 1.0,
647            error::InvalidFulltextOptionSnafu {
648                msg: format!(
649                    "Invalid false positive rate: {false_positive_rate}, expected: 0.0 < rate <= 1.0"
650                ),
651            }
652        );
653        ensure!(
654            granularity > 0,
655            error::InvalidFulltextOptionSnafu {
656                msg: format!("Invalid granularity: {granularity}, expected: positive integer"),
657            }
658        );
659        Ok(Self::new_unchecked(
660            enable,
661            analyzer,
662            case_sensitive,
663            backend,
664            granularity,
665            false_positive_rate,
666        ))
667    }
668
669    /// Creates a new fulltext options without checking `false_positive_rate` and `granularity`.
670    pub fn new_unchecked(
671        enable: bool,
672        analyzer: FulltextAnalyzer,
673        case_sensitive: bool,
674        backend: FulltextBackend,
675        granularity: u32,
676        false_positive_rate: f64,
677    ) -> Self {
678        Self {
679            enable,
680            analyzer,
681            case_sensitive,
682            backend,
683            granularity,
684            false_positive_rate_in_10000: (false_positive_rate * 10000.0) as u32,
685        }
686    }
687
688    /// Gets the false positive rate.
689    pub fn false_positive_rate(&self) -> f64 {
690        self.false_positive_rate_in_10000 as f64 / 10000.0
691    }
692}
693
694impl Default for FulltextOptions {
695    fn default() -> Self {
696        Self::new_unchecked(
697            false,
698            FulltextAnalyzer::default(),
699            false,
700            FulltextBackend::default(),
701            DEFAULT_GRANULARITY,
702            DEFAULT_FALSE_POSITIVE_RATE,
703        )
704    }
705}
706
707impl fmt::Display for FulltextOptions {
708    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
709        write!(f, "enable={}", self.enable)?;
710        if self.enable {
711            write!(f, ", analyzer={}", self.analyzer)?;
712            write!(f, ", case_sensitive={}", self.case_sensitive)?;
713            write!(f, ", backend={}", self.backend)?;
714            if self.backend == FulltextBackend::Bloom {
715                write!(f, ", granularity={}", self.granularity)?;
716                write!(f, ", false_positive_rate={}", self.false_positive_rate())?;
717            }
718        }
719        Ok(())
720    }
721}
722
723/// The backend of the fulltext index.
724#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
725#[serde(rename_all = "kebab-case")]
726pub enum FulltextBackend {
727    #[default]
728    Bloom,
729    Tantivy,
730}
731
732impl fmt::Display for FulltextBackend {
733    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
734        match self {
735            FulltextBackend::Tantivy => write!(f, "tantivy"),
736            FulltextBackend::Bloom => write!(f, "bloom"),
737        }
738    }
739}
740
741impl TryFrom<HashMap<String, String>> for FulltextOptions {
742    type Error = Error;
743
744    fn try_from(options: HashMap<String, String>) -> Result<Self> {
745        let mut fulltext_options = FulltextOptions {
746            enable: true,
747            ..Default::default()
748        };
749
750        if let Some(enable) = options.get(COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE) {
751            match enable.to_ascii_lowercase().as_str() {
752                "true" => fulltext_options.enable = true,
753                "false" => fulltext_options.enable = false,
754                _ => {
755                    return InvalidFulltextOptionSnafu {
756                        msg: format!("{enable}, expected: 'true' | 'false'"),
757                    }
758                    .fail();
759                }
760            }
761        };
762
763        if let Some(analyzer) = options.get(COLUMN_FULLTEXT_OPT_KEY_ANALYZER) {
764            match analyzer.to_ascii_lowercase().as_str() {
765                "english" => fulltext_options.analyzer = FulltextAnalyzer::English,
766                "chinese" => fulltext_options.analyzer = FulltextAnalyzer::Chinese,
767                _ => {
768                    return InvalidFulltextOptionSnafu {
769                        msg: format!("{analyzer}, expected: 'English' | 'Chinese'"),
770                    }
771                    .fail();
772                }
773            }
774        };
775
776        if let Some(case_sensitive) = options.get(COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE) {
777            match case_sensitive.to_ascii_lowercase().as_str() {
778                "true" => fulltext_options.case_sensitive = true,
779                "false" => fulltext_options.case_sensitive = false,
780                _ => {
781                    return InvalidFulltextOptionSnafu {
782                        msg: format!("{case_sensitive}, expected: 'true' | 'false'"),
783                    }
784                    .fail();
785                }
786            }
787        }
788
789        if let Some(backend) = options.get(COLUMN_FULLTEXT_OPT_KEY_BACKEND) {
790            match backend.to_ascii_lowercase().as_str() {
791                "bloom" => fulltext_options.backend = FulltextBackend::Bloom,
792                "tantivy" => fulltext_options.backend = FulltextBackend::Tantivy,
793                _ => {
794                    return InvalidFulltextOptionSnafu {
795                        msg: format!("{backend}, expected: 'bloom' | 'tantivy'"),
796                    }
797                    .fail();
798                }
799            }
800        }
801
802        if fulltext_options.backend == FulltextBackend::Bloom {
803            // Parse granularity with default value 10240
804            let granularity = match options.get(COLUMN_FULLTEXT_OPT_KEY_GRANULARITY) {
805                Some(value) => value
806                    .parse::<u32>()
807                    .ok()
808                    .filter(|&v| v > 0)
809                    .ok_or_else(|| {
810                        error::InvalidFulltextOptionSnafu {
811                            msg: format!(
812                                "Invalid granularity: {value}, expected: positive integer"
813                            ),
814                        }
815                        .build()
816                    })?,
817                None => DEFAULT_GRANULARITY,
818            };
819            fulltext_options.granularity = granularity;
820
821            // Parse false positive rate with default value 0.01
822            let false_positive_rate = match options.get(COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE)
823            {
824                Some(value) => value
825                    .parse::<f64>()
826                    .ok()
827                    .filter(|&v| v > 0.0 && v <= 1.0)
828                    .ok_or_else(|| {
829                        error::InvalidFulltextOptionSnafu {
830                            msg: format!(
831                                "Invalid false positive rate: {value}, expected: 0.0 < rate <= 1.0"
832                            ),
833                        }
834                        .build()
835                    })?,
836                None => DEFAULT_FALSE_POSITIVE_RATE,
837            };
838            fulltext_options.false_positive_rate_in_10000 = (false_positive_rate * 10000.0) as u32;
839        }
840
841        Ok(fulltext_options)
842    }
843}
844
845/// Fulltext analyzer.
846#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
847pub enum FulltextAnalyzer {
848    #[default]
849    English,
850    Chinese,
851}
852
853impl fmt::Display for FulltextAnalyzer {
854    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
855        match self {
856            FulltextAnalyzer::English => write!(f, "English"),
857            FulltextAnalyzer::Chinese => write!(f, "Chinese"),
858        }
859    }
860}
861
862/// Skipping options for a column.
863#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
864#[serde(rename_all = "kebab-case")]
865pub struct SkippingIndexOptions {
866    /// The granularity of the skip index.
867    pub granularity: u32,
868    /// The false positive rate of the skip index (in ten-thousandths, e.g., 100 = 1%).
869    #[serde(default = "index_options_default_false_positive_rate_in_10000")]
870    pub false_positive_rate_in_10000: u32,
871    /// The type of the skip index.
872    #[serde(default)]
873    pub index_type: SkippingIndexType,
874}
875
876impl SkippingIndexOptions {
877    /// Creates a new skipping index options without checking `false_positive_rate` and `granularity`.
878    pub fn new_unchecked(
879        granularity: u32,
880        false_positive_rate: f64,
881        index_type: SkippingIndexType,
882    ) -> Self {
883        Self {
884            granularity,
885            false_positive_rate_in_10000: (false_positive_rate * 10000.0) as u32,
886            index_type,
887        }
888    }
889
890    /// Creates a new skipping index options.
891    pub fn new(
892        granularity: u32,
893        false_positive_rate: f64,
894        index_type: SkippingIndexType,
895    ) -> Result<Self> {
896        ensure!(
897            0.0 < false_positive_rate && false_positive_rate <= 1.0,
898            error::InvalidSkippingIndexOptionSnafu {
899                msg: format!(
900                    "Invalid false positive rate: {false_positive_rate}, expected: 0.0 < rate <= 1.0"
901                ),
902            }
903        );
904        ensure!(
905            granularity > 0,
906            error::InvalidSkippingIndexOptionSnafu {
907                msg: format!("Invalid granularity: {granularity}, expected: positive integer"),
908            }
909        );
910        Ok(Self::new_unchecked(
911            granularity,
912            false_positive_rate,
913            index_type,
914        ))
915    }
916
917    /// Gets the false positive rate.
918    pub fn false_positive_rate(&self) -> f64 {
919        self.false_positive_rate_in_10000 as f64 / 10000.0
920    }
921}
922
923impl Default for SkippingIndexOptions {
924    fn default() -> Self {
925        Self::new_unchecked(
926            DEFAULT_GRANULARITY,
927            DEFAULT_FALSE_POSITIVE_RATE,
928            SkippingIndexType::default(),
929        )
930    }
931}
932
933impl fmt::Display for SkippingIndexOptions {
934    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
935        write!(f, "granularity={}", self.granularity)?;
936        write!(f, ", false_positive_rate={}", self.false_positive_rate())?;
937        write!(f, ", index_type={}", self.index_type)?;
938        Ok(())
939    }
940}
941
942/// Skip index types.
943#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
944pub enum SkippingIndexType {
945    #[default]
946    BloomFilter,
947}
948
949impl fmt::Display for SkippingIndexType {
950    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
951        match self {
952            SkippingIndexType::BloomFilter => write!(f, "BLOOM"),
953        }
954    }
955}
956
957impl TryFrom<HashMap<String, String>> for SkippingIndexOptions {
958    type Error = Error;
959
960    fn try_from(options: HashMap<String, String>) -> Result<Self> {
961        // Parse granularity with default value 1
962        let granularity = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY) {
963            Some(value) => value
964                .parse::<u32>()
965                .ok()
966                .filter(|&v| v > 0)
967                .ok_or_else(|| {
968                    error::InvalidSkippingIndexOptionSnafu {
969                        msg: format!("Invalid granularity: {value}, expected: positive integer"),
970                    }
971                    .build()
972                })?,
973            None => DEFAULT_GRANULARITY,
974        };
975
976        // Parse false positive rate with default value 100
977        let false_positive_rate =
978            match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE) {
979                Some(value) => value
980                    .parse::<f64>()
981                    .ok()
982                    .filter(|&v| v > 0.0 && v <= 1.0)
983                    .ok_or_else(|| {
984                        error::InvalidSkippingIndexOptionSnafu {
985                            msg: format!(
986                                "Invalid false positive rate: {value}, expected: 0.0 < rate <= 1.0"
987                            ),
988                        }
989                        .build()
990                    })?,
991                None => DEFAULT_FALSE_POSITIVE_RATE,
992            };
993
994        // Parse index type with default value BloomFilter
995        let index_type = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE) {
996            Some(typ) => match typ.to_ascii_uppercase().as_str() {
997                "BLOOM" => SkippingIndexType::BloomFilter,
998                _ => {
999                    return error::InvalidSkippingIndexOptionSnafu {
1000                        msg: format!("Invalid index type: {typ}, expected: 'BLOOM'"),
1001                    }
1002                    .fail();
1003                }
1004            },
1005            None => SkippingIndexType::default(),
1006        };
1007
1008        Ok(SkippingIndexOptions::new_unchecked(
1009            granularity,
1010            false_positive_rate,
1011            index_type,
1012        ))
1013    }
1014}
1015
1016/// Distance metric for vector similarity search.
1017#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
1018#[serde(rename_all = "lowercase")]
1019pub enum VectorDistanceMetric {
1020    /// Squared Euclidean distance (L2^2).
1021    #[default]
1022    L2sq,
1023    /// Cosine distance (1 - cosine similarity).
1024    Cosine,
1025    /// Inner product (negative, for maximum inner product search).
1026    #[serde(alias = "ip")]
1027    InnerProduct,
1028}
1029
1030impl fmt::Display for VectorDistanceMetric {
1031    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1032        match self {
1033            VectorDistanceMetric::L2sq => write!(f, "l2sq"),
1034            VectorDistanceMetric::Cosine => write!(f, "cosine"),
1035            VectorDistanceMetric::InnerProduct => write!(f, "ip"),
1036        }
1037    }
1038}
1039
1040impl std::str::FromStr for VectorDistanceMetric {
1041    type Err = String;
1042
1043    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
1044        match s.to_lowercase().as_str() {
1045            "l2sq" | "l2" | "euclidean" => Ok(VectorDistanceMetric::L2sq),
1046            "cosine" | "cos" => Ok(VectorDistanceMetric::Cosine),
1047            "inner_product" | "ip" | "dot" => Ok(VectorDistanceMetric::InnerProduct),
1048            _ => Err(format!(
1049                "Unknown distance metric: {}. Expected: l2sq, cosine, or ip",
1050                s
1051            )),
1052        }
1053    }
1054}
1055
1056impl VectorDistanceMetric {
1057    /// Returns the metric as u8 for blob serialization.
1058    pub fn as_u8(&self) -> u8 {
1059        match self {
1060            Self::L2sq => 0,
1061            Self::Cosine => 1,
1062            Self::InnerProduct => 2,
1063        }
1064    }
1065
1066    /// Parses metric from u8 (used when reading blob).
1067    pub fn try_from_u8(v: u8) -> Option<Self> {
1068        match v {
1069            0 => Some(Self::L2sq),
1070            1 => Some(Self::Cosine),
1071            2 => Some(Self::InnerProduct),
1072            _ => None,
1073        }
1074    }
1075}
1076
1077/// Default HNSW connectivity parameter.
1078const DEFAULT_VECTOR_INDEX_CONNECTIVITY: u32 = 16;
1079/// Default expansion factor during index construction.
1080const DEFAULT_VECTOR_INDEX_EXPANSION_ADD: u32 = 128;
1081/// Default expansion factor during search.
1082const DEFAULT_VECTOR_INDEX_EXPANSION_SEARCH: u32 = 64;
1083
1084fn default_vector_index_connectivity() -> u32 {
1085    DEFAULT_VECTOR_INDEX_CONNECTIVITY
1086}
1087
1088fn default_vector_index_expansion_add() -> u32 {
1089    DEFAULT_VECTOR_INDEX_EXPANSION_ADD
1090}
1091
1092fn default_vector_index_expansion_search() -> u32 {
1093    DEFAULT_VECTOR_INDEX_EXPANSION_SEARCH
1094}
1095
1096/// Supported vector index engine types.
1097#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize, Visit, VisitMut)]
1098#[serde(rename_all = "lowercase")]
1099pub enum VectorIndexEngineType {
1100    /// USearch HNSW implementation.
1101    #[default]
1102    Usearch,
1103    // Future: Vsag,
1104}
1105
1106impl VectorIndexEngineType {
1107    /// Returns the engine type as u8 for blob serialization.
1108    pub fn as_u8(&self) -> u8 {
1109        match self {
1110            Self::Usearch => 0,
1111        }
1112    }
1113
1114    /// Parses engine type from u8 (used when reading blob).
1115    pub fn try_from_u8(v: u8) -> Option<Self> {
1116        match v {
1117            0 => Some(Self::Usearch),
1118            _ => None,
1119        }
1120    }
1121}
1122
1123impl fmt::Display for VectorIndexEngineType {
1124    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1125        match self {
1126            Self::Usearch => write!(f, "usearch"),
1127        }
1128    }
1129}
1130
1131impl std::str::FromStr for VectorIndexEngineType {
1132    type Err = String;
1133
1134    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
1135        match s.to_lowercase().as_str() {
1136            "usearch" => Ok(Self::Usearch),
1137            _ => Err(format!(
1138                "Unknown vector index engine: {}. Expected: usearch",
1139                s
1140            )),
1141        }
1142    }
1143}
1144
1145/// Options for vector index (HNSW).
1146#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
1147#[serde(rename_all = "kebab-case")]
1148pub struct VectorIndexOptions {
1149    /// Vector index engine type (default: usearch).
1150    #[serde(default)]
1151    pub engine: VectorIndexEngineType,
1152    /// Distance metric for similarity search.
1153    #[serde(default)]
1154    pub metric: VectorDistanceMetric,
1155    /// HNSW connectivity parameter (M in the paper).
1156    /// Higher values improve recall but increase memory usage.
1157    #[serde(default = "default_vector_index_connectivity")]
1158    pub connectivity: u32,
1159    /// Expansion factor during index construction (ef_construction).
1160    /// Higher values improve index quality but slow down construction.
1161    #[serde(default = "default_vector_index_expansion_add")]
1162    pub expansion_add: u32,
1163    /// Expansion factor during search (ef_search).
1164    /// Higher values improve recall but slow down search.
1165    #[serde(default = "default_vector_index_expansion_search")]
1166    pub expansion_search: u32,
1167}
1168
1169impl Default for VectorIndexOptions {
1170    fn default() -> Self {
1171        Self {
1172            engine: VectorIndexEngineType::default(),
1173            metric: VectorDistanceMetric::default(),
1174            connectivity: DEFAULT_VECTOR_INDEX_CONNECTIVITY,
1175            expansion_add: DEFAULT_VECTOR_INDEX_EXPANSION_ADD,
1176            expansion_search: DEFAULT_VECTOR_INDEX_EXPANSION_SEARCH,
1177        }
1178    }
1179}
1180
1181impl fmt::Display for VectorIndexOptions {
1182    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1183        write!(
1184            f,
1185            "engine={}, metric={}, connectivity={}, expansion_add={}, expansion_search={}",
1186            self.engine, self.metric, self.connectivity, self.expansion_add, self.expansion_search
1187        )
1188    }
1189}
1190
1191#[cfg(test)]
1192mod tests {
1193    use std::sync::Arc;
1194
1195    use arrow::datatypes::{DataType as ArrowDataType, TimeUnit};
1196
1197    use super::*;
1198    use crate::value::Value;
1199    use crate::vectors::Int32Vector;
1200
1201    #[test]
1202    fn test_column_schema() {
1203        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
1204        let field = Field::try_from(&column_schema).unwrap();
1205        assert_eq!("test", field.name());
1206        assert_eq!(ArrowDataType::Int32, *field.data_type());
1207        assert!(field.is_nullable());
1208
1209        let new_column_schema = ColumnSchema::try_from(&field).unwrap();
1210        assert_eq!(column_schema, new_column_schema);
1211    }
1212
1213    #[test]
1214    fn test_column_schema_with_default_constraint() {
1215        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
1216            .with_default_constraint(Some(ColumnDefaultConstraint::Value(Value::from(99))))
1217            .unwrap();
1218        assert!(
1219            column_schema
1220                .metadata()
1221                .get(DEFAULT_CONSTRAINT_KEY)
1222                .is_none()
1223        );
1224
1225        let field = Field::try_from(&column_schema).unwrap();
1226        assert_eq!("test", field.name());
1227        assert_eq!(ArrowDataType::Int32, *field.data_type());
1228        assert!(field.is_nullable());
1229        assert_eq!(
1230            "{\"Value\":{\"Int32\":99}}",
1231            field.metadata().get(DEFAULT_CONSTRAINT_KEY).unwrap()
1232        );
1233
1234        let new_column_schema = ColumnSchema::try_from(&field).unwrap();
1235        assert_eq!(column_schema, new_column_schema);
1236    }
1237
1238    #[test]
1239    fn test_column_schema_with_metadata() {
1240        let metadata = Metadata::from([
1241            ("k1".to_string(), "v1".to_string()),
1242            (COMMENT_KEY.to_string(), "test comment".to_string()),
1243        ]);
1244        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
1245            .with_metadata(metadata)
1246            .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
1247            .unwrap();
1248        assert_eq!("v1", column_schema.metadata().get("k1").unwrap());
1249        assert_eq!("test comment", column_schema.column_comment().unwrap());
1250        assert!(
1251            column_schema
1252                .metadata()
1253                .get(DEFAULT_CONSTRAINT_KEY)
1254                .is_none()
1255        );
1256
1257        let field = Field::try_from(&column_schema).unwrap();
1258        assert_eq!("v1", field.metadata().get("k1").unwrap());
1259        let _ = field.metadata().get(DEFAULT_CONSTRAINT_KEY).unwrap();
1260
1261        let new_column_schema = ColumnSchema::try_from(&field).unwrap();
1262        assert_eq!(column_schema, new_column_schema);
1263    }
1264
1265    #[test]
1266    fn test_column_schema_with_duplicate_metadata() {
1267        let metadata = Metadata::from([(DEFAULT_CONSTRAINT_KEY.to_string(), "v1".to_string())]);
1268        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
1269            .with_metadata(metadata)
1270            .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
1271            .unwrap();
1272        assert!(Field::try_from(&column_schema).is_err());
1273    }
1274
1275    #[test]
1276    fn test_column_schema_invalid_default_constraint() {
1277        assert!(
1278            ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false)
1279                .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
1280                .is_err()
1281        );
1282    }
1283
1284    #[test]
1285    fn test_column_default_constraint_try_into_from() {
1286        let default_constraint = ColumnDefaultConstraint::Value(Value::from(42i64));
1287
1288        let bytes: Vec<u8> = default_constraint.clone().try_into().unwrap();
1289        let from_value = ColumnDefaultConstraint::try_from(&bytes[..]).unwrap();
1290
1291        assert_eq!(default_constraint, from_value);
1292    }
1293
1294    #[test]
1295    fn test_column_schema_create_default_null() {
1296        // Implicit default null.
1297        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
1298        let v = column_schema.create_default_vector(5).unwrap().unwrap();
1299        assert_eq!(5, v.len());
1300        assert!(v.only_null());
1301
1302        // Explicit default null.
1303        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
1304            .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
1305            .unwrap();
1306        let v = column_schema.create_default_vector(5).unwrap().unwrap();
1307        assert_eq!(5, v.len());
1308        assert!(v.only_null());
1309    }
1310
1311    #[test]
1312    fn test_column_schema_no_default() {
1313        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
1314        assert!(column_schema.create_default_vector(5).unwrap().is_none());
1315    }
1316
1317    #[test]
1318    fn test_create_default_vector_for_padding() {
1319        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
1320        let vector = column_schema.create_default_vector_for_padding(4);
1321        assert!(vector.only_null());
1322        assert_eq!(4, vector.len());
1323
1324        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
1325        let vector = column_schema.create_default_vector_for_padding(4);
1326        assert_eq!(4, vector.len());
1327        let expect: VectorRef = Arc::new(Int32Vector::from_slice([0, 0, 0, 0]));
1328        assert_eq!(expect, vector);
1329    }
1330
1331    #[test]
1332    fn test_column_schema_single_create_default_null() {
1333        // Implicit default null.
1334        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
1335        let v = column_schema.create_default().unwrap().unwrap();
1336        assert!(v.is_null());
1337
1338        // Explicit default null.
1339        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
1340            .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
1341            .unwrap();
1342        let v = column_schema.create_default().unwrap().unwrap();
1343        assert!(v.is_null());
1344    }
1345
1346    #[test]
1347    fn test_column_schema_single_create_default_not_null() {
1348        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
1349            .with_default_constraint(Some(ColumnDefaultConstraint::Value(Value::Int32(6))))
1350            .unwrap();
1351        let v = column_schema.create_default().unwrap().unwrap();
1352        assert_eq!(v, Value::Int32(6));
1353    }
1354
1355    #[test]
1356    fn test_column_schema_single_no_default() {
1357        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
1358        assert!(column_schema.create_default().unwrap().is_none());
1359    }
1360
1361    #[test]
1362    fn test_debug_for_column_schema() {
1363        let column_schema_int8 =
1364            ColumnSchema::new("test_column_1", ConcreteDataType::int8_datatype(), true);
1365
1366        let column_schema_int32 =
1367            ColumnSchema::new("test_column_2", ConcreteDataType::int32_datatype(), false);
1368
1369        let formatted_int8 = format!("{:?}", column_schema_int8);
1370        let formatted_int32 = format!("{:?}", column_schema_int32);
1371        assert_eq!(formatted_int8, "test_column_1 Int8 null");
1372        assert_eq!(formatted_int32, "test_column_2 Int32 not null");
1373    }
1374
1375    #[test]
1376    fn test_from_field_to_column_schema() {
1377        let field = Field::new("test", ArrowDataType::Int32, true);
1378        let column_schema = ColumnSchema::try_from(&field).unwrap();
1379        assert_eq!("test", column_schema.name);
1380        assert_eq!(ConcreteDataType::int32_datatype(), column_schema.data_type);
1381        assert!(column_schema.is_nullable);
1382        assert!(!column_schema.is_time_index);
1383        assert!(column_schema.default_constraint.is_none());
1384        assert!(column_schema.metadata.is_empty());
1385
1386        let field = Field::new("test", ArrowDataType::Binary, true);
1387        let field = field.with_metadata(Metadata::from([(
1388            TYPE_KEY.to_string(),
1389            ConcreteDataType::json_datatype().name(),
1390        )]));
1391        let column_schema = ColumnSchema::try_from(&field).unwrap();
1392        assert_eq!("test", column_schema.name);
1393        assert_eq!(ConcreteDataType::json_datatype(), column_schema.data_type);
1394        assert!(column_schema.is_nullable);
1395        assert!(!column_schema.is_time_index);
1396        assert!(column_schema.default_constraint.is_none());
1397        assert_eq!(
1398            column_schema.metadata.get(TYPE_KEY).unwrap(),
1399            &ConcreteDataType::json_datatype().name()
1400        );
1401
1402        let field = Field::new("test", ArrowDataType::Binary, true);
1403        let field = field.with_metadata(Metadata::from([(
1404            TYPE_KEY.to_string(),
1405            ConcreteDataType::vector_datatype(3).name(),
1406        )]));
1407        let column_schema = ColumnSchema::try_from(&field).unwrap();
1408        assert_eq!("test", column_schema.name);
1409        assert_eq!(
1410            ConcreteDataType::vector_datatype(3),
1411            column_schema.data_type
1412        );
1413        assert!(column_schema.is_nullable);
1414        assert!(!column_schema.is_time_index);
1415        assert!(column_schema.default_constraint.is_none());
1416        assert_eq!(
1417            column_schema.metadata.get(TYPE_KEY).unwrap(),
1418            &ConcreteDataType::vector_datatype(3).name()
1419        );
1420    }
1421
1422    #[test]
1423    fn test_column_schema_fix_time_index() {
1424        let field = Field::new(
1425            "test",
1426            ArrowDataType::Timestamp(TimeUnit::Second, None),
1427            false,
1428        );
1429        let field = field.with_metadata(Metadata::from([(
1430            TIME_INDEX_KEY.to_string(),
1431            "true".to_string(),
1432        )]));
1433        let column_schema = ColumnSchema::try_from(&field).unwrap();
1434        assert_eq!("test", column_schema.name);
1435        assert_eq!(
1436            ConcreteDataType::timestamp_second_datatype(),
1437            column_schema.data_type
1438        );
1439        assert!(!column_schema.is_nullable);
1440        assert!(column_schema.is_time_index);
1441        assert!(column_schema.default_constraint.is_none());
1442        assert_eq!(1, column_schema.metadata().len());
1443
1444        let field = Field::new("test", ArrowDataType::Int32, false);
1445        let field = field.with_metadata(Metadata::from([(
1446            TIME_INDEX_KEY.to_string(),
1447            "true".to_string(),
1448        )]));
1449        let column_schema = ColumnSchema::try_from(&field).unwrap();
1450        assert_eq!("test", column_schema.name);
1451        assert_eq!(ConcreteDataType::int32_datatype(), column_schema.data_type);
1452        assert!(!column_schema.is_nullable);
1453        assert!(!column_schema.is_time_index);
1454        assert!(column_schema.default_constraint.is_none());
1455        assert!(column_schema.metadata.is_empty());
1456    }
1457
1458    #[test]
1459    fn test_skipping_index_options_deserialization() {
1460        let original_options = "{\"granularity\":1024,\"false-positive-rate-in-10000\":10,\"index-type\":\"BloomFilter\"}";
1461        let options = serde_json::from_str::<SkippingIndexOptions>(original_options).unwrap();
1462        assert_eq!(1024, options.granularity);
1463        assert_eq!(SkippingIndexType::BloomFilter, options.index_type);
1464        assert_eq!(0.001, options.false_positive_rate());
1465
1466        let options_str = serde_json::to_string(&options).unwrap();
1467        assert_eq!(options_str, original_options);
1468    }
1469
1470    #[test]
1471    fn test_skipping_index_options_deserialization_v0_14_to_v0_15() {
1472        let options = "{\"granularity\":10240,\"index-type\":\"BloomFilter\"}";
1473        let options = serde_json::from_str::<SkippingIndexOptions>(options).unwrap();
1474        assert_eq!(10240, options.granularity);
1475        assert_eq!(SkippingIndexType::BloomFilter, options.index_type);
1476        assert_eq!(DEFAULT_FALSE_POSITIVE_RATE, options.false_positive_rate());
1477
1478        let options_str = serde_json::to_string(&options).unwrap();
1479        assert_eq!(
1480            options_str,
1481            "{\"granularity\":10240,\"false-positive-rate-in-10000\":100,\"index-type\":\"BloomFilter\"}"
1482        );
1483    }
1484
1485    #[test]
1486    fn test_fulltext_options_deserialization() {
1487        let original_options = "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\",\"granularity\":1024,\"false-positive-rate-in-10000\":10}";
1488        let options = serde_json::from_str::<FulltextOptions>(original_options).unwrap();
1489        assert!(!options.case_sensitive);
1490        assert!(options.enable);
1491        assert_eq!(FulltextBackend::Bloom, options.backend);
1492        assert_eq!(FulltextAnalyzer::default(), options.analyzer);
1493        assert_eq!(1024, options.granularity);
1494        assert_eq!(0.001, options.false_positive_rate());
1495
1496        let options_str = serde_json::to_string(&options).unwrap();
1497        assert_eq!(options_str, original_options);
1498    }
1499
1500    #[test]
1501    fn test_fulltext_options_deserialization_v0_14_to_v0_15() {
1502        // 0.14 to 0.15
1503        let options = "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\"}";
1504        let options = serde_json::from_str::<FulltextOptions>(options).unwrap();
1505        assert!(!options.case_sensitive);
1506        assert!(options.enable);
1507        assert_eq!(FulltextBackend::Bloom, options.backend);
1508        assert_eq!(FulltextAnalyzer::default(), options.analyzer);
1509        assert_eq!(DEFAULT_GRANULARITY, options.granularity);
1510        assert_eq!(DEFAULT_FALSE_POSITIVE_RATE, options.false_positive_rate());
1511
1512        let options_str = serde_json::to_string(&options).unwrap();
1513        assert_eq!(
1514            options_str,
1515            "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\",\"granularity\":10240,\"false-positive-rate-in-10000\":100}"
1516        );
1517    }
1518}