datatypes/schema/
column_schema.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashMap;
16use std::fmt;
17use std::str::FromStr;
18
19use arrow::datatypes::Field;
20use arrow_schema::extension::{
21    EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY, ExtensionType,
22};
23use serde::{Deserialize, Serialize};
24use snafu::{ResultExt, ensure};
25use sqlparser_derive::{Visit, VisitMut};
26
27use crate::data_type::{ConcreteDataType, DataType};
28use crate::error::{
29    self, ArrowMetadataSnafu, Error, InvalidFulltextOptionSnafu, ParseExtendedTypeSnafu, Result,
30};
31use crate::schema::TYPE_KEY;
32use crate::schema::constraint::ColumnDefaultConstraint;
33use crate::value::Value;
34use crate::vectors::VectorRef;
35
36pub type Metadata = HashMap<String, String>;
37
38/// Key used to store whether the column is time index in arrow field's metadata.
39pub const TIME_INDEX_KEY: &str = "greptime:time_index";
40pub const COMMENT_KEY: &str = "greptime:storage:comment";
41/// Key used to store default constraint in arrow field's metadata.
42const DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint";
43/// Key used to store fulltext options in arrow field's metadata.
44pub const FULLTEXT_KEY: &str = "greptime:fulltext";
45/// Key used to store whether the column has inverted index in arrow field's metadata.
46pub const INVERTED_INDEX_KEY: &str = "greptime:inverted_index";
47/// Key used to store skip options in arrow field's metadata.
48pub const SKIPPING_INDEX_KEY: &str = "greptime:skipping_index";
49/// Key used to store vector index options in arrow field's metadata.
50pub const VECTOR_INDEX_KEY: &str = "greptime:vector_index";
51
52/// Keys used in fulltext options
53pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable";
54pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer";
55pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive";
56pub const COLUMN_FULLTEXT_OPT_KEY_BACKEND: &str = "backend";
57pub const COLUMN_FULLTEXT_OPT_KEY_GRANULARITY: &str = "granularity";
58pub const COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE: &str = "false_positive_rate";
59
60/// Keys used in SKIPPING index options
61pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity";
62pub const COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE: &str = "false_positive_rate";
63pub const COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE: &str = "type";
64
65/// Keys used in VECTOR index options
66pub const COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE: &str = "engine";
67pub const COLUMN_VECTOR_INDEX_OPT_KEY_METRIC: &str = "metric";
68pub const COLUMN_VECTOR_INDEX_OPT_KEY_CONNECTIVITY: &str = "connectivity";
69pub const COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_ADD: &str = "expansion_add";
70pub const COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_SEARCH: &str = "expansion_search";
71
72pub const DEFAULT_GRANULARITY: u32 = 10240;
73
74pub const DEFAULT_FALSE_POSITIVE_RATE: f64 = 0.01;
75
76/// Schema of a column, used as an immutable struct.
77#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
78pub struct ColumnSchema {
79    pub name: String,
80    pub data_type: ConcreteDataType,
81    is_nullable: bool,
82    is_time_index: bool,
83    default_constraint: Option<ColumnDefaultConstraint>,
84    metadata: Metadata,
85}
86
87impl fmt::Debug for ColumnSchema {
88    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
89        write!(
90            f,
91            "{} {} {}",
92            self.name,
93            self.data_type,
94            if self.is_nullable { "null" } else { "not null" },
95        )?;
96
97        if self.is_time_index {
98            write!(f, " time_index")?;
99        }
100
101        // Add default constraint if present
102        if let Some(default_constraint) = &self.default_constraint {
103            write!(f, " default={:?}", default_constraint)?;
104        }
105
106        // Add metadata if present
107        if !self.metadata.is_empty() {
108            write!(f, " metadata={:?}", self.metadata)?;
109        }
110
111        Ok(())
112    }
113}
114
115impl ColumnSchema {
116    pub fn new<T: Into<String>>(
117        name: T,
118        data_type: ConcreteDataType,
119        is_nullable: bool,
120    ) -> ColumnSchema {
121        ColumnSchema {
122            name: name.into(),
123            data_type,
124            is_nullable,
125            is_time_index: false,
126            default_constraint: None,
127            metadata: Metadata::new(),
128        }
129    }
130
131    #[inline]
132    pub fn is_time_index(&self) -> bool {
133        self.is_time_index
134    }
135
136    #[inline]
137    pub fn is_nullable(&self) -> bool {
138        self.is_nullable
139    }
140
141    #[inline]
142    pub fn default_constraint(&self) -> Option<&ColumnDefaultConstraint> {
143        self.default_constraint.as_ref()
144    }
145
146    /// Check if the default constraint is a impure function.
147    pub fn is_default_impure(&self) -> bool {
148        self.default_constraint
149            .as_ref()
150            .map(|c| c.is_function())
151            .unwrap_or(false)
152    }
153
154    #[inline]
155    pub fn metadata(&self) -> &Metadata {
156        &self.metadata
157    }
158
159    #[inline]
160    pub fn mut_metadata(&mut self) -> &mut Metadata {
161        &mut self.metadata
162    }
163
164    /// Retrieve the column comment
165    pub fn column_comment(&self) -> Option<&String> {
166        self.metadata.get(COMMENT_KEY)
167    }
168
169    pub fn with_time_index(mut self, is_time_index: bool) -> Self {
170        self.is_time_index = is_time_index;
171        if is_time_index {
172            let _ = self
173                .metadata
174                .insert(TIME_INDEX_KEY.to_string(), "true".to_string());
175        } else {
176            let _ = self.metadata.remove(TIME_INDEX_KEY);
177        }
178        self
179    }
180
181    /// Set the inverted index for the column.
182    /// Similar to [with_inverted_index] but don't take the ownership.
183    ///
184    /// [with_inverted_index]: Self::with_inverted_index
185    pub fn set_inverted_index(&mut self, value: bool) {
186        match value {
187            true => {
188                self.metadata
189                    .insert(INVERTED_INDEX_KEY.to_string(), value.to_string());
190            }
191            false => {
192                self.metadata.remove(INVERTED_INDEX_KEY);
193            }
194        }
195    }
196
197    /// Set the inverted index for the column.
198    /// Similar to [set_inverted_index] but take the ownership and return a owned value.
199    ///
200    /// [set_inverted_index]: Self::set_inverted_index
201    pub fn with_inverted_index(mut self, value: bool) -> Self {
202        self.set_inverted_index(value);
203        self
204    }
205
206    pub fn is_inverted_indexed(&self) -> bool {
207        self.metadata
208            .get(INVERTED_INDEX_KEY)
209            .map(|v| v.eq_ignore_ascii_case("true"))
210            .unwrap_or(false)
211    }
212
213    pub fn is_fulltext_indexed(&self) -> bool {
214        self.fulltext_options()
215            .unwrap_or_default()
216            .map(|option| option.enable)
217            .unwrap_or_default()
218    }
219
220    pub fn is_skipping_indexed(&self) -> bool {
221        self.skipping_index_options().unwrap_or_default().is_some()
222    }
223
224    pub fn has_inverted_index_key(&self) -> bool {
225        self.metadata.contains_key(INVERTED_INDEX_KEY)
226    }
227
228    /// Checks if this column has a vector index.
229    pub fn is_vector_indexed(&self) -> bool {
230        match self.vector_index_options() {
231            Ok(opts) => opts.is_some(),
232            Err(e) => {
233                common_telemetry::warn!(
234                    "Failed to deserialize vector_index_options for column '{}': {}",
235                    self.name,
236                    e
237                );
238                false
239            }
240        }
241    }
242
243    /// Gets the vector index options.
244    pub fn vector_index_options(&self) -> Result<Option<VectorIndexOptions>> {
245        match self.metadata.get(VECTOR_INDEX_KEY) {
246            None => Ok(None),
247            Some(json) => {
248                let options =
249                    serde_json::from_str(json).context(error::DeserializeSnafu { json })?;
250                Ok(Some(options))
251            }
252        }
253    }
254
255    /// Sets the vector index options.
256    pub fn set_vector_index_options(&mut self, options: &VectorIndexOptions) -> Result<()> {
257        self.metadata.insert(
258            VECTOR_INDEX_KEY.to_string(),
259            serde_json::to_string(options).context(error::SerializeSnafu)?,
260        );
261        Ok(())
262    }
263
264    /// Removes the vector index options.
265    pub fn unset_vector_index_options(&mut self) {
266        self.metadata.remove(VECTOR_INDEX_KEY);
267    }
268
269    /// Sets vector index options and returns self for chaining.
270    pub fn with_vector_index_options(mut self, options: &VectorIndexOptions) -> Result<Self> {
271        self.set_vector_index_options(options)?;
272        Ok(self)
273    }
274
275    /// Set default constraint.
276    ///
277    /// If a default constraint exists for the column, this method will
278    /// validate it against the column's data type and nullability.
279    pub fn with_default_constraint(
280        mut self,
281        default_constraint: Option<ColumnDefaultConstraint>,
282    ) -> Result<Self> {
283        if let Some(constraint) = &default_constraint {
284            constraint.validate(&self.data_type, self.is_nullable)?;
285        }
286
287        self.default_constraint = default_constraint;
288        Ok(self)
289    }
290
291    /// Set the nullablity to `true` of the column.
292    /// Similar to [set_nullable] but take the ownership and return a owned value.
293    ///
294    /// [set_nullable]: Self::set_nullable
295    pub fn with_nullable_set(mut self) -> Self {
296        self.is_nullable = true;
297        self
298    }
299
300    /// Set the nullability to `true` of the column.
301    /// Similar to [with_nullable_set] but don't take the ownership
302    ///
303    /// [with_nullable_set]: Self::with_nullable_set
304    pub fn set_nullable(&mut self) {
305        self.is_nullable = true;
306    }
307
308    /// Set the `is_time_index` to `true` of the column.
309    /// Similar to [with_time_index] but don't take the ownership.
310    ///
311    /// [with_time_index]: Self::with_time_index
312    pub fn set_time_index(&mut self) {
313        self.is_time_index = true;
314    }
315
316    /// Creates a new [`ColumnSchema`] with given metadata.
317    pub fn with_metadata(mut self, metadata: Metadata) -> Self {
318        self.metadata = metadata;
319        self
320    }
321
322    /// Creates a vector with default value for this column.
323    ///
324    /// If the column is `NOT NULL` but doesn't has `DEFAULT` value supplied, returns `Ok(None)`.
325    pub fn create_default_vector(&self, num_rows: usize) -> Result<Option<VectorRef>> {
326        match &self.default_constraint {
327            Some(c) => c
328                .create_default_vector(&self.data_type, self.is_nullable, num_rows)
329                .map(Some),
330            None => {
331                if self.is_nullable {
332                    // No default constraint, use null as default value.
333                    // TODO(yingwen): Use NullVector once it supports setting logical type.
334                    ColumnDefaultConstraint::null_value()
335                        .create_default_vector(&self.data_type, self.is_nullable, num_rows)
336                        .map(Some)
337                } else {
338                    Ok(None)
339                }
340            }
341        }
342    }
343
344    /// Creates a vector for padding.
345    ///
346    /// This method always returns a vector since it uses [DataType::default_value]
347    /// to fill the vector. Callers should only use the created vector for padding
348    /// and never read its content.
349    pub fn create_default_vector_for_padding(&self, num_rows: usize) -> VectorRef {
350        let padding_value = if self.is_nullable {
351            Value::Null
352        } else {
353            // If the column is not null, use the data type's default value as it is
354            // more efficient to acquire.
355            self.data_type.default_value()
356        };
357        let value_ref = padding_value.as_value_ref();
358        let mut mutable_vector = self.data_type.create_mutable_vector(num_rows);
359        for _ in 0..num_rows {
360            mutable_vector.push_value_ref(&value_ref);
361        }
362        mutable_vector.to_vector()
363    }
364
365    /// Creates a default value for this column.
366    ///
367    /// If the column is `NOT NULL` but doesn't has `DEFAULT` value supplied, returns `Ok(None)`.
368    pub fn create_default(&self) -> Result<Option<Value>> {
369        match &self.default_constraint {
370            Some(c) => c
371                .create_default(&self.data_type, self.is_nullable)
372                .map(Some),
373            None => {
374                if self.is_nullable {
375                    // No default constraint, use null as default value.
376                    ColumnDefaultConstraint::null_value()
377                        .create_default(&self.data_type, self.is_nullable)
378                        .map(Some)
379                } else {
380                    Ok(None)
381                }
382            }
383        }
384    }
385
386    /// Creates an impure default value for this column, only if it have a impure default constraint.
387    /// Otherwise, returns `Ok(None)`.
388    pub fn create_impure_default(&self) -> Result<Option<Value>> {
389        match &self.default_constraint {
390            Some(c) => c.create_impure_default(&self.data_type),
391            None => Ok(None),
392        }
393    }
394
395    /// Retrieves the fulltext options for the column.
396    pub fn fulltext_options(&self) -> Result<Option<FulltextOptions>> {
397        match self.metadata.get(FULLTEXT_KEY) {
398            None => Ok(None),
399            Some(json) => {
400                let options =
401                    serde_json::from_str(json).context(error::DeserializeSnafu { json })?;
402                Ok(Some(options))
403            }
404        }
405    }
406
407    pub fn with_fulltext_options(mut self, options: FulltextOptions) -> Result<Self> {
408        self.metadata.insert(
409            FULLTEXT_KEY.to_string(),
410            serde_json::to_string(&options).context(error::SerializeSnafu)?,
411        );
412        Ok(self)
413    }
414
415    pub fn set_fulltext_options(&mut self, options: &FulltextOptions) -> Result<()> {
416        self.metadata.insert(
417            FULLTEXT_KEY.to_string(),
418            serde_json::to_string(options).context(error::SerializeSnafu)?,
419        );
420        Ok(())
421    }
422
423    /// Retrieves the skipping index options for the column.
424    pub fn skipping_index_options(&self) -> Result<Option<SkippingIndexOptions>> {
425        match self.metadata.get(SKIPPING_INDEX_KEY) {
426            None => Ok(None),
427            Some(json) => {
428                let options =
429                    serde_json::from_str(json).context(error::DeserializeSnafu { json })?;
430                Ok(Some(options))
431            }
432        }
433    }
434
435    pub fn with_skipping_options(mut self, options: SkippingIndexOptions) -> Result<Self> {
436        self.metadata.insert(
437            SKIPPING_INDEX_KEY.to_string(),
438            serde_json::to_string(&options).context(error::SerializeSnafu)?,
439        );
440        Ok(self)
441    }
442
443    pub fn set_skipping_options(&mut self, options: &SkippingIndexOptions) -> Result<()> {
444        self.metadata.insert(
445            SKIPPING_INDEX_KEY.to_string(),
446            serde_json::to_string(options).context(error::SerializeSnafu)?,
447        );
448        Ok(())
449    }
450
451    pub fn unset_skipping_options(&mut self) -> Result<()> {
452        self.metadata.remove(SKIPPING_INDEX_KEY);
453        Ok(())
454    }
455
456    pub fn extension_type<E>(&self) -> Result<Option<E>>
457    where
458        E: ExtensionType,
459    {
460        let extension_type_name = self.metadata.get(EXTENSION_TYPE_NAME_KEY);
461
462        if extension_type_name.map(|s| s.as_str()) == Some(E::NAME) {
463            let extension_metadata = self.metadata.get(EXTENSION_TYPE_METADATA_KEY);
464            let extension_metadata =
465                E::deserialize_metadata(extension_metadata.map(|s| s.as_str()))
466                    .context(ArrowMetadataSnafu)?;
467
468            let extension = E::try_new(&self.data_type.as_arrow_type(), extension_metadata)
469                .context(ArrowMetadataSnafu)?;
470            Ok(Some(extension))
471        } else {
472            Ok(None)
473        }
474    }
475
476    pub fn with_extension_type<E>(&mut self, extension_type: &E) -> Result<()>
477    where
478        E: ExtensionType,
479    {
480        self.metadata
481            .insert(EXTENSION_TYPE_NAME_KEY.to_string(), E::NAME.to_string());
482
483        if let Some(extension_metadata) = extension_type.serialize_metadata() {
484            self.metadata
485                .insert(EXTENSION_TYPE_METADATA_KEY.to_string(), extension_metadata);
486        }
487
488        Ok(())
489    }
490
491    pub fn is_indexed(&self) -> bool {
492        self.is_inverted_indexed() || self.is_fulltext_indexed() || self.is_skipping_indexed()
493    }
494}
495
496/// Column extended type set in column schema's metadata.
497#[derive(Debug, Clone, PartialEq, Eq)]
498pub enum ColumnExtType {
499    /// Json type.
500    Json,
501
502    /// Vector type with dimension.
503    Vector(u32),
504}
505
506impl fmt::Display for ColumnExtType {
507    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
508        match self {
509            ColumnExtType::Json => write!(f, "Json"),
510            ColumnExtType::Vector(dim) => write!(f, "Vector({})", dim),
511        }
512    }
513}
514
515impl FromStr for ColumnExtType {
516    type Err = String;
517
518    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
519        match s {
520            "Json" => Ok(ColumnExtType::Json),
521            _ if s.starts_with("Vector(") && s.ends_with(')') => s[7..s.len() - 1]
522                .parse::<u32>()
523                .map(ColumnExtType::Vector)
524                .map_err(|_| "Invalid dimension for Vector".to_string()),
525            _ => Err("Unknown variant".to_string()),
526        }
527    }
528}
529
530impl TryFrom<&Field> for ColumnSchema {
531    type Error = Error;
532
533    fn try_from(field: &Field) -> Result<ColumnSchema> {
534        let mut data_type = ConcreteDataType::try_from(field.data_type())?;
535        // Override the data type if it is specified in the metadata.
536        if let Some(s) = field.metadata().get(TYPE_KEY) {
537            let extype = ColumnExtType::from_str(s)
538                .map_err(|_| ParseExtendedTypeSnafu { value: s }.build())?;
539            match extype {
540                ColumnExtType::Json => {
541                    data_type = ConcreteDataType::json_datatype();
542                }
543                ColumnExtType::Vector(dim) => {
544                    data_type = ConcreteDataType::vector_datatype(dim);
545                }
546            }
547        }
548        let mut metadata = field.metadata().clone();
549        let default_constraint = match metadata.remove(DEFAULT_CONSTRAINT_KEY) {
550            Some(json) => {
551                Some(serde_json::from_str(&json).context(error::DeserializeSnafu { json })?)
552            }
553            None => None,
554        };
555        let mut is_time_index = metadata.contains_key(TIME_INDEX_KEY);
556        if is_time_index && !data_type.is_timestamp() {
557            // If the column is time index but the data type is not timestamp, it is invalid.
558            // We set the time index to false and remove the metadata.
559            // This is possible if we cast the time index column to another type. DataFusion will
560            // keep the metadata:
561            // https://github.com/apache/datafusion/pull/12951
562            is_time_index = false;
563            metadata.remove(TIME_INDEX_KEY);
564            common_telemetry::debug!(
565                "Column {} is not timestamp ({:?}) but has time index metadata",
566                data_type,
567                field.name(),
568            );
569        }
570
571        Ok(ColumnSchema {
572            name: field.name().clone(),
573            data_type,
574            is_nullable: field.is_nullable(),
575            is_time_index,
576            default_constraint,
577            metadata,
578        })
579    }
580}
581
582impl TryFrom<&ColumnSchema> for Field {
583    type Error = Error;
584
585    fn try_from(column_schema: &ColumnSchema) -> Result<Field> {
586        let mut metadata = column_schema.metadata.clone();
587        if let Some(value) = &column_schema.default_constraint {
588            // Adds an additional metadata to store the default constraint.
589            let old = metadata.insert(
590                DEFAULT_CONSTRAINT_KEY.to_string(),
591                serde_json::to_string(&value).context(error::SerializeSnafu)?,
592            );
593
594            ensure!(
595                old.is_none(),
596                error::DuplicateMetaSnafu {
597                    key: DEFAULT_CONSTRAINT_KEY,
598                }
599            );
600        }
601
602        Ok(Field::new(
603            &column_schema.name,
604            column_schema.data_type.as_arrow_type(),
605            column_schema.is_nullable(),
606        )
607        .with_metadata(metadata))
608    }
609}
610
611/// Fulltext options for a column.
612#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
613#[serde(rename_all = "kebab-case")]
614pub struct FulltextOptions {
615    /// Whether the fulltext index is enabled.
616    pub enable: bool,
617    /// The fulltext analyzer to use.
618    #[serde(default)]
619    pub analyzer: FulltextAnalyzer,
620    /// Whether the fulltext index is case-sensitive.
621    #[serde(default)]
622    pub case_sensitive: bool,
623    /// The fulltext backend to use.
624    #[serde(default)]
625    pub backend: FulltextBackend,
626    /// The granularity of the fulltext index (for bloom backend only)
627    #[serde(default = "fulltext_options_default_granularity")]
628    pub granularity: u32,
629    /// The false positive rate of the fulltext index (for bloom backend only)
630    #[serde(default = "index_options_default_false_positive_rate_in_10000")]
631    pub false_positive_rate_in_10000: u32,
632}
633
634fn fulltext_options_default_granularity() -> u32 {
635    DEFAULT_GRANULARITY
636}
637
638fn index_options_default_false_positive_rate_in_10000() -> u32 {
639    (DEFAULT_FALSE_POSITIVE_RATE * 10000.0) as u32
640}
641
642impl FulltextOptions {
643    /// Creates a new fulltext options.
644    pub fn new(
645        enable: bool,
646        analyzer: FulltextAnalyzer,
647        case_sensitive: bool,
648        backend: FulltextBackend,
649        granularity: u32,
650        false_positive_rate: f64,
651    ) -> Result<Self> {
652        ensure!(
653            0.0 < false_positive_rate && false_positive_rate <= 1.0,
654            error::InvalidFulltextOptionSnafu {
655                msg: format!(
656                    "Invalid false positive rate: {false_positive_rate}, expected: 0.0 < rate <= 1.0"
657                ),
658            }
659        );
660        ensure!(
661            granularity > 0,
662            error::InvalidFulltextOptionSnafu {
663                msg: format!("Invalid granularity: {granularity}, expected: positive integer"),
664            }
665        );
666        Ok(Self::new_unchecked(
667            enable,
668            analyzer,
669            case_sensitive,
670            backend,
671            granularity,
672            false_positive_rate,
673        ))
674    }
675
676    /// Creates a new fulltext options without checking `false_positive_rate` and `granularity`.
677    pub fn new_unchecked(
678        enable: bool,
679        analyzer: FulltextAnalyzer,
680        case_sensitive: bool,
681        backend: FulltextBackend,
682        granularity: u32,
683        false_positive_rate: f64,
684    ) -> Self {
685        Self {
686            enable,
687            analyzer,
688            case_sensitive,
689            backend,
690            granularity,
691            false_positive_rate_in_10000: (false_positive_rate * 10000.0) as u32,
692        }
693    }
694
695    /// Gets the false positive rate.
696    pub fn false_positive_rate(&self) -> f64 {
697        self.false_positive_rate_in_10000 as f64 / 10000.0
698    }
699}
700
701impl Default for FulltextOptions {
702    fn default() -> Self {
703        Self::new_unchecked(
704            false,
705            FulltextAnalyzer::default(),
706            false,
707            FulltextBackend::default(),
708            DEFAULT_GRANULARITY,
709            DEFAULT_FALSE_POSITIVE_RATE,
710        )
711    }
712}
713
714impl fmt::Display for FulltextOptions {
715    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
716        write!(f, "enable={}", self.enable)?;
717        if self.enable {
718            write!(f, ", analyzer={}", self.analyzer)?;
719            write!(f, ", case_sensitive={}", self.case_sensitive)?;
720            write!(f, ", backend={}", self.backend)?;
721            if self.backend == FulltextBackend::Bloom {
722                write!(f, ", granularity={}", self.granularity)?;
723                write!(f, ", false_positive_rate={}", self.false_positive_rate())?;
724            }
725        }
726        Ok(())
727    }
728}
729
730/// The backend of the fulltext index.
731#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
732#[serde(rename_all = "kebab-case")]
733pub enum FulltextBackend {
734    #[default]
735    Bloom,
736    Tantivy,
737}
738
739impl fmt::Display for FulltextBackend {
740    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
741        match self {
742            FulltextBackend::Tantivy => write!(f, "tantivy"),
743            FulltextBackend::Bloom => write!(f, "bloom"),
744        }
745    }
746}
747
748impl TryFrom<HashMap<String, String>> for FulltextOptions {
749    type Error = Error;
750
751    fn try_from(options: HashMap<String, String>) -> Result<Self> {
752        let mut fulltext_options = FulltextOptions {
753            enable: true,
754            ..Default::default()
755        };
756
757        if let Some(enable) = options.get(COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE) {
758            match enable.to_ascii_lowercase().as_str() {
759                "true" => fulltext_options.enable = true,
760                "false" => fulltext_options.enable = false,
761                _ => {
762                    return InvalidFulltextOptionSnafu {
763                        msg: format!("{enable}, expected: 'true' | 'false'"),
764                    }
765                    .fail();
766                }
767            }
768        };
769
770        if let Some(analyzer) = options.get(COLUMN_FULLTEXT_OPT_KEY_ANALYZER) {
771            match analyzer.to_ascii_lowercase().as_str() {
772                "english" => fulltext_options.analyzer = FulltextAnalyzer::English,
773                "chinese" => fulltext_options.analyzer = FulltextAnalyzer::Chinese,
774                _ => {
775                    return InvalidFulltextOptionSnafu {
776                        msg: format!("{analyzer}, expected: 'English' | 'Chinese'"),
777                    }
778                    .fail();
779                }
780            }
781        };
782
783        if let Some(case_sensitive) = options.get(COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE) {
784            match case_sensitive.to_ascii_lowercase().as_str() {
785                "true" => fulltext_options.case_sensitive = true,
786                "false" => fulltext_options.case_sensitive = false,
787                _ => {
788                    return InvalidFulltextOptionSnafu {
789                        msg: format!("{case_sensitive}, expected: 'true' | 'false'"),
790                    }
791                    .fail();
792                }
793            }
794        }
795
796        if let Some(backend) = options.get(COLUMN_FULLTEXT_OPT_KEY_BACKEND) {
797            match backend.to_ascii_lowercase().as_str() {
798                "bloom" => fulltext_options.backend = FulltextBackend::Bloom,
799                "tantivy" => fulltext_options.backend = FulltextBackend::Tantivy,
800                _ => {
801                    return InvalidFulltextOptionSnafu {
802                        msg: format!("{backend}, expected: 'bloom' | 'tantivy'"),
803                    }
804                    .fail();
805                }
806            }
807        }
808
809        if fulltext_options.backend == FulltextBackend::Bloom {
810            // Parse granularity with default value 10240
811            let granularity = match options.get(COLUMN_FULLTEXT_OPT_KEY_GRANULARITY) {
812                Some(value) => value
813                    .parse::<u32>()
814                    .ok()
815                    .filter(|&v| v > 0)
816                    .ok_or_else(|| {
817                        error::InvalidFulltextOptionSnafu {
818                            msg: format!(
819                                "Invalid granularity: {value}, expected: positive integer"
820                            ),
821                        }
822                        .build()
823                    })?,
824                None => DEFAULT_GRANULARITY,
825            };
826            fulltext_options.granularity = granularity;
827
828            // Parse false positive rate with default value 0.01
829            let false_positive_rate = match options.get(COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE)
830            {
831                Some(value) => value
832                    .parse::<f64>()
833                    .ok()
834                    .filter(|&v| v > 0.0 && v <= 1.0)
835                    .ok_or_else(|| {
836                        error::InvalidFulltextOptionSnafu {
837                            msg: format!(
838                                "Invalid false positive rate: {value}, expected: 0.0 < rate <= 1.0"
839                            ),
840                        }
841                        .build()
842                    })?,
843                None => DEFAULT_FALSE_POSITIVE_RATE,
844            };
845            fulltext_options.false_positive_rate_in_10000 = (false_positive_rate * 10000.0) as u32;
846        }
847
848        Ok(fulltext_options)
849    }
850}
851
852/// Fulltext analyzer.
853#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
854pub enum FulltextAnalyzer {
855    #[default]
856    English,
857    Chinese,
858}
859
860impl fmt::Display for FulltextAnalyzer {
861    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
862        match self {
863            FulltextAnalyzer::English => write!(f, "English"),
864            FulltextAnalyzer::Chinese => write!(f, "Chinese"),
865        }
866    }
867}
868
869/// Skipping options for a column.
870#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
871#[serde(rename_all = "kebab-case")]
872pub struct SkippingIndexOptions {
873    /// The granularity of the skip index.
874    pub granularity: u32,
875    /// The false positive rate of the skip index (in ten-thousandths, e.g., 100 = 1%).
876    #[serde(default = "index_options_default_false_positive_rate_in_10000")]
877    pub false_positive_rate_in_10000: u32,
878    /// The type of the skip index.
879    #[serde(default)]
880    pub index_type: SkippingIndexType,
881}
882
883impl SkippingIndexOptions {
884    /// Creates a new skipping index options without checking `false_positive_rate` and `granularity`.
885    pub fn new_unchecked(
886        granularity: u32,
887        false_positive_rate: f64,
888        index_type: SkippingIndexType,
889    ) -> Self {
890        Self {
891            granularity,
892            false_positive_rate_in_10000: (false_positive_rate * 10000.0) as u32,
893            index_type,
894        }
895    }
896
897    /// Creates a new skipping index options.
898    pub fn new(
899        granularity: u32,
900        false_positive_rate: f64,
901        index_type: SkippingIndexType,
902    ) -> Result<Self> {
903        ensure!(
904            0.0 < false_positive_rate && false_positive_rate <= 1.0,
905            error::InvalidSkippingIndexOptionSnafu {
906                msg: format!(
907                    "Invalid false positive rate: {false_positive_rate}, expected: 0.0 < rate <= 1.0"
908                ),
909            }
910        );
911        ensure!(
912            granularity > 0,
913            error::InvalidSkippingIndexOptionSnafu {
914                msg: format!("Invalid granularity: {granularity}, expected: positive integer"),
915            }
916        );
917        Ok(Self::new_unchecked(
918            granularity,
919            false_positive_rate,
920            index_type,
921        ))
922    }
923
924    /// Gets the false positive rate.
925    pub fn false_positive_rate(&self) -> f64 {
926        self.false_positive_rate_in_10000 as f64 / 10000.0
927    }
928}
929
930impl Default for SkippingIndexOptions {
931    fn default() -> Self {
932        Self::new_unchecked(
933            DEFAULT_GRANULARITY,
934            DEFAULT_FALSE_POSITIVE_RATE,
935            SkippingIndexType::default(),
936        )
937    }
938}
939
940impl fmt::Display for SkippingIndexOptions {
941    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
942        write!(f, "granularity={}", self.granularity)?;
943        write!(f, ", false_positive_rate={}", self.false_positive_rate())?;
944        write!(f, ", index_type={}", self.index_type)?;
945        Ok(())
946    }
947}
948
949/// Skip index types.
950#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
951pub enum SkippingIndexType {
952    #[default]
953    BloomFilter,
954}
955
956impl fmt::Display for SkippingIndexType {
957    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
958        match self {
959            SkippingIndexType::BloomFilter => write!(f, "BLOOM"),
960        }
961    }
962}
963
964impl TryFrom<HashMap<String, String>> for SkippingIndexOptions {
965    type Error = Error;
966
967    fn try_from(options: HashMap<String, String>) -> Result<Self> {
968        // Parse granularity with default value 1
969        let granularity = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY) {
970            Some(value) => value
971                .parse::<u32>()
972                .ok()
973                .filter(|&v| v > 0)
974                .ok_or_else(|| {
975                    error::InvalidSkippingIndexOptionSnafu {
976                        msg: format!("Invalid granularity: {value}, expected: positive integer"),
977                    }
978                    .build()
979                })?,
980            None => DEFAULT_GRANULARITY,
981        };
982
983        // Parse false positive rate with default value 100
984        let false_positive_rate =
985            match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE) {
986                Some(value) => value
987                    .parse::<f64>()
988                    .ok()
989                    .filter(|&v| v > 0.0 && v <= 1.0)
990                    .ok_or_else(|| {
991                        error::InvalidSkippingIndexOptionSnafu {
992                            msg: format!(
993                                "Invalid false positive rate: {value}, expected: 0.0 < rate <= 1.0"
994                            ),
995                        }
996                        .build()
997                    })?,
998                None => DEFAULT_FALSE_POSITIVE_RATE,
999            };
1000
1001        // Parse index type with default value BloomFilter
1002        let index_type = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE) {
1003            Some(typ) => match typ.to_ascii_uppercase().as_str() {
1004                "BLOOM" => SkippingIndexType::BloomFilter,
1005                _ => {
1006                    return error::InvalidSkippingIndexOptionSnafu {
1007                        msg: format!("Invalid index type: {typ}, expected: 'BLOOM'"),
1008                    }
1009                    .fail();
1010                }
1011            },
1012            None => SkippingIndexType::default(),
1013        };
1014
1015        Ok(SkippingIndexOptions::new_unchecked(
1016            granularity,
1017            false_positive_rate,
1018            index_type,
1019        ))
1020    }
1021}
1022
1023/// Distance metric for vector similarity search.
1024#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
1025#[serde(rename_all = "lowercase")]
1026pub enum VectorDistanceMetric {
1027    /// Squared Euclidean distance (L2^2).
1028    #[default]
1029    L2sq,
1030    /// Cosine distance (1 - cosine similarity).
1031    Cosine,
1032    /// Inner product (negative, for maximum inner product search).
1033    #[serde(alias = "ip")]
1034    InnerProduct,
1035}
1036
1037impl fmt::Display for VectorDistanceMetric {
1038    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1039        match self {
1040            VectorDistanceMetric::L2sq => write!(f, "l2sq"),
1041            VectorDistanceMetric::Cosine => write!(f, "cosine"),
1042            VectorDistanceMetric::InnerProduct => write!(f, "ip"),
1043        }
1044    }
1045}
1046
1047impl std::str::FromStr for VectorDistanceMetric {
1048    type Err = String;
1049
1050    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
1051        match s.to_lowercase().as_str() {
1052            "l2sq" | "l2" | "euclidean" => Ok(VectorDistanceMetric::L2sq),
1053            "cosine" | "cos" => Ok(VectorDistanceMetric::Cosine),
1054            "inner_product" | "ip" | "dot" => Ok(VectorDistanceMetric::InnerProduct),
1055            _ => Err(format!(
1056                "Unknown distance metric: {}. Expected: l2sq, cosine, or ip",
1057                s
1058            )),
1059        }
1060    }
1061}
1062
1063impl VectorDistanceMetric {
1064    /// Returns the metric as u8 for blob serialization.
1065    pub fn as_u8(&self) -> u8 {
1066        match self {
1067            Self::L2sq => 0,
1068            Self::Cosine => 1,
1069            Self::InnerProduct => 2,
1070        }
1071    }
1072
1073    /// Parses metric from u8 (used when reading blob).
1074    pub fn try_from_u8(v: u8) -> Option<Self> {
1075        match v {
1076            0 => Some(Self::L2sq),
1077            1 => Some(Self::Cosine),
1078            2 => Some(Self::InnerProduct),
1079            _ => None,
1080        }
1081    }
1082}
1083
1084/// Default HNSW connectivity parameter.
1085const DEFAULT_VECTOR_INDEX_CONNECTIVITY: u32 = 16;
1086/// Default expansion factor during index construction.
1087const DEFAULT_VECTOR_INDEX_EXPANSION_ADD: u32 = 128;
1088/// Default expansion factor during search.
1089const DEFAULT_VECTOR_INDEX_EXPANSION_SEARCH: u32 = 64;
1090
1091fn default_vector_index_connectivity() -> u32 {
1092    DEFAULT_VECTOR_INDEX_CONNECTIVITY
1093}
1094
1095fn default_vector_index_expansion_add() -> u32 {
1096    DEFAULT_VECTOR_INDEX_EXPANSION_ADD
1097}
1098
1099fn default_vector_index_expansion_search() -> u32 {
1100    DEFAULT_VECTOR_INDEX_EXPANSION_SEARCH
1101}
1102
1103/// Supported vector index engine types.
1104#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize, Visit, VisitMut)]
1105#[serde(rename_all = "lowercase")]
1106pub enum VectorIndexEngineType {
1107    /// USearch HNSW implementation.
1108    #[default]
1109    Usearch,
1110    // Future: Vsag,
1111}
1112
1113impl VectorIndexEngineType {
1114    /// Returns the engine type as u8 for blob serialization.
1115    pub fn as_u8(&self) -> u8 {
1116        match self {
1117            Self::Usearch => 0,
1118        }
1119    }
1120
1121    /// Parses engine type from u8 (used when reading blob).
1122    pub fn try_from_u8(v: u8) -> Option<Self> {
1123        match v {
1124            0 => Some(Self::Usearch),
1125            _ => None,
1126        }
1127    }
1128}
1129
1130impl fmt::Display for VectorIndexEngineType {
1131    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1132        match self {
1133            Self::Usearch => write!(f, "usearch"),
1134        }
1135    }
1136}
1137
1138impl std::str::FromStr for VectorIndexEngineType {
1139    type Err = String;
1140
1141    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
1142        match s.to_lowercase().as_str() {
1143            "usearch" => Ok(Self::Usearch),
1144            _ => Err(format!(
1145                "Unknown vector index engine: {}. Expected: usearch",
1146                s
1147            )),
1148        }
1149    }
1150}
1151
1152/// Options for vector index (HNSW).
1153#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
1154#[serde(rename_all = "kebab-case")]
1155pub struct VectorIndexOptions {
1156    /// Vector index engine type (default: usearch).
1157    #[serde(default)]
1158    pub engine: VectorIndexEngineType,
1159    /// Distance metric for similarity search.
1160    #[serde(default)]
1161    pub metric: VectorDistanceMetric,
1162    /// HNSW connectivity parameter (M in the paper).
1163    /// Higher values improve recall but increase memory usage.
1164    #[serde(default = "default_vector_index_connectivity")]
1165    pub connectivity: u32,
1166    /// Expansion factor during index construction (ef_construction).
1167    /// Higher values improve index quality but slow down construction.
1168    #[serde(default = "default_vector_index_expansion_add")]
1169    pub expansion_add: u32,
1170    /// Expansion factor during search (ef_search).
1171    /// Higher values improve recall but slow down search.
1172    #[serde(default = "default_vector_index_expansion_search")]
1173    pub expansion_search: u32,
1174}
1175
1176impl Default for VectorIndexOptions {
1177    fn default() -> Self {
1178        Self {
1179            engine: VectorIndexEngineType::default(),
1180            metric: VectorDistanceMetric::default(),
1181            connectivity: DEFAULT_VECTOR_INDEX_CONNECTIVITY,
1182            expansion_add: DEFAULT_VECTOR_INDEX_EXPANSION_ADD,
1183            expansion_search: DEFAULT_VECTOR_INDEX_EXPANSION_SEARCH,
1184        }
1185    }
1186}
1187
1188impl fmt::Display for VectorIndexOptions {
1189    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1190        write!(
1191            f,
1192            "engine={}, metric={}, connectivity={}, expansion_add={}, expansion_search={}",
1193            self.engine, self.metric, self.connectivity, self.expansion_add, self.expansion_search
1194        )
1195    }
1196}
1197
1198#[cfg(test)]
1199mod tests {
1200    use std::sync::Arc;
1201
1202    use arrow::datatypes::{DataType as ArrowDataType, TimeUnit};
1203
1204    use super::*;
1205    use crate::value::Value;
1206    use crate::vectors::Int32Vector;
1207
1208    #[test]
1209    fn test_column_schema() {
1210        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
1211        let field = Field::try_from(&column_schema).unwrap();
1212        assert_eq!("test", field.name());
1213        assert_eq!(ArrowDataType::Int32, *field.data_type());
1214        assert!(field.is_nullable());
1215
1216        let new_column_schema = ColumnSchema::try_from(&field).unwrap();
1217        assert_eq!(column_schema, new_column_schema);
1218    }
1219
1220    #[test]
1221    fn test_column_schema_with_default_constraint() {
1222        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
1223            .with_default_constraint(Some(ColumnDefaultConstraint::Value(Value::from(99))))
1224            .unwrap();
1225        assert!(
1226            column_schema
1227                .metadata()
1228                .get(DEFAULT_CONSTRAINT_KEY)
1229                .is_none()
1230        );
1231
1232        let field = Field::try_from(&column_schema).unwrap();
1233        assert_eq!("test", field.name());
1234        assert_eq!(ArrowDataType::Int32, *field.data_type());
1235        assert!(field.is_nullable());
1236        assert_eq!(
1237            "{\"Value\":{\"Int32\":99}}",
1238            field.metadata().get(DEFAULT_CONSTRAINT_KEY).unwrap()
1239        );
1240
1241        let new_column_schema = ColumnSchema::try_from(&field).unwrap();
1242        assert_eq!(column_schema, new_column_schema);
1243    }
1244
1245    #[test]
1246    fn test_column_schema_with_metadata() {
1247        let metadata = Metadata::from([
1248            ("k1".to_string(), "v1".to_string()),
1249            (COMMENT_KEY.to_string(), "test comment".to_string()),
1250        ]);
1251        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
1252            .with_metadata(metadata)
1253            .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
1254            .unwrap();
1255        assert_eq!("v1", column_schema.metadata().get("k1").unwrap());
1256        assert_eq!("test comment", column_schema.column_comment().unwrap());
1257        assert!(
1258            column_schema
1259                .metadata()
1260                .get(DEFAULT_CONSTRAINT_KEY)
1261                .is_none()
1262        );
1263
1264        let field = Field::try_from(&column_schema).unwrap();
1265        assert_eq!("v1", field.metadata().get("k1").unwrap());
1266        let _ = field.metadata().get(DEFAULT_CONSTRAINT_KEY).unwrap();
1267
1268        let new_column_schema = ColumnSchema::try_from(&field).unwrap();
1269        assert_eq!(column_schema, new_column_schema);
1270    }
1271
1272    #[test]
1273    fn test_column_schema_with_duplicate_metadata() {
1274        let metadata = Metadata::from([(DEFAULT_CONSTRAINT_KEY.to_string(), "v1".to_string())]);
1275        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
1276            .with_metadata(metadata)
1277            .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
1278            .unwrap();
1279        assert!(Field::try_from(&column_schema).is_err());
1280    }
1281
1282    #[test]
1283    fn test_column_schema_invalid_default_constraint() {
1284        assert!(
1285            ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false)
1286                .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
1287                .is_err()
1288        );
1289    }
1290
1291    #[test]
1292    fn test_column_default_constraint_try_into_from() {
1293        let default_constraint = ColumnDefaultConstraint::Value(Value::from(42i64));
1294
1295        let bytes: Vec<u8> = default_constraint.clone().try_into().unwrap();
1296        let from_value = ColumnDefaultConstraint::try_from(&bytes[..]).unwrap();
1297
1298        assert_eq!(default_constraint, from_value);
1299    }
1300
1301    #[test]
1302    fn test_column_schema_create_default_null() {
1303        // Implicit default null.
1304        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
1305        let v = column_schema.create_default_vector(5).unwrap().unwrap();
1306        assert_eq!(5, v.len());
1307        assert!(v.only_null());
1308
1309        // Explicit default null.
1310        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
1311            .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
1312            .unwrap();
1313        let v = column_schema.create_default_vector(5).unwrap().unwrap();
1314        assert_eq!(5, v.len());
1315        assert!(v.only_null());
1316    }
1317
1318    #[test]
1319    fn test_column_schema_no_default() {
1320        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
1321        assert!(column_schema.create_default_vector(5).unwrap().is_none());
1322    }
1323
1324    #[test]
1325    fn test_create_default_vector_for_padding() {
1326        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
1327        let vector = column_schema.create_default_vector_for_padding(4);
1328        assert!(vector.only_null());
1329        assert_eq!(4, vector.len());
1330
1331        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
1332        let vector = column_schema.create_default_vector_for_padding(4);
1333        assert_eq!(4, vector.len());
1334        let expect: VectorRef = Arc::new(Int32Vector::from_slice([0, 0, 0, 0]));
1335        assert_eq!(expect, vector);
1336    }
1337
1338    #[test]
1339    fn test_column_schema_single_create_default_null() {
1340        // Implicit default null.
1341        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
1342        let v = column_schema.create_default().unwrap().unwrap();
1343        assert!(v.is_null());
1344
1345        // Explicit default null.
1346        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
1347            .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
1348            .unwrap();
1349        let v = column_schema.create_default().unwrap().unwrap();
1350        assert!(v.is_null());
1351    }
1352
1353    #[test]
1354    fn test_column_schema_single_create_default_not_null() {
1355        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
1356            .with_default_constraint(Some(ColumnDefaultConstraint::Value(Value::Int32(6))))
1357            .unwrap();
1358        let v = column_schema.create_default().unwrap().unwrap();
1359        assert_eq!(v, Value::Int32(6));
1360    }
1361
1362    #[test]
1363    fn test_column_schema_single_no_default() {
1364        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
1365        assert!(column_schema.create_default().unwrap().is_none());
1366    }
1367
1368    #[test]
1369    fn test_debug_for_column_schema() {
1370        let column_schema_int8 =
1371            ColumnSchema::new("test_column_1", ConcreteDataType::int8_datatype(), true);
1372
1373        let column_schema_int32 =
1374            ColumnSchema::new("test_column_2", ConcreteDataType::int32_datatype(), false);
1375
1376        let formatted_int8 = format!("{:?}", column_schema_int8);
1377        let formatted_int32 = format!("{:?}", column_schema_int32);
1378        assert_eq!(formatted_int8, "test_column_1 Int8 null");
1379        assert_eq!(formatted_int32, "test_column_2 Int32 not null");
1380    }
1381
1382    #[test]
1383    fn test_from_field_to_column_schema() {
1384        let field = Field::new("test", ArrowDataType::Int32, true);
1385        let column_schema = ColumnSchema::try_from(&field).unwrap();
1386        assert_eq!("test", column_schema.name);
1387        assert_eq!(ConcreteDataType::int32_datatype(), column_schema.data_type);
1388        assert!(column_schema.is_nullable);
1389        assert!(!column_schema.is_time_index);
1390        assert!(column_schema.default_constraint.is_none());
1391        assert!(column_schema.metadata.is_empty());
1392
1393        let field = Field::new("test", ArrowDataType::Binary, true);
1394        let field = field.with_metadata(Metadata::from([(
1395            TYPE_KEY.to_string(),
1396            ConcreteDataType::json_datatype().name(),
1397        )]));
1398        let column_schema = ColumnSchema::try_from(&field).unwrap();
1399        assert_eq!("test", column_schema.name);
1400        assert_eq!(ConcreteDataType::json_datatype(), column_schema.data_type);
1401        assert!(column_schema.is_nullable);
1402        assert!(!column_schema.is_time_index);
1403        assert!(column_schema.default_constraint.is_none());
1404        assert_eq!(
1405            column_schema.metadata.get(TYPE_KEY).unwrap(),
1406            &ConcreteDataType::json_datatype().name()
1407        );
1408
1409        let field = Field::new("test", ArrowDataType::Binary, true);
1410        let field = field.with_metadata(Metadata::from([(
1411            TYPE_KEY.to_string(),
1412            ConcreteDataType::vector_datatype(3).name(),
1413        )]));
1414        let column_schema = ColumnSchema::try_from(&field).unwrap();
1415        assert_eq!("test", column_schema.name);
1416        assert_eq!(
1417            ConcreteDataType::vector_datatype(3),
1418            column_schema.data_type
1419        );
1420        assert!(column_schema.is_nullable);
1421        assert!(!column_schema.is_time_index);
1422        assert!(column_schema.default_constraint.is_none());
1423        assert_eq!(
1424            column_schema.metadata.get(TYPE_KEY).unwrap(),
1425            &ConcreteDataType::vector_datatype(3).name()
1426        );
1427    }
1428
1429    #[test]
1430    fn test_column_schema_fix_time_index() {
1431        let field = Field::new(
1432            "test",
1433            ArrowDataType::Timestamp(TimeUnit::Second, None),
1434            false,
1435        );
1436        let field = field.with_metadata(Metadata::from([(
1437            TIME_INDEX_KEY.to_string(),
1438            "true".to_string(),
1439        )]));
1440        let column_schema = ColumnSchema::try_from(&field).unwrap();
1441        assert_eq!("test", column_schema.name);
1442        assert_eq!(
1443            ConcreteDataType::timestamp_second_datatype(),
1444            column_schema.data_type
1445        );
1446        assert!(!column_schema.is_nullable);
1447        assert!(column_schema.is_time_index);
1448        assert!(column_schema.default_constraint.is_none());
1449        assert_eq!(1, column_schema.metadata().len());
1450
1451        let field = Field::new("test", ArrowDataType::Int32, false);
1452        let field = field.with_metadata(Metadata::from([(
1453            TIME_INDEX_KEY.to_string(),
1454            "true".to_string(),
1455        )]));
1456        let column_schema = ColumnSchema::try_from(&field).unwrap();
1457        assert_eq!("test", column_schema.name);
1458        assert_eq!(ConcreteDataType::int32_datatype(), column_schema.data_type);
1459        assert!(!column_schema.is_nullable);
1460        assert!(!column_schema.is_time_index);
1461        assert!(column_schema.default_constraint.is_none());
1462        assert!(column_schema.metadata.is_empty());
1463    }
1464
1465    #[test]
1466    fn test_skipping_index_options_deserialization() {
1467        let original_options = "{\"granularity\":1024,\"false-positive-rate-in-10000\":10,\"index-type\":\"BloomFilter\"}";
1468        let options = serde_json::from_str::<SkippingIndexOptions>(original_options).unwrap();
1469        assert_eq!(1024, options.granularity);
1470        assert_eq!(SkippingIndexType::BloomFilter, options.index_type);
1471        assert_eq!(0.001, options.false_positive_rate());
1472
1473        let options_str = serde_json::to_string(&options).unwrap();
1474        assert_eq!(options_str, original_options);
1475    }
1476
1477    #[test]
1478    fn test_skipping_index_options_deserialization_v0_14_to_v0_15() {
1479        let options = "{\"granularity\":10240,\"index-type\":\"BloomFilter\"}";
1480        let options = serde_json::from_str::<SkippingIndexOptions>(options).unwrap();
1481        assert_eq!(10240, options.granularity);
1482        assert_eq!(SkippingIndexType::BloomFilter, options.index_type);
1483        assert_eq!(DEFAULT_FALSE_POSITIVE_RATE, options.false_positive_rate());
1484
1485        let options_str = serde_json::to_string(&options).unwrap();
1486        assert_eq!(
1487            options_str,
1488            "{\"granularity\":10240,\"false-positive-rate-in-10000\":100,\"index-type\":\"BloomFilter\"}"
1489        );
1490    }
1491
1492    #[test]
1493    fn test_fulltext_options_deserialization() {
1494        let original_options = "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\",\"granularity\":1024,\"false-positive-rate-in-10000\":10}";
1495        let options = serde_json::from_str::<FulltextOptions>(original_options).unwrap();
1496        assert!(!options.case_sensitive);
1497        assert!(options.enable);
1498        assert_eq!(FulltextBackend::Bloom, options.backend);
1499        assert_eq!(FulltextAnalyzer::default(), options.analyzer);
1500        assert_eq!(1024, options.granularity);
1501        assert_eq!(0.001, options.false_positive_rate());
1502
1503        let options_str = serde_json::to_string(&options).unwrap();
1504        assert_eq!(options_str, original_options);
1505    }
1506
1507    #[test]
1508    fn test_fulltext_options_deserialization_v0_14_to_v0_15() {
1509        // 0.14 to 0.15
1510        let options = "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\"}";
1511        let options = serde_json::from_str::<FulltextOptions>(options).unwrap();
1512        assert!(!options.case_sensitive);
1513        assert!(options.enable);
1514        assert_eq!(FulltextBackend::Bloom, options.backend);
1515        assert_eq!(FulltextAnalyzer::default(), options.analyzer);
1516        assert_eq!(DEFAULT_GRANULARITY, options.granularity);
1517        assert_eq!(DEFAULT_FALSE_POSITIVE_RATE, options.false_positive_rate());
1518
1519        let options_str = serde_json::to_string(&options).unwrap();
1520        assert_eq!(
1521            options_str,
1522            "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\",\"granularity\":10240,\"false-positive-rate-in-10000\":100}"
1523        );
1524    }
1525}