Skip to main content

datatypes/schema/
column_schema.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashMap;
16use std::str::FromStr;
17use std::{fmt, mem};
18
19use arrow::datatypes::Field;
20use arrow_schema::extension::{
21    EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY, ExtensionType,
22};
23use serde::{Deserialize, Serialize};
24use snafu::{ResultExt, ensure};
25use sqlparser_derive::{Visit, VisitMut};
26
27use crate::data_type::{ConcreteDataType, DataType};
28use crate::error::{
29    self, ArrowMetadataSnafu, Error, InvalidFulltextOptionSnafu, ParseExtendedTypeSnafu, Result,
30};
31use crate::schema::TYPE_KEY;
32use crate::schema::constraint::ColumnDefaultConstraint;
33use crate::value::Value;
34use crate::vectors::VectorRef;
35
36pub type Metadata = HashMap<String, String>;
37
38/// Key used to store whether the column is time index in arrow field's metadata.
39pub const TIME_INDEX_KEY: &str = "greptime:time_index";
40pub const COMMENT_KEY: &str = "greptime:storage:comment";
41/// Key used to store default constraint in arrow field's metadata.
42const DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint";
43/// Key used to store fulltext options in arrow field's metadata.
44pub const FULLTEXT_KEY: &str = "greptime:fulltext";
45/// Key used to store whether the column has inverted index in arrow field's metadata.
46pub const INVERTED_INDEX_KEY: &str = "greptime:inverted_index";
47/// Key used to store skip options in arrow field's metadata.
48pub const SKIPPING_INDEX_KEY: &str = "greptime:skipping_index";
49/// Key used to store vector index options in arrow field's metadata.
50pub const VECTOR_INDEX_KEY: &str = "greptime:vector_index";
51
52/// Keys used in fulltext options
53pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable";
54pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer";
55pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive";
56pub const COLUMN_FULLTEXT_OPT_KEY_BACKEND: &str = "backend";
57pub const COLUMN_FULLTEXT_OPT_KEY_GRANULARITY: &str = "granularity";
58pub const COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE: &str = "false_positive_rate";
59
60/// Keys used in SKIPPING index options
61pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity";
62pub const COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE: &str = "false_positive_rate";
63pub const COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE: &str = "type";
64
65/// Keys used in VECTOR index options
66pub const COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE: &str = "engine";
67pub const COLUMN_VECTOR_INDEX_OPT_KEY_METRIC: &str = "metric";
68pub const COLUMN_VECTOR_INDEX_OPT_KEY_CONNECTIVITY: &str = "connectivity";
69pub const COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_ADD: &str = "expansion_add";
70pub const COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_SEARCH: &str = "expansion_search";
71
72pub const DEFAULT_GRANULARITY: u32 = 10240;
73
74pub const DEFAULT_FALSE_POSITIVE_RATE: f64 = 0.01;
75
76/// Schema of a column, used as an immutable struct.
77#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
78pub struct ColumnSchema {
79    pub name: String,
80    pub data_type: ConcreteDataType,
81    is_nullable: bool,
82    is_time_index: bool,
83    default_constraint: Option<ColumnDefaultConstraint>,
84    metadata: Metadata,
85}
86
87impl fmt::Debug for ColumnSchema {
88    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
89        write!(
90            f,
91            "{} {} {}",
92            self.name,
93            self.data_type,
94            if self.is_nullable { "null" } else { "not null" },
95        )?;
96
97        if self.is_time_index {
98            write!(f, " time_index")?;
99        }
100
101        // Add default constraint if present
102        if let Some(default_constraint) = &self.default_constraint {
103            write!(f, " default={:?}", default_constraint)?;
104        }
105
106        // Add metadata if present
107        if !self.metadata.is_empty() {
108            write!(f, " metadata={:?}", self.metadata)?;
109        }
110
111        Ok(())
112    }
113}
114
115impl ColumnSchema {
116    pub fn new<T: Into<String>>(
117        name: T,
118        data_type: ConcreteDataType,
119        is_nullable: bool,
120    ) -> ColumnSchema {
121        ColumnSchema {
122            name: name.into(),
123            data_type,
124            is_nullable,
125            is_time_index: false,
126            default_constraint: None,
127            metadata: Metadata::new(),
128        }
129    }
130
131    #[inline]
132    pub fn is_time_index(&self) -> bool {
133        self.is_time_index
134    }
135
136    #[inline]
137    pub fn is_nullable(&self) -> bool {
138        self.is_nullable
139    }
140
141    #[inline]
142    pub fn default_constraint(&self) -> Option<&ColumnDefaultConstraint> {
143        self.default_constraint.as_ref()
144    }
145
146    /// Check if the default constraint is a impure function.
147    pub fn is_default_impure(&self) -> bool {
148        self.default_constraint
149            .as_ref()
150            .map(|c| c.is_function())
151            .unwrap_or(false)
152    }
153
154    #[inline]
155    pub fn metadata(&self) -> &Metadata {
156        &self.metadata
157    }
158
159    #[inline]
160    pub fn mut_metadata(&mut self) -> &mut Metadata {
161        &mut self.metadata
162    }
163
164    /// Retrieve the column comment
165    pub fn column_comment(&self) -> Option<&String> {
166        self.metadata.get(COMMENT_KEY)
167    }
168
169    pub fn with_time_index(mut self, is_time_index: bool) -> Self {
170        self.is_time_index = is_time_index;
171        if is_time_index {
172            let _ = self
173                .metadata
174                .insert(TIME_INDEX_KEY.to_string(), "true".to_string());
175        } else {
176            let _ = self.metadata.remove(TIME_INDEX_KEY);
177        }
178        self
179    }
180
181    /// Returns the estimated memory footprint of this schema.
182    pub fn estimated_size(&self) -> usize {
183        mem::size_of_val(self) - mem::size_of_val(&self.data_type)
184            + self.data_type.as_arrow_type().size()
185            + self.name.capacity()
186            + self
187                .default_constraint
188                .as_ref()
189                .map(column_default_constraint_size)
190                .unwrap_or_default()
191            + metadata_size(&self.metadata)
192    }
193
194    /// Set the inverted index for the column.
195    /// Similar to [with_inverted_index] but don't take the ownership.
196    ///
197    /// [with_inverted_index]: Self::with_inverted_index
198    pub fn set_inverted_index(&mut self, value: bool) {
199        match value {
200            true => {
201                self.metadata
202                    .insert(INVERTED_INDEX_KEY.to_string(), value.to_string());
203            }
204            false => {
205                self.metadata.remove(INVERTED_INDEX_KEY);
206            }
207        }
208    }
209
210    /// Set the inverted index for the column.
211    /// Similar to [set_inverted_index] but take the ownership and return a owned value.
212    ///
213    /// [set_inverted_index]: Self::set_inverted_index
214    pub fn with_inverted_index(mut self, value: bool) -> Self {
215        self.set_inverted_index(value);
216        self
217    }
218
219    pub fn is_inverted_indexed(&self) -> bool {
220        self.metadata
221            .get(INVERTED_INDEX_KEY)
222            .map(|v| v.eq_ignore_ascii_case("true"))
223            .unwrap_or(false)
224    }
225
226    pub fn is_fulltext_indexed(&self) -> bool {
227        self.fulltext_options()
228            .unwrap_or_default()
229            .map(|option| option.enable)
230            .unwrap_or_default()
231    }
232
233    pub fn is_skipping_indexed(&self) -> bool {
234        self.skipping_index_options().unwrap_or_default().is_some()
235    }
236
237    pub fn has_inverted_index_key(&self) -> bool {
238        self.metadata.contains_key(INVERTED_INDEX_KEY)
239    }
240
241    /// Checks if this column has a vector index.
242    pub fn is_vector_indexed(&self) -> bool {
243        match self.vector_index_options() {
244            Ok(opts) => opts.is_some(),
245            Err(e) => {
246                common_telemetry::warn!(
247                    "Failed to deserialize vector_index_options for column '{}': {}",
248                    self.name,
249                    e
250                );
251                false
252            }
253        }
254    }
255
256    /// Gets the vector index options.
257    pub fn vector_index_options(&self) -> Result<Option<VectorIndexOptions>> {
258        match self.metadata.get(VECTOR_INDEX_KEY) {
259            None => Ok(None),
260            Some(json) => {
261                let options =
262                    serde_json::from_str(json).context(error::DeserializeSnafu { json })?;
263                Ok(Some(options))
264            }
265        }
266    }
267
268    /// Sets the vector index options.
269    pub fn set_vector_index_options(&mut self, options: &VectorIndexOptions) -> Result<()> {
270        self.metadata.insert(
271            VECTOR_INDEX_KEY.to_string(),
272            serde_json::to_string(options).context(error::SerializeSnafu)?,
273        );
274        Ok(())
275    }
276
277    /// Removes the vector index options.
278    pub fn unset_vector_index_options(&mut self) {
279        self.metadata.remove(VECTOR_INDEX_KEY);
280    }
281
282    /// Sets vector index options and returns self for chaining.
283    pub fn with_vector_index_options(mut self, options: &VectorIndexOptions) -> Result<Self> {
284        self.set_vector_index_options(options)?;
285        Ok(self)
286    }
287
288    /// Set default constraint.
289    ///
290    /// If a default constraint exists for the column, this method will
291    /// validate it against the column's data type and nullability.
292    pub fn with_default_constraint(
293        mut self,
294        default_constraint: Option<ColumnDefaultConstraint>,
295    ) -> Result<Self> {
296        if let Some(constraint) = &default_constraint {
297            constraint.validate(&self.data_type, self.is_nullable)?;
298        }
299
300        self.default_constraint = default_constraint;
301        Ok(self)
302    }
303
304    /// Set the nullablity to `true` of the column.
305    /// Similar to [set_nullable] but take the ownership and return a owned value.
306    ///
307    /// [set_nullable]: Self::set_nullable
308    pub fn with_nullable_set(mut self) -> Self {
309        self.is_nullable = true;
310        self
311    }
312
313    /// Set the nullability to `true` of the column.
314    /// Similar to [with_nullable_set] but don't take the ownership
315    ///
316    /// [with_nullable_set]: Self::with_nullable_set
317    pub fn set_nullable(&mut self) {
318        self.is_nullable = true;
319    }
320
321    /// Set the `is_time_index` to `true` of the column.
322    /// Similar to [with_time_index] but don't take the ownership.
323    ///
324    /// [with_time_index]: Self::with_time_index
325    pub fn set_time_index(&mut self) {
326        self.is_time_index = true;
327    }
328
329    /// Creates a new [`ColumnSchema`] with given metadata.
330    pub fn with_metadata(mut self, metadata: Metadata) -> Self {
331        self.metadata = metadata;
332        self
333    }
334
335    /// Creates a vector with default value for this column.
336    ///
337    /// If the column is `NOT NULL` but doesn't has `DEFAULT` value supplied, returns `Ok(None)`.
338    pub fn create_default_vector(&self, num_rows: usize) -> Result<Option<VectorRef>> {
339        match &self.default_constraint {
340            Some(c) => c
341                .create_default_vector(&self.data_type, self.is_nullable, num_rows)
342                .map(Some),
343            None => {
344                if self.is_nullable {
345                    // No default constraint, use null as default value.
346                    // TODO(yingwen): Use NullVector once it supports setting logical type.
347                    ColumnDefaultConstraint::null_value()
348                        .create_default_vector(&self.data_type, self.is_nullable, num_rows)
349                        .map(Some)
350                } else {
351                    Ok(None)
352                }
353            }
354        }
355    }
356
357    /// Creates a vector for padding.
358    ///
359    /// This method always returns a vector since it uses [DataType::default_value]
360    /// to fill the vector. Callers should only use the created vector for padding
361    /// and never read its content.
362    pub fn create_default_vector_for_padding(&self, num_rows: usize) -> VectorRef {
363        let padding_value = if self.is_nullable {
364            Value::Null
365        } else {
366            // If the column is not null, use the data type's default value as it is
367            // more efficient to acquire.
368            self.data_type.default_value()
369        };
370        let value_ref = padding_value.as_value_ref();
371        let mut mutable_vector = self.data_type.create_mutable_vector(num_rows);
372        for _ in 0..num_rows {
373            mutable_vector.push_value_ref(&value_ref);
374        }
375        mutable_vector.to_vector()
376    }
377
378    /// Creates a default value for this column.
379    ///
380    /// If the column is `NOT NULL` but doesn't has `DEFAULT` value supplied, returns `Ok(None)`.
381    pub fn create_default(&self) -> Result<Option<Value>> {
382        match &self.default_constraint {
383            Some(c) => c
384                .create_default(&self.data_type, self.is_nullable)
385                .map(Some),
386            None => {
387                if self.is_nullable {
388                    // No default constraint, use null as default value.
389                    ColumnDefaultConstraint::null_value()
390                        .create_default(&self.data_type, self.is_nullable)
391                        .map(Some)
392                } else {
393                    Ok(None)
394                }
395            }
396        }
397    }
398
399    /// Creates an impure default value for this column, only if it have a impure default constraint.
400    /// Otherwise, returns `Ok(None)`.
401    pub fn create_impure_default(&self) -> Result<Option<Value>> {
402        match &self.default_constraint {
403            Some(c) => c.create_impure_default(&self.data_type),
404            None => Ok(None),
405        }
406    }
407
408    /// Retrieves the fulltext options for the column.
409    pub fn fulltext_options(&self) -> Result<Option<FulltextOptions>> {
410        match self.metadata.get(FULLTEXT_KEY) {
411            None => Ok(None),
412            Some(json) => {
413                let options =
414                    serde_json::from_str(json).context(error::DeserializeSnafu { json })?;
415                Ok(Some(options))
416            }
417        }
418    }
419
420    pub fn with_fulltext_options(mut self, options: FulltextOptions) -> Result<Self> {
421        self.metadata.insert(
422            FULLTEXT_KEY.to_string(),
423            serde_json::to_string(&options).context(error::SerializeSnafu)?,
424        );
425        Ok(self)
426    }
427
428    pub fn set_fulltext_options(&mut self, options: &FulltextOptions) -> Result<()> {
429        self.metadata.insert(
430            FULLTEXT_KEY.to_string(),
431            serde_json::to_string(options).context(error::SerializeSnafu)?,
432        );
433        Ok(())
434    }
435
436    /// Retrieves the skipping index options for the column.
437    pub fn skipping_index_options(&self) -> Result<Option<SkippingIndexOptions>> {
438        match self.metadata.get(SKIPPING_INDEX_KEY) {
439            None => Ok(None),
440            Some(json) => {
441                let options =
442                    serde_json::from_str(json).context(error::DeserializeSnafu { json })?;
443                Ok(Some(options))
444            }
445        }
446    }
447
448    pub fn with_skipping_options(mut self, options: SkippingIndexOptions) -> Result<Self> {
449        self.metadata.insert(
450            SKIPPING_INDEX_KEY.to_string(),
451            serde_json::to_string(&options).context(error::SerializeSnafu)?,
452        );
453        Ok(self)
454    }
455
456    pub fn set_skipping_options(&mut self, options: &SkippingIndexOptions) -> Result<()> {
457        self.metadata.insert(
458            SKIPPING_INDEX_KEY.to_string(),
459            serde_json::to_string(options).context(error::SerializeSnafu)?,
460        );
461        Ok(())
462    }
463
464    pub fn unset_skipping_options(&mut self) -> Result<()> {
465        self.metadata.remove(SKIPPING_INDEX_KEY);
466        Ok(())
467    }
468
469    pub fn extension_type<E>(&self) -> Result<Option<E>>
470    where
471        E: ExtensionType,
472    {
473        let extension_type_name = self.metadata.get(EXTENSION_TYPE_NAME_KEY);
474
475        if extension_type_name.map(|s| s.as_str()) == Some(E::NAME) {
476            let extension_metadata = self.metadata.get(EXTENSION_TYPE_METADATA_KEY);
477            let extension_metadata =
478                E::deserialize_metadata(extension_metadata.map(|s| s.as_str()))
479                    .context(ArrowMetadataSnafu)?;
480
481            let extension = E::try_new(&self.data_type.as_arrow_type(), extension_metadata)
482                .context(ArrowMetadataSnafu)?;
483            Ok(Some(extension))
484        } else {
485            Ok(None)
486        }
487    }
488
489    pub fn with_extension_type<E>(&mut self, extension_type: &E) -> Result<()>
490    where
491        E: ExtensionType,
492    {
493        self.metadata
494            .insert(EXTENSION_TYPE_NAME_KEY.to_string(), E::NAME.to_string());
495
496        if let Some(extension_metadata) = extension_type.serialize_metadata() {
497            self.metadata
498                .insert(EXTENSION_TYPE_METADATA_KEY.to_string(), extension_metadata);
499        }
500
501        Ok(())
502    }
503
504    pub fn is_indexed(&self) -> bool {
505        self.is_inverted_indexed() || self.is_fulltext_indexed() || self.is_skipping_indexed()
506    }
507}
508
509fn metadata_size(metadata: &Metadata) -> usize {
510    mem::size_of::<(String, String)>() * metadata.capacity()
511        + metadata
512            .iter()
513            .map(|(key, value)| key.capacity() + value.capacity())
514            .sum::<usize>()
515}
516
517fn column_default_constraint_size(default_constraint: &ColumnDefaultConstraint) -> usize {
518    match default_constraint {
519        ColumnDefaultConstraint::Function(expr) => expr.capacity(),
520        ColumnDefaultConstraint::Value(value) => value.as_value_ref().data_size(),
521    }
522}
523
524/// Column extended type set in column schema's metadata.
525#[derive(Debug, Clone, PartialEq, Eq)]
526pub enum ColumnExtType {
527    /// Json type.
528    Json,
529
530    /// Vector type with dimension.
531    Vector(u32),
532}
533
534impl fmt::Display for ColumnExtType {
535    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
536        match self {
537            ColumnExtType::Json => write!(f, "Json"),
538            ColumnExtType::Vector(dim) => write!(f, "Vector({})", dim),
539        }
540    }
541}
542
543impl FromStr for ColumnExtType {
544    type Err = String;
545
546    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
547        match s {
548            "Json" => Ok(ColumnExtType::Json),
549            _ if s.starts_with("Vector(") && s.ends_with(')') => s[7..s.len() - 1]
550                .parse::<u32>()
551                .map(ColumnExtType::Vector)
552                .map_err(|_| "Invalid dimension for Vector".to_string()),
553            _ => Err("Unknown variant".to_string()),
554        }
555    }
556}
557
558impl TryFrom<&Field> for ColumnSchema {
559    type Error = Error;
560
561    fn try_from(field: &Field) -> Result<ColumnSchema> {
562        let mut data_type = ConcreteDataType::try_from(field.data_type())?;
563        // Override the data type if it is specified in the metadata.
564        if let Some(s) = field.metadata().get(TYPE_KEY) {
565            let extype = ColumnExtType::from_str(s)
566                .map_err(|_| ParseExtendedTypeSnafu { value: s }.build())?;
567            match extype {
568                ColumnExtType::Json => {
569                    data_type = ConcreteDataType::json_datatype();
570                }
571                ColumnExtType::Vector(dim) => {
572                    data_type = ConcreteDataType::vector_datatype(dim);
573                }
574            }
575        }
576        let mut metadata = field.metadata().clone();
577        let default_constraint = match metadata.remove(DEFAULT_CONSTRAINT_KEY) {
578            Some(json) => {
579                Some(serde_json::from_str(&json).context(error::DeserializeSnafu { json })?)
580            }
581            None => None,
582        };
583        let mut is_time_index = metadata.contains_key(TIME_INDEX_KEY);
584        if is_time_index && !data_type.is_timestamp() {
585            // If the column is time index but the data type is not timestamp, it is invalid.
586            // We set the time index to false and remove the metadata.
587            // This is possible if we cast the time index column to another type. DataFusion will
588            // keep the metadata:
589            // https://github.com/apache/datafusion/pull/12951
590            is_time_index = false;
591            metadata.remove(TIME_INDEX_KEY);
592            common_telemetry::debug!(
593                "Column {} is not timestamp ({:?}) but has time index metadata",
594                data_type,
595                field.name(),
596            );
597        }
598
599        Ok(ColumnSchema {
600            name: field.name().clone(),
601            data_type,
602            is_nullable: field.is_nullable(),
603            is_time_index,
604            default_constraint,
605            metadata,
606        })
607    }
608}
609
610impl TryFrom<&ColumnSchema> for Field {
611    type Error = Error;
612
613    fn try_from(column_schema: &ColumnSchema) -> Result<Field> {
614        let mut metadata = column_schema.metadata.clone();
615        if let Some(value) = &column_schema.default_constraint {
616            // Adds an additional metadata to store the default constraint.
617            let old = metadata.insert(
618                DEFAULT_CONSTRAINT_KEY.to_string(),
619                serde_json::to_string(&value).context(error::SerializeSnafu)?,
620            );
621
622            ensure!(
623                old.is_none(),
624                error::DuplicateMetaSnafu {
625                    key: DEFAULT_CONSTRAINT_KEY,
626                }
627            );
628        }
629
630        Ok(Field::new(
631            &column_schema.name,
632            column_schema.data_type.as_arrow_type(),
633            column_schema.is_nullable(),
634        )
635        .with_metadata(metadata))
636    }
637}
638
639/// Fulltext options for a column.
640#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
641#[serde(rename_all = "kebab-case")]
642pub struct FulltextOptions {
643    /// Whether the fulltext index is enabled.
644    pub enable: bool,
645    /// The fulltext analyzer to use.
646    #[serde(default)]
647    pub analyzer: FulltextAnalyzer,
648    /// Whether the fulltext index is case-sensitive.
649    #[serde(default)]
650    pub case_sensitive: bool,
651    /// The fulltext backend to use.
652    #[serde(default)]
653    pub backend: FulltextBackend,
654    /// The granularity of the fulltext index (for bloom backend only)
655    #[serde(default = "fulltext_options_default_granularity")]
656    pub granularity: u32,
657    /// The false positive rate of the fulltext index (for bloom backend only)
658    #[serde(default = "index_options_default_false_positive_rate_in_10000")]
659    pub false_positive_rate_in_10000: u32,
660}
661
662fn fulltext_options_default_granularity() -> u32 {
663    DEFAULT_GRANULARITY
664}
665
666fn index_options_default_false_positive_rate_in_10000() -> u32 {
667    (DEFAULT_FALSE_POSITIVE_RATE * 10000.0) as u32
668}
669
670impl FulltextOptions {
671    /// Creates a new fulltext options.
672    pub fn new(
673        enable: bool,
674        analyzer: FulltextAnalyzer,
675        case_sensitive: bool,
676        backend: FulltextBackend,
677        granularity: u32,
678        false_positive_rate: f64,
679    ) -> Result<Self> {
680        ensure!(
681            0.0 < false_positive_rate && false_positive_rate <= 1.0,
682            error::InvalidFulltextOptionSnafu {
683                msg: format!(
684                    "Invalid false positive rate: {false_positive_rate}, expected: 0.0 < rate <= 1.0"
685                ),
686            }
687        );
688        ensure!(
689            granularity > 0,
690            error::InvalidFulltextOptionSnafu {
691                msg: format!("Invalid granularity: {granularity}, expected: positive integer"),
692            }
693        );
694        Ok(Self::new_unchecked(
695            enable,
696            analyzer,
697            case_sensitive,
698            backend,
699            granularity,
700            false_positive_rate,
701        ))
702    }
703
704    /// Creates a new fulltext options without checking `false_positive_rate` and `granularity`.
705    pub fn new_unchecked(
706        enable: bool,
707        analyzer: FulltextAnalyzer,
708        case_sensitive: bool,
709        backend: FulltextBackend,
710        granularity: u32,
711        false_positive_rate: f64,
712    ) -> Self {
713        Self {
714            enable,
715            analyzer,
716            case_sensitive,
717            backend,
718            granularity,
719            false_positive_rate_in_10000: (false_positive_rate * 10000.0) as u32,
720        }
721    }
722
723    /// Gets the false positive rate.
724    pub fn false_positive_rate(&self) -> f64 {
725        self.false_positive_rate_in_10000 as f64 / 10000.0
726    }
727}
728
729impl Default for FulltextOptions {
730    fn default() -> Self {
731        Self::new_unchecked(
732            false,
733            FulltextAnalyzer::default(),
734            false,
735            FulltextBackend::default(),
736            DEFAULT_GRANULARITY,
737            DEFAULT_FALSE_POSITIVE_RATE,
738        )
739    }
740}
741
742impl fmt::Display for FulltextOptions {
743    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
744        write!(f, "enable={}", self.enable)?;
745        if self.enable {
746            write!(f, ", analyzer={}", self.analyzer)?;
747            write!(f, ", case_sensitive={}", self.case_sensitive)?;
748            write!(f, ", backend={}", self.backend)?;
749            if self.backend == FulltextBackend::Bloom {
750                write!(f, ", granularity={}", self.granularity)?;
751                write!(f, ", false_positive_rate={}", self.false_positive_rate())?;
752            }
753        }
754        Ok(())
755    }
756}
757
758/// The backend of the fulltext index.
759#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
760#[serde(rename_all = "kebab-case")]
761pub enum FulltextBackend {
762    #[default]
763    Bloom,
764    Tantivy,
765}
766
767impl fmt::Display for FulltextBackend {
768    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
769        match self {
770            FulltextBackend::Tantivy => write!(f, "tantivy"),
771            FulltextBackend::Bloom => write!(f, "bloom"),
772        }
773    }
774}
775
776impl TryFrom<HashMap<String, String>> for FulltextOptions {
777    type Error = Error;
778
779    fn try_from(options: HashMap<String, String>) -> Result<Self> {
780        let mut fulltext_options = FulltextOptions {
781            enable: true,
782            ..Default::default()
783        };
784
785        if let Some(enable) = options.get(COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE) {
786            match enable.to_ascii_lowercase().as_str() {
787                "true" => fulltext_options.enable = true,
788                "false" => fulltext_options.enable = false,
789                _ => {
790                    return InvalidFulltextOptionSnafu {
791                        msg: format!("{enable}, expected: 'true' | 'false'"),
792                    }
793                    .fail();
794                }
795            }
796        };
797
798        if let Some(analyzer) = options.get(COLUMN_FULLTEXT_OPT_KEY_ANALYZER) {
799            match analyzer.to_ascii_lowercase().as_str() {
800                "english" => fulltext_options.analyzer = FulltextAnalyzer::English,
801                "chinese" => fulltext_options.analyzer = FulltextAnalyzer::Chinese,
802                _ => {
803                    return InvalidFulltextOptionSnafu {
804                        msg: format!("{analyzer}, expected: 'English' | 'Chinese'"),
805                    }
806                    .fail();
807                }
808            }
809        };
810
811        if let Some(case_sensitive) = options.get(COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE) {
812            match case_sensitive.to_ascii_lowercase().as_str() {
813                "true" => fulltext_options.case_sensitive = true,
814                "false" => fulltext_options.case_sensitive = false,
815                _ => {
816                    return InvalidFulltextOptionSnafu {
817                        msg: format!("{case_sensitive}, expected: 'true' | 'false'"),
818                    }
819                    .fail();
820                }
821            }
822        }
823
824        if let Some(backend) = options.get(COLUMN_FULLTEXT_OPT_KEY_BACKEND) {
825            match backend.to_ascii_lowercase().as_str() {
826                "bloom" => fulltext_options.backend = FulltextBackend::Bloom,
827                "tantivy" => fulltext_options.backend = FulltextBackend::Tantivy,
828                _ => {
829                    return InvalidFulltextOptionSnafu {
830                        msg: format!("{backend}, expected: 'bloom' | 'tantivy'"),
831                    }
832                    .fail();
833                }
834            }
835        }
836
837        if fulltext_options.backend == FulltextBackend::Bloom {
838            // Parse granularity with default value 10240
839            let granularity = match options.get(COLUMN_FULLTEXT_OPT_KEY_GRANULARITY) {
840                Some(value) => value
841                    .parse::<u32>()
842                    .ok()
843                    .filter(|&v| v > 0)
844                    .ok_or_else(|| {
845                        error::InvalidFulltextOptionSnafu {
846                            msg: format!(
847                                "Invalid granularity: {value}, expected: positive integer"
848                            ),
849                        }
850                        .build()
851                    })?,
852                None => DEFAULT_GRANULARITY,
853            };
854            fulltext_options.granularity = granularity;
855
856            // Parse false positive rate with default value 0.01
857            let false_positive_rate = match options.get(COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE)
858            {
859                Some(value) => value
860                    .parse::<f64>()
861                    .ok()
862                    .filter(|&v| v > 0.0 && v <= 1.0)
863                    .ok_or_else(|| {
864                        error::InvalidFulltextOptionSnafu {
865                            msg: format!(
866                                "Invalid false positive rate: {value}, expected: 0.0 < rate <= 1.0"
867                            ),
868                        }
869                        .build()
870                    })?,
871                None => DEFAULT_FALSE_POSITIVE_RATE,
872            };
873            fulltext_options.false_positive_rate_in_10000 = (false_positive_rate * 10000.0) as u32;
874        }
875
876        Ok(fulltext_options)
877    }
878}
879
880/// Fulltext analyzer.
881#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
882pub enum FulltextAnalyzer {
883    #[default]
884    English,
885    Chinese,
886}
887
888impl fmt::Display for FulltextAnalyzer {
889    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
890        match self {
891            FulltextAnalyzer::English => write!(f, "English"),
892            FulltextAnalyzer::Chinese => write!(f, "Chinese"),
893        }
894    }
895}
896
897/// Skipping options for a column.
898#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
899#[serde(rename_all = "kebab-case")]
900pub struct SkippingIndexOptions {
901    /// The granularity of the skip index.
902    pub granularity: u32,
903    /// The false positive rate of the skip index (in ten-thousandths, e.g., 100 = 1%).
904    #[serde(default = "index_options_default_false_positive_rate_in_10000")]
905    pub false_positive_rate_in_10000: u32,
906    /// The type of the skip index.
907    #[serde(default)]
908    pub index_type: SkippingIndexType,
909}
910
911impl SkippingIndexOptions {
912    /// Creates a new skipping index options without checking `false_positive_rate` and `granularity`.
913    pub fn new_unchecked(
914        granularity: u32,
915        false_positive_rate: f64,
916        index_type: SkippingIndexType,
917    ) -> Self {
918        Self {
919            granularity,
920            false_positive_rate_in_10000: (false_positive_rate * 10000.0) as u32,
921            index_type,
922        }
923    }
924
925    /// Creates a new skipping index options.
926    pub fn new(
927        granularity: u32,
928        false_positive_rate: f64,
929        index_type: SkippingIndexType,
930    ) -> Result<Self> {
931        ensure!(
932            0.0 < false_positive_rate && false_positive_rate <= 1.0,
933            error::InvalidSkippingIndexOptionSnafu {
934                msg: format!(
935                    "Invalid false positive rate: {false_positive_rate}, expected: 0.0 < rate <= 1.0"
936                ),
937            }
938        );
939        ensure!(
940            granularity > 0,
941            error::InvalidSkippingIndexOptionSnafu {
942                msg: format!("Invalid granularity: {granularity}, expected: positive integer"),
943            }
944        );
945        Ok(Self::new_unchecked(
946            granularity,
947            false_positive_rate,
948            index_type,
949        ))
950    }
951
952    /// Gets the false positive rate.
953    pub fn false_positive_rate(&self) -> f64 {
954        self.false_positive_rate_in_10000 as f64 / 10000.0
955    }
956}
957
958impl Default for SkippingIndexOptions {
959    fn default() -> Self {
960        Self::new_unchecked(
961            DEFAULT_GRANULARITY,
962            DEFAULT_FALSE_POSITIVE_RATE,
963            SkippingIndexType::default(),
964        )
965    }
966}
967
968impl fmt::Display for SkippingIndexOptions {
969    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
970        write!(f, "granularity={}", self.granularity)?;
971        write!(f, ", false_positive_rate={}", self.false_positive_rate())?;
972        write!(f, ", index_type={}", self.index_type)?;
973        Ok(())
974    }
975}
976
977/// Skip index types.
978#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
979pub enum SkippingIndexType {
980    #[default]
981    BloomFilter,
982}
983
984impl fmt::Display for SkippingIndexType {
985    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
986        match self {
987            SkippingIndexType::BloomFilter => write!(f, "BLOOM"),
988        }
989    }
990}
991
992impl TryFrom<HashMap<String, String>> for SkippingIndexOptions {
993    type Error = Error;
994
995    fn try_from(options: HashMap<String, String>) -> Result<Self> {
996        // Parse granularity with default value 1
997        let granularity = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY) {
998            Some(value) => value
999                .parse::<u32>()
1000                .ok()
1001                .filter(|&v| v > 0)
1002                .ok_or_else(|| {
1003                    error::InvalidSkippingIndexOptionSnafu {
1004                        msg: format!("Invalid granularity: {value}, expected: positive integer"),
1005                    }
1006                    .build()
1007                })?,
1008            None => DEFAULT_GRANULARITY,
1009        };
1010
1011        // Parse false positive rate with default value 100
1012        let false_positive_rate =
1013            match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE) {
1014                Some(value) => value
1015                    .parse::<f64>()
1016                    .ok()
1017                    .filter(|&v| v > 0.0 && v <= 1.0)
1018                    .ok_or_else(|| {
1019                        error::InvalidSkippingIndexOptionSnafu {
1020                            msg: format!(
1021                                "Invalid false positive rate: {value}, expected: 0.0 < rate <= 1.0"
1022                            ),
1023                        }
1024                        .build()
1025                    })?,
1026                None => DEFAULT_FALSE_POSITIVE_RATE,
1027            };
1028
1029        // Parse index type with default value BloomFilter
1030        let index_type = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE) {
1031            Some(typ) => match typ.to_ascii_uppercase().as_str() {
1032                "BLOOM" => SkippingIndexType::BloomFilter,
1033                _ => {
1034                    return error::InvalidSkippingIndexOptionSnafu {
1035                        msg: format!("Invalid index type: {typ}, expected: 'BLOOM'"),
1036                    }
1037                    .fail();
1038                }
1039            },
1040            None => SkippingIndexType::default(),
1041        };
1042
1043        Ok(SkippingIndexOptions::new_unchecked(
1044            granularity,
1045            false_positive_rate,
1046            index_type,
1047        ))
1048    }
1049}
1050
1051/// Distance metric for vector similarity search.
1052#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
1053#[serde(rename_all = "lowercase")]
1054pub enum VectorDistanceMetric {
1055    /// Squared Euclidean distance (L2^2).
1056    #[default]
1057    L2sq,
1058    /// Cosine distance (1 - cosine similarity).
1059    Cosine,
1060    /// Inner product (negative, for maximum inner product search).
1061    #[serde(alias = "ip")]
1062    InnerProduct,
1063}
1064
1065impl fmt::Display for VectorDistanceMetric {
1066    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1067        match self {
1068            VectorDistanceMetric::L2sq => write!(f, "l2sq"),
1069            VectorDistanceMetric::Cosine => write!(f, "cosine"),
1070            VectorDistanceMetric::InnerProduct => write!(f, "ip"),
1071        }
1072    }
1073}
1074
1075impl std::str::FromStr for VectorDistanceMetric {
1076    type Err = String;
1077
1078    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
1079        match s.to_lowercase().as_str() {
1080            "l2sq" | "l2" | "euclidean" => Ok(VectorDistanceMetric::L2sq),
1081            "cosine" | "cos" => Ok(VectorDistanceMetric::Cosine),
1082            "inner_product" | "ip" | "dot" => Ok(VectorDistanceMetric::InnerProduct),
1083            _ => Err(format!(
1084                "Unknown distance metric: {}. Expected: l2sq, cosine, or ip",
1085                s
1086            )),
1087        }
1088    }
1089}
1090
1091impl VectorDistanceMetric {
1092    /// Returns the metric as u8 for blob serialization.
1093    pub fn as_u8(&self) -> u8 {
1094        match self {
1095            Self::L2sq => 0,
1096            Self::Cosine => 1,
1097            Self::InnerProduct => 2,
1098        }
1099    }
1100
1101    /// Parses metric from u8 (used when reading blob).
1102    pub fn try_from_u8(v: u8) -> Option<Self> {
1103        match v {
1104            0 => Some(Self::L2sq),
1105            1 => Some(Self::Cosine),
1106            2 => Some(Self::InnerProduct),
1107            _ => None,
1108        }
1109    }
1110}
1111
1112/// Default HNSW connectivity parameter.
1113const DEFAULT_VECTOR_INDEX_CONNECTIVITY: u32 = 16;
1114/// Default expansion factor during index construction.
1115const DEFAULT_VECTOR_INDEX_EXPANSION_ADD: u32 = 128;
1116/// Default expansion factor during search.
1117const DEFAULT_VECTOR_INDEX_EXPANSION_SEARCH: u32 = 64;
1118
1119fn default_vector_index_connectivity() -> u32 {
1120    DEFAULT_VECTOR_INDEX_CONNECTIVITY
1121}
1122
1123fn default_vector_index_expansion_add() -> u32 {
1124    DEFAULT_VECTOR_INDEX_EXPANSION_ADD
1125}
1126
1127fn default_vector_index_expansion_search() -> u32 {
1128    DEFAULT_VECTOR_INDEX_EXPANSION_SEARCH
1129}
1130
1131/// Supported vector index engine types.
1132#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize, Visit, VisitMut)]
1133#[serde(rename_all = "lowercase")]
1134pub enum VectorIndexEngineType {
1135    /// USearch HNSW implementation.
1136    #[default]
1137    Usearch,
1138    // Future: Vsag,
1139}
1140
1141impl VectorIndexEngineType {
1142    /// Returns the engine type as u8 for blob serialization.
1143    pub fn as_u8(&self) -> u8 {
1144        match self {
1145            Self::Usearch => 0,
1146        }
1147    }
1148
1149    /// Parses engine type from u8 (used when reading blob).
1150    pub fn try_from_u8(v: u8) -> Option<Self> {
1151        match v {
1152            0 => Some(Self::Usearch),
1153            _ => None,
1154        }
1155    }
1156}
1157
1158impl fmt::Display for VectorIndexEngineType {
1159    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1160        match self {
1161            Self::Usearch => write!(f, "usearch"),
1162        }
1163    }
1164}
1165
1166impl std::str::FromStr for VectorIndexEngineType {
1167    type Err = String;
1168
1169    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
1170        match s.to_lowercase().as_str() {
1171            "usearch" => Ok(Self::Usearch),
1172            _ => Err(format!(
1173                "Unknown vector index engine: {}. Expected: usearch",
1174                s
1175            )),
1176        }
1177    }
1178}
1179
1180/// Options for vector index (HNSW).
1181#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
1182#[serde(rename_all = "kebab-case")]
1183pub struct VectorIndexOptions {
1184    /// Vector index engine type (default: usearch).
1185    #[serde(default)]
1186    pub engine: VectorIndexEngineType,
1187    /// Distance metric for similarity search.
1188    #[serde(default)]
1189    pub metric: VectorDistanceMetric,
1190    /// HNSW connectivity parameter (M in the paper).
1191    /// Higher values improve recall but increase memory usage.
1192    #[serde(default = "default_vector_index_connectivity")]
1193    pub connectivity: u32,
1194    /// Expansion factor during index construction (ef_construction).
1195    /// Higher values improve index quality but slow down construction.
1196    #[serde(default = "default_vector_index_expansion_add")]
1197    pub expansion_add: u32,
1198    /// Expansion factor during search (ef_search).
1199    /// Higher values improve recall but slow down search.
1200    #[serde(default = "default_vector_index_expansion_search")]
1201    pub expansion_search: u32,
1202}
1203
1204impl Default for VectorIndexOptions {
1205    fn default() -> Self {
1206        Self {
1207            engine: VectorIndexEngineType::default(),
1208            metric: VectorDistanceMetric::default(),
1209            connectivity: DEFAULT_VECTOR_INDEX_CONNECTIVITY,
1210            expansion_add: DEFAULT_VECTOR_INDEX_EXPANSION_ADD,
1211            expansion_search: DEFAULT_VECTOR_INDEX_EXPANSION_SEARCH,
1212        }
1213    }
1214}
1215
1216impl fmt::Display for VectorIndexOptions {
1217    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1218        write!(
1219            f,
1220            "engine={}, metric={}, connectivity={}, expansion_add={}, expansion_search={}",
1221            self.engine, self.metric, self.connectivity, self.expansion_add, self.expansion_search
1222        )
1223    }
1224}
1225
1226#[cfg(test)]
1227mod tests {
1228    use std::sync::Arc;
1229
1230    use arrow::datatypes::{DataType as ArrowDataType, TimeUnit};
1231
1232    use super::*;
1233    use crate::value::Value;
1234    use crate::vectors::Int32Vector;
1235
1236    #[test]
1237    fn test_column_schema() {
1238        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
1239        let field = Field::try_from(&column_schema).unwrap();
1240        assert_eq!("test", field.name());
1241        assert_eq!(ArrowDataType::Int32, *field.data_type());
1242        assert!(field.is_nullable());
1243
1244        let new_column_schema = ColumnSchema::try_from(&field).unwrap();
1245        assert_eq!(column_schema, new_column_schema);
1246    }
1247
1248    #[test]
1249    fn test_column_schema_with_default_constraint() {
1250        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
1251            .with_default_constraint(Some(ColumnDefaultConstraint::Value(Value::from(99))))
1252            .unwrap();
1253        assert!(
1254            column_schema
1255                .metadata()
1256                .get(DEFAULT_CONSTRAINT_KEY)
1257                .is_none()
1258        );
1259
1260        let field = Field::try_from(&column_schema).unwrap();
1261        assert_eq!("test", field.name());
1262        assert_eq!(ArrowDataType::Int32, *field.data_type());
1263        assert!(field.is_nullable());
1264        assert_eq!(
1265            "{\"Value\":{\"Int32\":99}}",
1266            field.metadata().get(DEFAULT_CONSTRAINT_KEY).unwrap()
1267        );
1268
1269        let new_column_schema = ColumnSchema::try_from(&field).unwrap();
1270        assert_eq!(column_schema, new_column_schema);
1271    }
1272
1273    #[test]
1274    fn test_column_schema_with_metadata() {
1275        let metadata = Metadata::from([
1276            ("k1".to_string(), "v1".to_string()),
1277            (COMMENT_KEY.to_string(), "test comment".to_string()),
1278        ]);
1279        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
1280            .with_metadata(metadata)
1281            .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
1282            .unwrap();
1283        assert_eq!("v1", column_schema.metadata().get("k1").unwrap());
1284        assert_eq!("test comment", column_schema.column_comment().unwrap());
1285        assert!(
1286            column_schema
1287                .metadata()
1288                .get(DEFAULT_CONSTRAINT_KEY)
1289                .is_none()
1290        );
1291
1292        let field = Field::try_from(&column_schema).unwrap();
1293        assert_eq!("v1", field.metadata().get("k1").unwrap());
1294        let _ = field.metadata().get(DEFAULT_CONSTRAINT_KEY).unwrap();
1295
1296        let new_column_schema = ColumnSchema::try_from(&field).unwrap();
1297        assert_eq!(column_schema, new_column_schema);
1298    }
1299
1300    #[test]
1301    fn test_column_schema_with_duplicate_metadata() {
1302        let metadata = Metadata::from([(DEFAULT_CONSTRAINT_KEY.to_string(), "v1".to_string())]);
1303        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
1304            .with_metadata(metadata)
1305            .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
1306            .unwrap();
1307        assert!(Field::try_from(&column_schema).is_err());
1308    }
1309
1310    #[test]
1311    fn test_column_schema_invalid_default_constraint() {
1312        assert!(
1313            ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false)
1314                .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
1315                .is_err()
1316        );
1317    }
1318
1319    #[test]
1320    fn test_column_default_constraint_try_into_from() {
1321        let default_constraint = ColumnDefaultConstraint::Value(Value::from(42i64));
1322
1323        let bytes: Vec<u8> = default_constraint.clone().try_into().unwrap();
1324        let from_value = ColumnDefaultConstraint::try_from(&bytes[..]).unwrap();
1325
1326        assert_eq!(default_constraint, from_value);
1327    }
1328
1329    #[test]
1330    fn test_column_schema_create_default_null() {
1331        // Implicit default null.
1332        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
1333        let v = column_schema.create_default_vector(5).unwrap().unwrap();
1334        assert_eq!(5, v.len());
1335        assert!(v.only_null());
1336
1337        // Explicit default null.
1338        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
1339            .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
1340            .unwrap();
1341        let v = column_schema.create_default_vector(5).unwrap().unwrap();
1342        assert_eq!(5, v.len());
1343        assert!(v.only_null());
1344    }
1345
1346    #[test]
1347    fn test_column_schema_no_default() {
1348        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
1349        assert!(column_schema.create_default_vector(5).unwrap().is_none());
1350    }
1351
1352    #[test]
1353    fn test_create_default_vector_for_padding() {
1354        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
1355        let vector = column_schema.create_default_vector_for_padding(4);
1356        assert!(vector.only_null());
1357        assert_eq!(4, vector.len());
1358
1359        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
1360        let vector = column_schema.create_default_vector_for_padding(4);
1361        assert_eq!(4, vector.len());
1362        let expect: VectorRef = Arc::new(Int32Vector::from_slice([0, 0, 0, 0]));
1363        assert_eq!(expect, vector);
1364    }
1365
1366    #[test]
1367    fn test_column_schema_single_create_default_null() {
1368        // Implicit default null.
1369        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
1370        let v = column_schema.create_default().unwrap().unwrap();
1371        assert!(v.is_null());
1372
1373        // Explicit default null.
1374        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
1375            .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
1376            .unwrap();
1377        let v = column_schema.create_default().unwrap().unwrap();
1378        assert!(v.is_null());
1379    }
1380
1381    #[test]
1382    fn test_column_schema_single_create_default_not_null() {
1383        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
1384            .with_default_constraint(Some(ColumnDefaultConstraint::Value(Value::Int32(6))))
1385            .unwrap();
1386        let v = column_schema.create_default().unwrap().unwrap();
1387        assert_eq!(v, Value::Int32(6));
1388    }
1389
1390    #[test]
1391    fn test_column_schema_single_no_default() {
1392        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
1393        assert!(column_schema.create_default().unwrap().is_none());
1394    }
1395
1396    #[test]
1397    fn test_debug_for_column_schema() {
1398        let column_schema_int8 =
1399            ColumnSchema::new("test_column_1", ConcreteDataType::int8_datatype(), true);
1400
1401        let column_schema_int32 =
1402            ColumnSchema::new("test_column_2", ConcreteDataType::int32_datatype(), false);
1403
1404        let formatted_int8 = format!("{:?}", column_schema_int8);
1405        let formatted_int32 = format!("{:?}", column_schema_int32);
1406        assert_eq!(formatted_int8, "test_column_1 Int8 null");
1407        assert_eq!(formatted_int32, "test_column_2 Int32 not null");
1408    }
1409
1410    #[test]
1411    fn test_from_field_to_column_schema() {
1412        let field = Field::new("test", ArrowDataType::Int32, true);
1413        let column_schema = ColumnSchema::try_from(&field).unwrap();
1414        assert_eq!("test", column_schema.name);
1415        assert_eq!(ConcreteDataType::int32_datatype(), column_schema.data_type);
1416        assert!(column_schema.is_nullable);
1417        assert!(!column_schema.is_time_index);
1418        assert!(column_schema.default_constraint.is_none());
1419        assert!(column_schema.metadata.is_empty());
1420
1421        let field = Field::new("test", ArrowDataType::Binary, true);
1422        let field = field.with_metadata(Metadata::from([(
1423            TYPE_KEY.to_string(),
1424            ConcreteDataType::json_datatype().name(),
1425        )]));
1426        let column_schema = ColumnSchema::try_from(&field).unwrap();
1427        assert_eq!("test", column_schema.name);
1428        assert_eq!(ConcreteDataType::json_datatype(), column_schema.data_type);
1429        assert!(column_schema.is_nullable);
1430        assert!(!column_schema.is_time_index);
1431        assert!(column_schema.default_constraint.is_none());
1432        assert_eq!(
1433            column_schema.metadata.get(TYPE_KEY).unwrap(),
1434            &ConcreteDataType::json_datatype().name()
1435        );
1436
1437        let field = Field::new("test", ArrowDataType::Binary, true);
1438        let field = field.with_metadata(Metadata::from([(
1439            TYPE_KEY.to_string(),
1440            ConcreteDataType::vector_datatype(3).name(),
1441        )]));
1442        let column_schema = ColumnSchema::try_from(&field).unwrap();
1443        assert_eq!("test", column_schema.name);
1444        assert_eq!(
1445            ConcreteDataType::vector_datatype(3),
1446            column_schema.data_type
1447        );
1448        assert!(column_schema.is_nullable);
1449        assert!(!column_schema.is_time_index);
1450        assert!(column_schema.default_constraint.is_none());
1451        assert_eq!(
1452            column_schema.metadata.get(TYPE_KEY).unwrap(),
1453            &ConcreteDataType::vector_datatype(3).name()
1454        );
1455    }
1456
1457    #[test]
1458    fn test_column_schema_fix_time_index() {
1459        let field = Field::new(
1460            "test",
1461            ArrowDataType::Timestamp(TimeUnit::Second, None),
1462            false,
1463        );
1464        let field = field.with_metadata(Metadata::from([(
1465            TIME_INDEX_KEY.to_string(),
1466            "true".to_string(),
1467        )]));
1468        let column_schema = ColumnSchema::try_from(&field).unwrap();
1469        assert_eq!("test", column_schema.name);
1470        assert_eq!(
1471            ConcreteDataType::timestamp_second_datatype(),
1472            column_schema.data_type
1473        );
1474        assert!(!column_schema.is_nullable);
1475        assert!(column_schema.is_time_index);
1476        assert!(column_schema.default_constraint.is_none());
1477        assert_eq!(1, column_schema.metadata().len());
1478
1479        let field = Field::new("test", ArrowDataType::Int32, false);
1480        let field = field.with_metadata(Metadata::from([(
1481            TIME_INDEX_KEY.to_string(),
1482            "true".to_string(),
1483        )]));
1484        let column_schema = ColumnSchema::try_from(&field).unwrap();
1485        assert_eq!("test", column_schema.name);
1486        assert_eq!(ConcreteDataType::int32_datatype(), column_schema.data_type);
1487        assert!(!column_schema.is_nullable);
1488        assert!(!column_schema.is_time_index);
1489        assert!(column_schema.default_constraint.is_none());
1490        assert!(column_schema.metadata.is_empty());
1491    }
1492
1493    #[test]
1494    fn test_skipping_index_options_deserialization() {
1495        let original_options = "{\"granularity\":1024,\"false-positive-rate-in-10000\":10,\"index-type\":\"BloomFilter\"}";
1496        let options = serde_json::from_str::<SkippingIndexOptions>(original_options).unwrap();
1497        assert_eq!(1024, options.granularity);
1498        assert_eq!(SkippingIndexType::BloomFilter, options.index_type);
1499        assert_eq!(0.001, options.false_positive_rate());
1500
1501        let options_str = serde_json::to_string(&options).unwrap();
1502        assert_eq!(options_str, original_options);
1503    }
1504
1505    #[test]
1506    fn test_skipping_index_options_deserialization_v0_14_to_v0_15() {
1507        let options = "{\"granularity\":10240,\"index-type\":\"BloomFilter\"}";
1508        let options = serde_json::from_str::<SkippingIndexOptions>(options).unwrap();
1509        assert_eq!(10240, options.granularity);
1510        assert_eq!(SkippingIndexType::BloomFilter, options.index_type);
1511        assert_eq!(DEFAULT_FALSE_POSITIVE_RATE, options.false_positive_rate());
1512
1513        let options_str = serde_json::to_string(&options).unwrap();
1514        assert_eq!(
1515            options_str,
1516            "{\"granularity\":10240,\"false-positive-rate-in-10000\":100,\"index-type\":\"BloomFilter\"}"
1517        );
1518    }
1519
1520    #[test]
1521    fn test_fulltext_options_deserialization() {
1522        let original_options = "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\",\"granularity\":1024,\"false-positive-rate-in-10000\":10}";
1523        let options = serde_json::from_str::<FulltextOptions>(original_options).unwrap();
1524        assert!(!options.case_sensitive);
1525        assert!(options.enable);
1526        assert_eq!(FulltextBackend::Bloom, options.backend);
1527        assert_eq!(FulltextAnalyzer::default(), options.analyzer);
1528        assert_eq!(1024, options.granularity);
1529        assert_eq!(0.001, options.false_positive_rate());
1530
1531        let options_str = serde_json::to_string(&options).unwrap();
1532        assert_eq!(options_str, original_options);
1533    }
1534
1535    #[test]
1536    fn test_fulltext_options_deserialization_v0_14_to_v0_15() {
1537        // 0.14 to 0.15
1538        let options = "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\"}";
1539        let options = serde_json::from_str::<FulltextOptions>(options).unwrap();
1540        assert!(!options.case_sensitive);
1541        assert!(options.enable);
1542        assert_eq!(FulltextBackend::Bloom, options.backend);
1543        assert_eq!(FulltextAnalyzer::default(), options.analyzer);
1544        assert_eq!(DEFAULT_GRANULARITY, options.granularity);
1545        assert_eq!(DEFAULT_FALSE_POSITIVE_RATE, options.false_positive_rate());
1546
1547        let options_str = serde_json::to_string(&options).unwrap();
1548        assert_eq!(
1549            options_str,
1550            "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\",\"granularity\":10240,\"false-positive-rate-in-10000\":100}"
1551        );
1552    }
1553}