datatypes/schema/
column_schema.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashMap;
16use std::fmt;
17use std::str::FromStr;
18
19use arrow::datatypes::Field;
20use serde::{Deserialize, Serialize};
21use snafu::{ensure, ResultExt};
22use sqlparser_derive::{Visit, VisitMut};
23
24use crate::data_type::{ConcreteDataType, DataType};
25use crate::error::{self, Error, InvalidFulltextOptionSnafu, ParseExtendedTypeSnafu, Result};
26use crate::schema::constraint::ColumnDefaultConstraint;
27use crate::schema::TYPE_KEY;
28use crate::value::Value;
29use crate::vectors::VectorRef;
30
31pub type Metadata = HashMap<String, String>;
32
33/// Key used to store whether the column is time index in arrow field's metadata.
34pub const TIME_INDEX_KEY: &str = "greptime:time_index";
35pub const COMMENT_KEY: &str = "greptime:storage:comment";
36/// Key used to store default constraint in arrow field's metadata.
37const DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint";
38/// Key used to store fulltext options in arrow field's metadata.
39pub const FULLTEXT_KEY: &str = "greptime:fulltext";
40/// Key used to store whether the column has inverted index in arrow field's metadata.
41pub const INVERTED_INDEX_KEY: &str = "greptime:inverted_index";
42/// Key used to store skip options in arrow field's metadata.
43pub const SKIPPING_INDEX_KEY: &str = "greptime:skipping_index";
44
45/// Keys used in fulltext options
46pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable";
47pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer";
48pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive";
49pub const COLUMN_FULLTEXT_OPT_KEY_BACKEND: &str = "backend";
50
51/// Keys used in SKIPPING index options
52pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity";
53pub const COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE: &str = "type";
54
55pub const DEFAULT_GRANULARITY: u32 = 10240;
56
57/// Schema of a column, used as an immutable struct.
58#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
59pub struct ColumnSchema {
60    pub name: String,
61    pub data_type: ConcreteDataType,
62    is_nullable: bool,
63    is_time_index: bool,
64    default_constraint: Option<ColumnDefaultConstraint>,
65    metadata: Metadata,
66}
67
68impl fmt::Debug for ColumnSchema {
69    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
70        write!(
71            f,
72            "{} {} {}",
73            self.name,
74            self.data_type,
75            if self.is_nullable { "null" } else { "not null" },
76        )?;
77
78        if self.is_time_index {
79            write!(f, " time_index")?;
80        }
81
82        // Add default constraint if present
83        if let Some(default_constraint) = &self.default_constraint {
84            write!(f, " default={:?}", default_constraint)?;
85        }
86
87        // Add metadata if present
88        if !self.metadata.is_empty() {
89            write!(f, " metadata={:?}", self.metadata)?;
90        }
91
92        Ok(())
93    }
94}
95
96impl ColumnSchema {
97    pub fn new<T: Into<String>>(
98        name: T,
99        data_type: ConcreteDataType,
100        is_nullable: bool,
101    ) -> ColumnSchema {
102        ColumnSchema {
103            name: name.into(),
104            data_type,
105            is_nullable,
106            is_time_index: false,
107            default_constraint: None,
108            metadata: Metadata::new(),
109        }
110    }
111
112    #[inline]
113    pub fn is_time_index(&self) -> bool {
114        self.is_time_index
115    }
116
117    #[inline]
118    pub fn is_nullable(&self) -> bool {
119        self.is_nullable
120    }
121
122    #[inline]
123    pub fn default_constraint(&self) -> Option<&ColumnDefaultConstraint> {
124        self.default_constraint.as_ref()
125    }
126
127    /// Check if the default constraint is a impure function.
128    pub fn is_default_impure(&self) -> bool {
129        self.default_constraint
130            .as_ref()
131            .map(|c| c.is_function())
132            .unwrap_or(false)
133    }
134
135    #[inline]
136    pub fn metadata(&self) -> &Metadata {
137        &self.metadata
138    }
139
140    #[inline]
141    pub fn mut_metadata(&mut self) -> &mut Metadata {
142        &mut self.metadata
143    }
144
145    /// Retrieve the column comment
146    pub fn column_comment(&self) -> Option<&String> {
147        self.metadata.get(COMMENT_KEY)
148    }
149
150    pub fn with_time_index(mut self, is_time_index: bool) -> Self {
151        self.is_time_index = is_time_index;
152        if is_time_index {
153            let _ = self
154                .metadata
155                .insert(TIME_INDEX_KEY.to_string(), "true".to_string());
156        } else {
157            let _ = self.metadata.remove(TIME_INDEX_KEY);
158        }
159        self
160    }
161
162    /// Set the inverted index for the column.
163    /// Similar to [with_inverted_index] but don't take the ownership.
164    ///
165    /// [with_inverted_index]: Self::with_inverted_index
166    pub fn set_inverted_index(&mut self, value: bool) {
167        match value {
168            true => {
169                self.metadata
170                    .insert(INVERTED_INDEX_KEY.to_string(), value.to_string());
171            }
172            false => {
173                self.metadata.remove(INVERTED_INDEX_KEY);
174            }
175        }
176    }
177
178    /// Set the inverted index for the column.
179    /// Similar to [set_inverted_index] but take the ownership and return a owned value.
180    ///
181    /// [set_inverted_index]: Self::set_inverted_index
182    pub fn with_inverted_index(mut self, value: bool) -> Self {
183        self.set_inverted_index(value);
184        self
185    }
186
187    pub fn is_inverted_indexed(&self) -> bool {
188        self.metadata
189            .get(INVERTED_INDEX_KEY)
190            .map(|v| v.eq_ignore_ascii_case("true"))
191            .unwrap_or(false)
192    }
193
194    pub fn is_fulltext_indexed(&self) -> bool {
195        self.fulltext_options()
196            .unwrap_or_default()
197            .map(|option| option.enable)
198            .unwrap_or_default()
199    }
200
201    pub fn is_skipping_indexed(&self) -> bool {
202        self.skipping_index_options().unwrap_or_default().is_some()
203    }
204
205    pub fn has_inverted_index_key(&self) -> bool {
206        self.metadata.contains_key(INVERTED_INDEX_KEY)
207    }
208
209    /// Set default constraint.
210    ///
211    /// If a default constraint exists for the column, this method will
212    /// validate it against the column's data type and nullability.
213    pub fn with_default_constraint(
214        mut self,
215        default_constraint: Option<ColumnDefaultConstraint>,
216    ) -> Result<Self> {
217        if let Some(constraint) = &default_constraint {
218            constraint.validate(&self.data_type, self.is_nullable)?;
219        }
220
221        self.default_constraint = default_constraint;
222        Ok(self)
223    }
224
225    /// Set the nullablity to `true` of the column.
226    /// Similar to [set_nullable] but take the ownership and return a owned value.
227    ///
228    /// [set_nullable]: Self::set_nullable
229    pub fn with_nullable_set(mut self) -> Self {
230        self.is_nullable = true;
231        self
232    }
233
234    /// Set the nullability to `true` of the column.
235    /// Similar to [with_nullable_set] but don't take the ownership
236    ///
237    /// [with_nullable_set]: Self::with_nullable_set
238    pub fn set_nullable(&mut self) {
239        self.is_nullable = true;
240    }
241
242    /// Set the `is_time_index` to `true` of the column.
243    /// Similar to [with_time_index] but don't take the ownership.
244    ///
245    /// [with_time_index]: Self::with_time_index
246    pub fn set_time_index(&mut self) {
247        self.is_time_index = true;
248    }
249
250    /// Creates a new [`ColumnSchema`] with given metadata.
251    pub fn with_metadata(mut self, metadata: Metadata) -> Self {
252        self.metadata = metadata;
253        self
254    }
255
256    /// Creates a vector with default value for this column.
257    ///
258    /// If the column is `NOT NULL` but doesn't has `DEFAULT` value supplied, returns `Ok(None)`.
259    pub fn create_default_vector(&self, num_rows: usize) -> Result<Option<VectorRef>> {
260        match &self.default_constraint {
261            Some(c) => c
262                .create_default_vector(&self.data_type, self.is_nullable, num_rows)
263                .map(Some),
264            None => {
265                if self.is_nullable {
266                    // No default constraint, use null as default value.
267                    // TODO(yingwen): Use NullVector once it supports setting logical type.
268                    ColumnDefaultConstraint::null_value()
269                        .create_default_vector(&self.data_type, self.is_nullable, num_rows)
270                        .map(Some)
271                } else {
272                    Ok(None)
273                }
274            }
275        }
276    }
277
278    /// Creates a vector for padding.
279    ///
280    /// This method always returns a vector since it uses [DataType::default_value]
281    /// to fill the vector. Callers should only use the created vector for padding
282    /// and never read its content.
283    pub fn create_default_vector_for_padding(&self, num_rows: usize) -> VectorRef {
284        let padding_value = if self.is_nullable {
285            Value::Null
286        } else {
287            // If the column is not null, use the data type's default value as it is
288            // more efficient to acquire.
289            self.data_type.default_value()
290        };
291        let value_ref = padding_value.as_value_ref();
292        let mut mutable_vector = self.data_type.create_mutable_vector(num_rows);
293        for _ in 0..num_rows {
294            mutable_vector.push_value_ref(value_ref);
295        }
296        mutable_vector.to_vector()
297    }
298
299    /// Creates a default value for this column.
300    ///
301    /// If the column is `NOT NULL` but doesn't has `DEFAULT` value supplied, returns `Ok(None)`.
302    pub fn create_default(&self) -> Result<Option<Value>> {
303        match &self.default_constraint {
304            Some(c) => c
305                .create_default(&self.data_type, self.is_nullable)
306                .map(Some),
307            None => {
308                if self.is_nullable {
309                    // No default constraint, use null as default value.
310                    ColumnDefaultConstraint::null_value()
311                        .create_default(&self.data_type, self.is_nullable)
312                        .map(Some)
313                } else {
314                    Ok(None)
315                }
316            }
317        }
318    }
319
320    /// Creates an impure default value for this column, only if it have a impure default constraint.
321    /// Otherwise, returns `Ok(None)`.
322    pub fn create_impure_default(&self) -> Result<Option<Value>> {
323        match &self.default_constraint {
324            Some(c) => c.create_impure_default(&self.data_type),
325            None => Ok(None),
326        }
327    }
328
329    /// Retrieves the fulltext options for the column.
330    pub fn fulltext_options(&self) -> Result<Option<FulltextOptions>> {
331        match self.metadata.get(FULLTEXT_KEY) {
332            None => Ok(None),
333            Some(json) => {
334                let options =
335                    serde_json::from_str(json).context(error::DeserializeSnafu { json })?;
336                Ok(Some(options))
337            }
338        }
339    }
340
341    pub fn with_fulltext_options(mut self, options: FulltextOptions) -> Result<Self> {
342        self.metadata.insert(
343            FULLTEXT_KEY.to_string(),
344            serde_json::to_string(&options).context(error::SerializeSnafu)?,
345        );
346        Ok(self)
347    }
348
349    pub fn set_fulltext_options(&mut self, options: &FulltextOptions) -> Result<()> {
350        self.metadata.insert(
351            FULLTEXT_KEY.to_string(),
352            serde_json::to_string(options).context(error::SerializeSnafu)?,
353        );
354        Ok(())
355    }
356
357    /// Retrieves the skipping index options for the column.
358    pub fn skipping_index_options(&self) -> Result<Option<SkippingIndexOptions>> {
359        match self.metadata.get(SKIPPING_INDEX_KEY) {
360            None => Ok(None),
361            Some(json) => {
362                let options =
363                    serde_json::from_str(json).context(error::DeserializeSnafu { json })?;
364                Ok(Some(options))
365            }
366        }
367    }
368
369    pub fn with_skipping_options(mut self, options: SkippingIndexOptions) -> Result<Self> {
370        self.metadata.insert(
371            SKIPPING_INDEX_KEY.to_string(),
372            serde_json::to_string(&options).context(error::SerializeSnafu)?,
373        );
374        Ok(self)
375    }
376
377    pub fn set_skipping_options(&mut self, options: &SkippingIndexOptions) -> Result<()> {
378        self.metadata.insert(
379            SKIPPING_INDEX_KEY.to_string(),
380            serde_json::to_string(options).context(error::SerializeSnafu)?,
381        );
382        Ok(())
383    }
384
385    pub fn unset_skipping_options(&mut self) -> Result<()> {
386        self.metadata.remove(SKIPPING_INDEX_KEY);
387        Ok(())
388    }
389}
390
391/// Column extended type set in column schema's metadata.
392#[derive(Debug, Clone, PartialEq, Eq)]
393pub enum ColumnExtType {
394    /// Json type.
395    Json,
396
397    /// Vector type with dimension.
398    Vector(u32),
399}
400
401impl fmt::Display for ColumnExtType {
402    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
403        match self {
404            ColumnExtType::Json => write!(f, "Json"),
405            ColumnExtType::Vector(dim) => write!(f, "Vector({})", dim),
406        }
407    }
408}
409
410impl FromStr for ColumnExtType {
411    type Err = String;
412
413    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
414        match s {
415            "Json" => Ok(ColumnExtType::Json),
416            _ if s.starts_with("Vector(") && s.ends_with(')') => s[7..s.len() - 1]
417                .parse::<u32>()
418                .map(ColumnExtType::Vector)
419                .map_err(|_| "Invalid dimension for Vector".to_string()),
420            _ => Err("Unknown variant".to_string()),
421        }
422    }
423}
424
425impl TryFrom<&Field> for ColumnSchema {
426    type Error = Error;
427
428    fn try_from(field: &Field) -> Result<ColumnSchema> {
429        let mut data_type = ConcreteDataType::try_from(field.data_type())?;
430        // Override the data type if it is specified in the metadata.
431        if let Some(s) = field.metadata().get(TYPE_KEY) {
432            let extype = ColumnExtType::from_str(s)
433                .map_err(|_| ParseExtendedTypeSnafu { value: s }.build())?;
434            match extype {
435                ColumnExtType::Json => {
436                    data_type = ConcreteDataType::json_datatype();
437                }
438                ColumnExtType::Vector(dim) => {
439                    data_type = ConcreteDataType::vector_datatype(dim);
440                }
441            }
442        }
443        let mut metadata = field.metadata().clone();
444        let default_constraint = match metadata.remove(DEFAULT_CONSTRAINT_KEY) {
445            Some(json) => {
446                Some(serde_json::from_str(&json).context(error::DeserializeSnafu { json })?)
447            }
448            None => None,
449        };
450        let mut is_time_index = metadata.contains_key(TIME_INDEX_KEY);
451        if is_time_index && !data_type.is_timestamp() {
452            // If the column is time index but the data type is not timestamp, it is invalid.
453            // We set the time index to false and remove the metadata.
454            // This is possible if we cast the time index column to another type. DataFusion will
455            // keep the metadata:
456            // https://github.com/apache/datafusion/pull/12951
457            is_time_index = false;
458            metadata.remove(TIME_INDEX_KEY);
459            common_telemetry::debug!(
460                "Column {} is not timestamp ({:?}) but has time index metadata",
461                data_type,
462                field.name(),
463            );
464        }
465
466        Ok(ColumnSchema {
467            name: field.name().clone(),
468            data_type,
469            is_nullable: field.is_nullable(),
470            is_time_index,
471            default_constraint,
472            metadata,
473        })
474    }
475}
476
477impl TryFrom<&ColumnSchema> for Field {
478    type Error = Error;
479
480    fn try_from(column_schema: &ColumnSchema) -> Result<Field> {
481        let mut metadata = column_schema.metadata.clone();
482        if let Some(value) = &column_schema.default_constraint {
483            // Adds an additional metadata to store the default constraint.
484            let old = metadata.insert(
485                DEFAULT_CONSTRAINT_KEY.to_string(),
486                serde_json::to_string(&value).context(error::SerializeSnafu)?,
487            );
488
489            ensure!(
490                old.is_none(),
491                error::DuplicateMetaSnafu {
492                    key: DEFAULT_CONSTRAINT_KEY,
493                }
494            );
495        }
496
497        Ok(Field::new(
498            &column_schema.name,
499            column_schema.data_type.as_arrow_type(),
500            column_schema.is_nullable(),
501        )
502        .with_metadata(metadata))
503    }
504}
505
506/// Fulltext options for a column.
507#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
508#[serde(rename_all = "kebab-case")]
509pub struct FulltextOptions {
510    /// Whether the fulltext index is enabled.
511    pub enable: bool,
512    /// The fulltext analyzer to use.
513    #[serde(default)]
514    pub analyzer: FulltextAnalyzer,
515    /// Whether the fulltext index is case-sensitive.
516    #[serde(default)]
517    pub case_sensitive: bool,
518    /// The fulltext backend to use.
519    #[serde(default)]
520    pub backend: FulltextBackend,
521}
522
523impl fmt::Display for FulltextOptions {
524    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
525        write!(f, "enable={}", self.enable)?;
526        if self.enable {
527            write!(f, ", analyzer={}", self.analyzer)?;
528            write!(f, ", case_sensitive={}", self.case_sensitive)?;
529            write!(f, ", backend={}", self.backend)?;
530        }
531        Ok(())
532    }
533}
534
535/// The backend of the fulltext index.
536#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
537#[serde(rename_all = "kebab-case")]
538pub enum FulltextBackend {
539    #[default]
540    Bloom,
541    Tantivy,
542}
543
544impl fmt::Display for FulltextBackend {
545    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
546        match self {
547            FulltextBackend::Tantivy => write!(f, "tantivy"),
548            FulltextBackend::Bloom => write!(f, "bloom"),
549        }
550    }
551}
552
553impl TryFrom<HashMap<String, String>> for FulltextOptions {
554    type Error = Error;
555
556    fn try_from(options: HashMap<String, String>) -> Result<Self> {
557        let mut fulltext_options = FulltextOptions {
558            enable: true,
559            ..Default::default()
560        };
561
562        if let Some(enable) = options.get(COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE) {
563            match enable.to_ascii_lowercase().as_str() {
564                "true" => fulltext_options.enable = true,
565                "false" => fulltext_options.enable = false,
566                _ => {
567                    return InvalidFulltextOptionSnafu {
568                        msg: format!("{enable}, expected: 'true' | 'false'"),
569                    }
570                    .fail();
571                }
572            }
573        };
574
575        if let Some(analyzer) = options.get(COLUMN_FULLTEXT_OPT_KEY_ANALYZER) {
576            match analyzer.to_ascii_lowercase().as_str() {
577                "english" => fulltext_options.analyzer = FulltextAnalyzer::English,
578                "chinese" => fulltext_options.analyzer = FulltextAnalyzer::Chinese,
579                _ => {
580                    return InvalidFulltextOptionSnafu {
581                        msg: format!("{analyzer}, expected: 'English' | 'Chinese'"),
582                    }
583                    .fail();
584                }
585            }
586        };
587
588        if let Some(case_sensitive) = options.get(COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE) {
589            match case_sensitive.to_ascii_lowercase().as_str() {
590                "true" => fulltext_options.case_sensitive = true,
591                "false" => fulltext_options.case_sensitive = false,
592                _ => {
593                    return InvalidFulltextOptionSnafu {
594                        msg: format!("{case_sensitive}, expected: 'true' | 'false'"),
595                    }
596                    .fail();
597                }
598            }
599        }
600
601        if let Some(backend) = options.get(COLUMN_FULLTEXT_OPT_KEY_BACKEND) {
602            match backend.to_ascii_lowercase().as_str() {
603                "bloom" => fulltext_options.backend = FulltextBackend::Bloom,
604                "tantivy" => fulltext_options.backend = FulltextBackend::Tantivy,
605                _ => {
606                    return InvalidFulltextOptionSnafu {
607                        msg: format!("{backend}, expected: 'bloom' | 'tantivy'"),
608                    }
609                    .fail();
610                }
611            }
612        }
613
614        Ok(fulltext_options)
615    }
616}
617
618/// Fulltext analyzer.
619#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
620pub enum FulltextAnalyzer {
621    #[default]
622    English,
623    Chinese,
624}
625
626impl fmt::Display for FulltextAnalyzer {
627    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
628        match self {
629            FulltextAnalyzer::English => write!(f, "English"),
630            FulltextAnalyzer::Chinese => write!(f, "Chinese"),
631        }
632    }
633}
634
635/// Skipping options for a column.
636#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
637#[serde(rename_all = "kebab-case")]
638pub struct SkippingIndexOptions {
639    /// The granularity of the skip index.
640    pub granularity: u32,
641    /// The type of the skip index.
642    #[serde(default)]
643    pub index_type: SkippingIndexType,
644}
645
646impl Default for SkippingIndexOptions {
647    fn default() -> Self {
648        Self {
649            granularity: DEFAULT_GRANULARITY,
650            index_type: SkippingIndexType::default(),
651        }
652    }
653}
654
655impl fmt::Display for SkippingIndexOptions {
656    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
657        write!(f, "granularity={}", self.granularity)?;
658        write!(f, ", index_type={}", self.index_type)?;
659        Ok(())
660    }
661}
662
663/// Skip index types.
664#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
665pub enum SkippingIndexType {
666    #[default]
667    BloomFilter,
668}
669
670impl fmt::Display for SkippingIndexType {
671    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
672        match self {
673            SkippingIndexType::BloomFilter => write!(f, "BLOOM"),
674        }
675    }
676}
677
678impl TryFrom<HashMap<String, String>> for SkippingIndexOptions {
679    type Error = Error;
680
681    fn try_from(options: HashMap<String, String>) -> Result<Self> {
682        // Parse granularity with default value 1
683        let granularity = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY) {
684            Some(value) => value.parse::<u32>().map_err(|_| {
685                error::InvalidSkippingIndexOptionSnafu {
686                    msg: format!("Invalid granularity: {value}, expected: positive integer"),
687                }
688                .build()
689            })?,
690            None => DEFAULT_GRANULARITY,
691        };
692
693        // Parse index type with default value BloomFilter
694        let index_type = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE) {
695            Some(typ) => match typ.to_ascii_uppercase().as_str() {
696                "BLOOM" => SkippingIndexType::BloomFilter,
697                _ => {
698                    return error::InvalidSkippingIndexOptionSnafu {
699                        msg: format!("Invalid index type: {typ}, expected: 'BLOOM'"),
700                    }
701                    .fail();
702                }
703            },
704            None => SkippingIndexType::default(),
705        };
706
707        Ok(SkippingIndexOptions {
708            granularity,
709            index_type,
710        })
711    }
712}
713
714#[cfg(test)]
715mod tests {
716    use std::sync::Arc;
717
718    use arrow::datatypes::{DataType as ArrowDataType, TimeUnit};
719
720    use super::*;
721    use crate::value::Value;
722    use crate::vectors::Int32Vector;
723
724    #[test]
725    fn test_column_schema() {
726        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
727        let field = Field::try_from(&column_schema).unwrap();
728        assert_eq!("test", field.name());
729        assert_eq!(ArrowDataType::Int32, *field.data_type());
730        assert!(field.is_nullable());
731
732        let new_column_schema = ColumnSchema::try_from(&field).unwrap();
733        assert_eq!(column_schema, new_column_schema);
734    }
735
736    #[test]
737    fn test_column_schema_with_default_constraint() {
738        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
739            .with_default_constraint(Some(ColumnDefaultConstraint::Value(Value::from(99))))
740            .unwrap();
741        assert!(column_schema
742            .metadata()
743            .get(DEFAULT_CONSTRAINT_KEY)
744            .is_none());
745
746        let field = Field::try_from(&column_schema).unwrap();
747        assert_eq!("test", field.name());
748        assert_eq!(ArrowDataType::Int32, *field.data_type());
749        assert!(field.is_nullable());
750        assert_eq!(
751            "{\"Value\":{\"Int32\":99}}",
752            field.metadata().get(DEFAULT_CONSTRAINT_KEY).unwrap()
753        );
754
755        let new_column_schema = ColumnSchema::try_from(&field).unwrap();
756        assert_eq!(column_schema, new_column_schema);
757    }
758
759    #[test]
760    fn test_column_schema_with_metadata() {
761        let metadata = Metadata::from([
762            ("k1".to_string(), "v1".to_string()),
763            (COMMENT_KEY.to_string(), "test comment".to_string()),
764        ]);
765        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
766            .with_metadata(metadata)
767            .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
768            .unwrap();
769        assert_eq!("v1", column_schema.metadata().get("k1").unwrap());
770        assert_eq!("test comment", column_schema.column_comment().unwrap());
771        assert!(column_schema
772            .metadata()
773            .get(DEFAULT_CONSTRAINT_KEY)
774            .is_none());
775
776        let field = Field::try_from(&column_schema).unwrap();
777        assert_eq!("v1", field.metadata().get("k1").unwrap());
778        let _ = field.metadata().get(DEFAULT_CONSTRAINT_KEY).unwrap();
779
780        let new_column_schema = ColumnSchema::try_from(&field).unwrap();
781        assert_eq!(column_schema, new_column_schema);
782    }
783
784    #[test]
785    fn test_column_schema_with_duplicate_metadata() {
786        let metadata = Metadata::from([(DEFAULT_CONSTRAINT_KEY.to_string(), "v1".to_string())]);
787        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
788            .with_metadata(metadata)
789            .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
790            .unwrap();
791        assert!(Field::try_from(&column_schema).is_err());
792    }
793
794    #[test]
795    fn test_column_schema_invalid_default_constraint() {
796        assert!(
797            ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false)
798                .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
799                .is_err()
800        );
801    }
802
803    #[test]
804    fn test_column_default_constraint_try_into_from() {
805        let default_constraint = ColumnDefaultConstraint::Value(Value::from(42i64));
806
807        let bytes: Vec<u8> = default_constraint.clone().try_into().unwrap();
808        let from_value = ColumnDefaultConstraint::try_from(&bytes[..]).unwrap();
809
810        assert_eq!(default_constraint, from_value);
811    }
812
813    #[test]
814    fn test_column_schema_create_default_null() {
815        // Implicit default null.
816        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
817        let v = column_schema.create_default_vector(5).unwrap().unwrap();
818        assert_eq!(5, v.len());
819        assert!(v.only_null());
820
821        // Explicit default null.
822        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
823            .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
824            .unwrap();
825        let v = column_schema.create_default_vector(5).unwrap().unwrap();
826        assert_eq!(5, v.len());
827        assert!(v.only_null());
828    }
829
830    #[test]
831    fn test_column_schema_no_default() {
832        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
833        assert!(column_schema.create_default_vector(5).unwrap().is_none());
834    }
835
836    #[test]
837    fn test_create_default_vector_for_padding() {
838        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
839        let vector = column_schema.create_default_vector_for_padding(4);
840        assert!(vector.only_null());
841        assert_eq!(4, vector.len());
842
843        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
844        let vector = column_schema.create_default_vector_for_padding(4);
845        assert_eq!(4, vector.len());
846        let expect: VectorRef = Arc::new(Int32Vector::from_slice([0, 0, 0, 0]));
847        assert_eq!(expect, vector);
848    }
849
850    #[test]
851    fn test_column_schema_single_create_default_null() {
852        // Implicit default null.
853        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
854        let v = column_schema.create_default().unwrap().unwrap();
855        assert!(v.is_null());
856
857        // Explicit default null.
858        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
859            .with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
860            .unwrap();
861        let v = column_schema.create_default().unwrap().unwrap();
862        assert!(v.is_null());
863    }
864
865    #[test]
866    fn test_column_schema_single_create_default_not_null() {
867        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
868            .with_default_constraint(Some(ColumnDefaultConstraint::Value(Value::Int32(6))))
869            .unwrap();
870        let v = column_schema.create_default().unwrap().unwrap();
871        assert_eq!(v, Value::Int32(6));
872    }
873
874    #[test]
875    fn test_column_schema_single_no_default() {
876        let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
877        assert!(column_schema.create_default().unwrap().is_none());
878    }
879
880    #[test]
881    fn test_debug_for_column_schema() {
882        let column_schema_int8 =
883            ColumnSchema::new("test_column_1", ConcreteDataType::int8_datatype(), true);
884
885        let column_schema_int32 =
886            ColumnSchema::new("test_column_2", ConcreteDataType::int32_datatype(), false);
887
888        let formatted_int8 = format!("{:?}", column_schema_int8);
889        let formatted_int32 = format!("{:?}", column_schema_int32);
890        assert_eq!(formatted_int8, "test_column_1 Int8 null");
891        assert_eq!(formatted_int32, "test_column_2 Int32 not null");
892    }
893
894    #[test]
895    fn test_from_field_to_column_schema() {
896        let field = Field::new("test", ArrowDataType::Int32, true);
897        let column_schema = ColumnSchema::try_from(&field).unwrap();
898        assert_eq!("test", column_schema.name);
899        assert_eq!(ConcreteDataType::int32_datatype(), column_schema.data_type);
900        assert!(column_schema.is_nullable);
901        assert!(!column_schema.is_time_index);
902        assert!(column_schema.default_constraint.is_none());
903        assert!(column_schema.metadata.is_empty());
904
905        let field = Field::new("test", ArrowDataType::Binary, true);
906        let field = field.with_metadata(Metadata::from([(
907            TYPE_KEY.to_string(),
908            ConcreteDataType::json_datatype().name(),
909        )]));
910        let column_schema = ColumnSchema::try_from(&field).unwrap();
911        assert_eq!("test", column_schema.name);
912        assert_eq!(ConcreteDataType::json_datatype(), column_schema.data_type);
913        assert!(column_schema.is_nullable);
914        assert!(!column_schema.is_time_index);
915        assert!(column_schema.default_constraint.is_none());
916        assert_eq!(
917            column_schema.metadata.get(TYPE_KEY).unwrap(),
918            &ConcreteDataType::json_datatype().name()
919        );
920
921        let field = Field::new("test", ArrowDataType::Binary, true);
922        let field = field.with_metadata(Metadata::from([(
923            TYPE_KEY.to_string(),
924            ConcreteDataType::vector_datatype(3).name(),
925        )]));
926        let column_schema = ColumnSchema::try_from(&field).unwrap();
927        assert_eq!("test", column_schema.name);
928        assert_eq!(
929            ConcreteDataType::vector_datatype(3),
930            column_schema.data_type
931        );
932        assert!(column_schema.is_nullable);
933        assert!(!column_schema.is_time_index);
934        assert!(column_schema.default_constraint.is_none());
935        assert_eq!(
936            column_schema.metadata.get(TYPE_KEY).unwrap(),
937            &ConcreteDataType::vector_datatype(3).name()
938        );
939    }
940
941    #[test]
942    fn test_column_schema_fix_time_index() {
943        let field = Field::new(
944            "test",
945            ArrowDataType::Timestamp(TimeUnit::Second, None),
946            false,
947        );
948        let field = field.with_metadata(Metadata::from([(
949            TIME_INDEX_KEY.to_string(),
950            "true".to_string(),
951        )]));
952        let column_schema = ColumnSchema::try_from(&field).unwrap();
953        assert_eq!("test", column_schema.name);
954        assert_eq!(
955            ConcreteDataType::timestamp_second_datatype(),
956            column_schema.data_type
957        );
958        assert!(!column_schema.is_nullable);
959        assert!(column_schema.is_time_index);
960        assert!(column_schema.default_constraint.is_none());
961        assert_eq!(1, column_schema.metadata().len());
962
963        let field = Field::new("test", ArrowDataType::Int32, false);
964        let field = field.with_metadata(Metadata::from([(
965            TIME_INDEX_KEY.to_string(),
966            "true".to_string(),
967        )]));
968        let column_schema = ColumnSchema::try_from(&field).unwrap();
969        assert_eq!("test", column_schema.name);
970        assert_eq!(ConcreteDataType::int32_datatype(), column_schema.data_type);
971        assert!(!column_schema.is_nullable);
972        assert!(!column_schema.is_time_index);
973        assert!(column_schema.default_constraint.is_none());
974        assert!(column_schema.metadata.is_empty());
975    }
976}