mito2/
sst.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Sorted strings tables.
16
17use std::sync::Arc;
18
19use api::v1::SemanticType;
20use common_base::readable_size::ReadableSize;
21use datatypes::arrow::datatypes::{
22    DataType as ArrowDataType, Field, FieldRef, Fields, Schema, SchemaRef,
23};
24use store_api::metadata::RegionMetadata;
25use store_api::storage::consts::{
26    OP_TYPE_COLUMN_NAME, PRIMARY_KEY_COLUMN_NAME, SEQUENCE_COLUMN_NAME,
27};
28
29pub mod file;
30pub mod file_purger;
31pub mod index;
32pub mod location;
33pub mod parquet;
34pub(crate) mod version;
35
36/// Default write buffer size, it should be greater than the default minimum upload part of S3 (5mb).
37pub const DEFAULT_WRITE_BUFFER_SIZE: ReadableSize = ReadableSize::mb(8);
38
39/// Default number of concurrent write, it only works on object store backend(e.g., S3).
40pub const DEFAULT_WRITE_CONCURRENCY: usize = 8;
41
42/// Gets the arrow schema to store in parquet.
43pub fn to_sst_arrow_schema(metadata: &RegionMetadata) -> SchemaRef {
44    let fields = Fields::from_iter(
45        metadata
46            .schema
47            .arrow_schema()
48            .fields()
49            .iter()
50            .zip(&metadata.column_metadatas)
51            .filter_map(|(field, column_meta)| {
52                if column_meta.semantic_type == SemanticType::Field {
53                    Some(field.clone())
54                } else {
55                    // We have fixed positions for tags (primary key) and time index.
56                    None
57                }
58            })
59            .chain([metadata.time_index_field()])
60            .chain(internal_fields()),
61    );
62
63    Arc::new(Schema::new(fields))
64}
65
66/// Fields for internal columns.
67fn internal_fields() -> [FieldRef; 3] {
68    // Internal columns are always not null.
69    [
70        Arc::new(Field::new_dictionary(
71            PRIMARY_KEY_COLUMN_NAME,
72            ArrowDataType::UInt32,
73            ArrowDataType::Binary,
74            false,
75        )),
76        Arc::new(Field::new(
77            SEQUENCE_COLUMN_NAME,
78            ArrowDataType::UInt64,
79            false,
80        )),
81        Arc::new(Field::new(OP_TYPE_COLUMN_NAME, ArrowDataType::UInt8, false)),
82    ]
83}