datatypes/types/
string_type.rs1use std::sync::Arc;
16
17use arrow::datatypes::DataType as ArrowDataType;
18use common_base::bytes::StringBytes;
19use serde::{Deserialize, Serialize};
20
21use crate::data_type::{DataType, DataTypeRef};
22use crate::type_id::LogicalTypeId;
23use crate::value::Value;
24use crate::vectors::{MutableVector, StringVectorBuilder};
25
26#[derive(
28 Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default,
29)]
30pub enum StringSizeType {
31 #[default]
33 Utf8,
34 LargeUtf8,
36}
37
38#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
39pub struct StringType {
40 #[serde(default)]
41 size_type: StringSizeType,
42}
43
44impl<'de> serde::Deserialize<'de> for StringType {
46 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
47 where
48 D: serde::Deserializer<'de>,
49 {
50 #[derive(serde::Deserialize)]
51 struct Helper {
52 #[serde(default)]
53 size_type: StringSizeType,
54 }
55
56 let opt = Option::<Helper>::deserialize(deserializer)?;
57 Ok(match opt {
58 Some(helper) => Self {
59 size_type: helper.size_type,
60 },
61 None => Self::default(),
62 })
63 }
64}
65
66impl Default for StringType {
67 fn default() -> Self {
68 Self {
69 size_type: StringSizeType::Utf8,
70 }
71 }
72}
73
74impl StringType {
75 pub fn new() -> Self {
77 Self {
78 size_type: StringSizeType::Utf8,
79 }
80 }
81
82 pub fn with_size(size_type: StringSizeType) -> Self {
84 Self { size_type }
85 }
86
87 pub fn utf8() -> Self {
89 Self::with_size(StringSizeType::Utf8)
90 }
91
92 pub fn large_utf8() -> Self {
94 Self::with_size(StringSizeType::LargeUtf8)
95 }
96
97 pub fn size_type(&self) -> StringSizeType {
99 self.size_type
100 }
101
102 pub fn is_large(&self) -> bool {
104 matches!(self.size_type, StringSizeType::LargeUtf8)
105 }
106
107 pub fn arc() -> DataTypeRef {
108 Arc::new(Self::new())
109 }
110
111 pub fn large_arc() -> DataTypeRef {
112 Arc::new(Self::large_utf8())
113 }
114}
115
116impl DataType for StringType {
117 fn name(&self) -> String {
118 "String".to_string()
119 }
120
121 fn logical_type_id(&self) -> LogicalTypeId {
122 LogicalTypeId::String
123 }
124
125 fn default_value(&self) -> Value {
126 StringBytes::default().into()
127 }
128
129 fn as_arrow_type(&self) -> ArrowDataType {
130 match self.size_type {
131 StringSizeType::Utf8 => ArrowDataType::Utf8,
132 StringSizeType::LargeUtf8 => ArrowDataType::LargeUtf8,
133 }
134 }
135
136 fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
137 match self.size_type {
138 StringSizeType::Utf8 => Box::new(StringVectorBuilder::with_string_capacity(capacity)),
139 StringSizeType::LargeUtf8 => {
140 Box::new(StringVectorBuilder::with_large_capacity(capacity))
141 }
142 }
143 }
144
145 fn try_cast(&self, from: Value) -> Option<Value> {
146 if from.logical_type_id() == self.logical_type_id() {
147 return Some(from);
148 }
149
150 match from {
151 Value::Null => Some(Value::String(StringBytes::from("null".to_string()))),
152
153 Value::Boolean(v) => Some(Value::String(StringBytes::from(v.to_string()))),
154 Value::UInt8(v) => Some(Value::String(StringBytes::from(v.to_string()))),
155 Value::UInt16(v) => Some(Value::String(StringBytes::from(v.to_string()))),
156 Value::UInt32(v) => Some(Value::String(StringBytes::from(v.to_string()))),
157 Value::UInt64(v) => Some(Value::String(StringBytes::from(v.to_string()))),
158 Value::Int8(v) => Some(Value::String(StringBytes::from(v.to_string()))),
159 Value::Int16(v) => Some(Value::String(StringBytes::from(v.to_string()))),
160 Value::Int32(v) => Some(Value::String(StringBytes::from(v.to_string()))),
161 Value::Int64(v) => Some(Value::String(StringBytes::from(v.to_string()))),
162 Value::Float32(v) => Some(Value::String(StringBytes::from(v.to_string()))),
163 Value::Float64(v) => Some(Value::String(StringBytes::from(v.to_string()))),
164 Value::String(v) => Some(Value::String(v)),
165 Value::Date(v) => Some(Value::String(StringBytes::from(v.to_string()))),
166 Value::Timestamp(v) => Some(Value::String(StringBytes::from(v.to_iso8601_string()))),
167 Value::Time(v) => Some(Value::String(StringBytes::from(v.to_iso8601_string()))),
168 Value::IntervalYearMonth(v) => {
169 Some(Value::String(StringBytes::from(v.to_iso8601_string())))
170 }
171 Value::IntervalDayTime(v) => {
172 Some(Value::String(StringBytes::from(v.to_iso8601_string())))
173 }
174 Value::IntervalMonthDayNano(v) => {
175 Some(Value::String(StringBytes::from(v.to_iso8601_string())))
176 }
177 Value::Duration(v) => Some(Value::String(StringBytes::from(v.to_string()))),
178 Value::Decimal128(v) => Some(Value::String(StringBytes::from(v.to_string()))),
179
180 Value::Json(v) => self.try_cast(*v),
181
182 Value::Binary(_) | Value::List(_) | Value::Struct(_) => None,
184 }
185 }
186}