datatypes/types/
string_type.rs1use std::sync::Arc;
16
17use arrow::datatypes::DataType as ArrowDataType;
18use common_base::bytes::StringBytes;
19use serde::{Deserialize, Serialize};
20
21use crate::data_type::{DataType, DataTypeRef};
22use crate::type_id::LogicalTypeId;
23use crate::value::Value;
24use crate::vectors::{MutableVector, StringVectorBuilder};
25
26#[derive(
28 Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default,
29)]
30pub enum StringSizeType {
31 #[default]
33 Utf8,
34 LargeUtf8,
36 Utf8View,
38}
39
40#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
41pub struct StringType {
42 #[serde(default)]
43 size_type: StringSizeType,
44}
45
46impl<'de> serde::Deserialize<'de> for StringType {
48 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
49 where
50 D: serde::Deserializer<'de>,
51 {
52 #[derive(serde::Deserialize)]
53 struct Helper {
54 #[serde(default)]
55 size_type: StringSizeType,
56 }
57
58 let opt = Option::<Helper>::deserialize(deserializer)?;
59 Ok(match opt {
60 Some(helper) => Self {
61 size_type: helper.size_type,
62 },
63 None => Self::default(),
64 })
65 }
66}
67
68impl Default for StringType {
69 fn default() -> Self {
70 Self {
71 size_type: StringSizeType::Utf8,
72 }
73 }
74}
75
76impl StringType {
77 pub fn new() -> Self {
79 Self {
80 size_type: StringSizeType::Utf8,
81 }
82 }
83
84 pub fn with_size(size_type: StringSizeType) -> Self {
86 Self { size_type }
87 }
88
89 pub fn utf8() -> Self {
91 Self::with_size(StringSizeType::Utf8)
92 }
93
94 pub fn large_utf8() -> Self {
96 Self::with_size(StringSizeType::LargeUtf8)
97 }
98
99 pub fn utf8_view() -> Self {
101 Self::with_size(StringSizeType::Utf8View)
102 }
103
104 pub fn size_type(&self) -> StringSizeType {
106 self.size_type
107 }
108
109 pub fn is_large(&self) -> bool {
111 matches!(self.size_type, StringSizeType::LargeUtf8)
112 }
113
114 pub fn is_view(&self) -> bool {
115 matches!(self.size_type, StringSizeType::Utf8View)
116 }
117
118 pub fn arc() -> DataTypeRef {
119 Arc::new(Self::new())
120 }
121
122 pub fn large_arc() -> DataTypeRef {
123 Arc::new(Self::large_utf8())
124 }
125
126 pub fn view_arc() -> DataTypeRef {
127 Arc::new(Self::utf8_view())
128 }
129}
130
131impl DataType for StringType {
132 fn name(&self) -> String {
133 "String".to_string()
134 }
135
136 fn logical_type_id(&self) -> LogicalTypeId {
137 LogicalTypeId::String
138 }
139
140 fn default_value(&self) -> Value {
141 StringBytes::default().into()
142 }
143
144 fn as_arrow_type(&self) -> ArrowDataType {
145 match self.size_type {
146 StringSizeType::Utf8 => ArrowDataType::Utf8,
147 StringSizeType::LargeUtf8 => ArrowDataType::LargeUtf8,
148 StringSizeType::Utf8View => ArrowDataType::Utf8View,
149 }
150 }
151
152 fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
153 match self.size_type {
154 StringSizeType::Utf8 => Box::new(StringVectorBuilder::with_string_capacity(capacity)),
155 StringSizeType::LargeUtf8 => {
156 Box::new(StringVectorBuilder::with_large_capacity(capacity))
157 }
158 StringSizeType::Utf8View => Box::new(StringVectorBuilder::with_view_capacity(capacity)),
159 }
160 }
161
162 fn try_cast(&self, from: Value) -> Option<Value> {
163 if from.logical_type_id() == self.logical_type_id() {
164 return Some(from);
165 }
166
167 match from {
168 Value::Null => Some(Value::String(StringBytes::from("null".to_string()))),
169
170 Value::Boolean(v) => Some(Value::String(StringBytes::from(v.to_string()))),
171 Value::UInt8(v) => Some(Value::String(StringBytes::from(v.to_string()))),
172 Value::UInt16(v) => Some(Value::String(StringBytes::from(v.to_string()))),
173 Value::UInt32(v) => Some(Value::String(StringBytes::from(v.to_string()))),
174 Value::UInt64(v) => Some(Value::String(StringBytes::from(v.to_string()))),
175 Value::Int8(v) => Some(Value::String(StringBytes::from(v.to_string()))),
176 Value::Int16(v) => Some(Value::String(StringBytes::from(v.to_string()))),
177 Value::Int32(v) => Some(Value::String(StringBytes::from(v.to_string()))),
178 Value::Int64(v) => Some(Value::String(StringBytes::from(v.to_string()))),
179 Value::Float32(v) => Some(Value::String(StringBytes::from(v.to_string()))),
180 Value::Float64(v) => Some(Value::String(StringBytes::from(v.to_string()))),
181 Value::String(v) => Some(Value::String(v)),
182 Value::Date(v) => Some(Value::String(StringBytes::from(v.to_string()))),
183 Value::Timestamp(v) => Some(Value::String(StringBytes::from(v.to_iso8601_string()))),
184 Value::Time(v) => Some(Value::String(StringBytes::from(v.to_iso8601_string()))),
185 Value::IntervalYearMonth(v) => {
186 Some(Value::String(StringBytes::from(v.to_iso8601_string())))
187 }
188 Value::IntervalDayTime(v) => {
189 Some(Value::String(StringBytes::from(v.to_iso8601_string())))
190 }
191 Value::IntervalMonthDayNano(v) => {
192 Some(Value::String(StringBytes::from(v.to_iso8601_string())))
193 }
194 Value::Duration(v) => Some(Value::String(StringBytes::from(v.to_string()))),
195 Value::Decimal128(v) => Some(Value::String(StringBytes::from(v.to_string()))),
196
197 Value::Json(v) => serde_json::to_string(v.as_ref()).ok().map(|s| s.into()),
198
199 Value::Binary(_) | Value::List(_) | Value::Struct(_) => None,
201 }
202 }
203}