1pub(crate) mod alter_expr;
18pub(crate) mod create_expr;
19pub(crate) mod insert_expr;
20pub(crate) mod partition_expr;
21pub(crate) mod repartition_expr;
22pub(crate) mod select_expr;
23
24use core::fmt;
25use std::collections::HashMap;
26use std::sync::{Arc, Mutex};
27use std::time::Duration;
28
29pub use alter_expr::{AlterTableExpr, AlterTableOption};
30use common_time::timestamp::TimeUnit;
31use common_time::{Date, Timestamp};
32pub use create_expr::{CreateDatabaseExpr, CreateTableExpr};
33use datatypes::data_type::ConcreteDataType;
34use datatypes::types::TimestampType;
35use datatypes::value::Value;
36use derive_builder::Builder;
37pub use insert_expr::InsertIntoExpr;
38use lazy_static::lazy_static;
39use rand::Rng;
40use rand::seq::{IndexedRandom, SliceRandom};
41pub use repartition_expr::RepartitionExpr;
42use serde::{Deserialize, Serialize};
43
44use self::insert_expr::{RowValue, RowValues};
45use crate::context::TableContextRef;
46use crate::fake::WordGenerator;
47use crate::generator::{Random, TsValueGenerator};
48use crate::impl_random;
49use crate::ir::create_expr::ColumnOption;
50
51lazy_static! {
52 pub static ref DATA_TYPES: Vec<ConcreteDataType> = vec![
53 ConcreteDataType::boolean_datatype(),
54 ConcreteDataType::int16_datatype(),
55 ConcreteDataType::int32_datatype(),
56 ConcreteDataType::int64_datatype(),
57 ConcreteDataType::float32_datatype(),
58 ConcreteDataType::float64_datatype(),
59 ];
60 pub static ref TS_DATA_TYPES: Vec<ConcreteDataType> = vec![
61 ConcreteDataType::timestamp_nanosecond_datatype(),
62 ConcreteDataType::timestamp_microsecond_datatype(),
63 ConcreteDataType::timestamp_millisecond_datatype(),
64 ConcreteDataType::timestamp_second_datatype(),
65 ];
66 pub static ref PARTIBLE_DATA_TYPES: Vec<ConcreteDataType> = vec![
67 ConcreteDataType::int16_datatype(),
68 ConcreteDataType::int32_datatype(),
69 ConcreteDataType::int64_datatype(),
70 ConcreteDataType::float32_datatype(),
71 ConcreteDataType::float64_datatype(),
72 ConcreteDataType::string_datatype(),
73 ];
74 pub static ref STRING_DATA_TYPES: Vec<ConcreteDataType> =
75 vec![ConcreteDataType::string_datatype()];
76 pub static ref MYSQL_TS_DATA_TYPES: Vec<ConcreteDataType> = vec![
77 ConcreteDataType::timestamp_microsecond_datatype(),
79 ConcreteDataType::timestamp_millisecond_datatype(),
80 ConcreteDataType::timestamp_second_datatype(),
81 ];
82}
83
84impl_random!(ConcreteDataType, ColumnTypeGenerator, DATA_TYPES);
85impl_random!(ConcreteDataType, TsColumnTypeGenerator, TS_DATA_TYPES);
86impl_random!(
87 ConcreteDataType,
88 MySQLTsColumnTypeGenerator,
89 MYSQL_TS_DATA_TYPES
90);
91impl_random!(
92 ConcreteDataType,
93 PartibleColumnTypeGenerator,
94 PARTIBLE_DATA_TYPES
95);
96impl_random!(
97 ConcreteDataType,
98 StringColumnTypeGenerator,
99 STRING_DATA_TYPES
100);
101
102pub struct ColumnTypeGenerator;
103pub struct TsColumnTypeGenerator;
104pub struct MySQLTsColumnTypeGenerator;
105pub struct PartibleColumnTypeGenerator;
106pub struct StringColumnTypeGenerator;
107
108macro_rules! generate_values {
110 ($data_type:ty, $bounds:expr) => {{
111 let base = 0 as $data_type;
112 let step = <$data_type>::MAX / ($bounds as $data_type + 1 as $data_type) as $data_type;
113 (1..=$bounds)
114 .map(|i| Value::from(base + step * i as $data_type as $data_type))
115 .collect::<Vec<Value>>()
116 }};
117}
118
119pub fn generate_partition_bounds(datatype: &ConcreteDataType, bounds: usize) -> Vec<Value> {
121 match datatype {
122 ConcreteDataType::Int16(_) => generate_values!(i16, bounds),
123 ConcreteDataType::Int32(_) => generate_values!(i32, bounds),
124 ConcreteDataType::Int64(_) => generate_values!(i64, bounds),
125 ConcreteDataType::Float32(_) => generate_values!(f32, bounds),
126 ConcreteDataType::Float64(_) => generate_values!(f64, bounds),
127 ConcreteDataType::String(_) => {
128 let base = b'A';
129 let range = b'z' - b'A';
130 let step = range / (bounds as u8 + 1);
131 (1..=bounds)
132 .map(|i| {
133 Value::from(
134 char::from(base + step * i as u8)
135 .escape_default()
136 .to_string(),
137 )
138 })
139 .collect()
140 }
141 _ => unimplemented!("unsupported type: {datatype}"),
142 }
143}
144
145pub fn generate_random_value<R: Rng>(
147 rng: &mut R,
148 datatype: &ConcreteDataType,
149 random_str: Option<&dyn Random<Ident, R>>,
150) -> Value {
151 match datatype {
152 &ConcreteDataType::Boolean(_) => Value::from(rng.random::<bool>()),
153 ConcreteDataType::Int16(_) => Value::from(rng.random::<i16>()),
154 ConcreteDataType::Int32(_) => Value::from(rng.random::<i32>()),
155 ConcreteDataType::Int64(_) => Value::from(rng.random::<i64>()),
156 ConcreteDataType::Float32(_) => Value::from(rng.random::<f32>()),
157 ConcreteDataType::Float64(_) => Value::from(rng.random::<f64>()),
158 ConcreteDataType::String(_) => match random_str {
159 Some(random) => Value::from(random.generate(rng).value),
160 None => Value::from(rng.random::<char>().to_string()),
161 },
162 ConcreteDataType::Date(_) => generate_random_date(rng),
163
164 _ => unimplemented!("unsupported type: {datatype}"),
165 }
166}
167
168pub fn generate_unique_timestamp_for_mysql<R: Rng>(base: i64) -> TsValueGenerator<R> {
170 let base = Timestamp::new_millisecond(base);
171 let clock = Arc::new(Mutex::new(base));
172
173 Box::new(move |_rng, ts_type| -> Value {
174 let mut clock = clock.lock().unwrap();
175 let ts = clock.add_duration(Duration::from_secs(1)).unwrap();
176 *clock = ts;
177
178 let v = match ts_type {
179 TimestampType::Second(_) => ts.convert_to(TimeUnit::Second).unwrap(),
180 TimestampType::Millisecond(_) => ts.convert_to(TimeUnit::Millisecond).unwrap(),
181 TimestampType::Microsecond(_) => ts.convert_to(TimeUnit::Microsecond).unwrap(),
182 TimestampType::Nanosecond(_) => ts.convert_to(TimeUnit::Nanosecond).unwrap(),
183 };
184 Value::from(v)
185 })
186}
187
188pub fn generate_random_timestamp<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
190 let v = match ts_type {
191 TimestampType::Second(_) => {
192 let min = i64::from(Timestamp::MIN_SECOND);
193 let max = i64::from(Timestamp::MAX_SECOND);
194 let value = rng.random_range(min..=max);
195 Timestamp::new_second(value)
196 }
197 TimestampType::Millisecond(_) => {
198 let min = i64::from(Timestamp::MIN_MILLISECOND);
199 let max = i64::from(Timestamp::MAX_MILLISECOND);
200 let value = rng.random_range(min..=max);
201 Timestamp::new_millisecond(value)
202 }
203 TimestampType::Microsecond(_) => {
204 let min = i64::from(Timestamp::MIN_MICROSECOND);
205 let max = i64::from(Timestamp::MAX_MICROSECOND);
206 let value = rng.random_range(min..=max);
207 Timestamp::new_microsecond(value)
208 }
209 TimestampType::Nanosecond(_) => {
210 let min = i64::from(Timestamp::MIN_NANOSECOND);
211 let max = i64::from(Timestamp::MAX_NANOSECOND);
212 let value = rng.random_range(min..=max);
213 Timestamp::new_nanosecond(value)
214 }
215 };
216 Value::from(v)
217}
218
219pub fn generate_random_timestamp_for_mysql<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
221 let v = match ts_type {
222 TimestampType::Second(_) => {
223 let min = 1;
224 let max = 2_147_483_647;
225 let value = rng.random_range(min..=max);
226 Timestamp::new_second(value)
227 }
228 TimestampType::Millisecond(_) => {
229 let min = 1000;
230 let max = 2_147_483_647_499;
231 let value = rng.random_range(min..=max);
232 Timestamp::new_millisecond(value)
233 }
234 TimestampType::Microsecond(_) => {
235 let min = 1_000_000;
236 let max = 2_147_483_647_499_999;
237 let value = rng.random_range(min..=max);
238 Timestamp::new_microsecond(value)
239 }
240 TimestampType::Nanosecond(_) => {
241 let min = 1_000_000_000;
242 let max = 2_147_483_647_499_999_000;
243 let value = rng.random_range(min..=max);
244 Timestamp::new_nanosecond(value)
245 }
246 };
247 Value::from(v)
248}
249
250fn generate_random_date<R: Rng>(rng: &mut R) -> Value {
251 let min = i64::from(Timestamp::MIN_MILLISECOND);
252 let max = i64::from(Timestamp::MAX_MILLISECOND);
253 let value = rng.random_range(min..=max);
254 let date = Timestamp::new_millisecond(value).to_chrono_date().unwrap();
255 Value::from(Date::from(date))
256}
257
258#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord, Hash)]
260pub struct Ident {
261 pub value: String,
262 pub quote_style: Option<char>,
263}
264
265impl Ident {
266 pub fn new<S>(value: S) -> Self
268 where
269 S: Into<String>,
270 {
271 Ident {
272 value: value.into(),
273 quote_style: None,
274 }
275 }
276
277 pub fn with_quote<S>(quote: char, value: S) -> Self
279 where
280 S: Into<String>,
281 {
282 Ident {
283 value: value.into(),
284 quote_style: Some(quote),
285 }
286 }
287
288 pub fn is_empty(&self) -> bool {
289 self.value.is_empty()
290 }
291}
292
293impl From<&str> for Ident {
294 fn from(value: &str) -> Self {
295 Ident {
296 value: value.to_string(),
297 quote_style: None,
298 }
299 }
300}
301
302impl From<String> for Ident {
303 fn from(value: String) -> Self {
304 Ident {
305 value,
306 quote_style: None,
307 }
308 }
309}
310
311impl fmt::Display for Ident {
312 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
313 match self.quote_style {
314 Some(q) => write!(f, "{q}{}{q}", self.value),
315 None => f.write_str(&self.value),
316 }
317 }
318}
319
320#[derive(Debug, Builder, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
322pub struct Column {
323 #[builder(setter(into))]
324 pub name: Ident,
325 pub column_type: ConcreteDataType,
326 #[builder(default, setter(into))]
327 pub options: Vec<ColumnOption>,
328}
329
330impl Column {
331 pub fn timestamp_type(&self) -> Option<TimestampType> {
333 if let ConcreteDataType::Timestamp(ts_type) = self.column_type {
334 Some(ts_type)
335 } else {
336 None
337 }
338 }
339
340 pub fn is_time_index(&self) -> bool {
342 self.options
343 .iter()
344 .any(|opt| opt == &ColumnOption::TimeIndex)
345 }
346
347 pub fn is_primary_key(&self) -> bool {
349 self.options
350 .iter()
351 .any(|opt| opt == &ColumnOption::PrimaryKey)
352 }
353
354 pub fn is_nullable(&self) -> bool {
356 !self
357 .options
358 .iter()
359 .any(|opt| matches!(opt, ColumnOption::NotNull | ColumnOption::TimeIndex))
360 }
361
362 pub fn has_default_value(&self) -> bool {
364 self.options.iter().any(|opt| {
365 matches!(
366 opt,
367 ColumnOption::DefaultValue(_) | ColumnOption::DefaultFn(_)
368 )
369 })
370 }
371
372 pub fn default_value(&self) -> Option<&Value> {
374 self.options.iter().find_map(|opt| match opt {
375 ColumnOption::DefaultValue(value) => Some(value),
376 _ => None,
377 })
378 }
379}
380
381pub fn droppable_columns(columns: &[Column]) -> Vec<&Column> {
383 columns
384 .iter()
385 .filter(|column| {
386 !column.options.iter().any(|option| {
387 option == &ColumnOption::PrimaryKey || option == &ColumnOption::TimeIndex
388 })
389 })
390 .collect::<Vec<_>>()
391}
392
393pub fn modifiable_columns(columns: &[Column]) -> Vec<&Column> {
395 columns
396 .iter()
397 .filter(|column| {
398 !column.options.iter().any(|option| {
399 option == &ColumnOption::PrimaryKey
400 || option == &ColumnOption::TimeIndex
401 || option == &ColumnOption::NotNull
402 })
403 })
404 .collect::<Vec<_>>()
405}
406
407pub fn column_options_generator<R: Rng>(
409 rng: &mut R,
410 column_type: &ConcreteDataType,
411) -> Vec<ColumnOption> {
412 let option_idx = rng.random_range(0..5);
418 match option_idx {
419 0 => vec![ColumnOption::Null],
420 1 => vec![ColumnOption::NotNull],
421 2 => vec![ColumnOption::DefaultValue(generate_random_value(
422 rng,
423 column_type,
424 None,
425 ))],
426 3 => vec![ColumnOption::PrimaryKey],
427 _ => vec![],
428 }
429}
430
431pub fn partible_column_options_generator<R: Rng + 'static>(
433 rng: &mut R,
434 column_type: &ConcreteDataType,
435) -> Vec<ColumnOption> {
436 let option_idx = rng.random_range(0..4);
441 match option_idx {
442 0 => vec![ColumnOption::PrimaryKey, ColumnOption::Null],
443 1 => vec![ColumnOption::PrimaryKey, ColumnOption::NotNull],
444 2 => vec![
445 ColumnOption::PrimaryKey,
446 ColumnOption::DefaultValue(generate_random_value(
447 rng,
448 column_type,
449 Some(&WordGenerator),
450 )),
451 ],
452 3 => vec![ColumnOption::PrimaryKey],
453 _ => unreachable!(),
454 }
455}
456
457pub fn ts_column_options_generator<R: Rng + 'static>(
459 _: &mut R,
460 _: &ConcreteDataType,
461) -> Vec<ColumnOption> {
462 vec![ColumnOption::TimeIndex]
463}
464
465pub fn primary_key_and_not_null_column_options_generator<R: Rng + 'static>(
466 _: &mut R,
467 _: &ConcreteDataType,
468) -> Vec<ColumnOption> {
469 vec![ColumnOption::PrimaryKey, ColumnOption::NotNull]
470}
471
472pub fn primary_key_options_generator<R: Rng + 'static>(
473 _: &mut R,
474 _: &ConcreteDataType,
475) -> Vec<ColumnOption> {
476 vec![ColumnOption::PrimaryKey]
477}
478
479pub fn generate_columns<R: Rng + 'static>(
481 rng: &mut R,
482 names: impl IntoIterator<Item = Ident>,
483 types: &(impl Random<ConcreteDataType, R> + ?Sized),
484 options: impl Fn(&mut R, &ConcreteDataType) -> Vec<ColumnOption>,
485) -> Vec<Column> {
486 names
487 .into_iter()
488 .map(|name| {
489 let column_type = types.generate(rng);
490 let options = options(rng, &column_type);
491 Column {
492 name,
493 options,
494 column_type,
495 }
496 })
497 .collect()
498}
499
500pub fn replace_default(
502 rows: &[RowValues],
503 table_ctx_ref: &TableContextRef,
504 insert_expr: &InsertIntoExpr,
505) -> Vec<RowValues> {
506 let index_map: HashMap<usize, usize> = insert_expr
507 .columns
508 .iter()
509 .enumerate()
510 .map(|(insert_idx, insert_column)| {
511 let create_idx = table_ctx_ref
512 .columns
513 .iter()
514 .position(|create_column| create_column.name == insert_column.name)
515 .expect("Column not found in create_expr");
516 (insert_idx, create_idx)
517 })
518 .collect();
519
520 let mut new_rows = Vec::new();
521 for row in rows {
522 let mut new_row = Vec::new();
523 for (idx, value) in row.iter().enumerate() {
524 if let RowValue::Default = value {
525 let column = &table_ctx_ref.columns[index_map[&idx]];
526 new_row.push(RowValue::Value(column.default_value().unwrap().clone()));
527 } else {
528 new_row.push(value.clone());
529 }
530 }
531 new_rows.push(new_row);
532 }
533 new_rows
534}
535
536pub fn sort_by_primary_keys(rows: &mut [RowValues], primary_keys_idx: Vec<usize>) {
538 rows.sort_by(|a, b| {
539 let a_keys: Vec<_> = primary_keys_idx.iter().map(|&i| &a[i]).collect();
540 let b_keys: Vec<_> = primary_keys_idx.iter().map(|&i| &b[i]).collect();
541 for (a_key, b_key) in a_keys.iter().zip(b_keys.iter()) {
542 match a_key.cmp(b_key) {
543 Some(std::cmp::Ordering::Equal) => continue,
544 non_eq => return non_eq.unwrap(),
545 }
546 }
547 std::cmp::Ordering::Equal
548 });
549}
550
551pub fn format_columns(columns: &[Column]) -> String {
553 columns
554 .iter()
555 .map(|c| c.name.to_string())
556 .collect::<Vec<_>>()
557 .join(", ")
558}
559
560#[cfg(test)]
561mod tests {
562 use super::*;
563
564 #[test]
565 fn test_droppable_columns() {
566 let columns = vec![
567 Column {
568 name: "hi".into(),
569 column_type: ConcreteDataType::uint64_datatype(),
570 options: vec![ColumnOption::PrimaryKey],
571 },
572 Column {
573 name: "foo".into(),
574 column_type: ConcreteDataType::uint64_datatype(),
575 options: vec![ColumnOption::TimeIndex],
576 },
577 ];
578 let droppable = droppable_columns(&columns);
579 assert!(droppable.is_empty());
580
581 let columns = vec![
582 Column {
583 name: "hi".into(),
584 column_type: ConcreteDataType::uint64_datatype(),
585 options: vec![],
586 },
587 Column {
588 name: "foo".into(),
589 column_type: ConcreteDataType::uint64_datatype(),
590 options: vec![],
591 },
592 ];
593 let droppable = droppable_columns(&columns);
594 assert_eq!(droppable.len(), 2);
595 }
596}