1pub(crate) mod alter_expr;
18pub(crate) mod create_expr;
19pub(crate) mod insert_expr;
20pub(crate) mod partition_expr;
21pub(crate) mod repartition_expr;
22pub(crate) mod select_expr;
23pub(crate) mod string_value;
24
25use core::fmt;
26use std::collections::HashMap;
27use std::sync::{Arc, Mutex};
28use std::time::Duration;
29
30pub use alter_expr::{AlterTableExpr, AlterTableOption};
31use common_time::timestamp::TimeUnit;
32use common_time::{Date, Timestamp};
33pub use create_expr::{CreateDatabaseExpr, CreateTableExpr};
34use datatypes::data_type::ConcreteDataType;
35use datatypes::types::TimestampType;
36use datatypes::value::Value;
37use derive_builder::Builder;
38pub use insert_expr::InsertIntoExpr;
39use lazy_static::lazy_static;
40pub use partition_expr::SimplePartitions;
41use rand::Rng;
42use rand::seq::{IndexedRandom, SliceRandom};
43pub use repartition_expr::RepartitionExpr;
44use serde::{Deserialize, Serialize};
45
46use self::insert_expr::RowValues;
47use crate::context::TableContextRef;
48use crate::fake::WordGenerator;
49use crate::generator::{Random, TsValueGenerator};
50use crate::impl_random;
51use crate::ir::create_expr::ColumnOption;
52pub use crate::ir::insert_expr::RowValue;
53
54lazy_static! {
55 pub static ref DATA_TYPES: Vec<ConcreteDataType> = vec![
56 ConcreteDataType::boolean_datatype(),
57 ConcreteDataType::int16_datatype(),
58 ConcreteDataType::int32_datatype(),
59 ConcreteDataType::int64_datatype(),
60 ConcreteDataType::float32_datatype(),
61 ConcreteDataType::float64_datatype(),
62 ];
63 pub static ref TS_DATA_TYPES: Vec<ConcreteDataType> = vec![
64 ConcreteDataType::timestamp_nanosecond_datatype(),
65 ConcreteDataType::timestamp_microsecond_datatype(),
66 ConcreteDataType::timestamp_millisecond_datatype(),
67 ConcreteDataType::timestamp_second_datatype(),
68 ];
69 pub static ref PARTIBLE_DATA_TYPES: Vec<ConcreteDataType> = vec![
70 ConcreteDataType::int16_datatype(),
71 ConcreteDataType::int32_datatype(),
72 ConcreteDataType::int64_datatype(),
73 ConcreteDataType::float32_datatype(),
74 ConcreteDataType::float64_datatype(),
75 ConcreteDataType::string_datatype(),
76 ];
77 pub static ref STRING_DATA_TYPES: Vec<ConcreteDataType> =
78 vec![ConcreteDataType::string_datatype()];
79 pub static ref MYSQL_TS_DATA_TYPES: Vec<ConcreteDataType> = vec![
80 ConcreteDataType::timestamp_microsecond_datatype(),
82 ConcreteDataType::timestamp_millisecond_datatype(),
83 ConcreteDataType::timestamp_second_datatype(),
84 ];
85}
86
87impl_random!(ConcreteDataType, ColumnTypeGenerator, DATA_TYPES);
88impl_random!(ConcreteDataType, TsColumnTypeGenerator, TS_DATA_TYPES);
89impl_random!(
90 ConcreteDataType,
91 MySQLTsColumnTypeGenerator,
92 MYSQL_TS_DATA_TYPES
93);
94impl_random!(
95 ConcreteDataType,
96 PartibleColumnTypeGenerator,
97 PARTIBLE_DATA_TYPES
98);
99impl_random!(
100 ConcreteDataType,
101 StringColumnTypeGenerator,
102 STRING_DATA_TYPES
103);
104
105pub struct ColumnTypeGenerator;
106pub struct TsColumnTypeGenerator;
107pub struct MySQLTsColumnTypeGenerator;
108pub struct PartibleColumnTypeGenerator;
109pub struct StringColumnTypeGenerator;
110
111macro_rules! generate_values {
113 ($data_type:ty, $bounds:expr) => {{
114 let base = 0 as $data_type;
115 let step = <$data_type>::MAX / ($bounds as $data_type + 1 as $data_type) as $data_type;
116 (1..=$bounds)
117 .map(|i| Value::from(base + step * i as $data_type as $data_type))
118 .collect::<Vec<Value>>()
119 }};
120}
121
122pub fn generate_partition_bounds(datatype: &ConcreteDataType, bounds: usize) -> Vec<Value> {
124 match datatype {
125 ConcreteDataType::Int16(_) => generate_values!(i16, bounds),
126 ConcreteDataType::Int32(_) => generate_values!(i32, bounds),
127 ConcreteDataType::Int64(_) => generate_values!(i64, bounds),
128 ConcreteDataType::Float32(_) => generate_values!(f32, bounds),
129 ConcreteDataType::Float64(_) => generate_values!(f64, bounds),
130 ConcreteDataType::String(_) => string_value::generate_partition_bounds(bounds),
131 _ => unimplemented!("unsupported type: {datatype}"),
132 }
133}
134
135pub fn generate_random_value<R: Rng>(
137 rng: &mut R,
138 datatype: &ConcreteDataType,
139 random_str: Option<&dyn Random<Ident, R>>,
140) -> Value {
141 match datatype {
142 &ConcreteDataType::Boolean(_) => Value::from(rng.random::<bool>()),
143 ConcreteDataType::Int16(_) => Value::from(rng.random::<i16>()),
144 ConcreteDataType::Int32(_) => Value::from(rng.random::<i32>()),
145 ConcreteDataType::Int64(_) => Value::from(rng.random::<i64>()),
146 ConcreteDataType::Float32(_) => Value::from(rng.random::<f32>()),
147 ConcreteDataType::Float64(_) => Value::from(rng.random::<f64>()),
148 ConcreteDataType::String(_) => string_value::generate_data_string_value(rng, random_str),
149 ConcreteDataType::Date(_) => generate_random_date(rng),
150
151 _ => unimplemented!("unsupported type: {datatype}"),
152 }
153}
154
155pub fn generate_unique_timestamp_for_mysql<R: Rng>(base: i64) -> TsValueGenerator<R> {
157 let base = Timestamp::new_millisecond(base);
158 generate_unique_timestamp_for_mysql_with_clock(Arc::new(Mutex::new(base)))
159}
160
161pub fn generate_unique_timestamp_for_mysql_with_clock<R: Rng>(
163 clock: Arc<Mutex<Timestamp>>,
164) -> TsValueGenerator<R> {
165 Box::new(move |_rng, ts_type| -> Value {
166 let mut clock = clock.lock().unwrap();
167 let ts = clock.add_duration(Duration::from_secs(1)).unwrap();
168 *clock = ts;
169
170 let v = match ts_type {
171 TimestampType::Second(_) => ts.convert_to(TimeUnit::Second).unwrap(),
172 TimestampType::Millisecond(_) => ts.convert_to(TimeUnit::Millisecond).unwrap(),
173 TimestampType::Microsecond(_) => ts.convert_to(TimeUnit::Microsecond).unwrap(),
174 TimestampType::Nanosecond(_) => ts.convert_to(TimeUnit::Nanosecond).unwrap(),
175 };
176 Value::from(v)
177 })
178}
179
180pub fn generate_random_timestamp<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
182 let v = match ts_type {
183 TimestampType::Second(_) => {
184 let min = i64::from(Timestamp::MIN_SECOND);
185 let max = i64::from(Timestamp::MAX_SECOND);
186 let value = rng.random_range(min..=max);
187 Timestamp::new_second(value)
188 }
189 TimestampType::Millisecond(_) => {
190 let min = i64::from(Timestamp::MIN_MILLISECOND);
191 let max = i64::from(Timestamp::MAX_MILLISECOND);
192 let value = rng.random_range(min..=max);
193 Timestamp::new_millisecond(value)
194 }
195 TimestampType::Microsecond(_) => {
196 let min = i64::from(Timestamp::MIN_MICROSECOND);
197 let max = i64::from(Timestamp::MAX_MICROSECOND);
198 let value = rng.random_range(min..=max);
199 Timestamp::new_microsecond(value)
200 }
201 TimestampType::Nanosecond(_) => {
202 let min = i64::from(Timestamp::MIN_NANOSECOND);
203 let max = i64::from(Timestamp::MAX_NANOSECOND);
204 let value = rng.random_range(min..=max);
205 Timestamp::new_nanosecond(value)
206 }
207 };
208 Value::from(v)
209}
210
211pub fn generate_random_timestamp_for_mysql<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
213 let v = match ts_type {
214 TimestampType::Second(_) => {
215 let min = 1;
216 let max = 2_147_483_647;
217 let value = rng.random_range(min..=max);
218 Timestamp::new_second(value)
219 }
220 TimestampType::Millisecond(_) => {
221 let min = 1000;
222 let max = 2_147_483_647_499;
223 let value = rng.random_range(min..=max);
224 Timestamp::new_millisecond(value)
225 }
226 TimestampType::Microsecond(_) => {
227 let min = 1_000_000;
228 let max = 2_147_483_647_499_999;
229 let value = rng.random_range(min..=max);
230 Timestamp::new_microsecond(value)
231 }
232 TimestampType::Nanosecond(_) => {
233 let min = 1_000_000_000;
234 let max = 2_147_483_647_499_999_000;
235 let value = rng.random_range(min..=max);
236 Timestamp::new_nanosecond(value)
237 }
238 };
239 Value::from(v)
240}
241
242fn generate_random_date<R: Rng>(rng: &mut R) -> Value {
243 let min = i64::from(Timestamp::MIN_MILLISECOND);
244 let max = i64::from(Timestamp::MAX_MILLISECOND);
245 let value = rng.random_range(min..=max);
246 let date = Timestamp::new_millisecond(value).to_chrono_date().unwrap();
247 Value::from(Date::from(date))
248}
249
250pub fn generate_partition_value<R: Rng + 'static>(
252 rng: &mut R,
253 column_type: &ConcreteDataType,
254 bounds: &[Value],
255 bound_idx: usize,
256) -> Value {
257 if bounds.is_empty() {
258 return generate_random_value(rng, column_type, None);
259 }
260 let first = bounds.first().unwrap();
261 let last = bounds.last().unwrap();
262 match column_type {
263 datatypes::data_type::ConcreteDataType::Int16(_) => {
264 let first_value = match first {
265 datatypes::value::Value::Int16(v) => *v,
266 _ => 0,
267 };
268 if bound_idx == 0 {
269 datatypes::value::Value::from(first_value.saturating_sub(1))
270 } else if bound_idx < bounds.len() {
271 bounds[bound_idx - 1].clone()
272 } else {
273 last.clone()
274 }
275 }
276 datatypes::data_type::ConcreteDataType::Int32(_) => {
277 let first_value = match first {
278 datatypes::value::Value::Int32(v) => *v,
279 _ => 0,
280 };
281 if bound_idx == 0 {
282 datatypes::value::Value::from(first_value.saturating_sub(1))
283 } else if bound_idx < bounds.len() {
284 bounds[bound_idx - 1].clone()
285 } else {
286 last.clone()
287 }
288 }
289 datatypes::data_type::ConcreteDataType::Int64(_) => {
290 let first_value = match first {
291 datatypes::value::Value::Int64(v) => *v,
292 _ => 0,
293 };
294 if bound_idx == 0 {
295 datatypes::value::Value::from(first_value.saturating_sub(1))
296 } else if bound_idx < bounds.len() {
297 bounds[bound_idx - 1].clone()
298 } else {
299 last.clone()
300 }
301 }
302 datatypes::data_type::ConcreteDataType::Float32(_) => {
303 let first_value = match first {
304 datatypes::value::Value::Float32(v) => v.0,
305 _ => 0.0,
306 };
307 if bound_idx == 0 {
308 datatypes::value::Value::from(first_value - 1.0)
309 } else if bound_idx < bounds.len() {
310 bounds[bound_idx - 1].clone()
311 } else {
312 last.clone()
313 }
314 }
315 datatypes::data_type::ConcreteDataType::Float64(_) => {
316 let first_value = match first {
317 datatypes::value::Value::Float64(v) => v.0,
318 _ => 0.0,
319 };
320 if bound_idx == 0 {
321 datatypes::value::Value::from(first_value - 1.0)
322 } else if bound_idx < bounds.len() {
323 bounds[bound_idx - 1].clone()
324 } else {
325 last.clone()
326 }
327 }
328 datatypes::data_type::ConcreteDataType::String(_) => {
329 string_value::generate_partition_value(bounds, bound_idx)
330 }
331 _ => unimplemented!("unsupported partition column type: {column_type}"),
332 }
333}
334
335#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord, Hash)]
337pub struct Ident {
338 pub value: String,
339 pub quote_style: Option<char>,
340}
341
342impl Ident {
343 pub fn new<S>(value: S) -> Self
345 where
346 S: Into<String>,
347 {
348 Ident {
349 value: value.into(),
350 quote_style: None,
351 }
352 }
353
354 pub fn with_quote<S>(quote: char, value: S) -> Self
356 where
357 S: Into<String>,
358 {
359 Ident {
360 value: value.into(),
361 quote_style: Some(quote),
362 }
363 }
364
365 pub fn is_empty(&self) -> bool {
366 self.value.is_empty()
367 }
368}
369
370impl From<&str> for Ident {
371 fn from(value: &str) -> Self {
372 Ident {
373 value: value.to_string(),
374 quote_style: None,
375 }
376 }
377}
378
379impl From<String> for Ident {
380 fn from(value: String) -> Self {
381 Ident {
382 value,
383 quote_style: None,
384 }
385 }
386}
387
388impl fmt::Display for Ident {
389 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
390 match self.quote_style {
391 Some(q) => write!(f, "{q}{}{q}", self.value),
392 None => f.write_str(&self.value),
393 }
394 }
395}
396
397#[derive(Debug, Builder, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
399pub struct Column {
400 #[builder(setter(into))]
401 pub name: Ident,
402 pub column_type: ConcreteDataType,
403 #[builder(default, setter(into))]
404 pub options: Vec<ColumnOption>,
405}
406
407impl Column {
408 pub fn timestamp_type(&self) -> Option<TimestampType> {
410 if let ConcreteDataType::Timestamp(ts_type) = self.column_type {
411 Some(ts_type)
412 } else {
413 None
414 }
415 }
416
417 pub fn is_time_index(&self) -> bool {
419 self.options
420 .iter()
421 .any(|opt| opt == &ColumnOption::TimeIndex)
422 }
423
424 pub fn is_primary_key(&self) -> bool {
426 self.options
427 .iter()
428 .any(|opt| opt == &ColumnOption::PrimaryKey)
429 }
430
431 pub fn is_nullable(&self) -> bool {
433 !self
434 .options
435 .iter()
436 .any(|opt| matches!(opt, ColumnOption::NotNull | ColumnOption::TimeIndex))
437 }
438
439 pub fn has_default_value(&self) -> bool {
441 self.options.iter().any(|opt| {
442 matches!(
443 opt,
444 ColumnOption::DefaultValue(_) | ColumnOption::DefaultFn(_)
445 )
446 })
447 }
448
449 pub fn default_value(&self) -> Option<&Value> {
451 self.options.iter().find_map(|opt| match opt {
452 ColumnOption::DefaultValue(value) => Some(value),
453 _ => None,
454 })
455 }
456}
457
458pub fn droppable_columns(columns: &[Column]) -> Vec<&Column> {
460 columns
461 .iter()
462 .filter(|column| {
463 !column.options.iter().any(|option| {
464 option == &ColumnOption::PrimaryKey || option == &ColumnOption::TimeIndex
465 })
466 })
467 .collect::<Vec<_>>()
468}
469
470pub fn modifiable_columns(columns: &[Column]) -> Vec<&Column> {
472 columns
473 .iter()
474 .filter(|column| {
475 !column.options.iter().any(|option| {
476 option == &ColumnOption::PrimaryKey
477 || option == &ColumnOption::TimeIndex
478 || option == &ColumnOption::NotNull
479 })
480 })
481 .collect::<Vec<_>>()
482}
483
484pub fn column_options_generator<R: Rng>(
486 rng: &mut R,
487 column_type: &ConcreteDataType,
488) -> Vec<ColumnOption> {
489 let option_idx = rng.random_range(0..5);
495 match option_idx {
496 0 => vec![ColumnOption::Null],
497 1 => vec![ColumnOption::NotNull],
498 2 => vec![ColumnOption::DefaultValue(generate_random_value(
499 rng,
500 column_type,
501 None,
502 ))],
503 3 => vec![ColumnOption::PrimaryKey],
504 _ => vec![],
505 }
506}
507
508pub fn partible_column_options_generator<R: Rng + 'static>(
510 rng: &mut R,
511 column_type: &ConcreteDataType,
512) -> Vec<ColumnOption> {
513 let option_idx = rng.random_range(0..4);
518 match option_idx {
519 0 => vec![ColumnOption::PrimaryKey, ColumnOption::Null],
520 1 => vec![ColumnOption::PrimaryKey, ColumnOption::NotNull],
521 2 => vec![
522 ColumnOption::PrimaryKey,
523 ColumnOption::DefaultValue(generate_random_value(
524 rng,
525 column_type,
526 Some(&WordGenerator),
527 )),
528 ],
529 3 => vec![ColumnOption::PrimaryKey],
530 _ => unreachable!(),
531 }
532}
533
534pub fn ts_column_options_generator<R: Rng + 'static>(
536 _: &mut R,
537 _: &ConcreteDataType,
538) -> Vec<ColumnOption> {
539 vec![ColumnOption::TimeIndex]
540}
541
542pub fn primary_key_and_not_null_column_options_generator<R: Rng + 'static>(
543 _: &mut R,
544 _: &ConcreteDataType,
545) -> Vec<ColumnOption> {
546 vec![ColumnOption::PrimaryKey, ColumnOption::NotNull]
547}
548
549pub fn primary_key_options_generator<R: Rng + 'static>(
550 _: &mut R,
551 _: &ConcreteDataType,
552) -> Vec<ColumnOption> {
553 vec![ColumnOption::PrimaryKey]
554}
555
556pub fn generate_columns<R: Rng + 'static>(
558 rng: &mut R,
559 names: impl IntoIterator<Item = Ident>,
560 types: &(impl Random<ConcreteDataType, R> + ?Sized),
561 options: impl Fn(&mut R, &ConcreteDataType) -> Vec<ColumnOption>,
562) -> Vec<Column> {
563 names
564 .into_iter()
565 .map(|name| {
566 let column_type = types.generate(rng);
567 let options = options(rng, &column_type);
568 Column {
569 name,
570 options,
571 column_type,
572 }
573 })
574 .collect()
575}
576
577pub fn replace_default(
579 rows: &[RowValues],
580 table_ctx_ref: &TableContextRef,
581 insert_expr: &InsertIntoExpr,
582) -> Vec<RowValues> {
583 let index_map: HashMap<usize, usize> = insert_expr
584 .columns
585 .iter()
586 .enumerate()
587 .map(|(insert_idx, insert_column)| {
588 let create_idx = table_ctx_ref
589 .columns
590 .iter()
591 .position(|create_column| create_column.name == insert_column.name)
592 .expect("Column not found in create_expr");
593 (insert_idx, create_idx)
594 })
595 .collect();
596
597 let mut new_rows = Vec::new();
598 for row in rows {
599 let mut new_row = Vec::new();
600 for (idx, value) in row.iter().enumerate() {
601 if let RowValue::Default = value {
602 let column = &table_ctx_ref.columns[index_map[&idx]];
603 new_row.push(RowValue::Value(column.default_value().unwrap().clone()));
604 } else {
605 new_row.push(value.clone());
606 }
607 }
608 new_rows.push(new_row);
609 }
610 new_rows
611}
612
613pub fn sort_by_primary_keys(rows: &mut [RowValues], primary_keys_idx: Vec<usize>) {
615 rows.sort_by(|a, b| {
616 let a_keys: Vec<_> = primary_keys_idx.iter().map(|&i| &a[i]).collect();
617 let b_keys: Vec<_> = primary_keys_idx.iter().map(|&i| &b[i]).collect();
618 for (a_key, b_key) in a_keys.iter().zip(b_keys.iter()) {
619 match a_key.cmp(b_key) {
620 Some(std::cmp::Ordering::Equal) => continue,
621 non_eq => return non_eq.unwrap(),
622 }
623 }
624 std::cmp::Ordering::Equal
625 });
626}
627
628pub fn format_columns(columns: &[Column]) -> String {
630 columns
631 .iter()
632 .map(|c| c.name.to_string())
633 .collect::<Vec<_>>()
634 .join(", ")
635}
636
637#[cfg(test)]
638mod tests {
639 use super::*;
640
641 #[test]
642 fn test_droppable_columns() {
643 let columns = vec![
644 Column {
645 name: "hi".into(),
646 column_type: ConcreteDataType::uint64_datatype(),
647 options: vec![ColumnOption::PrimaryKey],
648 },
649 Column {
650 name: "foo".into(),
651 column_type: ConcreteDataType::uint64_datatype(),
652 options: vec![ColumnOption::TimeIndex],
653 },
654 ];
655 let droppable = droppable_columns(&columns);
656 assert!(droppable.is_empty());
657
658 let columns = vec![
659 Column {
660 name: "hi".into(),
661 column_type: ConcreteDataType::uint64_datatype(),
662 options: vec![],
663 },
664 Column {
665 name: "foo".into(),
666 column_type: ConcreteDataType::uint64_datatype(),
667 options: vec![],
668 },
669 ];
670 let droppable = droppable_columns(&columns);
671 assert_eq!(droppable.len(), 2);
672 }
673}