pub(crate) mod alter_expr;
pub(crate) mod create_expr;
pub(crate) mod insert_expr;
pub(crate) mod select_expr;
use core::fmt;
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
use std::time::Duration;
pub use alter_expr::{AlterTableExpr, AlterTableOption};
use common_time::timestamp::TimeUnit;
use common_time::{Date, Timestamp};
pub use create_expr::{CreateDatabaseExpr, CreateTableExpr};
use datatypes::data_type::ConcreteDataType;
use datatypes::types::TimestampType;
use datatypes::value::Value;
use derive_builder::Builder;
pub use insert_expr::InsertIntoExpr;
use lazy_static::lazy_static;
use rand::seq::{IndexedRandom, SliceRandom};
use rand::Rng;
use serde::{Deserialize, Serialize};
use self::insert_expr::{RowValue, RowValues};
use crate::context::TableContextRef;
use crate::fake::WordGenerator;
use crate::generator::{Random, TsValueGenerator};
use crate::impl_random;
use crate::ir::create_expr::ColumnOption;
lazy_static! {
pub static ref DATA_TYPES: Vec<ConcreteDataType> = vec![
ConcreteDataType::boolean_datatype(),
ConcreteDataType::int16_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::float32_datatype(),
ConcreteDataType::float64_datatype(),
];
pub static ref TS_DATA_TYPES: Vec<ConcreteDataType> = vec![
ConcreteDataType::timestamp_nanosecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_second_datatype(),
];
pub static ref PARTIBLE_DATA_TYPES: Vec<ConcreteDataType> = vec![
ConcreteDataType::int16_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::float32_datatype(),
ConcreteDataType::float64_datatype(),
ConcreteDataType::string_datatype(),
];
pub static ref STRING_DATA_TYPES: Vec<ConcreteDataType> =
vec![ConcreteDataType::string_datatype()];
pub static ref MYSQL_TS_DATA_TYPES: Vec<ConcreteDataType> = vec![
ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_second_datatype(),
];
}
impl_random!(ConcreteDataType, ColumnTypeGenerator, DATA_TYPES);
impl_random!(ConcreteDataType, TsColumnTypeGenerator, TS_DATA_TYPES);
impl_random!(
ConcreteDataType,
MySQLTsColumnTypeGenerator,
MYSQL_TS_DATA_TYPES
);
impl_random!(
ConcreteDataType,
PartibleColumnTypeGenerator,
PARTIBLE_DATA_TYPES
);
impl_random!(
ConcreteDataType,
StringColumnTypeGenerator,
STRING_DATA_TYPES
);
pub struct ColumnTypeGenerator;
pub struct TsColumnTypeGenerator;
pub struct MySQLTsColumnTypeGenerator;
pub struct PartibleColumnTypeGenerator;
pub struct StringColumnTypeGenerator;
macro_rules! generate_values {
($data_type:ty, $bounds:expr) => {{
let base = 0 as $data_type;
let step = <$data_type>::MAX / ($bounds as $data_type + 1 as $data_type) as $data_type;
(1..=$bounds)
.map(|i| Value::from(base + step * i as $data_type as $data_type))
.collect::<Vec<Value>>()
}};
}
pub fn generate_partition_bounds(datatype: &ConcreteDataType, bounds: usize) -> Vec<Value> {
match datatype {
ConcreteDataType::Int16(_) => generate_values!(i16, bounds),
ConcreteDataType::Int32(_) => generate_values!(i32, bounds),
ConcreteDataType::Int64(_) => generate_values!(i64, bounds),
ConcreteDataType::Float32(_) => generate_values!(f32, bounds),
ConcreteDataType::Float64(_) => generate_values!(f64, bounds),
ConcreteDataType::String(_) => {
let base = b'A';
let range = b'z' - b'A';
let step = range / (bounds as u8 + 1);
(1..=bounds)
.map(|i| {
Value::from(
char::from(base + step * i as u8)
.escape_default()
.to_string(),
)
})
.collect()
}
_ => unimplemented!("unsupported type: {datatype}"),
}
}
pub fn generate_random_value<R: Rng>(
rng: &mut R,
datatype: &ConcreteDataType,
random_str: Option<&dyn Random<Ident, R>>,
) -> Value {
match datatype {
&ConcreteDataType::Boolean(_) => Value::from(rng.random::<bool>()),
ConcreteDataType::Int16(_) => Value::from(rng.random::<i16>()),
ConcreteDataType::Int32(_) => Value::from(rng.random::<i32>()),
ConcreteDataType::Int64(_) => Value::from(rng.random::<i64>()),
ConcreteDataType::Float32(_) => Value::from(rng.random::<f32>()),
ConcreteDataType::Float64(_) => Value::from(rng.random::<f64>()),
ConcreteDataType::String(_) => match random_str {
Some(random) => Value::from(random.gen(rng).value),
None => Value::from(rng.random::<char>().to_string()),
},
ConcreteDataType::Date(_) => generate_random_date(rng),
_ => unimplemented!("unsupported type: {datatype}"),
}
}
pub fn generate_unique_timestamp_for_mysql<R: Rng>(base: i64) -> TsValueGenerator<R> {
let base = Timestamp::new_millisecond(base);
let clock = Arc::new(Mutex::new(base));
Box::new(move |_rng, ts_type| -> Value {
let mut clock = clock.lock().unwrap();
let ts = clock.add_duration(Duration::from_secs(1)).unwrap();
*clock = ts;
let v = match ts_type {
TimestampType::Second(_) => ts.convert_to(TimeUnit::Second).unwrap(),
TimestampType::Millisecond(_) => ts.convert_to(TimeUnit::Millisecond).unwrap(),
TimestampType::Microsecond(_) => ts.convert_to(TimeUnit::Microsecond).unwrap(),
TimestampType::Nanosecond(_) => ts.convert_to(TimeUnit::Nanosecond).unwrap(),
};
Value::from(v)
})
}
pub fn generate_random_timestamp<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
let v = match ts_type {
TimestampType::Second(_) => {
let min = i64::from(Timestamp::MIN_SECOND);
let max = i64::from(Timestamp::MAX_SECOND);
let value = rng.random_range(min..=max);
Timestamp::new_second(value)
}
TimestampType::Millisecond(_) => {
let min = i64::from(Timestamp::MIN_MILLISECOND);
let max = i64::from(Timestamp::MAX_MILLISECOND);
let value = rng.random_range(min..=max);
Timestamp::new_millisecond(value)
}
TimestampType::Microsecond(_) => {
let min = i64::from(Timestamp::MIN_MICROSECOND);
let max = i64::from(Timestamp::MAX_MICROSECOND);
let value = rng.random_range(min..=max);
Timestamp::new_microsecond(value)
}
TimestampType::Nanosecond(_) => {
let min = i64::from(Timestamp::MIN_NANOSECOND);
let max = i64::from(Timestamp::MAX_NANOSECOND);
let value = rng.random_range(min..=max);
Timestamp::new_nanosecond(value)
}
};
Value::from(v)
}
pub fn generate_random_timestamp_for_mysql<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
let v = match ts_type {
TimestampType::Second(_) => {
let min = 1;
let max = 2_147_483_647;
let value = rng.random_range(min..=max);
Timestamp::new_second(value)
}
TimestampType::Millisecond(_) => {
let min = 1000;
let max = 2_147_483_647_499;
let value = rng.random_range(min..=max);
Timestamp::new_millisecond(value)
}
TimestampType::Microsecond(_) => {
let min = 1_000_000;
let max = 2_147_483_647_499_999;
let value = rng.random_range(min..=max);
Timestamp::new_microsecond(value)
}
TimestampType::Nanosecond(_) => {
let min = 1_000_000_000;
let max = 2_147_483_647_499_999_000;
let value = rng.random_range(min..=max);
Timestamp::new_nanosecond(value)
}
};
Value::from(v)
}
fn generate_random_date<R: Rng>(rng: &mut R) -> Value {
let min = i64::from(Timestamp::MIN_MILLISECOND);
let max = i64::from(Timestamp::MAX_MILLISECOND);
let value = rng.random_range(min..=max);
let date = Timestamp::new_millisecond(value).to_chrono_date().unwrap();
Value::from(Date::from(date))
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord, Hash)]
pub struct Ident {
pub value: String,
pub quote_style: Option<char>,
}
impl Ident {
pub fn new<S>(value: S) -> Self
where
S: Into<String>,
{
Ident {
value: value.into(),
quote_style: None,
}
}
pub fn with_quote<S>(quote: char, value: S) -> Self
where
S: Into<String>,
{
Ident {
value: value.into(),
quote_style: Some(quote),
}
}
pub fn is_empty(&self) -> bool {
self.value.is_empty()
}
}
impl From<&str> for Ident {
fn from(value: &str) -> Self {
Ident {
value: value.to_string(),
quote_style: None,
}
}
}
impl From<String> for Ident {
fn from(value: String) -> Self {
Ident {
value,
quote_style: None,
}
}
}
impl fmt::Display for Ident {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.quote_style {
Some(q) => write!(f, "{q}{}{q}", self.value),
None => f.write_str(&self.value),
}
}
}
#[derive(Debug, Builder, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct Column {
#[builder(setter(into))]
pub name: Ident,
pub column_type: ConcreteDataType,
#[builder(default, setter(into))]
pub options: Vec<ColumnOption>,
}
impl Column {
pub fn timestamp_type(&self) -> Option<TimestampType> {
if let ConcreteDataType::Timestamp(ts_type) = self.column_type {
Some(ts_type)
} else {
None
}
}
pub fn is_time_index(&self) -> bool {
self.options
.iter()
.any(|opt| opt == &ColumnOption::TimeIndex)
}
pub fn is_primary_key(&self) -> bool {
self.options
.iter()
.any(|opt| opt == &ColumnOption::PrimaryKey)
}
pub fn is_nullable(&self) -> bool {
!self
.options
.iter()
.any(|opt| matches!(opt, ColumnOption::NotNull | ColumnOption::TimeIndex))
}
pub fn has_default_value(&self) -> bool {
self.options.iter().any(|opt| {
matches!(
opt,
ColumnOption::DefaultValue(_) | ColumnOption::DefaultFn(_)
)
})
}
pub fn default_value(&self) -> Option<&Value> {
self.options.iter().find_map(|opt| match opt {
ColumnOption::DefaultValue(value) => Some(value),
_ => None,
})
}
}
pub fn droppable_columns(columns: &[Column]) -> Vec<&Column> {
columns
.iter()
.filter(|column| {
!column.options.iter().any(|option| {
option == &ColumnOption::PrimaryKey || option == &ColumnOption::TimeIndex
})
})
.collect::<Vec<_>>()
}
pub fn modifiable_columns(columns: &[Column]) -> Vec<&Column> {
columns
.iter()
.filter(|column| {
!column.options.iter().any(|option| {
option == &ColumnOption::PrimaryKey
|| option == &ColumnOption::TimeIndex
|| option == &ColumnOption::NotNull
})
})
.collect::<Vec<_>>()
}
pub fn column_options_generator<R: Rng>(
rng: &mut R,
column_type: &ConcreteDataType,
) -> Vec<ColumnOption> {
let option_idx = rng.random_range(0..5);
match option_idx {
0 => vec![ColumnOption::Null],
1 => vec![ColumnOption::NotNull],
2 => vec![ColumnOption::DefaultValue(generate_random_value(
rng,
column_type,
None,
))],
3 => vec![ColumnOption::PrimaryKey],
_ => vec![],
}
}
pub fn partible_column_options_generator<R: Rng + 'static>(
rng: &mut R,
column_type: &ConcreteDataType,
) -> Vec<ColumnOption> {
let option_idx = rng.random_range(0..4);
match option_idx {
0 => vec![ColumnOption::PrimaryKey, ColumnOption::Null],
1 => vec![ColumnOption::PrimaryKey, ColumnOption::NotNull],
2 => vec![
ColumnOption::PrimaryKey,
ColumnOption::DefaultValue(generate_random_value(
rng,
column_type,
Some(&WordGenerator),
)),
],
3 => vec![ColumnOption::PrimaryKey],
_ => unreachable!(),
}
}
pub fn ts_column_options_generator<R: Rng + 'static>(
_: &mut R,
_: &ConcreteDataType,
) -> Vec<ColumnOption> {
vec![ColumnOption::TimeIndex]
}
pub fn primary_key_and_not_null_column_options_generator<R: Rng + 'static>(
_: &mut R,
_: &ConcreteDataType,
) -> Vec<ColumnOption> {
vec![ColumnOption::PrimaryKey, ColumnOption::NotNull]
}
pub fn primary_key_options_generator<R: Rng + 'static>(
_: &mut R,
_: &ConcreteDataType,
) -> Vec<ColumnOption> {
vec![ColumnOption::PrimaryKey]
}
pub fn generate_columns<R: Rng + 'static>(
rng: &mut R,
names: impl IntoIterator<Item = Ident>,
types: &(impl Random<ConcreteDataType, R> + ?Sized),
options: impl Fn(&mut R, &ConcreteDataType) -> Vec<ColumnOption>,
) -> Vec<Column> {
names
.into_iter()
.map(|name| {
let column_type = types.gen(rng);
let options = options(rng, &column_type);
Column {
name,
options,
column_type,
}
})
.collect()
}
pub fn replace_default(
rows: &[RowValues],
table_ctx_ref: &TableContextRef,
insert_expr: &InsertIntoExpr,
) -> Vec<RowValues> {
let index_map: HashMap<usize, usize> = insert_expr
.columns
.iter()
.enumerate()
.map(|(insert_idx, insert_column)| {
let create_idx = table_ctx_ref
.columns
.iter()
.position(|create_column| create_column.name == insert_column.name)
.expect("Column not found in create_expr");
(insert_idx, create_idx)
})
.collect();
let mut new_rows = Vec::new();
for row in rows {
let mut new_row = Vec::new();
for (idx, value) in row.iter().enumerate() {
if let RowValue::Default = value {
let column = &table_ctx_ref.columns[index_map[&idx]];
new_row.push(RowValue::Value(column.default_value().unwrap().clone()));
} else {
new_row.push(value.clone());
}
}
new_rows.push(new_row);
}
new_rows
}
pub fn sort_by_primary_keys(rows: &mut [RowValues], primary_keys_idx: Vec<usize>) {
rows.sort_by(|a, b| {
let a_keys: Vec<_> = primary_keys_idx.iter().map(|&i| &a[i]).collect();
let b_keys: Vec<_> = primary_keys_idx.iter().map(|&i| &b[i]).collect();
for (a_key, b_key) in a_keys.iter().zip(b_keys.iter()) {
match a_key.cmp(b_key) {
Some(std::cmp::Ordering::Equal) => continue,
non_eq => return non_eq.unwrap(),
}
}
std::cmp::Ordering::Equal
});
}
pub fn format_columns(columns: &[Column]) -> String {
columns
.iter()
.map(|c| c.name.to_string())
.collect::<Vec<_>>()
.join(", ")
.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_droppable_columns() {
let columns = vec![
Column {
name: "hi".into(),
column_type: ConcreteDataType::uint64_datatype(),
options: vec![ColumnOption::PrimaryKey],
},
Column {
name: "foo".into(),
column_type: ConcreteDataType::uint64_datatype(),
options: vec![ColumnOption::TimeIndex],
},
];
let droppable = droppable_columns(&columns);
assert!(droppable.is_empty());
let columns = vec![
Column {
name: "hi".into(),
column_type: ConcreteDataType::uint64_datatype(),
options: vec![],
},
Column {
name: "foo".into(),
column_type: ConcreteDataType::uint64_datatype(),
options: vec![],
},
];
let droppable = droppable_columns(&columns);
assert_eq!(droppable.len(), 2);
}
}