tests_fuzz/
ir.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! The intermediate representation
16
17pub(crate) mod alter_expr;
18pub(crate) mod create_expr;
19pub(crate) mod insert_expr;
20pub(crate) mod partition_expr;
21pub(crate) mod repartition_expr;
22pub(crate) mod select_expr;
23
24use core::fmt;
25use std::collections::HashMap;
26use std::sync::{Arc, Mutex};
27use std::time::Duration;
28
29pub use alter_expr::{AlterTableExpr, AlterTableOption};
30use common_time::timestamp::TimeUnit;
31use common_time::{Date, Timestamp};
32pub use create_expr::{CreateDatabaseExpr, CreateTableExpr};
33use datatypes::data_type::ConcreteDataType;
34use datatypes::types::TimestampType;
35use datatypes::value::Value;
36use derive_builder::Builder;
37pub use insert_expr::InsertIntoExpr;
38use lazy_static::lazy_static;
39use rand::Rng;
40use rand::seq::{IndexedRandom, SliceRandom};
41pub use repartition_expr::RepartitionExpr;
42use serde::{Deserialize, Serialize};
43
44use self::insert_expr::{RowValue, RowValues};
45use crate::context::TableContextRef;
46use crate::fake::WordGenerator;
47use crate::generator::{Random, TsValueGenerator};
48use crate::impl_random;
49use crate::ir::create_expr::ColumnOption;
50
51lazy_static! {
52    pub static ref DATA_TYPES: Vec<ConcreteDataType> = vec![
53        ConcreteDataType::boolean_datatype(),
54        ConcreteDataType::int16_datatype(),
55        ConcreteDataType::int32_datatype(),
56        ConcreteDataType::int64_datatype(),
57        ConcreteDataType::float32_datatype(),
58        ConcreteDataType::float64_datatype(),
59    ];
60    pub static ref TS_DATA_TYPES: Vec<ConcreteDataType> = vec![
61        ConcreteDataType::timestamp_nanosecond_datatype(),
62        ConcreteDataType::timestamp_microsecond_datatype(),
63        ConcreteDataType::timestamp_millisecond_datatype(),
64        ConcreteDataType::timestamp_second_datatype(),
65    ];
66    pub static ref PARTIBLE_DATA_TYPES: Vec<ConcreteDataType> = vec![
67        ConcreteDataType::int16_datatype(),
68        ConcreteDataType::int32_datatype(),
69        ConcreteDataType::int64_datatype(),
70        ConcreteDataType::float32_datatype(),
71        ConcreteDataType::float64_datatype(),
72        ConcreteDataType::string_datatype(),
73    ];
74    pub static ref STRING_DATA_TYPES: Vec<ConcreteDataType> =
75        vec![ConcreteDataType::string_datatype()];
76    pub static ref MYSQL_TS_DATA_TYPES: Vec<ConcreteDataType> = vec![
77        // MySQL only permits fractional seconds with up to microseconds (6 digits) precision.
78        ConcreteDataType::timestamp_microsecond_datatype(),
79        ConcreteDataType::timestamp_millisecond_datatype(),
80        ConcreteDataType::timestamp_second_datatype(),
81    ];
82}
83
84impl_random!(ConcreteDataType, ColumnTypeGenerator, DATA_TYPES);
85impl_random!(ConcreteDataType, TsColumnTypeGenerator, TS_DATA_TYPES);
86impl_random!(
87    ConcreteDataType,
88    MySQLTsColumnTypeGenerator,
89    MYSQL_TS_DATA_TYPES
90);
91impl_random!(
92    ConcreteDataType,
93    PartibleColumnTypeGenerator,
94    PARTIBLE_DATA_TYPES
95);
96impl_random!(
97    ConcreteDataType,
98    StringColumnTypeGenerator,
99    STRING_DATA_TYPES
100);
101
102pub struct ColumnTypeGenerator;
103pub struct TsColumnTypeGenerator;
104pub struct MySQLTsColumnTypeGenerator;
105pub struct PartibleColumnTypeGenerator;
106pub struct StringColumnTypeGenerator;
107
108/// FIXME(weny): Waits for https://github.com/GreptimeTeam/greptimedb/issues/4247
109macro_rules! generate_values {
110    ($data_type:ty, $bounds:expr) => {{
111        let base = 0 as $data_type;
112        let step = <$data_type>::MAX / ($bounds as $data_type + 1 as $data_type) as $data_type;
113        (1..=$bounds)
114            .map(|i| Value::from(base + step * i as $data_type as $data_type))
115            .collect::<Vec<Value>>()
116    }};
117}
118
119/// Generates partition bounds.
120pub fn generate_partition_bounds(datatype: &ConcreteDataType, bounds: usize) -> Vec<Value> {
121    match datatype {
122        ConcreteDataType::Int16(_) => generate_values!(i16, bounds),
123        ConcreteDataType::Int32(_) => generate_values!(i32, bounds),
124        ConcreteDataType::Int64(_) => generate_values!(i64, bounds),
125        ConcreteDataType::Float32(_) => generate_values!(f32, bounds),
126        ConcreteDataType::Float64(_) => generate_values!(f64, bounds),
127        ConcreteDataType::String(_) => {
128            let base = b'A';
129            let range = b'z' - b'A';
130            let step = range / (bounds as u8 + 1);
131            (1..=bounds)
132                .map(|i| {
133                    Value::from(
134                        char::from(base + step * i as u8)
135                            .escape_default()
136                            .to_string(),
137                    )
138                })
139                .collect()
140        }
141        _ => unimplemented!("unsupported type: {datatype}"),
142    }
143}
144
145/// Generates a random [Value].
146pub fn generate_random_value<R: Rng>(
147    rng: &mut R,
148    datatype: &ConcreteDataType,
149    random_str: Option<&dyn Random<Ident, R>>,
150) -> Value {
151    match datatype {
152        &ConcreteDataType::Boolean(_) => Value::from(rng.random::<bool>()),
153        ConcreteDataType::Int16(_) => Value::from(rng.random::<i16>()),
154        ConcreteDataType::Int32(_) => Value::from(rng.random::<i32>()),
155        ConcreteDataType::Int64(_) => Value::from(rng.random::<i64>()),
156        ConcreteDataType::Float32(_) => Value::from(rng.random::<f32>()),
157        ConcreteDataType::Float64(_) => Value::from(rng.random::<f64>()),
158        ConcreteDataType::String(_) => match random_str {
159            Some(random) => Value::from(random.generate(rng).value),
160            None => Value::from(rng.random::<char>().to_string()),
161        },
162        ConcreteDataType::Date(_) => generate_random_date(rng),
163
164        _ => unimplemented!("unsupported type: {datatype}"),
165    }
166}
167
168/// Generate monotonically increasing timestamps for MySQL.
169pub fn generate_unique_timestamp_for_mysql<R: Rng>(base: i64) -> TsValueGenerator<R> {
170    let base = Timestamp::new_millisecond(base);
171    let clock = Arc::new(Mutex::new(base));
172
173    Box::new(move |_rng, ts_type| -> Value {
174        let mut clock = clock.lock().unwrap();
175        let ts = clock.add_duration(Duration::from_secs(1)).unwrap();
176        *clock = ts;
177
178        let v = match ts_type {
179            TimestampType::Second(_) => ts.convert_to(TimeUnit::Second).unwrap(),
180            TimestampType::Millisecond(_) => ts.convert_to(TimeUnit::Millisecond).unwrap(),
181            TimestampType::Microsecond(_) => ts.convert_to(TimeUnit::Microsecond).unwrap(),
182            TimestampType::Nanosecond(_) => ts.convert_to(TimeUnit::Nanosecond).unwrap(),
183        };
184        Value::from(v)
185    })
186}
187
188/// Generate random timestamps.
189pub fn generate_random_timestamp<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
190    let v = match ts_type {
191        TimestampType::Second(_) => {
192            let min = i64::from(Timestamp::MIN_SECOND);
193            let max = i64::from(Timestamp::MAX_SECOND);
194            let value = rng.random_range(min..=max);
195            Timestamp::new_second(value)
196        }
197        TimestampType::Millisecond(_) => {
198            let min = i64::from(Timestamp::MIN_MILLISECOND);
199            let max = i64::from(Timestamp::MAX_MILLISECOND);
200            let value = rng.random_range(min..=max);
201            Timestamp::new_millisecond(value)
202        }
203        TimestampType::Microsecond(_) => {
204            let min = i64::from(Timestamp::MIN_MICROSECOND);
205            let max = i64::from(Timestamp::MAX_MICROSECOND);
206            let value = rng.random_range(min..=max);
207            Timestamp::new_microsecond(value)
208        }
209        TimestampType::Nanosecond(_) => {
210            let min = i64::from(Timestamp::MIN_NANOSECOND);
211            let max = i64::from(Timestamp::MAX_NANOSECOND);
212            let value = rng.random_range(min..=max);
213            Timestamp::new_nanosecond(value)
214        }
215    };
216    Value::from(v)
217}
218
219// MySQL supports timestamp from '1970-01-01 00:00:01.000000' to '2038-01-19 03:14:07.499999'
220pub fn generate_random_timestamp_for_mysql<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
221    let v = match ts_type {
222        TimestampType::Second(_) => {
223            let min = 1;
224            let max = 2_147_483_647;
225            let value = rng.random_range(min..=max);
226            Timestamp::new_second(value)
227        }
228        TimestampType::Millisecond(_) => {
229            let min = 1000;
230            let max = 2_147_483_647_499;
231            let value = rng.random_range(min..=max);
232            Timestamp::new_millisecond(value)
233        }
234        TimestampType::Microsecond(_) => {
235            let min = 1_000_000;
236            let max = 2_147_483_647_499_999;
237            let value = rng.random_range(min..=max);
238            Timestamp::new_microsecond(value)
239        }
240        TimestampType::Nanosecond(_) => {
241            let min = 1_000_000_000;
242            let max = 2_147_483_647_499_999_000;
243            let value = rng.random_range(min..=max);
244            Timestamp::new_nanosecond(value)
245        }
246    };
247    Value::from(v)
248}
249
250fn generate_random_date<R: Rng>(rng: &mut R) -> Value {
251    let min = i64::from(Timestamp::MIN_MILLISECOND);
252    let max = i64::from(Timestamp::MAX_MILLISECOND);
253    let value = rng.random_range(min..=max);
254    let date = Timestamp::new_millisecond(value).to_chrono_date().unwrap();
255    Value::from(Date::from(date))
256}
257
258/// An identifier.
259#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord, Hash)]
260pub struct Ident {
261    pub value: String,
262    pub quote_style: Option<char>,
263}
264
265impl Ident {
266    /// Creates a new identifier with the given value and no quotes.
267    pub fn new<S>(value: S) -> Self
268    where
269        S: Into<String>,
270    {
271        Ident {
272            value: value.into(),
273            quote_style: None,
274        }
275    }
276
277    /// Creates a new quoted identifier with the given quote and value.
278    pub fn with_quote<S>(quote: char, value: S) -> Self
279    where
280        S: Into<String>,
281    {
282        Ident {
283            value: value.into(),
284            quote_style: Some(quote),
285        }
286    }
287
288    pub fn is_empty(&self) -> bool {
289        self.value.is_empty()
290    }
291}
292
293impl From<&str> for Ident {
294    fn from(value: &str) -> Self {
295        Ident {
296            value: value.to_string(),
297            quote_style: None,
298        }
299    }
300}
301
302impl From<String> for Ident {
303    fn from(value: String) -> Self {
304        Ident {
305            value,
306            quote_style: None,
307        }
308    }
309}
310
311impl fmt::Display for Ident {
312    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
313        match self.quote_style {
314            Some(q) => write!(f, "{q}{}{q}", self.value),
315            None => f.write_str(&self.value),
316        }
317    }
318}
319
320/// The IR column.
321#[derive(Debug, Builder, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
322pub struct Column {
323    #[builder(setter(into))]
324    pub name: Ident,
325    pub column_type: ConcreteDataType,
326    #[builder(default, setter(into))]
327    pub options: Vec<ColumnOption>,
328}
329
330impl Column {
331    /// Returns [TimestampType] if it's [ColumnOption::TimeIndex] [Column].
332    pub fn timestamp_type(&self) -> Option<TimestampType> {
333        if let ConcreteDataType::Timestamp(ts_type) = self.column_type {
334            Some(ts_type)
335        } else {
336            None
337        }
338    }
339
340    /// Returns true if it's [ColumnOption::TimeIndex] [Column].
341    pub fn is_time_index(&self) -> bool {
342        self.options
343            .iter()
344            .any(|opt| opt == &ColumnOption::TimeIndex)
345    }
346
347    /// Returns true if it's the [ColumnOption::PrimaryKey] [Column].
348    pub fn is_primary_key(&self) -> bool {
349        self.options
350            .iter()
351            .any(|opt| opt == &ColumnOption::PrimaryKey)
352    }
353
354    /// Returns true if it's nullable.
355    pub fn is_nullable(&self) -> bool {
356        !self
357            .options
358            .iter()
359            .any(|opt| matches!(opt, ColumnOption::NotNull | ColumnOption::TimeIndex))
360    }
361
362    // Returns true if it has default value.
363    pub fn has_default_value(&self) -> bool {
364        self.options.iter().any(|opt| {
365            matches!(
366                opt,
367                ColumnOption::DefaultValue(_) | ColumnOption::DefaultFn(_)
368            )
369        })
370    }
371
372    // Returns default value if it has.
373    pub fn default_value(&self) -> Option<&Value> {
374        self.options.iter().find_map(|opt| match opt {
375            ColumnOption::DefaultValue(value) => Some(value),
376            _ => None,
377        })
378    }
379}
380
381/// Returns droppable columns. i.e., non-primary key columns, non-ts columns.
382pub fn droppable_columns(columns: &[Column]) -> Vec<&Column> {
383    columns
384        .iter()
385        .filter(|column| {
386            !column.options.iter().any(|option| {
387                option == &ColumnOption::PrimaryKey || option == &ColumnOption::TimeIndex
388            })
389        })
390        .collect::<Vec<_>>()
391}
392
393/// Returns columns that can use the alter table modify command
394pub fn modifiable_columns(columns: &[Column]) -> Vec<&Column> {
395    columns
396        .iter()
397        .filter(|column| {
398            !column.options.iter().any(|option| {
399                option == &ColumnOption::PrimaryKey
400                    || option == &ColumnOption::TimeIndex
401                    || option == &ColumnOption::NotNull
402            })
403        })
404        .collect::<Vec<_>>()
405}
406
407/// Generates [ColumnOption] for [Column].
408pub fn column_options_generator<R: Rng>(
409    rng: &mut R,
410    column_type: &ConcreteDataType,
411) -> Vec<ColumnOption> {
412    // 0 -> NULL
413    // 1 -> NOT NULL
414    // 2 -> DEFAULT VALUE
415    // 3 -> PRIMARY KEY
416    // 4 -> EMPTY
417    let option_idx = rng.random_range(0..5);
418    match option_idx {
419        0 => vec![ColumnOption::Null],
420        1 => vec![ColumnOption::NotNull],
421        2 => vec![ColumnOption::DefaultValue(generate_random_value(
422            rng,
423            column_type,
424            None,
425        ))],
426        3 => vec![ColumnOption::PrimaryKey],
427        _ => vec![],
428    }
429}
430
431/// Generates [ColumnOption] for Partible [Column].
432pub fn partible_column_options_generator<R: Rng + 'static>(
433    rng: &mut R,
434    column_type: &ConcreteDataType,
435) -> Vec<ColumnOption> {
436    // 0 -> NULL
437    // 1 -> NOT NULL
438    // 2 -> DEFAULT VALUE
439    // 3 -> PRIMARY KEY
440    let option_idx = rng.random_range(0..4);
441    match option_idx {
442        0 => vec![ColumnOption::PrimaryKey, ColumnOption::Null],
443        1 => vec![ColumnOption::PrimaryKey, ColumnOption::NotNull],
444        2 => vec![
445            ColumnOption::PrimaryKey,
446            ColumnOption::DefaultValue(generate_random_value(
447                rng,
448                column_type,
449                Some(&WordGenerator),
450            )),
451        ],
452        3 => vec![ColumnOption::PrimaryKey],
453        _ => unreachable!(),
454    }
455}
456
457/// Generates [ColumnOption] for ts [Column].
458pub fn ts_column_options_generator<R: Rng + 'static>(
459    _: &mut R,
460    _: &ConcreteDataType,
461) -> Vec<ColumnOption> {
462    vec![ColumnOption::TimeIndex]
463}
464
465pub fn primary_key_and_not_null_column_options_generator<R: Rng + 'static>(
466    _: &mut R,
467    _: &ConcreteDataType,
468) -> Vec<ColumnOption> {
469    vec![ColumnOption::PrimaryKey, ColumnOption::NotNull]
470}
471
472pub fn primary_key_options_generator<R: Rng + 'static>(
473    _: &mut R,
474    _: &ConcreteDataType,
475) -> Vec<ColumnOption> {
476    vec![ColumnOption::PrimaryKey]
477}
478
479/// Generates columns with given `names`.
480pub fn generate_columns<R: Rng + 'static>(
481    rng: &mut R,
482    names: impl IntoIterator<Item = Ident>,
483    types: &(impl Random<ConcreteDataType, R> + ?Sized),
484    options: impl Fn(&mut R, &ConcreteDataType) -> Vec<ColumnOption>,
485) -> Vec<Column> {
486    names
487        .into_iter()
488        .map(|name| {
489            let column_type = types.generate(rng);
490            let options = options(rng, &column_type);
491            Column {
492                name,
493                options,
494                column_type,
495            }
496        })
497        .collect()
498}
499
500/// Replace Value::Default with the corresponding default value in the rows for comparison.
501pub fn replace_default(
502    rows: &[RowValues],
503    table_ctx_ref: &TableContextRef,
504    insert_expr: &InsertIntoExpr,
505) -> Vec<RowValues> {
506    let index_map: HashMap<usize, usize> = insert_expr
507        .columns
508        .iter()
509        .enumerate()
510        .map(|(insert_idx, insert_column)| {
511            let create_idx = table_ctx_ref
512                .columns
513                .iter()
514                .position(|create_column| create_column.name == insert_column.name)
515                .expect("Column not found in create_expr");
516            (insert_idx, create_idx)
517        })
518        .collect();
519
520    let mut new_rows = Vec::new();
521    for row in rows {
522        let mut new_row = Vec::new();
523        for (idx, value) in row.iter().enumerate() {
524            if let RowValue::Default = value {
525                let column = &table_ctx_ref.columns[index_map[&idx]];
526                new_row.push(RowValue::Value(column.default_value().unwrap().clone()));
527            } else {
528                new_row.push(value.clone());
529            }
530        }
531        new_rows.push(new_row);
532    }
533    new_rows
534}
535
536/// Sorts a vector of rows based on the values in the specified primary key columns.
537pub fn sort_by_primary_keys(rows: &mut [RowValues], primary_keys_idx: Vec<usize>) {
538    rows.sort_by(|a, b| {
539        let a_keys: Vec<_> = primary_keys_idx.iter().map(|&i| &a[i]).collect();
540        let b_keys: Vec<_> = primary_keys_idx.iter().map(|&i| &b[i]).collect();
541        for (a_key, b_key) in a_keys.iter().zip(b_keys.iter()) {
542            match a_key.cmp(b_key) {
543                Some(std::cmp::Ordering::Equal) => continue,
544                non_eq => return non_eq.unwrap(),
545            }
546        }
547        std::cmp::Ordering::Equal
548    });
549}
550
551/// Formats a slice of columns into a comma-separated string of column names.
552pub fn format_columns(columns: &[Column]) -> String {
553    columns
554        .iter()
555        .map(|c| c.name.to_string())
556        .collect::<Vec<_>>()
557        .join(", ")
558}
559
560#[cfg(test)]
561mod tests {
562    use super::*;
563
564    #[test]
565    fn test_droppable_columns() {
566        let columns = vec![
567            Column {
568                name: "hi".into(),
569                column_type: ConcreteDataType::uint64_datatype(),
570                options: vec![ColumnOption::PrimaryKey],
571            },
572            Column {
573                name: "foo".into(),
574                column_type: ConcreteDataType::uint64_datatype(),
575                options: vec![ColumnOption::TimeIndex],
576            },
577        ];
578        let droppable = droppable_columns(&columns);
579        assert!(droppable.is_empty());
580
581        let columns = vec![
582            Column {
583                name: "hi".into(),
584                column_type: ConcreteDataType::uint64_datatype(),
585                options: vec![],
586            },
587            Column {
588                name: "foo".into(),
589                column_type: ConcreteDataType::uint64_datatype(),
590                options: vec![],
591            },
592        ];
593        let droppable = droppable_columns(&columns);
594        assert_eq!(droppable.len(), 2);
595    }
596}