tests_fuzz/
ir.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! The intermediate representation
16
17pub(crate) mod alter_expr;
18pub(crate) mod create_expr;
19pub(crate) mod insert_expr;
20pub(crate) mod select_expr;
21
22use core::fmt;
23use std::collections::HashMap;
24use std::sync::{Arc, Mutex};
25use std::time::Duration;
26
27pub use alter_expr::{AlterTableExpr, AlterTableOption};
28use common_time::timestamp::TimeUnit;
29use common_time::{Date, Timestamp};
30pub use create_expr::{CreateDatabaseExpr, CreateTableExpr};
31use datatypes::data_type::ConcreteDataType;
32use datatypes::types::TimestampType;
33use datatypes::value::Value;
34use derive_builder::Builder;
35pub use insert_expr::InsertIntoExpr;
36use lazy_static::lazy_static;
37use rand::seq::{IndexedRandom, SliceRandom};
38use rand::Rng;
39use serde::{Deserialize, Serialize};
40
41use self::insert_expr::{RowValue, RowValues};
42use crate::context::TableContextRef;
43use crate::fake::WordGenerator;
44use crate::generator::{Random, TsValueGenerator};
45use crate::impl_random;
46use crate::ir::create_expr::ColumnOption;
47
48lazy_static! {
49    pub static ref DATA_TYPES: Vec<ConcreteDataType> = vec![
50        ConcreteDataType::boolean_datatype(),
51        ConcreteDataType::int16_datatype(),
52        ConcreteDataType::int32_datatype(),
53        ConcreteDataType::int64_datatype(),
54        ConcreteDataType::float32_datatype(),
55        ConcreteDataType::float64_datatype(),
56    ];
57    pub static ref TS_DATA_TYPES: Vec<ConcreteDataType> = vec![
58        ConcreteDataType::timestamp_nanosecond_datatype(),
59        ConcreteDataType::timestamp_microsecond_datatype(),
60        ConcreteDataType::timestamp_millisecond_datatype(),
61        ConcreteDataType::timestamp_second_datatype(),
62    ];
63    pub static ref PARTIBLE_DATA_TYPES: Vec<ConcreteDataType> = vec![
64        ConcreteDataType::int16_datatype(),
65        ConcreteDataType::int32_datatype(),
66        ConcreteDataType::int64_datatype(),
67        ConcreteDataType::float32_datatype(),
68        ConcreteDataType::float64_datatype(),
69        ConcreteDataType::string_datatype(),
70    ];
71    pub static ref STRING_DATA_TYPES: Vec<ConcreteDataType> =
72        vec![ConcreteDataType::string_datatype()];
73    pub static ref MYSQL_TS_DATA_TYPES: Vec<ConcreteDataType> = vec![
74        // MySQL only permits fractional seconds with up to microseconds (6 digits) precision.
75        ConcreteDataType::timestamp_microsecond_datatype(),
76        ConcreteDataType::timestamp_millisecond_datatype(),
77        ConcreteDataType::timestamp_second_datatype(),
78    ];
79}
80
81impl_random!(ConcreteDataType, ColumnTypeGenerator, DATA_TYPES);
82impl_random!(ConcreteDataType, TsColumnTypeGenerator, TS_DATA_TYPES);
83impl_random!(
84    ConcreteDataType,
85    MySQLTsColumnTypeGenerator,
86    MYSQL_TS_DATA_TYPES
87);
88impl_random!(
89    ConcreteDataType,
90    PartibleColumnTypeGenerator,
91    PARTIBLE_DATA_TYPES
92);
93impl_random!(
94    ConcreteDataType,
95    StringColumnTypeGenerator,
96    STRING_DATA_TYPES
97);
98
99pub struct ColumnTypeGenerator;
100pub struct TsColumnTypeGenerator;
101pub struct MySQLTsColumnTypeGenerator;
102pub struct PartibleColumnTypeGenerator;
103pub struct StringColumnTypeGenerator;
104
105/// FIXME(weny): Waits for https://github.com/GreptimeTeam/greptimedb/issues/4247
106macro_rules! generate_values {
107    ($data_type:ty, $bounds:expr) => {{
108        let base = 0 as $data_type;
109        let step = <$data_type>::MAX / ($bounds as $data_type + 1 as $data_type) as $data_type;
110        (1..=$bounds)
111            .map(|i| Value::from(base + step * i as $data_type as $data_type))
112            .collect::<Vec<Value>>()
113    }};
114}
115
116/// Generates partition bounds.
117pub fn generate_partition_bounds(datatype: &ConcreteDataType, bounds: usize) -> Vec<Value> {
118    match datatype {
119        ConcreteDataType::Int16(_) => generate_values!(i16, bounds),
120        ConcreteDataType::Int32(_) => generate_values!(i32, bounds),
121        ConcreteDataType::Int64(_) => generate_values!(i64, bounds),
122        ConcreteDataType::Float32(_) => generate_values!(f32, bounds),
123        ConcreteDataType::Float64(_) => generate_values!(f64, bounds),
124        ConcreteDataType::String(_) => {
125            let base = b'A';
126            let range = b'z' - b'A';
127            let step = range / (bounds as u8 + 1);
128            (1..=bounds)
129                .map(|i| {
130                    Value::from(
131                        char::from(base + step * i as u8)
132                            .escape_default()
133                            .to_string(),
134                    )
135                })
136                .collect()
137        }
138        _ => unimplemented!("unsupported type: {datatype}"),
139    }
140}
141
142/// Generates a random [Value].
143pub fn generate_random_value<R: Rng>(
144    rng: &mut R,
145    datatype: &ConcreteDataType,
146    random_str: Option<&dyn Random<Ident, R>>,
147) -> Value {
148    match datatype {
149        &ConcreteDataType::Boolean(_) => Value::from(rng.random::<bool>()),
150        ConcreteDataType::Int16(_) => Value::from(rng.random::<i16>()),
151        ConcreteDataType::Int32(_) => Value::from(rng.random::<i32>()),
152        ConcreteDataType::Int64(_) => Value::from(rng.random::<i64>()),
153        ConcreteDataType::Float32(_) => Value::from(rng.random::<f32>()),
154        ConcreteDataType::Float64(_) => Value::from(rng.random::<f64>()),
155        ConcreteDataType::String(_) => match random_str {
156            Some(random) => Value::from(random.gen(rng).value),
157            None => Value::from(rng.random::<char>().to_string()),
158        },
159        ConcreteDataType::Date(_) => generate_random_date(rng),
160
161        _ => unimplemented!("unsupported type: {datatype}"),
162    }
163}
164
165/// Generate monotonically increasing timestamps for MySQL.
166pub fn generate_unique_timestamp_for_mysql<R: Rng>(base: i64) -> TsValueGenerator<R> {
167    let base = Timestamp::new_millisecond(base);
168    let clock = Arc::new(Mutex::new(base));
169
170    Box::new(move |_rng, ts_type| -> Value {
171        let mut clock = clock.lock().unwrap();
172        let ts = clock.add_duration(Duration::from_secs(1)).unwrap();
173        *clock = ts;
174
175        let v = match ts_type {
176            TimestampType::Second(_) => ts.convert_to(TimeUnit::Second).unwrap(),
177            TimestampType::Millisecond(_) => ts.convert_to(TimeUnit::Millisecond).unwrap(),
178            TimestampType::Microsecond(_) => ts.convert_to(TimeUnit::Microsecond).unwrap(),
179            TimestampType::Nanosecond(_) => ts.convert_to(TimeUnit::Nanosecond).unwrap(),
180        };
181        Value::from(v)
182    })
183}
184
185/// Generate random timestamps.
186pub fn generate_random_timestamp<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
187    let v = match ts_type {
188        TimestampType::Second(_) => {
189            let min = i64::from(Timestamp::MIN_SECOND);
190            let max = i64::from(Timestamp::MAX_SECOND);
191            let value = rng.random_range(min..=max);
192            Timestamp::new_second(value)
193        }
194        TimestampType::Millisecond(_) => {
195            let min = i64::from(Timestamp::MIN_MILLISECOND);
196            let max = i64::from(Timestamp::MAX_MILLISECOND);
197            let value = rng.random_range(min..=max);
198            Timestamp::new_millisecond(value)
199        }
200        TimestampType::Microsecond(_) => {
201            let min = i64::from(Timestamp::MIN_MICROSECOND);
202            let max = i64::from(Timestamp::MAX_MICROSECOND);
203            let value = rng.random_range(min..=max);
204            Timestamp::new_microsecond(value)
205        }
206        TimestampType::Nanosecond(_) => {
207            let min = i64::from(Timestamp::MIN_NANOSECOND);
208            let max = i64::from(Timestamp::MAX_NANOSECOND);
209            let value = rng.random_range(min..=max);
210            Timestamp::new_nanosecond(value)
211        }
212    };
213    Value::from(v)
214}
215
216// MySQL supports timestamp from '1970-01-01 00:00:01.000000' to '2038-01-19 03:14:07.499999'
217pub fn generate_random_timestamp_for_mysql<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
218    let v = match ts_type {
219        TimestampType::Second(_) => {
220            let min = 1;
221            let max = 2_147_483_647;
222            let value = rng.random_range(min..=max);
223            Timestamp::new_second(value)
224        }
225        TimestampType::Millisecond(_) => {
226            let min = 1000;
227            let max = 2_147_483_647_499;
228            let value = rng.random_range(min..=max);
229            Timestamp::new_millisecond(value)
230        }
231        TimestampType::Microsecond(_) => {
232            let min = 1_000_000;
233            let max = 2_147_483_647_499_999;
234            let value = rng.random_range(min..=max);
235            Timestamp::new_microsecond(value)
236        }
237        TimestampType::Nanosecond(_) => {
238            let min = 1_000_000_000;
239            let max = 2_147_483_647_499_999_000;
240            let value = rng.random_range(min..=max);
241            Timestamp::new_nanosecond(value)
242        }
243    };
244    Value::from(v)
245}
246
247fn generate_random_date<R: Rng>(rng: &mut R) -> Value {
248    let min = i64::from(Timestamp::MIN_MILLISECOND);
249    let max = i64::from(Timestamp::MAX_MILLISECOND);
250    let value = rng.random_range(min..=max);
251    let date = Timestamp::new_millisecond(value).to_chrono_date().unwrap();
252    Value::from(Date::from(date))
253}
254
255/// An identifier.
256#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord, Hash)]
257pub struct Ident {
258    pub value: String,
259    pub quote_style: Option<char>,
260}
261
262impl Ident {
263    /// Creates a new identifier with the given value and no quotes.
264    pub fn new<S>(value: S) -> Self
265    where
266        S: Into<String>,
267    {
268        Ident {
269            value: value.into(),
270            quote_style: None,
271        }
272    }
273
274    /// Creates a new quoted identifier with the given quote and value.
275    pub fn with_quote<S>(quote: char, value: S) -> Self
276    where
277        S: Into<String>,
278    {
279        Ident {
280            value: value.into(),
281            quote_style: Some(quote),
282        }
283    }
284
285    pub fn is_empty(&self) -> bool {
286        self.value.is_empty()
287    }
288}
289
290impl From<&str> for Ident {
291    fn from(value: &str) -> Self {
292        Ident {
293            value: value.to_string(),
294            quote_style: None,
295        }
296    }
297}
298
299impl From<String> for Ident {
300    fn from(value: String) -> Self {
301        Ident {
302            value,
303            quote_style: None,
304        }
305    }
306}
307
308impl fmt::Display for Ident {
309    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
310        match self.quote_style {
311            Some(q) => write!(f, "{q}{}{q}", self.value),
312            None => f.write_str(&self.value),
313        }
314    }
315}
316
317/// The IR column.
318#[derive(Debug, Builder, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
319pub struct Column {
320    #[builder(setter(into))]
321    pub name: Ident,
322    pub column_type: ConcreteDataType,
323    #[builder(default, setter(into))]
324    pub options: Vec<ColumnOption>,
325}
326
327impl Column {
328    /// Returns [TimestampType] if it's [ColumnOption::TimeIndex] [Column].
329    pub fn timestamp_type(&self) -> Option<TimestampType> {
330        if let ConcreteDataType::Timestamp(ts_type) = self.column_type {
331            Some(ts_type)
332        } else {
333            None
334        }
335    }
336
337    /// Returns true if it's [ColumnOption::TimeIndex] [Column].
338    pub fn is_time_index(&self) -> bool {
339        self.options
340            .iter()
341            .any(|opt| opt == &ColumnOption::TimeIndex)
342    }
343
344    /// Returns true if it's the [ColumnOption::PrimaryKey] [Column].
345    pub fn is_primary_key(&self) -> bool {
346        self.options
347            .iter()
348            .any(|opt| opt == &ColumnOption::PrimaryKey)
349    }
350
351    /// Returns true if it's nullable.
352    pub fn is_nullable(&self) -> bool {
353        !self
354            .options
355            .iter()
356            .any(|opt| matches!(opt, ColumnOption::NotNull | ColumnOption::TimeIndex))
357    }
358
359    // Returns true if it has default value.
360    pub fn has_default_value(&self) -> bool {
361        self.options.iter().any(|opt| {
362            matches!(
363                opt,
364                ColumnOption::DefaultValue(_) | ColumnOption::DefaultFn(_)
365            )
366        })
367    }
368
369    // Returns default value if it has.
370    pub fn default_value(&self) -> Option<&Value> {
371        self.options.iter().find_map(|opt| match opt {
372            ColumnOption::DefaultValue(value) => Some(value),
373            _ => None,
374        })
375    }
376}
377
378/// Returns droppable columns. i.e., non-primary key columns, non-ts columns.
379pub fn droppable_columns(columns: &[Column]) -> Vec<&Column> {
380    columns
381        .iter()
382        .filter(|column| {
383            !column.options.iter().any(|option| {
384                option == &ColumnOption::PrimaryKey || option == &ColumnOption::TimeIndex
385            })
386        })
387        .collect::<Vec<_>>()
388}
389
390/// Returns columns that can use the alter table modify command
391pub fn modifiable_columns(columns: &[Column]) -> Vec<&Column> {
392    columns
393        .iter()
394        .filter(|column| {
395            !column.options.iter().any(|option| {
396                option == &ColumnOption::PrimaryKey
397                    || option == &ColumnOption::TimeIndex
398                    || option == &ColumnOption::NotNull
399            })
400        })
401        .collect::<Vec<_>>()
402}
403
404/// Generates [ColumnOption] for [Column].
405pub fn column_options_generator<R: Rng>(
406    rng: &mut R,
407    column_type: &ConcreteDataType,
408) -> Vec<ColumnOption> {
409    // 0 -> NULL
410    // 1 -> NOT NULL
411    // 2 -> DEFAULT VALUE
412    // 3 -> PRIMARY KEY
413    // 4 -> EMPTY
414    let option_idx = rng.random_range(0..5);
415    match option_idx {
416        0 => vec![ColumnOption::Null],
417        1 => vec![ColumnOption::NotNull],
418        2 => vec![ColumnOption::DefaultValue(generate_random_value(
419            rng,
420            column_type,
421            None,
422        ))],
423        3 => vec![ColumnOption::PrimaryKey],
424        _ => vec![],
425    }
426}
427
428/// Generates [ColumnOption] for Partible [Column].
429pub fn partible_column_options_generator<R: Rng + 'static>(
430    rng: &mut R,
431    column_type: &ConcreteDataType,
432) -> Vec<ColumnOption> {
433    // 0 -> NULL
434    // 1 -> NOT NULL
435    // 2 -> DEFAULT VALUE
436    // 3 -> PRIMARY KEY
437    let option_idx = rng.random_range(0..4);
438    match option_idx {
439        0 => vec![ColumnOption::PrimaryKey, ColumnOption::Null],
440        1 => vec![ColumnOption::PrimaryKey, ColumnOption::NotNull],
441        2 => vec![
442            ColumnOption::PrimaryKey,
443            ColumnOption::DefaultValue(generate_random_value(
444                rng,
445                column_type,
446                Some(&WordGenerator),
447            )),
448        ],
449        3 => vec![ColumnOption::PrimaryKey],
450        _ => unreachable!(),
451    }
452}
453
454/// Generates [ColumnOption] for ts [Column].
455pub fn ts_column_options_generator<R: Rng + 'static>(
456    _: &mut R,
457    _: &ConcreteDataType,
458) -> Vec<ColumnOption> {
459    vec![ColumnOption::TimeIndex]
460}
461
462pub fn primary_key_and_not_null_column_options_generator<R: Rng + 'static>(
463    _: &mut R,
464    _: &ConcreteDataType,
465) -> Vec<ColumnOption> {
466    vec![ColumnOption::PrimaryKey, ColumnOption::NotNull]
467}
468
469pub fn primary_key_options_generator<R: Rng + 'static>(
470    _: &mut R,
471    _: &ConcreteDataType,
472) -> Vec<ColumnOption> {
473    vec![ColumnOption::PrimaryKey]
474}
475
476/// Generates columns with given `names`.
477pub fn generate_columns<R: Rng + 'static>(
478    rng: &mut R,
479    names: impl IntoIterator<Item = Ident>,
480    types: &(impl Random<ConcreteDataType, R> + ?Sized),
481    options: impl Fn(&mut R, &ConcreteDataType) -> Vec<ColumnOption>,
482) -> Vec<Column> {
483    names
484        .into_iter()
485        .map(|name| {
486            let column_type = types.gen(rng);
487            let options = options(rng, &column_type);
488            Column {
489                name,
490                options,
491                column_type,
492            }
493        })
494        .collect()
495}
496
497/// Replace Value::Default with the corresponding default value in the rows for comparison.
498pub fn replace_default(
499    rows: &[RowValues],
500    table_ctx_ref: &TableContextRef,
501    insert_expr: &InsertIntoExpr,
502) -> Vec<RowValues> {
503    let index_map: HashMap<usize, usize> = insert_expr
504        .columns
505        .iter()
506        .enumerate()
507        .map(|(insert_idx, insert_column)| {
508            let create_idx = table_ctx_ref
509                .columns
510                .iter()
511                .position(|create_column| create_column.name == insert_column.name)
512                .expect("Column not found in create_expr");
513            (insert_idx, create_idx)
514        })
515        .collect();
516
517    let mut new_rows = Vec::new();
518    for row in rows {
519        let mut new_row = Vec::new();
520        for (idx, value) in row.iter().enumerate() {
521            if let RowValue::Default = value {
522                let column = &table_ctx_ref.columns[index_map[&idx]];
523                new_row.push(RowValue::Value(column.default_value().unwrap().clone()));
524            } else {
525                new_row.push(value.clone());
526            }
527        }
528        new_rows.push(new_row);
529    }
530    new_rows
531}
532
533/// Sorts a vector of rows based on the values in the specified primary key columns.
534pub fn sort_by_primary_keys(rows: &mut [RowValues], primary_keys_idx: Vec<usize>) {
535    rows.sort_by(|a, b| {
536        let a_keys: Vec<_> = primary_keys_idx.iter().map(|&i| &a[i]).collect();
537        let b_keys: Vec<_> = primary_keys_idx.iter().map(|&i| &b[i]).collect();
538        for (a_key, b_key) in a_keys.iter().zip(b_keys.iter()) {
539            match a_key.cmp(b_key) {
540                Some(std::cmp::Ordering::Equal) => continue,
541                non_eq => return non_eq.unwrap(),
542            }
543        }
544        std::cmp::Ordering::Equal
545    });
546}
547
548/// Formats a slice of columns into a comma-separated string of column names.
549pub fn format_columns(columns: &[Column]) -> String {
550    columns
551        .iter()
552        .map(|c| c.name.to_string())
553        .collect::<Vec<_>>()
554        .join(", ")
555        .to_string()
556}
557
558#[cfg(test)]
559mod tests {
560    use super::*;
561
562    #[test]
563    fn test_droppable_columns() {
564        let columns = vec![
565            Column {
566                name: "hi".into(),
567                column_type: ConcreteDataType::uint64_datatype(),
568                options: vec![ColumnOption::PrimaryKey],
569            },
570            Column {
571                name: "foo".into(),
572                column_type: ConcreteDataType::uint64_datatype(),
573                options: vec![ColumnOption::TimeIndex],
574            },
575        ];
576        let droppable = droppable_columns(&columns);
577        assert!(droppable.is_empty());
578
579        let columns = vec![
580            Column {
581                name: "hi".into(),
582                column_type: ConcreteDataType::uint64_datatype(),
583                options: vec![],
584            },
585            Column {
586                name: "foo".into(),
587                column_type: ConcreteDataType::uint64_datatype(),
588                options: vec![],
589            },
590        ];
591        let droppable = droppable_columns(&columns);
592        assert_eq!(droppable.len(), 2);
593    }
594}