tests_fuzz/
ir.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! The intermediate representation
16
17pub(crate) mod alter_expr;
18pub(crate) mod create_expr;
19pub(crate) mod insert_expr;
20pub(crate) mod partition_expr;
21pub(crate) mod repartition_expr;
22pub(crate) mod select_expr;
23pub(crate) mod string_value;
24
25use core::fmt;
26use std::collections::HashMap;
27use std::sync::{Arc, Mutex};
28use std::time::Duration;
29
30pub use alter_expr::{AlterTableExpr, AlterTableOption};
31use common_time::timestamp::TimeUnit;
32use common_time::{Date, Timestamp};
33pub use create_expr::{CreateDatabaseExpr, CreateTableExpr};
34use datatypes::data_type::ConcreteDataType;
35use datatypes::types::TimestampType;
36use datatypes::value::Value;
37use derive_builder::Builder;
38pub use insert_expr::InsertIntoExpr;
39use lazy_static::lazy_static;
40pub use partition_expr::SimplePartitions;
41use rand::Rng;
42use rand::seq::{IndexedRandom, SliceRandom};
43pub use repartition_expr::RepartitionExpr;
44use serde::{Deserialize, Serialize};
45
46use self::insert_expr::RowValues;
47use crate::context::TableContextRef;
48use crate::fake::WordGenerator;
49use crate::generator::{Random, TsValueGenerator};
50use crate::impl_random;
51use crate::ir::create_expr::ColumnOption;
52pub use crate::ir::insert_expr::RowValue;
53
54lazy_static! {
55    pub static ref DATA_TYPES: Vec<ConcreteDataType> = vec![
56        ConcreteDataType::boolean_datatype(),
57        ConcreteDataType::int16_datatype(),
58        ConcreteDataType::int32_datatype(),
59        ConcreteDataType::int64_datatype(),
60        ConcreteDataType::float32_datatype(),
61        ConcreteDataType::float64_datatype(),
62    ];
63    pub static ref TS_DATA_TYPES: Vec<ConcreteDataType> = vec![
64        ConcreteDataType::timestamp_nanosecond_datatype(),
65        ConcreteDataType::timestamp_microsecond_datatype(),
66        ConcreteDataType::timestamp_millisecond_datatype(),
67        ConcreteDataType::timestamp_second_datatype(),
68    ];
69    pub static ref PARTIBLE_DATA_TYPES: Vec<ConcreteDataType> = vec![
70        ConcreteDataType::int16_datatype(),
71        ConcreteDataType::int32_datatype(),
72        ConcreteDataType::int64_datatype(),
73        ConcreteDataType::float32_datatype(),
74        ConcreteDataType::float64_datatype(),
75        ConcreteDataType::string_datatype(),
76    ];
77    pub static ref STRING_DATA_TYPES: Vec<ConcreteDataType> =
78        vec![ConcreteDataType::string_datatype()];
79    pub static ref MYSQL_TS_DATA_TYPES: Vec<ConcreteDataType> = vec![
80        // MySQL only permits fractional seconds with up to microseconds (6 digits) precision.
81        ConcreteDataType::timestamp_microsecond_datatype(),
82        ConcreteDataType::timestamp_millisecond_datatype(),
83        ConcreteDataType::timestamp_second_datatype(),
84    ];
85}
86
87impl_random!(ConcreteDataType, ColumnTypeGenerator, DATA_TYPES);
88impl_random!(ConcreteDataType, TsColumnTypeGenerator, TS_DATA_TYPES);
89impl_random!(
90    ConcreteDataType,
91    MySQLTsColumnTypeGenerator,
92    MYSQL_TS_DATA_TYPES
93);
94impl_random!(
95    ConcreteDataType,
96    PartibleColumnTypeGenerator,
97    PARTIBLE_DATA_TYPES
98);
99impl_random!(
100    ConcreteDataType,
101    StringColumnTypeGenerator,
102    STRING_DATA_TYPES
103);
104
105pub struct ColumnTypeGenerator;
106pub struct TsColumnTypeGenerator;
107pub struct MySQLTsColumnTypeGenerator;
108pub struct PartibleColumnTypeGenerator;
109pub struct StringColumnTypeGenerator;
110
111/// FIXME(weny): Waits for https://github.com/GreptimeTeam/greptimedb/issues/4247
112macro_rules! generate_values {
113    ($data_type:ty, $bounds:expr) => {{
114        let base = 0 as $data_type;
115        let step = <$data_type>::MAX / ($bounds as $data_type + 1 as $data_type) as $data_type;
116        (1..=$bounds)
117            .map(|i| Value::from(base + step * i as $data_type as $data_type))
118            .collect::<Vec<Value>>()
119    }};
120}
121
122/// Generates partition bounds.
123pub fn generate_partition_bounds(datatype: &ConcreteDataType, bounds: usize) -> Vec<Value> {
124    match datatype {
125        ConcreteDataType::Int16(_) => generate_values!(i16, bounds),
126        ConcreteDataType::Int32(_) => generate_values!(i32, bounds),
127        ConcreteDataType::Int64(_) => generate_values!(i64, bounds),
128        ConcreteDataType::Float32(_) => generate_values!(f32, bounds),
129        ConcreteDataType::Float64(_) => generate_values!(f64, bounds),
130        ConcreteDataType::String(_) => string_value::generate_partition_bounds(bounds),
131        _ => unimplemented!("unsupported type: {datatype}"),
132    }
133}
134
135/// Generates a random [Value].
136pub fn generate_random_value<R: Rng>(
137    rng: &mut R,
138    datatype: &ConcreteDataType,
139    random_str: Option<&dyn Random<Ident, R>>,
140) -> Value {
141    match datatype {
142        &ConcreteDataType::Boolean(_) => Value::from(rng.random::<bool>()),
143        ConcreteDataType::Int16(_) => Value::from(rng.random::<i16>()),
144        ConcreteDataType::Int32(_) => Value::from(rng.random::<i32>()),
145        ConcreteDataType::Int64(_) => Value::from(rng.random::<i64>()),
146        ConcreteDataType::Float32(_) => Value::from(rng.random::<f32>()),
147        ConcreteDataType::Float64(_) => Value::from(rng.random::<f64>()),
148        ConcreteDataType::String(_) => string_value::generate_data_string_value(rng, random_str),
149        ConcreteDataType::Date(_) => generate_random_date(rng),
150
151        _ => unimplemented!("unsupported type: {datatype}"),
152    }
153}
154
155/// Generate monotonically increasing timestamps for MySQL.
156pub fn generate_unique_timestamp_for_mysql<R: Rng>(base: i64) -> TsValueGenerator<R> {
157    let base = Timestamp::new_millisecond(base);
158    generate_unique_timestamp_for_mysql_with_clock(Arc::new(Mutex::new(base)))
159}
160
161/// Generates a unique timestamp for MySQL.
162pub fn generate_unique_timestamp_for_mysql_with_clock<R: Rng>(
163    clock: Arc<Mutex<Timestamp>>,
164) -> TsValueGenerator<R> {
165    Box::new(move |_rng, ts_type| -> Value {
166        let mut clock = clock.lock().unwrap();
167        let ts = clock.add_duration(Duration::from_secs(1)).unwrap();
168        *clock = ts;
169
170        let v = match ts_type {
171            TimestampType::Second(_) => ts.convert_to(TimeUnit::Second).unwrap(),
172            TimestampType::Millisecond(_) => ts.convert_to(TimeUnit::Millisecond).unwrap(),
173            TimestampType::Microsecond(_) => ts.convert_to(TimeUnit::Microsecond).unwrap(),
174            TimestampType::Nanosecond(_) => ts.convert_to(TimeUnit::Nanosecond).unwrap(),
175        };
176        Value::from(v)
177    })
178}
179
180/// Generate random timestamps.
181pub fn generate_random_timestamp<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
182    let v = match ts_type {
183        TimestampType::Second(_) => {
184            let min = i64::from(Timestamp::MIN_SECOND);
185            let max = i64::from(Timestamp::MAX_SECOND);
186            let value = rng.random_range(min..=max);
187            Timestamp::new_second(value)
188        }
189        TimestampType::Millisecond(_) => {
190            let min = i64::from(Timestamp::MIN_MILLISECOND);
191            let max = i64::from(Timestamp::MAX_MILLISECOND);
192            let value = rng.random_range(min..=max);
193            Timestamp::new_millisecond(value)
194        }
195        TimestampType::Microsecond(_) => {
196            let min = i64::from(Timestamp::MIN_MICROSECOND);
197            let max = i64::from(Timestamp::MAX_MICROSECOND);
198            let value = rng.random_range(min..=max);
199            Timestamp::new_microsecond(value)
200        }
201        TimestampType::Nanosecond(_) => {
202            let min = i64::from(Timestamp::MIN_NANOSECOND);
203            let max = i64::from(Timestamp::MAX_NANOSECOND);
204            let value = rng.random_range(min..=max);
205            Timestamp::new_nanosecond(value)
206        }
207    };
208    Value::from(v)
209}
210
211// MySQL supports timestamp from '1970-01-01 00:00:01.000000' to '2038-01-19 03:14:07.499999'
212pub fn generate_random_timestamp_for_mysql<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
213    let v = match ts_type {
214        TimestampType::Second(_) => {
215            let min = 1;
216            let max = 2_147_483_647;
217            let value = rng.random_range(min..=max);
218            Timestamp::new_second(value)
219        }
220        TimestampType::Millisecond(_) => {
221            let min = 1000;
222            let max = 2_147_483_647_499;
223            let value = rng.random_range(min..=max);
224            Timestamp::new_millisecond(value)
225        }
226        TimestampType::Microsecond(_) => {
227            let min = 1_000_000;
228            let max = 2_147_483_647_499_999;
229            let value = rng.random_range(min..=max);
230            Timestamp::new_microsecond(value)
231        }
232        TimestampType::Nanosecond(_) => {
233            let min = 1_000_000_000;
234            let max = 2_147_483_647_499_999_000;
235            let value = rng.random_range(min..=max);
236            Timestamp::new_nanosecond(value)
237        }
238    };
239    Value::from(v)
240}
241
242fn generate_random_date<R: Rng>(rng: &mut R) -> Value {
243    let min = i64::from(Timestamp::MIN_MILLISECOND);
244    let max = i64::from(Timestamp::MAX_MILLISECOND);
245    let value = rng.random_range(min..=max);
246    let date = Timestamp::new_millisecond(value).to_chrono_date().unwrap();
247    Value::from(Date::from(date))
248}
249
250/// Generates a partition value for the given column type and bounds.
251pub fn generate_partition_value<R: Rng + 'static>(
252    rng: &mut R,
253    column_type: &ConcreteDataType,
254    bounds: &[Value],
255    bound_idx: usize,
256) -> Value {
257    if bounds.is_empty() {
258        return generate_random_value(rng, column_type, None);
259    }
260    let first = bounds.first().unwrap();
261    let last = bounds.last().unwrap();
262    match column_type {
263        datatypes::data_type::ConcreteDataType::Int16(_) => {
264            let first_value = match first {
265                datatypes::value::Value::Int16(v) => *v,
266                _ => 0,
267            };
268            if bound_idx == 0 {
269                datatypes::value::Value::from(first_value.saturating_sub(1))
270            } else if bound_idx < bounds.len() {
271                bounds[bound_idx - 1].clone()
272            } else {
273                last.clone()
274            }
275        }
276        datatypes::data_type::ConcreteDataType::Int32(_) => {
277            let first_value = match first {
278                datatypes::value::Value::Int32(v) => *v,
279                _ => 0,
280            };
281            if bound_idx == 0 {
282                datatypes::value::Value::from(first_value.saturating_sub(1))
283            } else if bound_idx < bounds.len() {
284                bounds[bound_idx - 1].clone()
285            } else {
286                last.clone()
287            }
288        }
289        datatypes::data_type::ConcreteDataType::Int64(_) => {
290            let first_value = match first {
291                datatypes::value::Value::Int64(v) => *v,
292                _ => 0,
293            };
294            if bound_idx == 0 {
295                datatypes::value::Value::from(first_value.saturating_sub(1))
296            } else if bound_idx < bounds.len() {
297                bounds[bound_idx - 1].clone()
298            } else {
299                last.clone()
300            }
301        }
302        datatypes::data_type::ConcreteDataType::Float32(_) => {
303            let first_value = match first {
304                datatypes::value::Value::Float32(v) => v.0,
305                _ => 0.0,
306            };
307            if bound_idx == 0 {
308                datatypes::value::Value::from(first_value - 1.0)
309            } else if bound_idx < bounds.len() {
310                bounds[bound_idx - 1].clone()
311            } else {
312                last.clone()
313            }
314        }
315        datatypes::data_type::ConcreteDataType::Float64(_) => {
316            let first_value = match first {
317                datatypes::value::Value::Float64(v) => v.0,
318                _ => 0.0,
319            };
320            if bound_idx == 0 {
321                datatypes::value::Value::from(first_value - 1.0)
322            } else if bound_idx < bounds.len() {
323                bounds[bound_idx - 1].clone()
324            } else {
325                last.clone()
326            }
327        }
328        datatypes::data_type::ConcreteDataType::String(_) => {
329            string_value::generate_partition_value(bounds, bound_idx)
330        }
331        _ => unimplemented!("unsupported partition column type: {column_type}"),
332    }
333}
334
335/// An identifier.
336#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord, Hash)]
337pub struct Ident {
338    pub value: String,
339    pub quote_style: Option<char>,
340}
341
342impl Ident {
343    /// Creates a new identifier with the given value and no quotes.
344    pub fn new<S>(value: S) -> Self
345    where
346        S: Into<String>,
347    {
348        Ident {
349            value: value.into(),
350            quote_style: None,
351        }
352    }
353
354    /// Creates a new quoted identifier with the given quote and value.
355    pub fn with_quote<S>(quote: char, value: S) -> Self
356    where
357        S: Into<String>,
358    {
359        Ident {
360            value: value.into(),
361            quote_style: Some(quote),
362        }
363    }
364
365    pub fn is_empty(&self) -> bool {
366        self.value.is_empty()
367    }
368}
369
370impl From<&str> for Ident {
371    fn from(value: &str) -> Self {
372        Ident {
373            value: value.to_string(),
374            quote_style: None,
375        }
376    }
377}
378
379impl From<String> for Ident {
380    fn from(value: String) -> Self {
381        Ident {
382            value,
383            quote_style: None,
384        }
385    }
386}
387
388impl fmt::Display for Ident {
389    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
390        match self.quote_style {
391            Some(q) => write!(f, "{q}{}{q}", self.value),
392            None => f.write_str(&self.value),
393        }
394    }
395}
396
397/// The IR column.
398#[derive(Debug, Builder, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
399pub struct Column {
400    #[builder(setter(into))]
401    pub name: Ident,
402    pub column_type: ConcreteDataType,
403    #[builder(default, setter(into))]
404    pub options: Vec<ColumnOption>,
405}
406
407impl Column {
408    /// Returns [TimestampType] if it's [ColumnOption::TimeIndex] [Column].
409    pub fn timestamp_type(&self) -> Option<TimestampType> {
410        if let ConcreteDataType::Timestamp(ts_type) = self.column_type {
411            Some(ts_type)
412        } else {
413            None
414        }
415    }
416
417    /// Returns true if it's [ColumnOption::TimeIndex] [Column].
418    pub fn is_time_index(&self) -> bool {
419        self.options
420            .iter()
421            .any(|opt| opt == &ColumnOption::TimeIndex)
422    }
423
424    /// Returns true if it's the [ColumnOption::PrimaryKey] [Column].
425    pub fn is_primary_key(&self) -> bool {
426        self.options
427            .iter()
428            .any(|opt| opt == &ColumnOption::PrimaryKey)
429    }
430
431    /// Returns true if it's nullable.
432    pub fn is_nullable(&self) -> bool {
433        !self
434            .options
435            .iter()
436            .any(|opt| matches!(opt, ColumnOption::NotNull | ColumnOption::TimeIndex))
437    }
438
439    // Returns true if it has default value.
440    pub fn has_default_value(&self) -> bool {
441        self.options.iter().any(|opt| {
442            matches!(
443                opt,
444                ColumnOption::DefaultValue(_) | ColumnOption::DefaultFn(_)
445            )
446        })
447    }
448
449    // Returns default value if it has.
450    pub fn default_value(&self) -> Option<&Value> {
451        self.options.iter().find_map(|opt| match opt {
452            ColumnOption::DefaultValue(value) => Some(value),
453            _ => None,
454        })
455    }
456}
457
458/// Returns droppable columns. i.e., non-primary key columns, non-ts columns.
459pub fn droppable_columns(columns: &[Column]) -> Vec<&Column> {
460    columns
461        .iter()
462        .filter(|column| {
463            !column.options.iter().any(|option| {
464                option == &ColumnOption::PrimaryKey || option == &ColumnOption::TimeIndex
465            })
466        })
467        .collect::<Vec<_>>()
468}
469
470/// Returns columns that can use the alter table modify command
471pub fn modifiable_columns(columns: &[Column]) -> Vec<&Column> {
472    columns
473        .iter()
474        .filter(|column| {
475            !column.options.iter().any(|option| {
476                option == &ColumnOption::PrimaryKey
477                    || option == &ColumnOption::TimeIndex
478                    || option == &ColumnOption::NotNull
479            })
480        })
481        .collect::<Vec<_>>()
482}
483
484/// Generates [ColumnOption] for [Column].
485pub fn column_options_generator<R: Rng>(
486    rng: &mut R,
487    column_type: &ConcreteDataType,
488) -> Vec<ColumnOption> {
489    // 0 -> NULL
490    // 1 -> NOT NULL
491    // 2 -> DEFAULT VALUE
492    // 3 -> PRIMARY KEY
493    // 4 -> EMPTY
494    let option_idx = rng.random_range(0..5);
495    match option_idx {
496        0 => vec![ColumnOption::Null],
497        1 => vec![ColumnOption::NotNull],
498        2 => vec![ColumnOption::DefaultValue(generate_random_value(
499            rng,
500            column_type,
501            None,
502        ))],
503        3 => vec![ColumnOption::PrimaryKey],
504        _ => vec![],
505    }
506}
507
508/// Generates [ColumnOption] for Partible [Column].
509pub fn partible_column_options_generator<R: Rng + 'static>(
510    rng: &mut R,
511    column_type: &ConcreteDataType,
512) -> Vec<ColumnOption> {
513    // 0 -> NULL
514    // 1 -> NOT NULL
515    // 2 -> DEFAULT VALUE
516    // 3 -> PRIMARY KEY
517    let option_idx = rng.random_range(0..4);
518    match option_idx {
519        0 => vec![ColumnOption::PrimaryKey, ColumnOption::Null],
520        1 => vec![ColumnOption::PrimaryKey, ColumnOption::NotNull],
521        2 => vec![
522            ColumnOption::PrimaryKey,
523            ColumnOption::DefaultValue(generate_random_value(
524                rng,
525                column_type,
526                Some(&WordGenerator),
527            )),
528        ],
529        3 => vec![ColumnOption::PrimaryKey],
530        _ => unreachable!(),
531    }
532}
533
534/// Generates [ColumnOption] for ts [Column].
535pub fn ts_column_options_generator<R: Rng + 'static>(
536    _: &mut R,
537    _: &ConcreteDataType,
538) -> Vec<ColumnOption> {
539    vec![ColumnOption::TimeIndex]
540}
541
542pub fn primary_key_and_not_null_column_options_generator<R: Rng + 'static>(
543    _: &mut R,
544    _: &ConcreteDataType,
545) -> Vec<ColumnOption> {
546    vec![ColumnOption::PrimaryKey, ColumnOption::NotNull]
547}
548
549pub fn primary_key_options_generator<R: Rng + 'static>(
550    _: &mut R,
551    _: &ConcreteDataType,
552) -> Vec<ColumnOption> {
553    vec![ColumnOption::PrimaryKey]
554}
555
556/// Generates columns with given `names`.
557pub fn generate_columns<R: Rng + 'static>(
558    rng: &mut R,
559    names: impl IntoIterator<Item = Ident>,
560    types: &(impl Random<ConcreteDataType, R> + ?Sized),
561    options: impl Fn(&mut R, &ConcreteDataType) -> Vec<ColumnOption>,
562) -> Vec<Column> {
563    names
564        .into_iter()
565        .map(|name| {
566            let column_type = types.generate(rng);
567            let options = options(rng, &column_type);
568            Column {
569                name,
570                options,
571                column_type,
572            }
573        })
574        .collect()
575}
576
577/// Replace Value::Default with the corresponding default value in the rows for comparison.
578pub fn replace_default(
579    rows: &[RowValues],
580    table_ctx_ref: &TableContextRef,
581    insert_expr: &InsertIntoExpr,
582) -> Vec<RowValues> {
583    let index_map: HashMap<usize, usize> = insert_expr
584        .columns
585        .iter()
586        .enumerate()
587        .map(|(insert_idx, insert_column)| {
588            let create_idx = table_ctx_ref
589                .columns
590                .iter()
591                .position(|create_column| create_column.name == insert_column.name)
592                .expect("Column not found in create_expr");
593            (insert_idx, create_idx)
594        })
595        .collect();
596
597    let mut new_rows = Vec::new();
598    for row in rows {
599        let mut new_row = Vec::new();
600        for (idx, value) in row.iter().enumerate() {
601            if let RowValue::Default = value {
602                let column = &table_ctx_ref.columns[index_map[&idx]];
603                new_row.push(RowValue::Value(column.default_value().unwrap().clone()));
604            } else {
605                new_row.push(value.clone());
606            }
607        }
608        new_rows.push(new_row);
609    }
610    new_rows
611}
612
613/// Sorts a vector of rows based on the values in the specified primary key columns.
614pub fn sort_by_primary_keys(rows: &mut [RowValues], primary_keys_idx: Vec<usize>) {
615    rows.sort_by(|a, b| {
616        let a_keys: Vec<_> = primary_keys_idx.iter().map(|&i| &a[i]).collect();
617        let b_keys: Vec<_> = primary_keys_idx.iter().map(|&i| &b[i]).collect();
618        for (a_key, b_key) in a_keys.iter().zip(b_keys.iter()) {
619            match a_key.cmp(b_key) {
620                Some(std::cmp::Ordering::Equal) => continue,
621                non_eq => return non_eq.unwrap(),
622            }
623        }
624        std::cmp::Ordering::Equal
625    });
626}
627
628/// Formats a slice of columns into a comma-separated string of column names.
629pub fn format_columns(columns: &[Column]) -> String {
630    columns
631        .iter()
632        .map(|c| c.name.to_string())
633        .collect::<Vec<_>>()
634        .join(", ")
635}
636
637#[cfg(test)]
638mod tests {
639    use super::*;
640
641    #[test]
642    fn test_droppable_columns() {
643        let columns = vec![
644            Column {
645                name: "hi".into(),
646                column_type: ConcreteDataType::uint64_datatype(),
647                options: vec![ColumnOption::PrimaryKey],
648            },
649            Column {
650                name: "foo".into(),
651                column_type: ConcreteDataType::uint64_datatype(),
652                options: vec![ColumnOption::TimeIndex],
653            },
654        ];
655        let droppable = droppable_columns(&columns);
656        assert!(droppable.is_empty());
657
658        let columns = vec![
659            Column {
660                name: "hi".into(),
661                column_type: ConcreteDataType::uint64_datatype(),
662                options: vec![],
663            },
664            Column {
665                name: "foo".into(),
666                column_type: ConcreteDataType::uint64_datatype(),
667                options: vec![],
668            },
669        ];
670        let droppable = droppable_columns(&columns);
671        assert_eq!(droppable.len(), 2);
672    }
673}