tests_fuzz/generator/
create_expr.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashMap;
16
17use datatypes::data_type::ConcreteDataType;
18use datatypes::value::Value;
19use derive_builder::Builder;
20use partition::expr::{Operand, PartitionExpr, RestrictedOp};
21use partition::partition::{PartitionBound, PartitionDef};
22use rand::seq::SliceRandom;
23use rand::Rng;
24use snafu::{ensure, ResultExt};
25
26use super::Generator;
27use crate::context::TableContextRef;
28use crate::error::{self, Error, Result};
29use crate::fake::{random_capitalize_map, MappedGenerator, WordGenerator};
30use crate::generator::{ColumnOptionGenerator, ConcreteDataTypeGenerator, Random};
31use crate::ir::create_expr::{ColumnOption, CreateDatabaseExprBuilder, CreateTableExprBuilder};
32use crate::ir::{
33    column_options_generator, generate_columns, generate_partition_bounds, generate_random_value,
34    partible_column_options_generator, primary_key_options_generator, ts_column_options_generator,
35    Column, ColumnTypeGenerator, CreateDatabaseExpr, CreateTableExpr, Ident,
36    PartibleColumnTypeGenerator, StringColumnTypeGenerator, TsColumnTypeGenerator,
37};
38
39#[derive(Builder)]
40#[builder(default, pattern = "owned")]
41pub struct CreateTableExprGenerator<R: Rng + 'static> {
42    columns: usize,
43    #[builder(setter(into))]
44    engine: String,
45    partition: usize,
46    if_not_exists: bool,
47    #[builder(setter(into))]
48    name: Ident,
49    #[builder(setter(into))]
50    with_clause: HashMap<String, String>,
51    name_generator: Box<dyn Random<Ident, R>>,
52    ts_column_type_generator: ConcreteDataTypeGenerator<R>,
53    column_type_generator: ConcreteDataTypeGenerator<R>,
54    partible_column_type_generator: ConcreteDataTypeGenerator<R>,
55    partible_column_options_generator: ColumnOptionGenerator<R>,
56    column_options_generator: ColumnOptionGenerator<R>,
57    ts_column_options_generator: ColumnOptionGenerator<R>,
58}
59
60const DEFAULT_ENGINE: &str = "mito";
61
62impl<R: Rng + 'static> Default for CreateTableExprGenerator<R> {
63    fn default() -> Self {
64        Self {
65            columns: 0,
66            engine: DEFAULT_ENGINE.to_string(),
67            if_not_exists: false,
68            partition: 0,
69            name: Ident::new(""),
70            with_clause: HashMap::default(),
71            name_generator: Box::new(MappedGenerator::new(WordGenerator, random_capitalize_map)),
72            ts_column_type_generator: Box::new(TsColumnTypeGenerator),
73            column_type_generator: Box::new(ColumnTypeGenerator),
74            partible_column_type_generator: Box::new(PartibleColumnTypeGenerator),
75            partible_column_options_generator: Box::new(partible_column_options_generator),
76            column_options_generator: Box::new(column_options_generator),
77            ts_column_options_generator: Box::new(ts_column_options_generator),
78        }
79    }
80}
81
82impl<R: Rng + 'static> Generator<CreateTableExpr, R> for CreateTableExprGenerator<R> {
83    type Error = Error;
84
85    /// Generates the [CreateTableExpr].
86    fn generate(&self, rng: &mut R) -> Result<CreateTableExpr> {
87        ensure!(
88            self.columns != 0,
89            error::UnexpectedSnafu {
90                violated: "The columns must larger than zero"
91            }
92        );
93
94        let mut builder = CreateTableExprBuilder::default();
95        let mut columns = Vec::with_capacity(self.columns);
96        let mut primary_keys = vec![];
97        let need_partible_column = self.partition > 1;
98        let mut column_names = self.name_generator.choose(rng, self.columns);
99
100        if self.columns == 1 {
101            // Generates the ts column.
102            // Safety: columns must large than 0.
103            let name = column_names.pop().unwrap();
104            let column = generate_columns(
105                rng,
106                vec![name.clone()],
107                self.ts_column_type_generator.as_ref(),
108                self.ts_column_options_generator.as_ref(),
109            )
110            .remove(0);
111
112            if need_partible_column {
113                // Generates partition bounds.
114                let mut partition_bounds = Vec::with_capacity(self.partition);
115                for _ in 0..self.partition - 1 {
116                    partition_bounds.push(PartitionBound::Value(generate_random_value(
117                        rng,
118                        &column.column_type,
119                        None,
120                    )));
121                    partition_bounds.sort();
122                }
123                partition_bounds.push(PartitionBound::MaxValue);
124                builder.partition(PartitionDef::new(
125                    vec![name.value.to_string()],
126                    partition_bounds,
127                ));
128            }
129
130            columns.push(column);
131        } else {
132            // Generates the partible column.
133            if need_partible_column {
134                // Safety: columns must large than 0.
135                let name = column_names.pop().unwrap();
136                let column = generate_columns(
137                    rng,
138                    vec![name.clone()],
139                    self.partible_column_type_generator.as_ref(),
140                    self.partible_column_options_generator.as_ref(),
141                )
142                .remove(0);
143
144                // Generates partition bounds.
145                let partition_def = generate_partition_def(
146                    self.partition,
147                    column.column_type.clone(),
148                    name.clone(),
149                );
150                builder.partition(partition_def);
151                columns.push(column);
152            }
153            // Generates the ts column.
154            // Safety: columns must large than 1.
155            let name = column_names.pop().unwrap();
156            columns.extend(generate_columns(
157                rng,
158                vec![name],
159                self.ts_column_type_generator.as_ref(),
160                self.ts_column_options_generator.as_ref(),
161            ));
162            // Generates rest columns
163            columns.extend(generate_columns(
164                rng,
165                column_names,
166                self.column_type_generator.as_ref(),
167                self.column_options_generator.as_ref(),
168            ));
169        }
170
171        for (idx, column) in columns.iter().enumerate() {
172            if column.is_primary_key() {
173                primary_keys.push(idx);
174            }
175        }
176        // Shuffles the primary keys.
177        primary_keys.shuffle(rng);
178
179        builder.columns(columns);
180        builder.primary_keys(primary_keys);
181        builder.engine(self.engine.to_string());
182        builder.if_not_exists(self.if_not_exists);
183        if self.name.is_empty() {
184            builder.table_name(self.name_generator.gen(rng));
185        } else {
186            builder.table_name(self.name.clone());
187        }
188        if !self.with_clause.is_empty() {
189            let mut options = HashMap::new();
190            for (key, value) in &self.with_clause {
191                options.insert(key.to_string(), Value::from(value.to_string()));
192            }
193            builder.options(options);
194        }
195        builder.build().context(error::BuildCreateTableExprSnafu)
196    }
197}
198
199fn generate_partition_def(
200    partitions: usize,
201    column_type: ConcreteDataType,
202    column_name: Ident,
203) -> PartitionDef {
204    let bounds = generate_partition_bounds(&column_type, partitions - 1);
205    let mut partition_bounds = Vec::with_capacity(partitions);
206
207    let first_bound = bounds[0].clone();
208    partition_bounds.push(PartitionBound::Expr(PartitionExpr::new(
209        Operand::Column(column_name.to_string()),
210        RestrictedOp::Lt,
211        Operand::Value(first_bound),
212    )));
213    for bound_idx in 1..bounds.len() {
214        partition_bounds.push(PartitionBound::Expr(PartitionExpr::new(
215            Operand::Expr(PartitionExpr::new(
216                Operand::Column(column_name.to_string()),
217                RestrictedOp::GtEq,
218                Operand::Value(bounds[bound_idx - 1].clone()),
219            )),
220            RestrictedOp::And,
221            Operand::Expr(PartitionExpr::new(
222                Operand::Column(column_name.to_string()),
223                RestrictedOp::Lt,
224                Operand::Value(bounds[bound_idx].clone()),
225            )),
226        )));
227    }
228    let last_bound = bounds.last().unwrap().clone();
229    partition_bounds.push(PartitionBound::Expr(PartitionExpr::new(
230        Operand::Column(column_name.to_string()),
231        RestrictedOp::GtEq,
232        Operand::Value(last_bound),
233    )));
234
235    PartitionDef::new(vec![column_name.to_string()], partition_bounds)
236}
237
238/// Generate a physical table with 2 columns: ts of TimestampType::Millisecond as time index and val of Float64Type.
239#[derive(Builder)]
240#[builder(pattern = "owned")]
241pub struct CreatePhysicalTableExprGenerator<R: Rng + 'static> {
242    #[builder(default = "Box::new(WordGenerator)")]
243    name_generator: Box<dyn Random<Ident, R>>,
244    #[builder(default = "false")]
245    if_not_exists: bool,
246    #[builder(default, setter(into))]
247    with_clause: HashMap<String, String>,
248}
249
250impl<R: Rng + 'static> Generator<CreateTableExpr, R> for CreatePhysicalTableExprGenerator<R> {
251    type Error = Error;
252
253    fn generate(&self, rng: &mut R) -> Result<CreateTableExpr> {
254        let mut options = HashMap::with_capacity(self.with_clause.len() + 1);
255        options.insert("physical_metric_table".to_string(), Value::from(""));
256        for (key, value) in &self.with_clause {
257            options.insert(key.to_string(), Value::from(value.to_string()));
258        }
259
260        Ok(CreateTableExpr {
261            table_name: self.name_generator.gen(rng),
262            columns: vec![
263                Column {
264                    name: Ident::new("ts"),
265                    column_type: ConcreteDataType::timestamp_millisecond_datatype(),
266                    options: vec![ColumnOption::TimeIndex],
267                },
268                Column {
269                    name: Ident::new("val"),
270                    column_type: ConcreteDataType::float64_datatype(),
271                    options: vec![],
272                },
273            ],
274            if_not_exists: self.if_not_exists,
275            partition: None,
276            engine: "metric".to_string(),
277            options,
278            primary_keys: vec![],
279        })
280    }
281}
282
283/// Generate a logical table based on an existing physical table.
284#[derive(Builder)]
285#[builder(pattern = "owned")]
286pub struct CreateLogicalTableExprGenerator<R: Rng + 'static> {
287    physical_table_ctx: TableContextRef,
288    labels: usize,
289    if_not_exists: bool,
290    #[builder(default = "Box::new(WordGenerator)")]
291    name_generator: Box<dyn Random<Ident, R>>,
292}
293
294impl<R: Rng + 'static> Generator<CreateTableExpr, R> for CreateLogicalTableExprGenerator<R> {
295    type Error = Error;
296
297    fn generate(&self, rng: &mut R) -> Result<CreateTableExpr> {
298        // Currently we mock the usage of GreptimeDB as Prometheus' backend, the physical table must have two columns.
299        ensure!(
300            self.physical_table_ctx.columns.len() == 2,
301            error::UnexpectedSnafu {
302                violated: "The physical table must have two columns"
303            }
304        );
305
306        // Generates the logical table columns based on the physical table.
307        let logical_table_name = self
308            .physical_table_ctx
309            .generate_unique_table_name(rng, self.name_generator.as_ref());
310        let mut logical_table = CreateTableExpr {
311            table_name: logical_table_name,
312            columns: self.physical_table_ctx.columns.clone(),
313            if_not_exists: self.if_not_exists,
314            partition: None,
315            engine: "metric".to_string(),
316            options: [(
317                "on_physical_table".to_string(),
318                self.physical_table_ctx.name.value.clone().into(),
319            )]
320            .into(),
321            primary_keys: vec![],
322        };
323
324        let column_names = self.name_generator.choose(rng, self.labels);
325        logical_table.columns.extend(generate_columns(
326            rng,
327            column_names,
328            &StringColumnTypeGenerator,
329            Box::new(primary_key_options_generator),
330        ));
331
332        // Currently only the `primary key` option is kept in physical table,
333        // so we only keep the `primary key` option in the logical table for fuzz test.
334        let mut primary_keys = vec![];
335        for (idx, column) in logical_table.columns.iter().enumerate() {
336            if column.is_primary_key() {
337                primary_keys.push(idx);
338            }
339        }
340        primary_keys.shuffle(rng);
341        logical_table.primary_keys = primary_keys;
342
343        Ok(logical_table)
344    }
345}
346
347#[derive(Builder)]
348#[builder(default, pattern = "owned")]
349pub struct CreateDatabaseExprGenerator<R: Rng + 'static> {
350    #[builder(setter(into))]
351    database_name: String,
352    name_generator: Box<dyn Random<Ident, R>>,
353    if_not_exists: bool,
354}
355
356impl<R: Rng + 'static> Default for CreateDatabaseExprGenerator<R> {
357    fn default() -> Self {
358        Self {
359            database_name: String::new(),
360            name_generator: Box::new(MappedGenerator::new(WordGenerator, random_capitalize_map)),
361            if_not_exists: false,
362        }
363    }
364}
365
366impl<R: Rng + 'static> Generator<CreateDatabaseExpr, R> for CreateDatabaseExprGenerator<R> {
367    type Error = Error;
368
369    fn generate(&self, rng: &mut R) -> Result<CreateDatabaseExpr> {
370        let mut builder = CreateDatabaseExprBuilder::default();
371        builder.if_not_exists(self.if_not_exists);
372        if self.database_name.is_empty() {
373            builder.database_name(self.name_generator.gen(rng));
374        } else {
375            builder.database_name(self.database_name.to_string());
376        }
377        builder.build().context(error::BuildCreateDatabaseExprSnafu)
378    }
379}
380
381#[cfg(test)]
382mod tests {
383    use std::sync::Arc;
384
385    use datatypes::data_type::ConcreteDataType;
386    use datatypes::value::Value;
387    use rand::SeedableRng;
388
389    use super::*;
390    use crate::context::TableContext;
391
392    #[test]
393    fn test_float64() {
394        let value = Value::from(0.047318541668048164);
395        assert_eq!("0.047318541668048164", value.to_string());
396        let value: f64 = "0.047318541668048164".parse().unwrap();
397        assert_eq!("0.047318541668048164", value.to_string());
398    }
399
400    #[test]
401    fn test_create_table_expr_generator() {
402        let mut rng = rand::rng();
403
404        let expr = CreateTableExprGeneratorBuilder::default()
405            .columns(10)
406            .partition(3)
407            .if_not_exists(true)
408            .engine("mito2")
409            .build()
410            .unwrap()
411            .generate(&mut rng)
412            .unwrap();
413        assert_eq!(expr.engine, "mito2");
414        assert!(expr.if_not_exists);
415        assert_eq!(expr.columns.len(), 10);
416        assert_eq!(expr.partition.unwrap().partition_bounds().len(), 3);
417
418        let expr = CreateTableExprGeneratorBuilder::default()
419            .columns(10)
420            .partition(1)
421            .build()
422            .unwrap()
423            .generate(&mut rng)
424            .unwrap();
425        assert_eq!(expr.columns.len(), 10);
426        assert!(expr.partition.is_none());
427    }
428
429    #[test]
430    fn test_create_table_expr_generator_deterministic() {
431        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0);
432        let expr = CreateTableExprGeneratorBuilder::default()
433            .columns(10)
434            .partition(3)
435            .if_not_exists(true)
436            .engine("mito2")
437            .build()
438            .unwrap()
439            .generate(&mut rng)
440            .unwrap();
441
442        let serialized = serde_json::to_string(&expr).unwrap();
443        let expected = r#"{"table_name":{"value":"quasi","quote_style":null},"columns":[{"name":{"value":"mOLEsTIAs","quote_style":null},"column_type":{"Float64":{}},"options":["PrimaryKey","Null"]},{"name":{"value":"CUMQUe","quote_style":null},"column_type":{"Timestamp":{"Second":null}},"options":["TimeIndex"]},{"name":{"value":"NaTus","quote_style":null},"column_type":{"Int64":{}},"options":[]},{"name":{"value":"EXPeDITA","quote_style":null},"column_type":{"Float64":{}},"options":[]},{"name":{"value":"ImPEDiT","quote_style":null},"column_type":{"Float32":{}},"options":[{"DefaultValue":{"Float32":0.56425774}}]},{"name":{"value":"ADIpisci","quote_style":null},"column_type":{"Float32":{}},"options":["PrimaryKey"]},{"name":{"value":"deBITIs","quote_style":null},"column_type":{"Float32":{}},"options":[{"DefaultValue":{"Float32":0.31315368}}]},{"name":{"value":"toTaM","quote_style":null},"column_type":{"Int32":{}},"options":["NotNull"]},{"name":{"value":"QuI","quote_style":null},"column_type":{"Float32":{}},"options":[{"DefaultValue":{"Float32":0.39941502}}]},{"name":{"value":"INVeNtOre","quote_style":null},"column_type":{"Boolean":null},"options":["PrimaryKey"]}],"if_not_exists":true,"partition":{"partition_columns":["mOLEsTIAs"],"partition_bounds":[{"Expr":{"lhs":{"Column":"mOLEsTIAs"},"op":"Lt","rhs":{"Value":{"Float64":5.992310449541053e307}}}},{"Expr":{"lhs":{"Expr":{"lhs":{"Column":"mOLEsTIAs"},"op":"GtEq","rhs":{"Value":{"Float64":5.992310449541053e307}}}},"op":"And","rhs":{"Expr":{"lhs":{"Column":"mOLEsTIAs"},"op":"Lt","rhs":{"Value":{"Float64":1.1984620899082105e308}}}}}},{"Expr":{"lhs":{"Column":"mOLEsTIAs"},"op":"GtEq","rhs":{"Value":{"Float64":1.1984620899082105e308}}}}]},"engine":"mito2","options":{},"primary_keys":[0,5,9]}"#;
444        assert_eq!(expected, serialized);
445    }
446
447    #[test]
448    fn test_create_logical_table_expr_generator() {
449        let mut rng = rand::rng();
450
451        let physical_table_expr = CreatePhysicalTableExprGeneratorBuilder::default()
452            .if_not_exists(false)
453            .build()
454            .unwrap()
455            .generate(&mut rng)
456            .unwrap();
457        assert_eq!(physical_table_expr.engine, "metric");
458        assert_eq!(physical_table_expr.columns.len(), 2);
459
460        let physical_ts = physical_table_expr.columns.iter().position(|column| {
461            column
462                .options
463                .iter()
464                .any(|option| option == &ColumnOption::TimeIndex)
465        });
466        let physical_ts_name = physical_table_expr.columns[physical_ts.unwrap()]
467            .name
468            .value
469            .to_string();
470
471        let physical_table_ctx = Arc::new(TableContext::from(&physical_table_expr));
472
473        let logical_table_expr = CreateLogicalTableExprGeneratorBuilder::default()
474            .physical_table_ctx(physical_table_ctx)
475            .labels(5)
476            .if_not_exists(false)
477            .build()
478            .unwrap()
479            .generate(&mut rng)
480            .unwrap();
481        let logical_ts = logical_table_expr.columns.iter().position(|column| {
482            column
483                .options
484                .iter()
485                .any(|option| option == &ColumnOption::TimeIndex)
486        });
487        let logical_ts_name = logical_table_expr.columns[logical_ts.unwrap()]
488            .name
489            .value
490            .to_string();
491
492        assert_eq!(logical_table_expr.engine, "metric");
493        assert_eq!(logical_table_expr.columns.len(), 7);
494        assert_eq!(logical_ts_name, physical_ts_name);
495        assert!(logical_table_expr
496            .columns
497            .iter()
498            .all(
499                |column| column.column_type != ConcreteDataType::string_datatype()
500                    || column
501                        .options
502                        .iter()
503                        .any(|option| option == &ColumnOption::PrimaryKey)
504            ));
505    }
506
507    #[test]
508    fn test_create_logical_table_expr_generator_deterministic() {
509        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0);
510        let physical_table_expr = CreatePhysicalTableExprGeneratorBuilder::default()
511            .if_not_exists(false)
512            .build()
513            .unwrap()
514            .generate(&mut rng)
515            .unwrap();
516        let physical_table_serialized = serde_json::to_string(&physical_table_expr).unwrap();
517        let physical_table_expected = r#"{"table_name":{"value":"expedita","quote_style":null},"columns":[{"name":{"value":"ts","quote_style":null},"column_type":{"Timestamp":{"Millisecond":null}},"options":["TimeIndex"]},{"name":{"value":"val","quote_style":null},"column_type":{"Float64":{}},"options":[]}],"if_not_exists":false,"partition":null,"engine":"metric","options":{"physical_metric_table":{"String":""}},"primary_keys":[]}"#;
518        assert_eq!(physical_table_expected, physical_table_serialized);
519
520        let physical_table_ctx = Arc::new(TableContext::from(&physical_table_expr));
521
522        let logical_table_expr = CreateLogicalTableExprGeneratorBuilder::default()
523            .physical_table_ctx(physical_table_ctx)
524            .labels(5)
525            .if_not_exists(false)
526            .build()
527            .unwrap()
528            .generate(&mut rng)
529            .unwrap();
530
531        let logical_table_serialized = serde_json::to_string(&logical_table_expr).unwrap();
532        let logical_table_expected = r#"{"table_name":{"value":"impedit","quote_style":null},"columns":[{"name":{"value":"ts","quote_style":null},"column_type":{"Timestamp":{"Millisecond":null}},"options":["TimeIndex"]},{"name":{"value":"val","quote_style":null},"column_type":{"Float64":{}},"options":[]},{"name":{"value":"totam","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"cumque","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"natus","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"molestias","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"qui","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]}],"if_not_exists":false,"partition":null,"engine":"metric","options":{"on_physical_table":{"String":"expedita"}},"primary_keys":[4,2,3,6,5]}"#;
533        assert_eq!(logical_table_expected, logical_table_serialized);
534    }
535
536    #[test]
537    fn test_create_database_expr_generator() {
538        let mut rng = rand::rng();
539
540        let expr = CreateDatabaseExprGeneratorBuilder::default()
541            .if_not_exists(true)
542            .build()
543            .unwrap()
544            .generate(&mut rng)
545            .unwrap();
546        assert!(expr.if_not_exists);
547    }
548
549    #[test]
550    fn test_create_database_expr_generator_deterministic() {
551        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0);
552        let expr = CreateDatabaseExprGeneratorBuilder::default()
553            .if_not_exists(true)
554            .build()
555            .unwrap()
556            .generate(&mut rng)
557            .unwrap();
558
559        let serialized = serde_json::to_string(&expr).unwrap();
560        let expected =
561            r#"{"database_name":{"value":"EXPediTA","quote_style":null},"if_not_exists":true}"#;
562        assert_eq!(expected, serialized);
563    }
564}