flow/plan/
join.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use crate::expr::ScalarExpr;
16use crate::plan::SafeMfpPlan;
17
18/// TODO(discord9): consider impl more join strategies
19#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
20pub enum JoinPlan {
21    Linear(LinearJoinPlan),
22}
23
24/// Determine if a given row should stay in the output. And apply a map filter project before output the row
25#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
26pub struct JoinFilter {
27    /// each element in the outer vector will check if each expr in itself can be eval to same value
28    /// if not, the row will be filtered out. Useful for equi-join(join based on equality of some columns)
29    pub ready_equivalences: Vec<Vec<ScalarExpr>>,
30    /// Apply a map filter project before output the row
31    pub before: SafeMfpPlan,
32}
33
34/// A plan for the execution of a linear join.
35///
36/// A linear join is a sequence of stages, each of which introduces
37/// a new collection. Each stage is represented by a [LinearStagePlan].
38#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
39pub struct LinearJoinPlan {
40    /// The source relation from which we start the join.
41    pub source_relation: usize,
42    /// The arrangement to use for the source relation, if any
43    pub source_key: Option<Vec<ScalarExpr>>,
44    /// An initial closure to apply before any stages.
45    ///
46    /// Values of `None` indicate the identity closure.
47    pub initial_closure: Option<JoinFilter>,
48    /// A *sequence* of stages to apply one after the other.
49    pub stage_plans: Vec<LinearStagePlan>,
50    /// A concluding filter to apply after the last stage.
51    ///
52    /// Values of `None` indicate the identity closure.
53    pub final_closure: Option<JoinFilter>,
54}
55
56/// A plan for the execution of one stage of a linear join.
57///
58/// Each stage is a binary join between the current accumulated
59/// join results, and a new collection. The former is referred to
60/// as the "stream" and the latter the "lookup".
61#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
62pub struct LinearStagePlan {
63    /// The index of the relation into which we will look up.
64    pub lookup_relation: usize,
65    /// The key expressions to use for the stream relation.
66    pub stream_key: Vec<ScalarExpr>,
67    /// Columns to retain from the stream relation.
68    /// These columns are those that are not redundant with `stream_key`,
69    /// and cannot be read out of the key component of an arrangement.
70    pub stream_thinning: Vec<usize>,
71    /// The key expressions to use for the lookup relation.
72    pub lookup_key: Vec<ScalarExpr>,
73    /// The closure to apply to the concatenation of the key columns,
74    /// the stream value columns, and the lookup value colunms.
75    pub closure: JoinFilter,
76}