sql/parsers/
with_tql_parser.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt;
16
17use serde::Serialize;
18use snafu::ResultExt;
19use sqlparser::ast::helpers::attached_token::AttachedToken;
20use sqlparser::ast::{
21    Cte, Ident, ObjectName, Query as SpQuery, TableAlias, TableAliasColumnDef, With,
22};
23use sqlparser::keywords::Keyword;
24use sqlparser::parser::IsOptional;
25use sqlparser::tokenizer::Token;
26use sqlparser_derive::{Visit, VisitMut};
27
28use crate::dialect::GreptimeDbDialect;
29use crate::error::{self, Result};
30use crate::parser::{ParseOptions, ParserContext};
31use crate::parsers::tql_parser;
32use crate::statements::query::Query;
33use crate::statements::statement::Statement;
34use crate::statements::tql::Tql;
35use crate::util::location_to_index;
36
37/// Content of a CTE - either SQL or TQL
38#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
39pub enum CteContent {
40    Sql(Box<SpQuery>),
41    Tql(Tql),
42}
43
44/// A hybrid CTE that can contain either SQL or TQL
45#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
46pub struct HybridCte {
47    pub name: Ident,
48    /// Column aliases for the CTE table. Empty if not specified.
49    pub columns: Vec<ObjectName>,
50    pub content: CteContent,
51}
52
53/// Extended WITH clause that supports hybrid SQL/TQL CTEs
54#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
55pub struct HybridCteWith {
56    pub recursive: bool,
57    pub cte_tables: Vec<HybridCte>,
58}
59
60impl fmt::Display for HybridCteWith {
61    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
62        write!(f, "WITH ")?;
63
64        if self.recursive {
65            write!(f, "RECURSIVE ")?;
66        }
67
68        for (i, cte) in self.cte_tables.iter().enumerate() {
69            if i > 0 {
70                write!(f, ", ")?;
71            }
72            write!(f, "{}", cte.name)?;
73
74            if !cte.columns.is_empty() {
75                write!(f, " (")?;
76                for (j, col) in cte.columns.iter().enumerate() {
77                    if j > 0 {
78                        write!(f, ", ")?;
79                    }
80                    write!(f, "{}", col)?;
81                }
82                write!(f, ")")?;
83            }
84
85            write!(f, " AS (")?;
86            match &cte.content {
87                CteContent::Sql(query) => write!(f, "{}", query)?,
88                CteContent::Tql(tql) => write!(f, "{}", tql)?,
89            }
90            write!(f, ")")?;
91        }
92        Ok(())
93    }
94}
95
96/// Parser implementation for hybrid WITH clauses containing TQL
97impl ParserContext<'_> {
98    /// Parse a WITH clause that may contain TQL CTEs or SQL CTEs.
99    pub(crate) fn parse_with_tql(&mut self) -> Result<Statement> {
100        // Consume the WITH token
101        self.parser
102            .expect_keyword(Keyword::WITH)
103            .context(error::SyntaxSnafu)?;
104
105        // Check for RECURSIVE keyword
106        let recursive = self.parser.parse_keyword(Keyword::RECURSIVE);
107
108        // Parse the CTE list
109        let mut tql_cte_tables = Vec::new();
110        let mut sql_cte_tables = Vec::new();
111
112        loop {
113            let cte = self.parse_hybrid_cte()?;
114            match cte.content {
115                CteContent::Sql(body) => sql_cte_tables.push(Cte {
116                    alias: TableAlias {
117                        name: cte.name,
118                        columns: cte
119                            .columns
120                            .into_iter()
121                            .flat_map(|col| col.0[0].as_ident().cloned())
122                            .map(|name| TableAliasColumnDef {
123                                name,
124                                data_type: None,
125                            })
126                            .collect(),
127                    },
128                    query: body,
129                    from: None,
130                    materialized: None,
131                    closing_paren_token: AttachedToken::empty(),
132                }),
133                CteContent::Tql(_) => tql_cte_tables.push(cte),
134            }
135
136            if !self.parser.consume_token(&Token::Comma) {
137                break;
138            }
139        }
140
141        // Parse the main query
142        let main_query = self.parser.parse_query().context(error::SyntaxSnafu)?;
143
144        // Convert the hybrid CTEs to a standard query with hybrid metadata
145        let hybrid_cte = HybridCteWith {
146            recursive,
147            cte_tables: tql_cte_tables,
148        };
149
150        // Create a Query statement with hybrid CTE metadata
151        let mut query = Query::try_from(*main_query)?;
152        query.hybrid_cte = Some(hybrid_cte);
153        query.inner.with = Some(With {
154            recursive,
155            cte_tables: sql_cte_tables,
156            with_token: AttachedToken::empty(),
157        });
158
159        Ok(Statement::Query(Box::new(query)))
160    }
161
162    /// Parse a single CTE that can be either SQL or TQL
163    fn parse_hybrid_cte(&mut self) -> Result<HybridCte> {
164        // Parse CTE name
165        let name = self.parser.parse_identifier().context(error::SyntaxSnafu)?;
166        let name = Self::canonicalize_identifier(name);
167
168        // Parse optional column list
169        let columns = self
170            .parser
171            .parse_parenthesized_qualified_column_list(IsOptional::Optional, true)
172            .context(error::SyntaxSnafu)?;
173
174        // Expect AS keyword
175        self.parser
176            .expect_keyword(Keyword::AS)
177            .context(error::SyntaxSnafu)?;
178
179        // Parse the CTE content
180        self.parser
181            .expect_token(&Token::LParen)
182            .context(error::SyntaxSnafu)?;
183
184        let content = self.parse_cte_content()?;
185
186        self.parser
187            .expect_token(&Token::RParen)
188            .context(error::SyntaxSnafu)?;
189
190        Ok(HybridCte {
191            name,
192            columns,
193            content,
194        })
195    }
196
197    /// Determine if CTE contains TQL or SQL and parse accordingly
198    fn parse_cte_content(&mut self) -> Result<CteContent> {
199        // Check if the next token is TQL
200        if let Token::Word(w) = &self.parser.peek_token().token {
201            if w.keyword == Keyword::NoKeyword
202                && w.quote_style.is_none()
203                && w.value.to_uppercase() == tql_parser::TQL
204            {
205                let tql = self.parse_tql_content_in_cte()?;
206                return Ok(CteContent::Tql(tql));
207            }
208        }
209
210        // Parse as SQL query
211        let sql_query = self.parser.parse_query().context(error::SyntaxSnafu)?;
212        Ok(CteContent::Sql(sql_query))
213    }
214
215    /// Parse TQL content within a CTE by extracting the raw query string.
216    ///
217    /// This method consumes all tokens that belong to the TQL statement and
218    /// stops right **before** the closing `)` of the CTE so that the caller
219    /// can handle it normally.
220    ///
221    /// Only `TQL EVAL` is supported inside CTEs.
222    fn parse_tql_content_in_cte(&mut self) -> Result<Tql> {
223        // Consume and get the position of the TQL keyword
224        let tql_token = self.parser.next_token();
225        if tql_token.token == Token::EOF {
226            return Err(error::InvalidSqlSnafu {
227                msg: "Unexpected end of input while parsing TQL inside CTE".to_string(),
228            }
229            .build());
230        }
231
232        let start_location = tql_token.span.start;
233
234        // Track parentheses depth to find the end of the CTE
235        let mut paren_depth = 0usize;
236        let end_location;
237
238        loop {
239            let token_with_span = self.parser.peek_token();
240
241            // Guard against unexpected EOF
242            if token_with_span.token == Token::EOF {
243                return Err(error::InvalidSqlSnafu {
244                    msg: "Unexpected end of input while parsing TQL inside CTE".to_string(),
245                }
246                .build());
247            }
248
249            // Stop **before** the closing parenthesis that ends the CTE
250            if token_with_span.token == Token::RParen && paren_depth == 0 {
251                end_location = token_with_span.span.start;
252                break;
253            }
254
255            // Consume the token and track parentheses depth
256            let consumed = self.parser.next_token();
257            match consumed.token {
258                Token::LParen => paren_depth += 1,
259                Token::RParen => {
260                    // This RParen must belong to a nested expression since
261                    // `paren_depth > 0` here. Decrease depth accordingly.
262                    paren_depth = paren_depth.saturating_sub(1);
263                }
264                _ => {}
265            }
266        }
267
268        // Extract the TQL query string directly from the original SQL
269        let start_index = location_to_index(self.sql, &start_location);
270        let end_index = location_to_index(self.sql, &end_location);
271        let tql_string = &self.sql[start_index..end_index];
272        let tql_string = tql_string.trim();
273
274        // Parse the TQL string using the standard TQL parser
275        let mut stmts = ParserContext::create_with_dialect(
276            tql_string,
277            &GreptimeDbDialect {},
278            ParseOptions::default(),
279        )?;
280
281        if stmts.len() != 1 {
282            return Err(error::InvalidSqlSnafu {
283                msg: "Expected a single TQL statement inside CTE".to_string(),
284            }
285            .build());
286        }
287
288        match stmts.remove(0) {
289            Statement::Tql(Tql::Eval(eval)) => Ok(Tql::Eval(eval)),
290            Statement::Tql(_) => Err(error::InvalidSqlSnafu {
291                msg: "Only TQL EVAL is supported in CTEs".to_string(),
292            }
293            .build()),
294            _ => Err(error::InvalidSqlSnafu {
295                msg: "Expected a TQL statement inside CTE".to_string(),
296            }
297            .build()),
298        }
299    }
300}
301
302#[cfg(test)]
303mod tests {
304    use crate::dialect::GreptimeDbDialect;
305    use crate::parser::{ParseOptions, ParserContext};
306    use crate::parsers::with_tql_parser::CteContent;
307    use crate::statements::statement::Statement;
308    use crate::statements::tql::Tql;
309
310    #[test]
311    fn test_parse_hybrid_cte_with_parentheses_in_query() {
312        // Test that parentheses within the TQL query don't interfere with CTE parsing
313        let sql = r#"
314            WITH tql_cte AS (
315                TQL EVAL (0, 100, '5s') 
316                sum(rate(http_requests_total[1m])) + (max(cpu_usage) * (1 + 0.5))
317            ) 
318            SELECT * FROM tql_cte
319        "#;
320
321        let statements =
322            ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
323                .unwrap();
324        assert_eq!(statements.len(), 1);
325
326        let Statement::Query(query) = &statements[0] else {
327            panic!("Expected Query statement");
328        };
329        let hybrid_cte = query.hybrid_cte.as_ref().unwrap();
330        assert_eq!(hybrid_cte.cte_tables.len(), 1);
331
332        // Should be TQL content
333        assert!(matches!(
334            hybrid_cte.cte_tables[0].content,
335            CteContent::Tql(_)
336        ));
337
338        // Check that the query includes the parentheses
339        if let CteContent::Tql(Tql::Eval(eval)) = &hybrid_cte.cte_tables[0].content {
340            // Verify that complex nested parentheses are preserved correctly
341            // The new approach preserves original spacing, so no extra spaces between tokens
342            assert!(eval.query.contains("sum(rate(http_requests_total[1m]))"));
343            assert!(eval.query.contains("(max(cpu_usage) * (1 + 0.5))"));
344            // Most importantly, verify the parentheses counting didn't break the parsing
345            assert!(eval.query.contains("+ (max"));
346        }
347    }
348
349    #[test]
350    fn test_parse_hybrid_cte_sql_and_tql() {
351        let sql = r#"
352            WITH 
353                sql_cte(ts, value, label) AS (SELECT timestamp, val, name FROM metrics),
354                tql_cte(time, metric_value) AS (TQL EVAL (0, 100, '5s') cpu_usage)
355            SELECT s.ts, s.value, t.metric_value 
356            FROM sql_cte s JOIN tql_cte t ON s.ts = t.time
357        "#;
358
359        let statements =
360            ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
361                .unwrap();
362        assert_eq!(statements.len(), 1);
363
364        let Statement::Query(query) = &statements[0] else {
365            panic!("Expected Query statement");
366        };
367        let hybrid_cte = query.hybrid_cte.as_ref().unwrap();
368        assert_eq!(hybrid_cte.cte_tables.len(), 1); // only TQL CTE presents here
369
370        // First CTE should be TQL with column aliases
371        let second_cte = &hybrid_cte.cte_tables[0];
372        assert!(matches!(second_cte.content, CteContent::Tql(_)));
373        assert_eq!(second_cte.columns.len(), 2);
374        assert_eq!(
375            second_cte
376                .columns
377                .iter()
378                .map(|x| x.to_string())
379                .collect::<Vec<_>>()
380                .join(" "),
381            "time metric_value"
382        );
383    }
384}