sql/statements/transform/
expand_interval.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashMap;
16use std::ops::ControlFlow;
17use std::time::Duration as StdDuration;
18
19use itertools::Itertools;
20use lazy_static::lazy_static;
21use regex::Regex;
22use sqlparser::ast::{DataType, Expr, Interval, Value, ValueWithSpan};
23
24use crate::statements::transform::TransformRule;
25
26lazy_static! {
27    /// Matches either one or more digits `(\d+)` or one or more ASCII characters `[a-zA-Z]` or plus/minus signs
28    static ref INTERVAL_ABBREVIATION_PATTERN: Regex = Regex::new(r"([+-]?\d+|[a-zA-Z]+|\+|-)").unwrap();
29
30    /// Checks if the provided string starts as ISO_8601 format string (case/sign independent)
31    static ref IS_VALID_ISO_8601_PREFIX_PATTERN: Regex = Regex::new(r"^[-]?[Pp]").unwrap();
32
33    static ref INTERVAL_ABBREVIATION_MAPPING: HashMap<&'static str, &'static str> = HashMap::from([
34        ("y","years"),
35        ("mon","months"),
36        ("w","weeks"),
37        ("d","days"),
38        ("h","hours"),
39        ("m","minutes"),
40        ("s","seconds"),
41        ("millis","milliseconds"),
42        ("ms","milliseconds"),
43        ("us","microseconds"),
44        ("ns","nanoseconds"),
45    ]);
46}
47
48/// 'INTERVAL' abbreviation transformer
49/// - `y` for `years`
50/// - `mon` for `months`
51/// - `w` for `weeks`
52/// - `d` for `days`
53/// - `h` for `hours`
54/// - `m` for `minutes`
55/// - `s` for `seconds`
56/// - `millis` for `milliseconds`
57/// - `ms` for `milliseconds`
58/// - `us` for `microseconds`
59/// - `ns` for `nanoseconds`
60///
61/// Required for scenarios that use the shortened version of `INTERVAL`,
62///   f.e `SELECT INTERVAL '1h'` or `SELECT INTERVAL '3w2d'`
63pub(crate) struct ExpandIntervalTransformRule;
64
65impl TransformRule for ExpandIntervalTransformRule {
66    /// Applies transform rule for `Interval` type by extending the shortened version (e.g. '1h', '2d') or
67    /// converting ISO 8601 format strings (e.g., "P1Y2M3D")
68    /// In case when `Interval` has `BinaryOp` value (e.g. query like `SELECT INTERVAL '2h' - INTERVAL '1h'`)
69    /// it's AST has `left` part of type `Value::SingleQuotedString` which needs to be handled specifically.
70    /// To handle the `right` part which is `Interval` no extra steps are needed.
71    fn visit_expr(&self, expr: &mut Expr) -> ControlFlow<()> {
72        match expr {
73            Expr::Interval(interval) => match &*interval.value {
74                Expr::Value(ValueWithSpan {
75                    value: Value::SingleQuotedString(value),
76                    ..
77                })
78                | Expr::Value(ValueWithSpan {
79                    value: Value::DoubleQuotedString(value),
80                    ..
81                }) => {
82                    if let Some(normalized_name) = normalize_interval_name(value) {
83                        *expr = update_existing_interval_with_value(
84                            interval,
85                            single_quoted_string_expr(normalized_name),
86                        );
87                    }
88                }
89                Expr::BinaryOp { left, op, right } => match &**left {
90                    Expr::Value(ValueWithSpan {
91                        value: Value::SingleQuotedString(value),
92                        ..
93                    })
94                    | Expr::Value(ValueWithSpan {
95                        value: Value::DoubleQuotedString(value),
96                        ..
97                    }) => {
98                        if let Some(normalized_name) = normalize_interval_name(value) {
99                            let new_expr_value = Box::new(Expr::BinaryOp {
100                                left: single_quoted_string_expr(normalized_name),
101                                op: op.clone(),
102                                right: right.clone(),
103                            });
104                            *expr = update_existing_interval_with_value(interval, new_expr_value);
105                        }
106                    }
107                    _ => {}
108                },
109                _ => {}
110            },
111            Expr::Cast {
112                expr: cast_exp,
113                data_type,
114                kind,
115                format,
116            } => {
117                if DataType::Interval == *data_type {
118                    match &**cast_exp {
119                        Expr::Value(ValueWithSpan {
120                            value: Value::SingleQuotedString(value),
121                            ..
122                        })
123                        | Expr::Value(ValueWithSpan {
124                            value: Value::DoubleQuotedString(value),
125                            ..
126                        }) => {
127                            let interval_value =
128                                normalize_interval_name(value).unwrap_or_else(|| value.to_string());
129                            *expr = Expr::Cast {
130                                kind: kind.clone(),
131                                expr: single_quoted_string_expr(interval_value),
132                                data_type: DataType::Interval,
133                                format: std::mem::take(format),
134                            }
135                        }
136                        _ => {}
137                    }
138                }
139            }
140            _ => {}
141        }
142        ControlFlow::<()>::Continue(())
143    }
144}
145
146fn single_quoted_string_expr(string: String) -> Box<Expr> {
147    Box::new(Expr::Value(Value::SingleQuotedString(string).into()))
148}
149
150fn update_existing_interval_with_value(interval: &Interval, value: Box<Expr>) -> Expr {
151    Expr::Interval(Interval {
152        value,
153        leading_field: interval.leading_field.clone(),
154        leading_precision: interval.leading_precision,
155        last_field: interval.last_field.clone(),
156        fractional_seconds_precision: interval.fractional_seconds_precision,
157    })
158}
159
160/// Normalizes an interval expression string into the sql-compatible format.
161/// This function handles 2 types of input:
162/// 1. Abbreviated interval strings (e.g., "1y2mo3d")
163///    Returns an interval's full name (e.g., "years", "hours", "minutes") according to the `INTERVAL_ABBREVIATION_MAPPING`
164///    If the `interval_str` contains whitespaces, the interval name is considered to be in a full form.
165/// 2. ISO 8601 format strings (e.g., "P1Y2M3D"), case/sign independent
166///    Returns a number of milliseconds corresponding to ISO 8601 (e.g., "36525000 milliseconds")
167///
168/// Note: Hybrid format "1y 2 days 3h" is not supported.
169fn normalize_interval_name(interval_str: &str) -> Option<String> {
170    if interval_str.contains(char::is_whitespace) {
171        return None;
172    }
173
174    if IS_VALID_ISO_8601_PREFIX_PATTERN.is_match(interval_str) {
175        return parse_iso8601_interval(interval_str);
176    }
177
178    expand_interval_abbreviation(interval_str)
179}
180
181fn parse_iso8601_interval(signed_iso: &str) -> Option<String> {
182    let (is_negative, unsigned_iso) = if let Some(stripped) = signed_iso.strip_prefix('-') {
183        (true, stripped)
184    } else {
185        (false, signed_iso)
186    };
187
188    match iso8601::duration(&unsigned_iso.to_uppercase()) {
189        Ok(duration) => {
190            let millis = StdDuration::from(duration).as_millis();
191            let sign = if is_negative { "-" } else { "" };
192            Some(format!("{}{} milliseconds", sign, millis))
193        }
194        Err(_) => None,
195    }
196}
197
198fn expand_interval_abbreviation(interval_str: &str) -> Option<String> {
199    Some(
200        INTERVAL_ABBREVIATION_PATTERN
201            .find_iter(interval_str)
202            .map(|mat| {
203                let mat_str = mat.as_str();
204                *INTERVAL_ABBREVIATION_MAPPING
205                    .get(mat_str)
206                    .unwrap_or(&mat_str)
207            })
208            .join(" "),
209    )
210}
211
212#[cfg(test)]
213mod tests {
214    use std::ops::ControlFlow;
215
216    use sqlparser::ast::{BinaryOperator, CastKind, DataType, Expr, Interval, Value};
217
218    use crate::statements::transform::expand_interval::{
219        normalize_interval_name, single_quoted_string_expr, ExpandIntervalTransformRule,
220    };
221    use crate::statements::transform::TransformRule;
222
223    fn create_interval(value: Box<Expr>) -> Expr {
224        Expr::Interval(Interval {
225            value,
226            leading_field: None,
227            leading_precision: None,
228            last_field: None,
229            fractional_seconds_precision: None,
230        })
231    }
232
233    #[test]
234    fn test_transform_interval_basic_conversions() {
235        let test_cases = vec![
236            ("1y", "1 years"),
237            ("4mon", "4 months"),
238            ("-3w", "-3 weeks"),
239            ("55h", "55 hours"),
240            ("3d", "3 days"),
241            ("5s", "5 seconds"),
242            ("2m", "2 minutes"),
243            ("100millis", "100 milliseconds"),
244            ("200ms", "200 milliseconds"),
245            ("350us", "350 microseconds"),
246            ("400ns", "400 nanoseconds"),
247        ];
248        for (input, expected) in test_cases {
249            let result = normalize_interval_name(input).unwrap();
250            assert_eq!(result, expected);
251        }
252
253        let test_cases = vec!["1 year 2 months 3 days 4 hours", "-2 months"];
254        for input in test_cases {
255            assert_eq!(normalize_interval_name(input), None);
256        }
257    }
258
259    #[test]
260    fn test_transform_interval_compound_conversions() {
261        let test_cases = vec![
262            ("2y4mon6w", "2 years 4 months 6 weeks"),
263            ("5d3h1m", "5 days 3 hours 1 minutes"),
264            (
265                "10s312ms789ns",
266                "10 seconds 312 milliseconds 789 nanoseconds",
267            ),
268            (
269                "23millis987us754ns",
270                "23 milliseconds 987 microseconds 754 nanoseconds",
271            ),
272            ("-1d-5h", "-1 days -5 hours"),
273            ("-2y-4mon-6w", "-2 years -4 months -6 weeks"),
274            ("-5d-3h-1m", "-5 days -3 hours -1 minutes"),
275            (
276                "-10s-312ms-789ns",
277                "-10 seconds -312 milliseconds -789 nanoseconds",
278            ),
279            (
280                "-23millis-987us-754ns",
281                "-23 milliseconds -987 microseconds -754 nanoseconds",
282            ),
283        ];
284        for (input, expected) in test_cases {
285            let result = normalize_interval_name(input).unwrap();
286            assert_eq!(result, expected);
287        }
288    }
289
290    #[test]
291    fn test_iso8601_format() {
292        assert_eq!(
293            normalize_interval_name("P1Y2M3DT4H5M6S"),
294            Some("36993906000 milliseconds".to_string())
295        );
296        assert_eq!(
297            normalize_interval_name("p3y3m700dt133h17m36.789s"),
298            Some("163343856789 milliseconds".to_string())
299        );
300        assert_eq!(
301            normalize_interval_name("-P1Y2M3DT4H5M6S"),
302            Some("-36993906000 milliseconds".to_string())
303        );
304        assert_eq!(normalize_interval_name("P1_INVALID_ISO8601"), None);
305    }
306
307    #[test]
308    fn test_visit_expr_when_interval_is_single_quoted_string_abbr_expr() {
309        let interval_transformation_rule = ExpandIntervalTransformRule {};
310
311        let mut string_expr = create_interval(single_quoted_string_expr("5y".to_string()));
312
313        let control_flow = interval_transformation_rule.visit_expr(&mut string_expr);
314
315        assert_eq!(control_flow, ControlFlow::Continue(()));
316        assert_eq!(
317            string_expr,
318            Expr::Interval(Interval {
319                value: Box::new(Expr::Value(
320                    Value::SingleQuotedString("5 years".to_string()).into()
321                )),
322                leading_field: None,
323                leading_precision: None,
324                last_field: None,
325                fractional_seconds_precision: None,
326            })
327        );
328    }
329
330    #[test]
331    fn test_visit_expr_when_interval_is_single_quoted_string_iso8601_expr() {
332        let interval_transformation_rule = ExpandIntervalTransformRule {};
333
334        let mut string_expr =
335            create_interval(single_quoted_string_expr("P1Y2M3DT4H5M6S".to_string()));
336
337        let control_flow = interval_transformation_rule.visit_expr(&mut string_expr);
338
339        assert_eq!(control_flow, ControlFlow::Continue(()));
340        assert_eq!(
341            string_expr,
342            Expr::Interval(Interval {
343                value: Box::new(Expr::Value(
344                    Value::SingleQuotedString("36993906000 milliseconds".to_string()).into()
345                )),
346                leading_field: None,
347                leading_precision: None,
348                last_field: None,
349                fractional_seconds_precision: None,
350            })
351        );
352    }
353
354    #[test]
355    fn test_visit_expr_when_interval_is_binary_op() {
356        let interval_transformation_rule = ExpandIntervalTransformRule {};
357
358        let binary_op = Box::new(Expr::BinaryOp {
359            left: single_quoted_string_expr("2d".to_string()),
360            op: BinaryOperator::Minus,
361            right: Box::new(create_interval(single_quoted_string_expr("1d".to_string()))),
362        });
363        let mut binary_op_expr = create_interval(binary_op);
364        let control_flow = interval_transformation_rule.visit_expr(&mut binary_op_expr);
365
366        assert_eq!(control_flow, ControlFlow::Continue(()));
367        assert_eq!(
368            binary_op_expr,
369            Expr::Interval(Interval {
370                value: Box::new(Expr::BinaryOp {
371                    left: single_quoted_string_expr("2 days".to_string()),
372                    op: BinaryOperator::Minus,
373                    right: Box::new(Expr::Interval(Interval {
374                        value: single_quoted_string_expr("1d".to_string()),
375                        leading_field: None,
376                        leading_precision: None,
377                        last_field: None,
378                        fractional_seconds_precision: None,
379                    })),
380                }),
381                leading_field: None,
382                leading_precision: None,
383                last_field: None,
384                fractional_seconds_precision: None,
385            })
386        );
387    }
388
389    #[test]
390    fn test_visit_expr_when_cast_expr() {
391        let interval_transformation_rule = ExpandIntervalTransformRule {};
392
393        let mut cast_to_interval_expr = Expr::Cast {
394            expr: single_quoted_string_expr("3y2mon".to_string()),
395            data_type: DataType::Interval,
396            format: None,
397            kind: sqlparser::ast::CastKind::Cast,
398        };
399
400        let control_flow = interval_transformation_rule.visit_expr(&mut cast_to_interval_expr);
401
402        assert_eq!(control_flow, ControlFlow::Continue(()));
403        assert_eq!(
404            cast_to_interval_expr,
405            Expr::Cast {
406                kind: CastKind::Cast,
407                expr: Box::new(Expr::Value(
408                    Value::SingleQuotedString("3 years 2 months".to_string()).into()
409                )),
410                data_type: DataType::Interval,
411                format: None,
412            }
413        );
414
415        let mut cast_to_i64_expr = Expr::Cast {
416            expr: single_quoted_string_expr("5".to_string()),
417            data_type: DataType::Int64,
418            format: None,
419            kind: sqlparser::ast::CastKind::Cast,
420        };
421        let control_flow = interval_transformation_rule.visit_expr(&mut cast_to_i64_expr);
422        assert_eq!(control_flow, ControlFlow::Continue(()));
423        assert_eq!(
424            cast_to_i64_expr,
425            Expr::Cast {
426                expr: single_quoted_string_expr("5".to_string()),
427                data_type: DataType::Int64,
428                format: None,
429                kind: sqlparser::ast::CastKind::Cast,
430            }
431        );
432    }
433}