servers/mysql/
federated.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Use regex to filter out some MySQL federated components' emitted statements.
16//! Inspired by Databend's "[mysql_federated.rs](https://github.com/datafuselabs/databend/blob/ac706bf65845e6895141c96c0a10bad6fdc2d367/src/query/service/src/servers/mysql/mysql_federated.rs)".
17
18use std::collections::HashMap;
19use std::sync::Arc;
20
21use common_query::Output;
22use common_recordbatch::RecordBatches;
23use common_time::timezone::system_timezone_name;
24use datatypes::prelude::ConcreteDataType;
25use datatypes::schema::{ColumnSchema, Schema};
26use datatypes::vectors::StringVector;
27use once_cell::sync::Lazy;
28use regex::bytes::RegexSet;
29use regex::Regex;
30use session::context::QueryContextRef;
31use session::SessionRef;
32
33static SELECT_VAR_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new("(?i)^(SELECT @@(.*))").unwrap());
34static MYSQL_CONN_JAVA_PATTERN: Lazy<Regex> =
35    Lazy::new(|| Regex::new("(?i)^(/\\* mysql-connector-j(.*))").unwrap());
36static SHOW_LOWER_CASE_PATTERN: Lazy<Regex> =
37    Lazy::new(|| Regex::new("(?i)^(SHOW VARIABLES LIKE 'lower_case_table_names'(.*))").unwrap());
38static SHOW_VARIABLES_LIKE_PATTERN: Lazy<Regex> =
39    Lazy::new(|| Regex::new("(?i)^(SHOW VARIABLES( LIKE (.*))?)").unwrap());
40
41// SELECT TIMEDIFF(NOW(), UTC_TIMESTAMP());
42static SELECT_TIME_DIFF_FUNC_PATTERN: Lazy<Regex> =
43    Lazy::new(|| Regex::new("(?i)^(SELECT TIMEDIFF\\(NOW\\(\\), UTC_TIMESTAMP\\(\\)\\))").unwrap());
44
45// sqlalchemy < 1.4.30
46static SHOW_SQL_MODE_PATTERN: Lazy<Regex> =
47    Lazy::new(|| Regex::new("(?i)^(SHOW VARIABLES LIKE 'sql_mode'(.*))").unwrap());
48
49static OTHER_NOT_SUPPORTED_STMT: Lazy<RegexSet> = Lazy::new(|| {
50    RegexSet::new([
51        // Txn.
52        "(?i)^(ROLLBACK(.*))",
53        "(?i)^(COMMIT(.*))",
54        "(?i)^(START(.*))",
55
56        // Set.
57        "(?i)^(SET NAMES(.*))",
58        "(?i)^(SET character_set_results(.*))",
59        "(?i)^(SET net_write_timeout(.*))",
60        "(?i)^(SET FOREIGN_KEY_CHECKS(.*))",
61        "(?i)^(SET AUTOCOMMIT(.*))",
62        "(?i)^(SET SQL_LOG_BIN(.*))",
63        "(?i)^(SET SESSION TRANSACTION(.*))",
64        "(?i)^(SET TRANSACTION(.*))",
65        "(?i)^(SET sql_mode(.*))",
66        "(?i)^(SET SQL_SELECT_LIMIT(.*))",
67        "(?i)^(SET @@(.*))",
68        "(?i)^(SET PROFILING(.*))",
69
70        // mysqlclient.
71        "(?i)^(SELECT \\$\\$)",
72
73        // mysqldump.
74        "(?i)^(SET SQL_QUOTE_SHOW_CREATE(.*))",
75        "(?i)^(LOCK TABLES(.*))",
76        "(?i)^(UNLOCK TABLES(.*))",
77        "(?i)^(SELECT LOGFILE_GROUP_NAME, FILE_NAME, TOTAL_EXTENTS, INITIAL_SIZE, ENGINE, EXTRA FROM INFORMATION_SCHEMA.FILES(.*))",
78
79        // mydumper.
80        "(?i)^(/\\*!80003 SET(.*) \\*/)$",
81        "(?i)^(SHOW MASTER STATUS)",
82        "(?i)^(SHOW ALL SLAVES STATUS)",
83        "(?i)^(LOCK BINLOG FOR BACKUP)",
84        "(?i)^(LOCK TABLES FOR BACKUP)",
85        "(?i)^(UNLOCK BINLOG(.*))",
86        "(?i)^(/\\*!40101 SET(.*) \\*/)$",
87
88        // DBeaver.
89        "(?i)^(SHOW WARNINGS)",
90        "(?i)^(/\\* ApplicationName=(.*)SHOW WARNINGS)",
91        "(?i)^(/\\* ApplicationName=(.*)SHOW PLUGINS)",
92        "(?i)^(/\\* ApplicationName=(.*)SHOW ENGINES)",
93        "(?i)^(/\\* ApplicationName=(.*)SELECT @@(.*))",
94        "(?i)^(/\\* ApplicationName=(.*)SHOW @@(.*))",
95        "(?i)^(/\\* ApplicationName=(.*)SET net_write_timeout(.*))",
96        "(?i)^(/\\* ApplicationName=(.*)SET SQL_SELECT_LIMIT(.*))",
97        "(?i)^(/\\* ApplicationName=(.*)SHOW VARIABLES(.*))",
98
99        // pt-toolkit
100        "(?i)^(/\\*!40101 SET(.*) \\*/)$",
101
102        // mysqldump 5.7.16
103        "(?i)^(/\\*!40100 SET(.*) \\*/)$",
104        "(?i)^(/\\*!40103 SET(.*) \\*/)$",
105        "(?i)^(/\\*!40111 SET(.*) \\*/)$",
106        "(?i)^(/\\*!40101 SET(.*) \\*/)$",
107        "(?i)^(/\\*!40014 SET(.*) \\*/)$",
108        "(?i)^(/\\*!40000 SET(.*) \\*/)$",
109    ]).unwrap()
110});
111
112static VAR_VALUES: Lazy<HashMap<&str, &str>> = Lazy::new(|| {
113    HashMap::from([
114        ("tx_isolation", "REPEATABLE-READ"),
115        ("session.tx_isolation", "REPEATABLE-READ"),
116        ("transaction_isolation", "REPEATABLE-READ"),
117        ("session.transaction_isolation", "REPEATABLE-READ"),
118        ("session.transaction_read_only", "0"),
119        ("max_allowed_packet", "134217728"),
120        ("interactive_timeout", "31536000"),
121        ("wait_timeout", "31536000"),
122        ("net_write_timeout", "31536000"),
123        ("version_comment", "Greptime"),
124    ])
125});
126
127// Recordbatches for select function.
128// Format:
129// |function_name|
130// |value|
131fn select_function(name: &str, value: &str) -> RecordBatches {
132    let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
133        name,
134        ConcreteDataType::string_datatype(),
135        true,
136    )]));
137    let columns = vec![Arc::new(StringVector::from(vec![value])) as _];
138    RecordBatches::try_from_columns(schema, columns)
139        // unwrap is safe because the schema and data are definitely able to form a recordbatch, they are all string type
140        .unwrap()
141}
142
143// Recordbatches for show variable statement.
144// Format is:
145// | Variable_name | Value |
146// | xx            | yy    |
147fn show_variables(name: &str, value: &str) -> RecordBatches {
148    let schema = Arc::new(Schema::new(vec![
149        ColumnSchema::new("Variable_name", ConcreteDataType::string_datatype(), true),
150        ColumnSchema::new("Value", ConcreteDataType::string_datatype(), true),
151    ]));
152    let columns = vec![
153        Arc::new(StringVector::from(vec![name])) as _,
154        Arc::new(StringVector::from(vec![value])) as _,
155    ];
156    RecordBatches::try_from_columns(schema, columns)
157        // unwrap is safe because the schema and data are definitely able to form a recordbatch, they are all string type
158        .unwrap()
159}
160
161fn select_variable(query: &str, query_context: QueryContextRef) -> Option<Output> {
162    let mut fields = vec![];
163    let mut values = vec![];
164
165    // query like "SELECT @@aa, @@bb as cc, @dd..."
166    let query = query.to_lowercase();
167    let vars: Vec<&str> = query.split("@@").collect();
168    if vars.len() <= 1 {
169        return None;
170    }
171
172    // skip the first "select"
173    for var in vars.iter().skip(1) {
174        let var = var.trim_matches(|c| c == ' ' || c == ',');
175        let var_as: Vec<&str> = var
176            .split(" as ")
177            .map(|x| {
178                x.trim_matches(|c| c == ' ')
179                    .split_whitespace()
180                    .next()
181                    .unwrap_or("")
182            })
183            .collect();
184
185        // get value of variables from known sources or fallback to defaults
186        let value = match var_as[0] {
187            "time_zone" => query_context.timezone().to_string(),
188            "system_time_zone" => system_timezone_name(),
189            _ => VAR_VALUES
190                .get(var_as[0])
191                .map(|v| v.to_string())
192                .unwrap_or_else(|| "0".to_owned()),
193        };
194
195        values.push(Arc::new(StringVector::from(vec![value])) as _);
196        match var_as.len() {
197            1 => {
198                // @@aa
199                // field is '@@aa'
200                fields.push(ColumnSchema::new(
201                    format!("@@{}", var_as[0]),
202                    ConcreteDataType::string_datatype(),
203                    true,
204                ));
205            }
206            2 => {
207                // @@bb as cc:
208                // var is 'bb'.
209                // field is 'cc'.
210                fields.push(ColumnSchema::new(
211                    var_as[1],
212                    ConcreteDataType::string_datatype(),
213                    true,
214                ));
215            }
216            _ => return None,
217        }
218    }
219
220    let schema = Arc::new(Schema::new(fields));
221    // unwrap is safe because the schema and data are definitely able to form a recordbatch, they are all string type
222    let batches = RecordBatches::try_from_columns(schema, values).unwrap();
223    Some(Output::new_with_record_batches(batches))
224}
225
226fn check_select_variable(query: &str, query_context: QueryContextRef) -> Option<Output> {
227    if [&SELECT_VAR_PATTERN, &MYSQL_CONN_JAVA_PATTERN]
228        .iter()
229        .any(|r| r.is_match(query))
230    {
231        select_variable(query, query_context)
232    } else {
233        None
234    }
235}
236
237fn check_show_variables(query: &str) -> Option<Output> {
238    let recordbatches = if SHOW_SQL_MODE_PATTERN.is_match(query) {
239        Some(show_variables("sql_mode", "ONLY_FULL_GROUP_BY STRICT_TRANS_TABLES NO_ZERO_IN_DATE NO_ZERO_DATE ERROR_FOR_DIVISION_BY_ZERO NO_ENGINE_SUBSTITUTION"))
240    } else if SHOW_LOWER_CASE_PATTERN.is_match(query) {
241        Some(show_variables("lower_case_table_names", "0"))
242    } else if SHOW_VARIABLES_LIKE_PATTERN.is_match(query) {
243        Some(show_variables("", ""))
244    } else {
245        None
246    };
247    recordbatches.map(Output::new_with_record_batches)
248}
249
250// Check for SET or others query, this is the final check of the federated query.
251fn check_others(query: &str, _query_ctx: QueryContextRef) -> Option<Output> {
252    if OTHER_NOT_SUPPORTED_STMT.is_match(query.as_bytes()) {
253        return Some(Output::new_with_record_batches(RecordBatches::empty()));
254    }
255
256    let recordbatches = if SELECT_TIME_DIFF_FUNC_PATTERN.is_match(query) {
257        Some(select_function(
258            "TIMEDIFF(NOW(), UTC_TIMESTAMP())",
259            "00:00:00",
260        ))
261    } else {
262        None
263    };
264    recordbatches.map(Output::new_with_record_batches)
265}
266
267// Check whether the query is a federated or driver setup command,
268// and return some faked results if there are any.
269pub(crate) fn check(
270    query: &str,
271    query_ctx: QueryContextRef,
272    _session: SessionRef,
273) -> Option<Output> {
274    // INSERT don't need MySQL federated check. We assume the query doesn't contain
275    // federated or driver setup command if it starts with a 'INSERT' statement.
276    let the_6th_index = query.char_indices().nth(6).map(|(i, _)| i);
277    if let Some(index) = the_6th_index {
278        if query[..index].eq_ignore_ascii_case("INSERT") {
279            return None;
280        }
281    }
282
283    // First to check the query is like "select @@variables".
284    check_select_variable(query, query_ctx.clone())
285        // Then to check "show variables like ...".
286        .or_else(|| check_show_variables(query))
287        // Last check
288        .or_else(|| check_others(query, query_ctx))
289}
290
291#[cfg(test)]
292mod test {
293
294    use common_query::OutputData;
295    use common_time::timezone::set_default_timezone;
296    use session::context::{Channel, QueryContext};
297    use session::Session;
298
299    use super::*;
300
301    #[test]
302    fn test_check_abnormal() {
303        let session = Arc::new(Session::new(None, Channel::Mysql, Default::default()));
304        let query = "🫣一点不正常的东西🫣";
305        let output = check(query, QueryContext::arc(), session.clone());
306
307        assert!(output.is_none());
308    }
309
310    #[test]
311    fn test_check() {
312        let session = Arc::new(Session::new(None, Channel::Mysql, Default::default()));
313        let query = "select 1";
314        let result = check(query, QueryContext::arc(), session.clone());
315        assert!(result.is_none());
316
317        let query = "select version";
318        let output = check(query, QueryContext::arc(), session.clone());
319        assert!(output.is_none());
320
321        fn test(query: &str, expected: &str) {
322            let session = Arc::new(Session::new(None, Channel::Mysql, Default::default()));
323            let output = check(query, QueryContext::arc(), session.clone());
324            match output.unwrap().data {
325                OutputData::RecordBatches(r) => {
326                    assert_eq!(&r.pretty_print().unwrap(), expected)
327                }
328                _ => unreachable!(),
329            }
330        }
331
332        let query = "SELECT @@version_comment LIMIT 1";
333        let expected = "\
334+-------------------+
335| @@version_comment |
336+-------------------+
337| Greptime          |
338+-------------------+";
339        test(query, expected);
340
341        // variables
342        let query = "select @@tx_isolation, @@session.tx_isolation";
343        let expected = "\
344+-----------------+------------------------+
345| @@tx_isolation  | @@session.tx_isolation |
346+-----------------+------------------------+
347| REPEATABLE-READ | REPEATABLE-READ        |
348+-----------------+------------------------+";
349        test(query, expected);
350
351        // set system timezone
352        set_default_timezone(Some("Asia/Shanghai")).unwrap();
353        // complex variables
354        let query = "/* mysql-connector-java-8.0.17 (Revision: 16a712ddb3f826a1933ab42b0039f7fb9eebc6ec) */SELECT  @@session.auto_increment_increment AS auto_increment_increment, @@character_set_client AS character_set_client, @@character_set_connection AS character_set_connection, @@character_set_results AS character_set_results, @@character_set_server AS character_set_server, @@collation_server AS collation_server, @@collation_connection AS collation_connection, @@init_connect AS init_connect, @@interactive_timeout AS interactive_timeout, @@license AS license, @@lower_case_table_names AS lower_case_table_names, @@max_allowed_packet AS max_allowed_packet, @@net_write_timeout AS net_write_timeout, @@performance_schema AS performance_schema, @@sql_mode AS sql_mode, @@system_time_zone AS system_time_zone, @@time_zone AS time_zone, @@transaction_isolation AS transaction_isolation, @@wait_timeout AS wait_timeout;";
355        let expected = "\
356+--------------------------+----------------------+--------------------------+-----------------------+----------------------+------------------+----------------------+--------------+---------------------+---------+------------------------+--------------------+-------------------+--------------------+----------+------------------+---------------+-----------------------+---------------+
357| auto_increment_increment | character_set_client | character_set_connection | character_set_results | character_set_server | collation_server | collation_connection | init_connect | interactive_timeout | license | lower_case_table_names | max_allowed_packet | net_write_timeout | performance_schema | sql_mode | system_time_zone | time_zone     | transaction_isolation | wait_timeout; |
358+--------------------------+----------------------+--------------------------+-----------------------+----------------------+------------------+----------------------+--------------+---------------------+---------+------------------------+--------------------+-------------------+--------------------+----------+------------------+---------------+-----------------------+---------------+
359| 0                        | 0                    | 0                        | 0                     | 0                    | 0                | 0                    | 0            | 31536000            | 0       | 0                      | 134217728          | 31536000          | 0                  | 0        | Asia/Shanghai    | Asia/Shanghai | REPEATABLE-READ       | 31536000      |
360+--------------------------+----------------------+--------------------------+-----------------------+----------------------+------------------+----------------------+--------------+---------------------+---------+------------------------+--------------------+-------------------+--------------------+----------+------------------+---------------+-----------------------+---------------+";
361        test(query, expected);
362
363        let query = "show variables";
364        let expected = "\
365+---------------+-------+
366| Variable_name | Value |
367+---------------+-------+
368|               |       |
369+---------------+-------+";
370        test(query, expected);
371
372        let query = "show variables like 'lower_case_table_names'";
373        let expected = "\
374+------------------------+-------+
375| Variable_name          | Value |
376+------------------------+-------+
377| lower_case_table_names | 0     |
378+------------------------+-------+";
379        test(query, expected);
380
381        let query = "SELECT TIMEDIFF(NOW(), UTC_TIMESTAMP())";
382        let expected = "\
383+----------------------------------+
384| TIMEDIFF(NOW(), UTC_TIMESTAMP()) |
385+----------------------------------+
386| 00:00:00                         |
387+----------------------------------+";
388        test(query, expected);
389    }
390}