common_datasource/
util.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17pub fn find_dir_and_filename(path: &str) -> (String, Option<String>) {
18    if path.is_empty() {
19        ("/".to_string(), None)
20    } else if path.ends_with('/') {
21        (path.to_string(), None)
22    } else if let Some(idx) = path.rfind('/') {
23        (
24            path[..idx + 1].to_string(),
25            Some(path[idx + 1..].to_string()),
26        )
27    } else {
28        ("/".to_string(), Some(path.to_string()))
29    }
30}
31
32/// Normalize the schema inferred from the data.
33/// If the data type is null, set the data type to Utf8.
34pub fn normalize_infer_schema(schema: arrow_schema::Schema) -> arrow_schema::Schema {
35    let fields = schema
36        .fields
37        .iter()
38        .map(|f| {
39            if f.data_type().is_null() {
40                // Set the data type to Utf8 for null fields
41                Arc::new((**f).clone().with_data_type(arrow_schema::DataType::Utf8))
42            } else {
43                f.clone()
44            }
45        })
46        .collect::<Vec<_>>();
47
48    arrow_schema::Schema {
49        fields: arrow_schema::Fields::from(fields),
50        metadata: schema.metadata,
51    }
52}
53
54#[cfg(test)]
55mod tests {
56    use url::Url;
57
58    use super::*;
59
60    #[test]
61    fn test_parse_uri() {
62        struct Test<'a> {
63            uri: &'a str,
64            expected_path: &'a str,
65            expected_schema: &'a str,
66        }
67
68        let tests = [
69            Test {
70                uri: "s3://bucket/to/path/",
71                expected_path: "/to/path/",
72                expected_schema: "s3",
73            },
74            Test {
75                uri: "fs:///to/path/",
76                expected_path: "/to/path/",
77                expected_schema: "fs",
78            },
79            Test {
80                uri: "fs:///to/path/file",
81                expected_path: "/to/path/file",
82                expected_schema: "fs",
83            },
84        ];
85        for test in tests {
86            let parsed_uri = Url::parse(test.uri).unwrap();
87            assert_eq!(parsed_uri.path(), test.expected_path);
88            assert_eq!(parsed_uri.scheme(), test.expected_schema);
89        }
90    }
91
92    #[cfg(not(windows))]
93    #[test]
94    fn test_parse_path_and_dir() {
95        let parsed = Url::from_file_path("/to/path/file").unwrap();
96        assert_eq!(parsed.path(), "/to/path/file");
97
98        let parsed = Url::from_directory_path("/to/path/").unwrap();
99        assert_eq!(parsed.path(), "/to/path/");
100    }
101
102    #[cfg(windows)]
103    #[test]
104    fn test_parse_path_and_dir() {
105        let parsed = Url::from_file_path("C:\\to\\path\\file").unwrap();
106        assert_eq!(parsed.path(), "/C:/to/path/file");
107
108        let parsed = Url::from_directory_path("C:\\to\\path\\").unwrap();
109        assert_eq!(parsed.path(), "/C:/to/path/");
110    }
111
112    #[test]
113    fn test_find_dir_and_filename() {
114        struct Test<'a> {
115            path: &'a str,
116            expected_dir: &'a str,
117            expected_filename: Option<String>,
118        }
119
120        let tests = [
121            Test {
122                path: "to/path/",
123                expected_dir: "to/path/",
124                expected_filename: None,
125            },
126            Test {
127                path: "to/path/filename",
128                expected_dir: "to/path/",
129                expected_filename: Some("filename".into()),
130            },
131            Test {
132                path: "/to/path/filename",
133                expected_dir: "/to/path/",
134                expected_filename: Some("filename".into()),
135            },
136            Test {
137                path: "/",
138                expected_dir: "/",
139                expected_filename: None,
140            },
141            Test {
142                path: "filename",
143                expected_dir: "/",
144                expected_filename: Some("filename".into()),
145            },
146            Test {
147                path: "",
148                expected_dir: "/",
149                expected_filename: None,
150            },
151        ];
152
153        for test in tests {
154            let (path, filename) = find_dir_and_filename(test.path);
155            assert_eq!(test.expected_dir, path);
156            assert_eq!(test.expected_filename, filename)
157        }
158    }
159}