1use crate::ObjectStore;
19#[cfg(all(feature = "fs", not(target_arch = "wasm32")))]
20use crate::local::LocalFileSystem;
21use crate::memory::InMemory;
22use crate::path::Path;
23use url::Url;
24
25#[derive(Debug, thiserror::Error)]
26pub enum Error {
27 #[error("Unable to recognise URL \"{}\"", url)]
28 Unrecognised { url: Url },
29
30 #[error(transparent)]
31 Path {
32 #[from]
33 source: crate::path::Error,
34 },
35}
36
37impl From<Error> for super::Error {
38 fn from(e: Error) -> Self {
39 Self::Generic {
40 store: "URL",
41 source: Box::new(e),
42 }
43 }
44}
45
46#[non_exhaustive] #[derive(Debug, Eq, PartialEq, Clone)]
66pub enum ObjectStoreScheme {
67 Local,
69 Memory,
71 AmazonS3,
73 GoogleCloudStorage,
75 MicrosoftAzure,
77 Http,
79}
80
81impl ObjectStoreScheme {
82 pub fn parse(url: &Url) -> Result<(Self, Path), Error> {
106 let strip_bucket = || Some(url.path().strip_prefix('/')?.split_once('/')?.1);
107
108 let (scheme, path) = match (url.scheme(), url.host_str()) {
109 ("file", None) => (Self::Local, url.path()),
110 ("memory", None) => (Self::Memory, url.path()),
111 ("s3" | "s3a", Some(_)) => (Self::AmazonS3, url.path()),
112 ("gs", Some(_)) => (Self::GoogleCloudStorage, url.path()),
113 ("az" | "adl" | "azure" | "abfs" | "abfss", Some(_)) => {
114 (Self::MicrosoftAzure, url.path())
115 }
116 ("http", Some(_)) => (Self::Http, url.path()),
117 ("https", Some(host)) => {
118 if host.ends_with("dfs.core.windows.net")
119 || host.ends_with("blob.core.windows.net")
120 || host.ends_with("dfs.fabric.microsoft.com")
121 || host.ends_with("blob.fabric.microsoft.com")
122 {
123 (Self::MicrosoftAzure, strip_bucket().unwrap_or_default())
124 } else if host.ends_with("amazonaws.com") {
125 match host.starts_with("s3") {
126 true => (Self::AmazonS3, strip_bucket().unwrap_or_default()),
127 false => (Self::AmazonS3, url.path()),
128 }
129 } else if host.ends_with("r2.cloudflarestorage.com") {
130 (Self::AmazonS3, strip_bucket().unwrap_or_default())
131 } else {
132 (Self::Http, url.path())
133 }
134 }
135 _ => return Err(Error::Unrecognised { url: url.clone() }),
136 };
137
138 Ok((scheme, Path::from_url_path(path)?))
139 }
140}
141
142#[cfg(feature = "cloud")]
143macro_rules! builder_opts {
144 ($builder:ty, $url:expr, $options:expr) => {{
145 let builder = $options.into_iter().fold(
146 <$builder>::new().with_url($url.to_string()),
147 |builder, (key, value)| match key.as_ref().to_ascii_lowercase().parse() {
148 Ok(k) => builder.with_config(k, value),
149 Err(_) => builder,
150 },
151 );
152 Box::new(builder.build()?) as _
153 }};
154}
155
156pub fn parse_url(url: &Url) -> Result<(Box<dyn ObjectStore>, Path), super::Error> {
162 parse_url_opts(url, std::iter::empty::<(&str, &str)>())
163}
164
165pub fn parse_url_opts<I, K, V>(
188 url: &Url,
189 options: I,
190) -> Result<(Box<dyn ObjectStore>, Path), super::Error>
191where
192 I: IntoIterator<Item = (K, V)>,
193 K: AsRef<str>,
194 V: Into<String>,
195{
196 let _options = options;
197 let (scheme, path) = ObjectStoreScheme::parse(url)?;
198 let path = Path::parse(path)?;
199
200 let store = match scheme {
201 #[cfg(all(feature = "fs", not(target_arch = "wasm32")))]
202 ObjectStoreScheme::Local => Box::new(LocalFileSystem::new()) as _,
203 ObjectStoreScheme::Memory => Box::new(InMemory::new()) as _,
204 #[cfg(feature = "aws")]
205 ObjectStoreScheme::AmazonS3 => {
206 builder_opts!(crate::aws::AmazonS3Builder, url, _options)
207 }
208 #[cfg(feature = "gcp")]
209 ObjectStoreScheme::GoogleCloudStorage => {
210 builder_opts!(crate::gcp::GoogleCloudStorageBuilder, url, _options)
211 }
212 #[cfg(feature = "azure")]
213 ObjectStoreScheme::MicrosoftAzure => {
214 builder_opts!(crate::azure::MicrosoftAzureBuilder, url, _options)
215 }
216 #[cfg(feature = "http")]
217 ObjectStoreScheme::Http => {
218 let url = &url[..url::Position::BeforePath];
219 builder_opts!(crate::http::HttpBuilder, url, _options)
220 }
221 #[cfg(not(all(
222 feature = "fs",
223 feature = "aws",
224 feature = "azure",
225 feature = "gcp",
226 feature = "http",
227 not(target_arch = "wasm32")
228 )))]
229 s => {
230 return Err(super::Error::Generic {
231 store: "parse_url",
232 source: format!("feature for {s:?} not enabled").into(),
233 });
234 }
235 };
236
237 Ok((store, path))
238}
239
240#[cfg(test)]
241mod tests {
242 use super::*;
243
244 #[test]
245 fn test_parse() {
246 let cases = [
247 ("file:/path", (ObjectStoreScheme::Local, "path")),
248 ("file:///path", (ObjectStoreScheme::Local, "path")),
249 ("memory:/path", (ObjectStoreScheme::Memory, "path")),
250 ("memory:///", (ObjectStoreScheme::Memory, "")),
251 ("s3://bucket/path", (ObjectStoreScheme::AmazonS3, "path")),
252 ("s3a://bucket/path", (ObjectStoreScheme::AmazonS3, "path")),
253 (
254 "https://s3.region.amazonaws.com/bucket",
255 (ObjectStoreScheme::AmazonS3, ""),
256 ),
257 (
258 "https://s3.region.amazonaws.com/bucket/path",
259 (ObjectStoreScheme::AmazonS3, "path"),
260 ),
261 (
262 "https://bucket.s3.region.amazonaws.com",
263 (ObjectStoreScheme::AmazonS3, ""),
264 ),
265 (
266 "https://ACCOUNT_ID.r2.cloudflarestorage.com/bucket",
267 (ObjectStoreScheme::AmazonS3, ""),
268 ),
269 (
270 "https://ACCOUNT_ID.r2.cloudflarestorage.com/bucket/path",
271 (ObjectStoreScheme::AmazonS3, "path"),
272 ),
273 (
274 "abfs://container/path",
275 (ObjectStoreScheme::MicrosoftAzure, "path"),
276 ),
277 (
278 "abfs://file_system@account_name.dfs.core.windows.net/path",
279 (ObjectStoreScheme::MicrosoftAzure, "path"),
280 ),
281 (
282 "abfss://file_system@account_name.dfs.core.windows.net/path",
283 (ObjectStoreScheme::MicrosoftAzure, "path"),
284 ),
285 (
286 "https://account.dfs.core.windows.net",
287 (ObjectStoreScheme::MicrosoftAzure, ""),
288 ),
289 (
290 "https://account.dfs.core.windows.net/container/path",
291 (ObjectStoreScheme::MicrosoftAzure, "path"),
292 ),
293 (
294 "https://account.blob.core.windows.net",
295 (ObjectStoreScheme::MicrosoftAzure, ""),
296 ),
297 (
298 "https://account.blob.core.windows.net/container/path",
299 (ObjectStoreScheme::MicrosoftAzure, "path"),
300 ),
301 (
302 "az://container/path",
303 (ObjectStoreScheme::MicrosoftAzure, "path"),
304 ),
305 (
306 "az://container@account/path",
307 (ObjectStoreScheme::MicrosoftAzure, "path"),
308 ),
309 (
310 "abfs://container/path",
311 (ObjectStoreScheme::MicrosoftAzure, "path"),
312 ),
313 (
314 "abfs://container@account/path",
315 (ObjectStoreScheme::MicrosoftAzure, "path"),
316 ),
317 (
318 "abfss://container/path",
319 (ObjectStoreScheme::MicrosoftAzure, "path"),
320 ),
321 (
322 "abfss://container@account/path",
323 (ObjectStoreScheme::MicrosoftAzure, "path"),
324 ),
325 (
326 "adl://container/path",
327 (ObjectStoreScheme::MicrosoftAzure, "path"),
328 ),
329 (
330 "adl://container@account/path",
331 (ObjectStoreScheme::MicrosoftAzure, "path"),
332 ),
333 (
334 "gs://bucket/path",
335 (ObjectStoreScheme::GoogleCloudStorage, "path"),
336 ),
337 (
338 "gs://test.example.com/path",
339 (ObjectStoreScheme::GoogleCloudStorage, "path"),
340 ),
341 ("http://mydomain/path", (ObjectStoreScheme::Http, "path")),
342 ("https://mydomain/path", (ObjectStoreScheme::Http, "path")),
343 (
344 "s3://bucket/foo%20bar",
345 (ObjectStoreScheme::AmazonS3, "foo bar"),
346 ),
347 (
348 "s3://bucket/foo bar",
349 (ObjectStoreScheme::AmazonS3, "foo bar"),
350 ),
351 ("s3://bucket/😀", (ObjectStoreScheme::AmazonS3, "😀")),
352 (
353 "s3://bucket/%F0%9F%98%80",
354 (ObjectStoreScheme::AmazonS3, "😀"),
355 ),
356 (
357 "https://foo/bar%20baz",
358 (ObjectStoreScheme::Http, "bar baz"),
359 ),
360 (
361 "file:///bar%252Efoo",
362 (ObjectStoreScheme::Local, "bar%2Efoo"),
363 ),
364 (
365 "abfss://file_system@account.dfs.fabric.microsoft.com/",
366 (ObjectStoreScheme::MicrosoftAzure, ""),
367 ),
368 (
369 "abfss://file_system@account.dfs.fabric.microsoft.com/",
370 (ObjectStoreScheme::MicrosoftAzure, ""),
371 ),
372 (
373 "https://account.dfs.fabric.microsoft.com/",
374 (ObjectStoreScheme::MicrosoftAzure, ""),
375 ),
376 (
377 "https://account.dfs.fabric.microsoft.com/container",
378 (ObjectStoreScheme::MicrosoftAzure, ""),
379 ),
380 (
381 "https://account.dfs.fabric.microsoft.com/container/path",
382 (ObjectStoreScheme::MicrosoftAzure, "path"),
383 ),
384 (
385 "https://account.blob.fabric.microsoft.com/",
386 (ObjectStoreScheme::MicrosoftAzure, ""),
387 ),
388 (
389 "https://account.blob.fabric.microsoft.com/container",
390 (ObjectStoreScheme::MicrosoftAzure, ""),
391 ),
392 (
393 "https://account.blob.fabric.microsoft.com/container/path",
394 (ObjectStoreScheme::MicrosoftAzure, "path"),
395 ),
396 ];
397
398 for (s, (expected_scheme, expected_path)) in cases {
399 let url = Url::parse(s).unwrap();
400 let (scheme, path) = ObjectStoreScheme::parse(&url).unwrap();
401
402 assert_eq!(scheme, expected_scheme, "{s}");
403 assert_eq!(path, Path::parse(expected_path).unwrap(), "{s}");
404 }
405
406 let neg_cases = [
407 "unix:/run/foo.socket",
408 "file://remote/path",
409 "memory://remote/",
410 ];
411 for s in neg_cases {
412 let url = Url::parse(s).unwrap();
413 assert!(ObjectStoreScheme::parse(&url).is_err());
414 }
415 }
416
417 #[test]
418 fn test_url_spaces() {
419 let url = Url::parse("file:///my file with spaces").unwrap();
420 assert_eq!(url.path(), "/my%20file%20with%20spaces");
421 let (_, path) = parse_url(&url).unwrap();
422 assert_eq!(path.as_ref(), "my file with spaces");
423 }
424
425 #[tokio::test]
426 #[cfg(all(feature = "http", not(target_arch = "wasm32")))]
427 async fn test_url_http() {
428 use crate::{ObjectStoreExt, client::mock_server::MockServer};
429 use http::{Response, header::USER_AGENT};
430
431 let server = MockServer::new().await;
432
433 server.push_fn(|r| {
434 assert_eq!(r.uri().path(), "/foo/bar");
435 assert_eq!(r.headers().get(USER_AGENT).unwrap(), "test_url");
436 Response::new(String::from("result"))
437 });
438
439 let test = format!("{}/foo/bar", server.url());
440 let opts = [("USER_AGENT", "test_url"), ("allow_http", "true")];
441 let url = test.parse().unwrap();
442 let (store, path) = parse_url_opts(&url, opts).unwrap();
443 assert_eq!(path.as_ref(), "foo/bar");
444
445 let res = store.get(&path).await.unwrap();
446 let body = res.bytes().await.unwrap();
447 let body = str::from_utf8(&body).unwrap();
448 assert_eq!(body, "result");
449
450 server.shutdown().await;
451 }
452}