object_store/path/
parts.rs1use percent_encoding::{AsciiSet, CONTROLS, percent_encode};
19use std::{
20 borrow::Cow,
21 iter::{self, FusedIterator},
22 str::SplitTerminator,
23};
24
25use crate::path::DELIMITER_BYTE;
26
27#[derive(Debug, thiserror::Error)]
29#[error(
30 "Encountered illegal character sequence \"{}\" whilst parsing path segment \"{}\"",
31 illegal,
32 segment
33)]
34#[allow(missing_copy_implementations)]
35pub struct InvalidPart {
36 segment: String,
37 illegal: String,
38}
39
40#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
48pub struct PathPart<'a> {
49 pub(super) raw: Cow<'a, str>,
50}
51
52impl<'a> PathPart<'a> {
53 pub fn parse(segment: &'a str) -> Result<Self, InvalidPart> {
55 if segment == "." || segment == ".." {
56 return Err(InvalidPart {
57 segment: segment.to_string(),
58 illegal: segment.to_string(),
59 });
60 }
61
62 for c in segment.chars() {
63 if c.is_ascii_control() || c == '/' {
64 return Err(InvalidPart {
65 segment: segment.to_string(),
66 illegal: c.to_string(),
68 });
69 }
70 }
71
72 Ok(Self {
73 raw: segment.into(),
74 })
75 }
76}
77
78const INVALID: &AsciiSet = &CONTROLS
80 .add(DELIMITER_BYTE)
82 .add(b'\\')
85 .add(b'{')
86 .add(b'^')
87 .add(b'}')
88 .add(b'%')
89 .add(b'`')
90 .add(b']')
91 .add(b'"') .add(b'>')
93 .add(b'[')
94 .add(b'~')
95 .add(b'<')
96 .add(b'#')
97 .add(b'|')
98 .add(b'\r')
101 .add(b'\n')
102 .add(b'*')
103 .add(b'?');
104
105impl<'a> From<&'a [u8]> for PathPart<'a> {
106 fn from(v: &'a [u8]) -> Self {
107 let inner = match v {
108 b"." => "%2E".into(),
111 b".." => "%2E%2E".into(),
112 other => percent_encode(other, INVALID).into(),
113 };
114 Self { raw: inner }
115 }
116}
117
118impl<'a> From<&'a str> for PathPart<'a> {
119 fn from(v: &'a str) -> Self {
120 Self::from(v.as_bytes())
121 }
122}
123
124impl From<String> for PathPart<'static> {
125 fn from(s: String) -> Self {
126 Self {
127 raw: Cow::Owned(PathPart::from(s.as_str()).raw.into_owned()),
128 }
129 }
130}
131
132impl AsRef<str> for PathPart<'_> {
133 fn as_ref(&self) -> &str {
134 self.raw.as_ref()
135 }
136}
137
138#[derive(Debug, Clone)]
140pub struct PathParts<'a>(iter::Map<SplitTerminator<'a, char>, fn(&str) -> PathPart<'_>>);
141
142impl<'a> PathParts<'a> {
143 pub(super) fn new(raw: &'a str) -> Self {
145 Self(
146 raw.split_terminator(super::DELIMITER_CHAR)
147 .map(|s| PathPart { raw: s.into() }),
148 )
149 }
150}
151
152impl<'a> Iterator for PathParts<'a> {
153 type Item = PathPart<'a>;
154
155 fn next(&mut self) -> Option<Self::Item> {
156 self.0.next()
157 }
158}
159
160impl<'a> FusedIterator for PathParts<'a> {}
161
162impl<'a> DoubleEndedIterator for PathParts<'a> {
163 fn next_back(&mut self) -> Option<Self::Item> {
164 self.0.next_back()
165 }
166}
167
168#[cfg(test)]
169mod tests {
170 use super::*;
171
172 #[test]
173 fn path_part_delimiter_gets_encoded() {
174 let part: PathPart<'_> = "foo/bar".into();
175 assert_eq!(part.raw, "foo%2Fbar");
176 }
177
178 #[test]
179 fn path_part_given_already_encoded_string() {
180 let part: PathPart<'_> = "foo%2Fbar".into();
181 assert_eq!(part.raw, "foo%252Fbar");
182 }
183
184 #[test]
185 fn path_part_cant_be_one_dot() {
186 let part: PathPart<'_> = ".".into();
187 assert_eq!(part.raw, "%2E");
188 }
189
190 #[test]
191 fn path_part_cant_be_two_dots() {
192 let part: PathPart<'_> = "..".into();
193 assert_eq!(part.raw, "%2E%2E");
194 }
195
196 #[test]
197 fn path_part_parse() {
198 PathPart::parse("foo").unwrap();
199 PathPart::parse("foo/bar").unwrap_err();
200
201 PathPart::parse("foo%2Fbar").unwrap();
203 PathPart::parse("L%3ABC.parquet").unwrap();
204
205 PathPart::parse("%Z").unwrap();
207 PathPart::parse("%%").unwrap();
208 }
209}