Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix URL encoding of request paths #95

Merged
merged 1 commit into from
Feb 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion s3-client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ lazy_static = "1.4.0"
libc = "0.2.126"
libc-stdhandle = "0.1.0"
metrics = "0.20.1"
percent-encoding = "2.2.0"
pin-project = "1.0.12"
regex = "1.7.1"
static_assertions = "1.1.0"
thiserror = "1.0.34"
time = "0.3.14"
tracing = { version = "0.1.35", default-features = false, features = ["std", "log"] }
urlencoding = "2.1.2"
xmltree = "0.10.3"

[dev-dependencies]
Expand Down
59 changes: 54 additions & 5 deletions s3-client/src/s3_crt_client.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::ffi::{OsStr, OsString};
use std::future::Future;
use std::ops::Range;
use std::os::unix::prelude::OsStrExt;
use std::pin::Pin;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Mutex};
Expand All @@ -22,6 +23,7 @@ use aws_crt_s3::s3::client::{

use async_trait::async_trait;
use futures::channel::oneshot;
use percent_encoding::{percent_encode, AsciiSet, NON_ALPHANUMERIC};
use pin_project::pin_project;
use thiserror::Error;
use tracing::{debug, error, trace, warn, Span};
Expand Down Expand Up @@ -358,14 +360,61 @@ impl<'a> S3Message<'a> {
self.inner.add_header(header)
}

/// Set the request path for this message.
fn set_request_path(&mut self, path: impl AsRef<OsStr>) -> Result<(), aws_crt_s3::common::error::Error> {
let mut full_path = OsString::with_capacity(self.path_prefix.len() + path.as_ref().len());
full_path.push(&self.path_prefix);
full_path.push(path);
/// Set the request path and query for this message. The components should not be URL-encoded;
/// this method will handle that.
fn set_request_path_and_query<P: AsRef<OsStr>>(
&mut self,
path: impl AsRef<OsStr>,
query: impl AsRef<[(P, P)]>,
) -> Result<(), aws_crt_s3::common::error::Error> {
// This is RFC 3986 but with '/' also considered a safe character for path fragments.
const URLENCODE_QUERY_FRAGMENT: &AsciiSet =
&NON_ALPHANUMERIC.remove(b'-').remove(b'.').remove(b'_').remove(b'~');
const URLENCODE_PATH_FRAGMENT: &AsciiSet = &URLENCODE_QUERY_FRAGMENT.remove(b'/');

fn write_encoded_fragment(s: &mut OsString, piece: impl AsRef<OsStr>, encoding: &'static AsciiSet) {
let iter = percent_encode(piece.as_ref().as_bytes(), encoding);
s.extend(iter.map(|s| OsStr::from_bytes(s.as_bytes())));
}

// This estimate is exact if no characters need encoding, otherwise we'll end up
// reallocating a couple of times. The '?' for the query is counted in the first key-value
// pair.
let space_needed = self.path_prefix.len()
+ path.as_ref().len()
+ query
.as_ref()
.iter()
.map(|(key, value)| key.as_ref().len() + value.as_ref().len() + 2) // +2 for & and =
.sum::<usize>();

let mut full_path = OsString::with_capacity(space_needed);

write_encoded_fragment(&mut full_path, &self.path_prefix, URLENCODE_PATH_FRAGMENT);
write_encoded_fragment(&mut full_path, &path, URLENCODE_PATH_FRAGMENT);

// Build the query string
if !query.as_ref().is_empty() {
full_path.push("?");
for (i, (key, value)) in query.as_ref().iter().enumerate() {
if i != 0 {
full_path.push("&");
}
write_encoded_fragment(&mut full_path, key, URLENCODE_QUERY_FRAGMENT);
full_path.push("=");
write_encoded_fragment(&mut full_path, value, URLENCODE_QUERY_FRAGMENT);
}
}

self.inner.set_request_path(full_path)
}

/// Set the request path for this message. The path should not be URL-encoded; this method will
/// handle that.
fn set_request_path(&mut self, path: impl AsRef<OsStr>) -> Result<(), aws_crt_s3::common::error::Error> {
self.set_request_path_and_query::<&str>(path, &[])
}

/// Sets the body input stream for this message, and returns any previously set input stream.
/// If input_stream is None, unsets the body.
fn set_body_stream(&mut self, input_stream: Option<InputStream<'a>>) -> Option<InputStream<'a>> {
Expand Down
1 change: 0 additions & 1 deletion s3-client/src/s3_crt_client/head_object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ impl S3CrtClient {
.new_request_template("HEAD", bucket)
.map_err(S3RequestError::construction_failure)?;

// Don't URI encode the key, since "/" needs to be preserved
let key = key.to_string();
message
.set_request_path(format!("/{key}"))
Expand Down
16 changes: 9 additions & 7 deletions s3-client/src/s3_crt_client/list_objects.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,17 +143,19 @@ impl S3CrtClient {
.new_request_template("GET", bucket)
.map_err(S3RequestError::construction_failure)?;

// Don't URI encode delimiter or prefix, since "/" in those needs to be a real "/".
let mut request = format!("/?list-type=2&delimiter={delimiter}&max-keys={max_keys}&prefix={prefix}");

let max_keys = format!("{max_keys}");
let mut query = vec![
("list-type", "2"),
("delimiter", delimiter),
("max-keys", &max_keys),
("prefix", prefix),
];
if let Some(continuation_token) = continuation_token {
// DO URI encode the continuation token, since "/" in it needs to become "%2F"
let continuation_token = urlencoding::encode(continuation_token);
request = format!("{request}&continuation-token={continuation_token}");
query.push(("continuation-token", continuation_token));
}

message
.set_request_path(request)
.set_request_path_and_query("/", query)
.map_err(S3RequestError::construction_failure)?;

let span = request_span!(self, "list_objects");
Expand Down
46 changes: 46 additions & 0 deletions s3-client/tests/list_objects.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,49 @@ async fn test_list_objects_404_bucket() {
Err(ObjectClientError::ServiceError(ListObjectsError::NoSuchBucket))
));
}

// Test list with keys and arguments that poke at URL encoding
#[tokio::test]
async fn test_interesting_keys() {
let keys = &[
"the first one@@@",
"the first one@@@/the 1st one!$%#@?_.-=&+^",
"the first one@@@/the 2nd+one!",
"the first one@@@/the 3rd one&",
];
let sdk_client = get_test_sdk_client().await;
let (bucket, prefix) = get_test_bucket_and_prefix("test_list_objects");
create_objects_for_test(&sdk_client, &bucket, &prefix, keys).await;

let client: S3CrtClient = get_test_client();

let result = client
.list_objects(&bucket, None, "/", 2, &prefix)
.await
.expect("ListObjects failed");
assert_eq!(result.common_prefixes[0], format!("{prefix}{}/", keys[0]));
assert_eq!(result.objects[0].key, format!("{prefix}{}", keys[0]));

let result = client
.list_objects(&bucket, None, "/", 1, &format!("{prefix}{}/", keys[0]))
.await
.expect("ListObjects failed");
assert_eq!(result.objects.len(), 1);
assert_eq!(result.objects[0].key, format!("{prefix}{}", keys[1]));
jamesbornholt marked this conversation as resolved.
Show resolved Hide resolved
assert!(result.next_continuation_token.is_some());

let result = client
.list_objects(
&bucket,
result.next_continuation_token.as_deref(),
"/",
1000,
&format!("{prefix}{}/", keys[0]),
)
.await
.expect("ListObjects failed");
assert_eq!(result.objects[0].key, format!("{prefix}{}", keys[2]));
assert_eq!(result.objects[1].key, format!("{prefix}{}", keys[3]));
assert_eq!(result.objects.len(), 2);
assert!(result.next_continuation_token.is_none());
}
63 changes: 62 additions & 1 deletion s3-file-connector/tests/fuse_tests/lookup_test.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::fs::{metadata, read_dir};
use std::fs::{metadata, read_dir, read_to_string};

use fuser::BackgroundSession;
use s3_file_connector::S3FilesystemConfig;
Expand Down Expand Up @@ -47,3 +47,64 @@ fn lookup_directory_overlap_test_s3(subdir: &str) {
fn lookup_directory_overlap_test_mock(prefix: &str, subdir: &str) {
lookup_directory_overlap_test(crate::fuse_tests::mock_session::new, prefix, subdir);
}

fn lookup_weird_characters_test<F>(creator_fn: F, prefix: &str)
where
F: FnOnce(&str, S3FilesystemConfig) -> (TempDir, BackgroundSession, PutObjectFn),
{
let (mount_point, _session, mut put_object_fn) = creator_fn(prefix, Default::default());

let keys = &[
"weird$dir name",
"weird$dir name/my 1st file~.jpg",
"weird$dir name/my 2nd file: the better one.jpg",
"weirder_.-@dir +name",
"weirder_.-@dir +name/",
];

for (i, key) in keys.iter().enumerate() {
put_object_fn(key, format!("hello world {i}").as_bytes()).unwrap();
}

let test_dir = read_dir(mount_point.path()).unwrap();
let dirs: Vec<_> = test_dir.map(|f| f.unwrap()).collect();

assert_eq!(
dirs.iter()
.map(|f| f.path().file_name().unwrap().to_str().unwrap().to_owned())
.collect::<Vec<_>>(),
vec!["weird$dir name", "weirder_.-@dir +name"]
);

let m = metadata(mount_point.path().join(keys[0])).unwrap();
assert!(m.file_type().is_dir());
let m = metadata(mount_point.path().join(keys[3])).unwrap();
assert!(m.file_type().is_dir());

let test_dir = read_dir(mount_point.path().join(keys[0])).unwrap();
let dirs: Vec<_> = test_dir.map(|f| f.unwrap()).collect();

assert_eq!(
dirs.iter()
.map(|f| f.path().file_name().unwrap().to_str().unwrap().to_owned())
.collect::<Vec<_>>(),
vec!["my 1st file~.jpg", "my 2nd file: the better one.jpg"]
);

let f = read_to_string(mount_point.path().join(keys[1])).unwrap();
assert_eq!(f, "hello world 1");

let f = read_to_string(mount_point.path().join(keys[2])).unwrap();
assert_eq!(f, "hello world 2");
}

#[cfg(feature = "s3_tests")]
#[test]
fn lookup_directory_weird_characters_s3() {
lookup_weird_characters_test(crate::fuse_tests::s3_session::new, "lookup_weird_characters_test");
}

#[test]
fn lookup_directory_weird_characters_mock() {
lookup_weird_characters_test(crate::fuse_tests::mock_session::new, "lookup_weird_characters_test");
}