Skip to content

Commit

Permalink
Handle raw UTF-8 bytes in redirect headers (#317)
Browse files Browse the repository at this point in the history
Try to parse the `Location` header as UTF-8 bytes as a fallback if the header value is not valid US-ASCII. This is technically against the URI spec which requires all literal characters in the URI to be US-ASCII (see [RFC 3986, Section 4.1](https://tools.ietf.org/html/rfc3986#section-4.1)).

This is also more or less against the HTTP spec, which historically allowed for ISO-8859-1 text in header values but since was restricted to US-ASCII plus opaque bytes. Never has UTF-8 been encouraged or allowed as-such. See [RFC 7230, Section 3.2.4](https://tools.ietf.org/html/rfc7230#section-3.2.4) for more info.

However, some bad or misconfigured web servers will do this anyway, and most web browsers recover from this by allowing and interpreting UTF-8 characters as themselves even though they _should_ have been percent-encoded. The third-party URI parsers that we use have no such leniency, so we percent-encode such bytes (if legal UTF-8) ahead of time before handing them off to the URI parser.

This is in the spirit of being generous with what we accept (within reason) while being strict in what we produce. Since real websites exhibit this out-of-spec behavior it is worth handling it.

Note that the underlying `tiny_http` library that our HTTP test mocking is based on does not allow UTF-8 header values right now, so we can't really test this efficiently. We already have a couple tests out there doing some raw TCP munging for one reason or another, so in the future we need to make sure to rewrite `testserver` to allow such headers and then enable the test. For now I've manually verified that this works.

Fixes #315.
  • Loading branch information
sagebind authored Apr 6, 2021
1 parent 3522593 commit 70100a4
Show file tree
Hide file tree
Showing 7 changed files with 169 additions and 55 deletions.
58 changes: 29 additions & 29 deletions src/agent/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,15 @@ use crossbeam_utils::{atomic::AtomicCell, sync::WaitGroup};
use curl::multi::{Events, Multi, Socket, SocketEvents};
use flume::{Receiver, Sender};
use slab::Slab;
use std::{io, sync::{Arc, Mutex}, task::Waker, thread, time::{Duration, Instant}};
use std::{
io,
sync::{Arc, Mutex},
task::Waker,
thread,
time::{Duration, Instant},
};

use self::selector::Selector;
use self::timer::Timer;
use self::{selector::Selector, timer::Timer};

mod selector;
mod timer;
Expand Down Expand Up @@ -112,12 +117,7 @@ impl AgentBuilder {
.map_err(Error::from_any)?;
}

let agent = AgentContext::new(
multi,
selector,
message_tx_clone,
message_rx,
)?;
let agent = AgentContext::new(multi, selector, message_tx_clone, message_rx)?;

drop(wait_group_thread);

Expand Down Expand Up @@ -297,20 +297,22 @@ impl AgentContext {
})
.map_err(Error::from_any)?;

multi.timer_function({
let timer = timer.clone();
multi
.timer_function({
let timer = timer.clone();

move |timeout| match timeout {
Some(timeout) => {
timer.start(timeout);
true
}
None => {
timer.stop();
true
move |timeout| match timeout {
Some(timeout) => {
timer.start(timeout);
true
}
None => {
timer.stop();
true
}
}
}
}).map_err(Error::from_any)?;
})
.map_err(Error::from_any)?;

Ok(Self {
multi,
Expand Down Expand Up @@ -342,10 +344,9 @@ impl AgentContext {
self.waker
.chain(move |inner| match tx.send(Message::UnpauseRead(id)) {
Ok(()) => inner.wake_by_ref(),
Err(_) => tracing::warn!(
id,
"agent went away while resuming read for request"
),
Err(_) => {
tracing::warn!(id, "agent went away while resuming read for request")
}
})
},
{
Expand All @@ -354,10 +355,9 @@ impl AgentContext {
self.waker
.chain(move |inner| match tx.send(Message::UnpauseWrite(id)) {
Ok(()) => inner.wake_by_ref(),
Err(_) => tracing::warn!(
id,
"agent went away while resuming write for request"
),
Err(_) => {
tracing::warn!(id, "agent went away while resuming write for request")
}
})
},
);
Expand Down
59 changes: 44 additions & 15 deletions src/agent/selector.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
use curl::multi::Socket;
use polling::{Event, Poller};
use std::{collections::{HashMap, HashSet}, hash::{BuildHasherDefault, Hasher}, io, sync::Arc, task::Waker, time::Duration};
use std::{
collections::{HashMap, HashSet},
hash::{BuildHasherDefault, Hasher},
io,
sync::Arc,
task::Waker,
time::Duration,
};

/// Asynchronous I/O selector for sockets. Used by the agent to wait for network
/// activity asynchronously, as directed by curl.
Expand Down Expand Up @@ -65,7 +72,12 @@ impl Selector {
///
/// This method can also be used to update/modify the readiness events you
/// are interested in for a previously registered socket.
pub(crate) fn register(&mut self, socket: Socket, readable: bool, writable: bool) -> io::Result<()> {
pub(crate) fn register(
&mut self,
socket: Socket,
readable: bool,
writable: bool,
) -> io::Result<()> {
let previous = self.sockets.insert(socket, Registration {
readable,
writable,
Expand Down Expand Up @@ -138,7 +150,12 @@ impl Selector {
// If the socket was already re-registered this tick, then we
// don't need to do this.
if registration.tick != self.tick {
poller_modify(&self.poller, socket, registration.readable, registration.writable)?;
poller_modify(
&self.poller,
socket,
registration.readable,
registration.writable,
)?;
registration.tick = self.tick;
}
}
Expand All @@ -155,7 +172,8 @@ impl Selector {
if let Some(registration) = sockets.get_mut(&socket) {
if registration.tick != tick {
registration.tick = tick;
poller_add(poller, socket, registration.readable, registration.writable).is_err()
poller_add(poller, socket, registration.readable, registration.writable)
.is_err()
} else {
true
}
Expand All @@ -179,12 +197,10 @@ impl Selector {

/// Get an iterator over the socket events that occurred during the most
/// recent call to `poll`.
pub(crate) fn events(&self) -> impl Iterator<Item = (Socket, bool, bool)> + '_ {
self.events.iter().map(|event| (
event.key as Socket,
event.readable,
event.writable,
))
pub(crate) fn events(&self) -> impl Iterator<Item = (Socket, bool, bool)> + '_ {
self.events
.iter()
.map(|event| (event.key as Socket, event.readable, event.writable))
}
}

Expand All @@ -200,7 +216,11 @@ fn poller_add(poller: &Poller, socket: Socket, readable: bool, writable: bool) -
readable,
writable,
}) {
tracing::debug!("failed to add interest for socket {}, retrying as a modify: {}", socket, e);
tracing::debug!(
"failed to add interest for socket {}, retrying as a modify: {}",
socket,
e
);
poller.modify(socket, Event {
key: socket as usize,
readable,
Expand All @@ -211,15 +231,24 @@ fn poller_add(poller: &Poller, socket: Socket, readable: bool, writable: bool) -
Ok(())
}

fn poller_modify(poller: &Poller, socket: Socket, readable: bool, writable: bool) -> io::Result<()> {
fn poller_modify(
poller: &Poller,
socket: Socket,
readable: bool,
writable: bool,
) -> io::Result<()> {
// If this errors, we retry the operation as an add instead. This is done
// because epoll is weird.
if let Err(e) = poller.modify(socket, Event {
key: socket as usize,
readable,
writable,
}) {
tracing::debug!("failed to modify interest for socket {}, retrying as an add: {}", socket, e);
tracing::debug!(
"failed to modify interest for socket {}, retrying as an add: {}",
socket,
e
);
poller.add(socket, Event {
key: socket as usize,
readable,
Expand Down Expand Up @@ -251,8 +280,8 @@ fn is_bad_socket_error(error: &io::Error) -> bool {
// completion port or was already closed.
Some(ERROR_INVALID_HANDLE) | Some(ERROR_NOT_FOUND) if cfg!(windows) => true,

_ => false
}
_ => false,
},
}
}

Expand Down
7 changes: 5 additions & 2 deletions src/agent/timer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@ impl Timer {
}

pub(crate) fn is_expired(&self, now: Instant) -> bool {
self.timeout.load()
self.timeout
.load()
.map(|timeout| now >= timeout)
.unwrap_or(false)
}

pub(crate) fn get_remaining(&self, now: Instant) -> Option<Duration> {
self.timeout.load().map(|timeout| timeout.saturating_duration_since(now))
self.timeout
.load()
.map(|timeout| timeout.saturating_duration_since(now))
}

pub(crate) fn start(&self, timeout: Duration) {
Expand Down
3 changes: 1 addition & 2 deletions src/cookies/psl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@
//! list. If we can't, then we log a warning and use the stale list anyway,
//! since a stale list is better than no list at all.
use crate::request::RequestExt;
use crate::ReadResponseExt;
use crate::{request::RequestExt, ReadResponseExt};
use chrono::{prelude::*, Duration};
use once_cell::sync::Lazy;
use parking_lot::{RwLock, RwLockUpgradableReadGuard};
Expand Down
5 changes: 4 additions & 1 deletion src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,10 @@ impl fmt::Debug for Error {
.field("kind", &self.kind())
.field("context", &self.0.context)
.field("source", &self.source())
.field("source_type", &self.0.source.as_ref().map(|e| e.type_name()))
.field(
"source_type",
&self.0.source.as_ref().map(|e| e.type_name()),
)
.finish()
}
}
Expand Down
59 changes: 53 additions & 6 deletions src/redirect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ use crate::{
interceptor::{Context, Interceptor, InterceptorFuture},
request::RequestExt,
};
use http::{Request, Response, Uri};
use std::convert::TryFrom;
use http::{header::ToStrError, HeaderValue, Request, Response, Uri};
use std::{borrow::Cow, convert::TryFrom, str};
use url::Url;

/// How many redirects to follow by default if a limit is not specified. We
Expand Down Expand Up @@ -141,22 +141,69 @@ fn get_redirect_location<T>(request_uri: &Uri, response: &Response<T>) -> Option
if response.status().is_redirection() {
let location = response.headers().get(http::header::LOCATION)?;

match location.to_str() {
Ok(location) => match resolve(request_uri, location) {
match parse_location(location) {
Ok(location) => match resolve(request_uri, location.as_ref()) {
Ok(uri) => return Some(uri),
Err(e) => {
tracing::debug!("bad redirect location: {}", e);
tracing::debug!("invalid redirect location: {}", e);
}
},
Err(e) => {
tracing::debug!("bad redirect location: {}", e);
tracing::debug!("invalid redirect location: {}", e);
}
}
}

None
}

/// Parse the given `Location` header value into a string.
fn parse_location(location: &HeaderValue) -> Result<Cow<'_, str>, ToStrError> {
match location.to_str() {
// This will return a `str` if the header value contained only legal
// US-ASCII characters.
Ok(s) => Ok(Cow::Borrowed(s)),

// Try to parse the location as UTF-8 bytes instead of US-ASCII. This is
// technically against the URI spec which requires all literal
// characters in the URI to be US-ASCII (see [RFC 3986, Section
// 4.1](https://tools.ietf.org/html/rfc3986#section-4.1)).
//
// This is also more or less against the HTTP spec, which historically
// allowed for ISO-8859-1 text in header values but since was restricted
// to US-ASCII plus opaque bytes. Never has UTF-8 been encouraged or
// allowed as-such. See [RFC 7230, Section
// 3.2.4](https://tools.ietf.org/html/rfc7230#section-3.2.4) for more
// info.
//
// However, some bad or misconfigured web servers will do this anyway,
// and most web browsers recover from this by allowing and interpreting
// UTF-8 characters as themselves even though they _should_ have been
// percent-encoded. The third-party URI parsers that we use have no such
// leniency, so we percent-encode such bytes (if legal UTF-8) ahead of
// time before handing them off to the URI parser.
Err(e) => {
if str::from_utf8(location.as_bytes()).is_ok() {
let mut s = String::with_capacity(location.len());

for &byte in location.as_bytes() {
if byte.is_ascii() {
s.push(byte as char);
} else {
s.push_str(&format!("%{:02x}", byte));
}
}

Ok(Cow::Owned(s))
} else {
// Header value isn't legal UTF-8 either, not much we can
// reasonably do.
Err(e)
}
}
}
}

/// Resolve one URI in terms of another.
fn resolve(base: &Uri, target: &str) -> Result<Uri, Box<dyn std::error::Error>> {
// Optimistically check if this is an absolute URI.
Expand Down
33 changes: 33 additions & 0 deletions tests/redirects.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,3 +283,36 @@ fn auto_referer_sets_expected_header() {
m2.request().expect_header("Referer", m1.url());
m3.request().expect_header("Referer", m2.url());
}

#[test]
#[ignore = "testserver does not support non-ASCII headers yet"]
fn redirect_with_unencoded_utf8_bytes_in_location() {
let m2 = mock! {
status: 200,
body: "ok",
};

// Put literal non-ASCII UTF-8 characters into the location!
let location = m2.url() + "?bad=résumé";

let m1 = mock! {
status: 301,
headers {
"Location": location,
}
};

let mut response = Request::get(m1.url())
.redirect_policy(RedirectPolicy::Follow)
.body(())
.unwrap()
.send()
.unwrap();

assert_eq!(response.status(), 200);
assert_eq!(response.text().unwrap(), "ok");
assert_eq!(response.effective_uri().unwrap().to_string(), m2.url());

assert!(!m1.requests().is_empty());
assert!(!m2.requests().is_empty());
}

0 comments on commit 70100a4

Please sign in to comment.