Skip to content

Commit

Permalink
Be lenient if a character set is 'single-quoted'.
Browse files Browse the repository at this point in the history
It's what Chrome does. Firefox and Safari ignore it. I'd love to ignore
it, but in our API that really just means the caller needs to handle an
InvalidCharsetNameException and nobody is expecting that.

Closes: #2677
  • Loading branch information
squarejesse committed Jul 11, 2016
1 parent 9c67901 commit f2c6d8a
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 3 deletions.
37 changes: 37 additions & 0 deletions okhttp-tests/src/test/java/okhttp3/MediaTypeTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,16 @@ public class MediaTypeTest {
assertInvalid("text/plain ; a=1");
}

@Test public void testDoubleQuotesAreSpecial() throws Exception {
MediaType mediaType = MediaType.parse("text/plain;a=\";charset=utf-8;b=\"");
assertNull(mediaType.charset());
}

@Test public void testSingleQuotesAreNotSpecial() throws Exception {
MediaType mediaType = MediaType.parse("text/plain;a=';charset=utf-8;b='");
assertEquals("UTF-8", mediaType.charset().name());
}

@Test public void testParseWithSpecialCharacters() throws Exception {
MediaType mediaType = MediaType.parse(
"!#$%&'*+-.{|}~/!#$%&'*+-.{|}~; !#$%&'*+-.{|}~=!#$%&'*+-.{|}~");
Expand Down Expand Up @@ -138,6 +148,33 @@ public class MediaTypeTest {
}
}

/**
* This is invalid according to RFC 822. But it's what Chrome does and it avoids a potentially
* unpleasant IllegalCharsetNameException.
*/
@Test public void testCharsetNameIsSingleQuoted() throws Exception {
MediaType mediaType = MediaType.parse("text/plain;charset='utf-8'");
assertEquals("UTF-8", mediaType.charset().name());
}

@Test public void testCharsetNameIsDoubleQuotedAndSingleQuote() throws Exception {
MediaType mediaType = MediaType.parse("text/plain;charset=\"'\"");
try {
mediaType.charset();
fail();
} catch (IllegalCharsetNameException expected) {
}
}

@Test public void testCharsetNameIsDoubleQuotedAndSingleQuoted() throws Exception {
MediaType mediaType = MediaType.parse("text/plain;charset=\"'utf-8'\"");
try {
mediaType.charset();
fail();
} catch (IllegalCharsetNameException expected) {
}
}

@Test public void testDefaultCharset() throws Exception {
MediaType noCharset = MediaType.parse("text/plain");
assertEquals("UTF-8", noCharset.charset(Util.UTF_8).name());
Expand Down
14 changes: 11 additions & 3 deletions okhttp/src/main/java/okhttp3/MediaType.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,17 @@ public static MediaType parse(String string) {

String name = parameter.group(1);
if (name == null || !name.equalsIgnoreCase("charset")) continue;
String charsetParameter = parameter.group(2) != null
? parameter.group(2) // Value is a token.
: parameter.group(3); // Value is a quoted string.
String charsetParameter;
String token = parameter.group(2);
if (token != null) {
// If the token is 'single-quoted' it's invalid! But we're lenient and strip the quotes.
charsetParameter = (token.startsWith("'") && token.endsWith("'") && token.length() > 2)
? token.substring(1, token.length() - 1)
: token;
} else {
// Value is "double-quoted". That's valid and our regex group already strips the quotes.
charsetParameter = parameter.group(3);
}
if (charset != null && !charsetParameter.equalsIgnoreCase(charset)) {
throw new IllegalArgumentException("Multiple different charsets: " + string);
}
Expand Down

0 comments on commit f2c6d8a

Please sign in to comment.