From 6610ae816f78ee8fc1080b93a55bf19e4ce48d3e Mon Sep 17 00:00:00 2001 From: Georgios Kalpakas Date: Tue, 15 Dec 2015 12:16:14 +0200 Subject: [PATCH] fix(input): fix URL validation being too strict Background: Prior to ffb6b2f, there was a bug in `URL_REGEXP`, trying to match the hostname as `\S+` (meaning any non-space character). This resulted in never actually validating the structure of the URL (e.g. segments such as port, path, query, fragment). Then ffb6b2f and subsequently e4bb838 fixed that bug, but revealed `URL_REGEXP`'s "strictness" wrt certain parts of the URL. Since browsers are too lenient when it comes to URL validation anyway, it doesn't make sense for Angular to be much stricter, so this commit relaxes the "strictness" of `URL_REGEXP`, focusing more on the general structure, than on the specific characters allowed in each segment. Note 1: `URL_REGEXP` still seems to be stricter than browsers in some cases. Note 2: Browsers don't always agree on what is a valid URL and what isn't. Fixes #13528 Closes #13544 --- src/ng/directive/input.js | 13 +++- test/ng/directive/inputSpec.js | 125 ++++++++++++++++++++++++++++----- 2 files changed, 121 insertions(+), 17 deletions(-) diff --git a/src/ng/directive/input.js b/src/ng/directive/input.js index bcf34b26747e..65d26ad979cf 100644 --- a/src/ng/directive/input.js +++ b/src/ng/directive/input.js @@ -12,7 +12,18 @@ // Regex code is obtained from SO: https://stackoverflow.com/questions/3143070/javascript-regex-iso-datetime#answer-3143231 var ISO_DATE_REGEXP = /\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z)/; // See valid URLs in RFC3987 (http://tools.ietf.org/html/rfc3987) -var URL_REGEXP = /^[A-Za-z][A-Za-z\d.+-]*:\/*(?:\w+(?::\w+)?@)?[^\s/]+(?::\d+)?(?:\/[\w#!:.?+=&%@\-/[\]$'()*,;~]*)?$/; +// Note: We are being more lenient, because browsers are too. +// 1. Scheme +// 2. Slashes +// 3. Username +// 4. Password +// 5. Hostname +// 6. Port +// 7. Path +// 8. Query +// 9. Fragment +// 1111111111111111 222 333333 44444 555555555555555555555555 666 77777777 8888888 999 +var URL_REGEXP = /^[a-z][a-z\d.+-]*:\/*(?:[^:@]+(?::[^@]+)?@)?(?:[^\s:/?#]+|\[[a-f\d:]+\])(?::\d+)?(?:\/[^?#]*)?(?:\?[^#]*)?(?:#.*)?$/i; var EMAIL_REGEXP = /^[a-z0-9!#$%&'*+\/=?^_`{|}~.-]+@[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)*$/i; var NUMBER_REGEXP = /^\s*(\-|\+)?(\d+|(\d*(\.\d*)))([eE][+-]?\d+)?\s*$/; var DATE_REGEXP = /^(\d{4})-(\d{2})-(\d{2})$/; diff --git a/test/ng/directive/inputSpec.js b/test/ng/directive/inputSpec.js index 98f45decab50..58689c0eea5f 100644 --- a/test/ng/directive/inputSpec.js +++ b/test/ng/directive/inputSpec.js @@ -2535,22 +2535,115 @@ describe('input', function() { describe('URL_REGEXP', function() { - /* global URL_REGEXP: false */ - it('should validate url', function() { - // See valid URLs in RFC3987 (http://tools.ietf.org/html/rfc3987) - expect(URL_REGEXP.test('http://server:123/path')).toBe(true); - expect(URL_REGEXP.test('https://server:123/path')).toBe(true); - expect(URL_REGEXP.test('file:///home/user')).toBe(true); - expect(URL_REGEXP.test('mailto:user@example.com?subject=Foo')).toBe(true); - expect(URL_REGEXP.test('r2-d2.c3-p0://localhost/foo')).toBe(true); - expect(URL_REGEXP.test('abc:/foo')).toBe(true); - expect(URL_REGEXP.test('http://example.com/path;path')).toBe(true); - expect(URL_REGEXP.test('http://example.com/[]$\'()*,~)')).toBe(true); - expect(URL_REGEXP.test('http:')).toBe(false); - expect(URL_REGEXP.test('a@B.c')).toBe(false); - expect(URL_REGEXP.test('a_B.c')).toBe(false); - expect(URL_REGEXP.test('0scheme://example.com')).toBe(false); - expect(URL_REGEXP.test('http://example.com:9999/``')).toBe(false); + // See valid URLs in RFC3987 (http://tools.ietf.org/html/rfc3987) + // Note: We are being more lenient, because browsers are too. + var urls = [ + ['scheme://hostname', true], + ['scheme://username:password@host.name:7678/pa/t.h?q=u&e=r&y#fragment', true], + + // Validating `scheme` + ['://example.com', false], + ['0scheme://example.com', false], + ['.scheme://example.com', false], + ['+scheme://example.com', false], + ['-scheme://example.com', false], + ['_scheme://example.com', false], + ['scheme0://example.com', true], + ['scheme.://example.com', true], + ['scheme+://example.com', true], + ['scheme-://example.com', true], + ['scheme_://example.com', false], + + // Vaidating `:` and `/` after `scheme` + ['scheme//example.com', false], + ['scheme:example.com', true], + ['scheme:/example.com', true], + ['scheme:///example.com', true], + + // Validating `username` and `password` + ['scheme://@example.com', true], + ['scheme://username@example.com', true], + ['scheme://u0s.e+r-n_a~m!e@example.com', true], + ['scheme://u#s$e%r^n&a*m;e@example.com', true], + ['scheme://:password@example.com', true], + ['scheme://username:password@example.com', true], + ['scheme://username:pass:word@example.com', true], + ['scheme://username:p0a.s+s-w_o~r!d@example.com', true], + ['scheme://username:p#a$s%s^w&o*r;d@example.com', true], + + // Validating `hostname` + ['scheme:', false], // Chrome, FF: true + ['scheme://', false], // Chrome, FF: true + ['scheme:// example.com:', false], // Chrome, FF: true + ['scheme://example com:', false], // Chrome, FF: true + ['scheme://:', false], // Chrome, FF: true + ['scheme://?', false], // Chrome, FF: true + ['scheme://#', false], // Chrome, FF: true + ['scheme://username:password@:', false], // Chrome, FF: true + ['scheme://username:password@/', false], // Chrome, FF: true + ['scheme://username:password@?', false], // Chrome, FF: true + ['scheme://username:password@#', false], // Chrome, FF: true + ['scheme://host.name', true], + ['scheme://123.456.789.10', true], + ['scheme://[1234:0000:0000:5678:9abc:0000:0000:def]', true], + ['scheme://[1234:0000:0000:5678:9abc:0000:0000:def]:7678', true], + ['scheme://[1234:0:0:5678:9abc:0:0:def]', true], + ['scheme://[1234::5678:9abc::def]', true], + ['scheme://~`!@$%^&*-_=+|\\;\'",.()[]{}<>', true], + + // Validating `port` + ['scheme://example.com/no-port', true], + ['scheme://example.com:7678', true], + ['scheme://example.com:76T8', false], // Chrome, FF: true + ['scheme://example.com:port', false], // Chrome, FF: true + + // Validating `path` + ['scheme://example.com/', true], + ['scheme://example.com/path', true], + ['scheme://example.com/path/~`!@$%^&*-_=+|\\;:\'",./()[]{}<>', true], + + // Validating `query` + ['scheme://example.com?query', true], + ['scheme://example.com/?query', true], + ['scheme://example.com/path?query', true], + ['scheme://example.com/path?~`!@$%^&*-_=+|\\;:\'",.?/()[]{}<>', true], + + // Validating `fragment` + ['scheme://example.com#fragment', true], + ['scheme://example.com/#fragment', true], + ['scheme://example.com/path#fragment', true], + ['scheme://example.com/path/#fragment', true], + ['scheme://example.com/path?query#fragment', true], + ['scheme://example.com/path?query#~`!@#$%^&*-_=+|\\;:\'",.?/()[]{}<>', true], + + // Validating miscellaneous + ['scheme://☺.✪.⌘.➡/䨹', true], + ['scheme://مثال.إختبار', true], + ['scheme://例子.测试', true], + ['scheme://उदाहरण.परीक्षा', true], + + // Legacy tests + ['http://server:123/path', true], + ['https://server:123/path', true], + ['file:///home/user', true], + ['mailto:user@example.com?subject=Foo', true], + ['r2-d2.c3-p0://localhost/foo', true], + ['abc:/foo', true], + ['http://example.com/path;path', true], + ['http://example.com/[]$\'()*,~)', true], + ['http:', false], // FF: true + ['a@B.c', false], + ['a_B.c', false], + ['0scheme://example.com', false], + ['http://example.com:9999/``', true] + ]; + + they('should validate url: $prop', urls, function(item) { + var url = item[0]; + var valid = item[1]; + + /* global URL_REGEXP: false */ + expect(URL_REGEXP.test(url)).toBe(valid); }); }); });