From 94d99e9cdef3eeb5a93dec2b61b11cc9a8dc231f Mon Sep 17 00:00:00 2001 From: Francois Marier Date: Tue, 20 Aug 2019 14:36:55 -0700 Subject: [PATCH] Filter user-tracking parameters from query string (fixes brave/brave-browser#4239) If a URL's query string includes one of the parameter names known to track individual users, we remove them. https://support.google.com/analytics/answer/7519794 https://stackoverflow.com/questions/52847475/what-is-fbclid-the-new-facebook-parameter https://about.ads.microsoft.com/en-us/blog/post/january-2018/conversion-tracking-update-on-bing-ads https://developer.mailchimp.com/documentation/mailchimp/guides/getting-started-with-ecommerce/#e-commerce-tracking-and-reports --- ...rave_site_hacks_network_delegate_helper.cc | 44 +++++++++++ ..._hacks_network_delegate_helper_unittest.cc | 77 +++++++++++++++++++ 2 files changed, 121 insertions(+) diff --git a/browser/net/brave_site_hacks_network_delegate_helper.cc b/browser/net/brave_site_hacks_network_delegate_helper.cc index 6140f5710eec..b363376c71dd 100644 --- a/browser/net/brave_site_hacks_network_delegate_helper.cc +++ b/browser/net/brave_site_hacks_network_delegate_helper.cc @@ -50,12 +50,56 @@ bool ApplyPotentialReferrerBlock(std::shared_ptr ctx) { return false; } +bool ApplyPotentialQueryStringFilter(std::shared_ptr ctx) { + DCHECK_CURRENTLY_ON(BrowserThread::IO); + if (ctx->request_url.has_query()) { + bool modified = false; + std::vector new_query_parts; + + const std::string query = ctx->request_url.query(); + url::Component cursor(0, query.size()); + url::Component key_range, value_range; + while (url::ExtractQueryKeyValue(query.data(), &cursor, &key_range, + &value_range)) { + const base::StringPiece key(query.data() + key_range.begin, + key_range.len); + const base::StringPiece value(query.data() + value_range.begin, + value_range.len); + if (!value.empty() && (key == "fbclid" || key == "gclid" || + key == "msclkid" || key == "mc_eid")) { + modified = true; // We'll have to rewrite the query string. + } else if (!key.empty() || !value.empty()) { + // Add the current key=value to the new query string. + new_query_parts.push_back(base::StringPrintf("%s=%s", + key.as_string().c_str(), value.as_string().c_str())); + } + } + + if (modified) { + url::Replacements replacements; + if (new_query_parts.empty()) { + replacements.ClearQuery(); + } else { + std::string new_query_string = base::JoinString(new_query_parts, "&"); + url::Component new_query(0, new_query_string.size()); + replacements.SetQuery(new_query_string.c_str(), new_query); + } + ctx->new_url_spec = + ctx->request_url.ReplaceComponents(replacements).spec(); + return true; + } + } + + return false; +} + } // namespace int OnBeforeURLRequest_SiteHacksWork( const ResponseCallback& next_callback, std::shared_ptr ctx) { ApplyPotentialReferrerBlock(ctx); + ApplyPotentialQueryStringFilter(ctx); return net::OK; } diff --git a/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc b/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc index 2e1d62bbcb7b..e0fdd06eb1aa 100644 --- a/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc +++ b/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc @@ -267,4 +267,81 @@ TEST_F(BraveSiteHacksNetworkDelegateHelperTest, }); } +TEST_F(BraveSiteHacksNetworkDelegateHelperTest, QueryStringUntouched) { + std::vector urls({ + GURL("https://example.com/"), + GURL("https://example.com/?"), + GURL("https://example.com/?+%20"), + GURL("https://user:pass@example.com/path/file.html?foo=1#fragment"), + GURL("http://user:pass@example.com/path/file.html?foo=1&bar=2#fragment"), + GURL("https://example.com/?file=https%3A%2F%2Fexample.com%2Ftest.pdf"), + GURL("https://example.com/?title=1+2&caption=1%202"), + GURL("https://example.com/?foo=1&&bar=2#fragment"), + GURL("https://example.com/?foo&bar=&#fragment"), + GURL("https://example.com/?foo=1&fbcid=no&gcid=no&mc_cid=no&bar=&#frag"), + GURL("https://example.com/?fbclid=&gclid&=mc_eid&msclkid="), + GURL("https://example.com/?value=fbclid=1¬-gclid=2&foo+mc_eid=3"), + }); + std::for_each(urls.begin(), urls.end(), [this](GURL url){ + net::TestDelegate test_delegate; + std::unique_ptr request = + context()->CreateRequest(url, net::IDLE, &test_delegate, + TRAFFIC_ANNOTATION_FOR_TESTS); + + std::shared_ptr + brave_request_info(new brave::BraveRequestInfo()); + brave::BraveRequestInfo::FillCTXFromRequest(request.get(), + brave_request_info); + brave::ResponseCallback callback; + int ret = brave::OnBeforeURLRequest_SiteHacksWork(callback, + brave_request_info); + EXPECT_EQ(ret, net::OK); + // new_url should not be set + EXPECT_TRUE(brave_request_info->new_url_spec.empty()); + EXPECT_EQ(request->url(), url); + }); +} + +TEST_F(BraveSiteHacksNetworkDelegateHelperTest, QueryStringFiltered) { + std::vector > urls({ + // { original url, expected url after filtering } + { GURL("https://example.com/?fbclid=1234"), GURL("https://example.com/") }, + { GURL("https://example.com/?gclid=1234"), GURL("https://example.com/") }, + { GURL("https://example.com/?fbclid=0&gclid=1&msclkid=a&mc_eid=a1"), + GURL("https://example.com/") }, + { GURL("https://example.com/?fbclid=&foo=1&gclid=1234&bar=2"), + GURL("https://example.com/?fbclid=&foo=1&bar=2") }, + { GURL("http://u:p@example.com/path/file.html?foo=1&fbclid=abcd#fragment"), + GURL("http://u:p@example.com/path/file.html?foo=1#fragment") }, + // Obscure edge cases that break most parsers + // { GURL("https://example.com/?fbclid&foo&&gclid=2&bar=&%20"), + // GURL("https://example.com/?fbclid&foo&&bar=&%20") }, + // { GURL("https://example.com/?fbclid=1&1==2&=msclkid&foo=bar&&a=b=c&"), + // GURL("https://example.com/?1==2&=msclkid&foo=bar&&a=b=c&") }, + // { GURL("https://example.com/?fbclid;foo=1;glcid=yes"), + // GURL("https://example.com/?fbclid;foo=1;") }, + // { GURL("https://example.com/?fbclid=1&=2&?foo=yes&bar=2+"), + // GURL("https://example.com/?=2&?foo=yes&bar=2+") }, + // { GURL("https://example.com/?fbclid=1&a+b+c=some%20thing&1%202=3+4"), + // GURL("https://example.com/?a+b+c=some%20thing&1%202=3+4") }, + }); + std::for_each(urls.begin(), urls.end(), [this](std::pair url){ + net::TestDelegate test_delegate; + std::unique_ptr request = + context()->CreateRequest(url.first, net::IDLE, &test_delegate, + TRAFFIC_ANNOTATION_FOR_TESTS); + + std::shared_ptr + brave_request_info(new brave::BraveRequestInfo()); + brave::BraveRequestInfo::FillCTXFromRequest(request.get(), + brave_request_info); + brave::ResponseCallback callback; + int ret = brave::OnBeforeURLRequest_SiteHacksWork(callback, + brave_request_info); + EXPECT_EQ(ret, net::OK); + EXPECT_STREQ(brave_request_info->new_url_spec.c_str(), + url.second.spec().c_str()); + }); +} + } // namespace