Skip to content

Commit

Permalink
Filter user-tracking parameters from query string (fixes brave/brave-…
Browse files Browse the repository at this point in the history
…browser#4239)

If a URL's query string includes one of the parameter names known
to track individual users, we remove them.

We essentially apply the following to the query string:

    s/&(fbclid|gclid|msclkid|mc_eid)=[^&]+//g
    s/^(fbclid|gclid|msclkid|mc_eid)=[^&]+&//g
    s/^(fbclid|gclid|msclkid|mc_eid)=[^&]+$//g

https://support.google.com/analytics/answer/7519794
https://stackoverflow.com/questions/52847475/what-is-fbclid-the-new-facebook-parameter
https://about.ads.microsoft.com/en-us/blog/post/january-2018/conversion-tracking-update-on-bing-ads
https://developer.mailchimp.com/documentation/mailchimp/guides/getting-started-with-ecommerce/#e-commerce-tracking-and-reports
  • Loading branch information
fmarier committed Sep 10, 2019
1 parent e867c3d commit 10a9663
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 0 deletions.
1 change: 1 addition & 0 deletions browser/net/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ source_set("net") {
"//net",
"//services/network/public/cpp",
"//services/network/public/mojom",
"//third_party/re2",
"//url",
]

Expand Down
64 changes: 64 additions & 0 deletions browser/net/brave_site_hacks_network_delegate_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@

#include <memory>
#include <string>
#include <vector>

#include "base/lazy_instance.h"
#include "base/no_destructor.h"
#include "base/sequenced_task_runner.h"
#include "base/strings/string_util.h"
#include "brave/common/network_constants.h"
Expand All @@ -21,6 +24,7 @@
#include "content/public/common/referrer.h"
#include "extensions/common/url_pattern.h"
#include "net/url_request/url_request.h"
#include "third_party/re2/src/re2/re2.h"

using content::BrowserThread;
using content::Referrer;
Expand All @@ -29,6 +33,42 @@ namespace brave {

namespace {

const std::string& GetQueryStringTrackers() {
static const base::NoDestructor<std::string> trackers(base::JoinString(
std::vector<std::string>({"fbclid", "gclid", "msclkid", "mc_eid"}), "|"));
return *trackers;
}

// From src/components/autofill/content/renderer/page_passwords_analyser.cc
// and password_form_conversion_utils.cc:
#define DECLARE_LAZY_MATCHER(NAME, PATTERN) \
struct LabelPatternLazyInstanceTraits_##NAME \
: public base::internal::DestructorAtExitLazyInstanceTraits<re2::RE2> { \
static re2::RE2* New(void* instance) { \
re2::RE2::Options options; \
options.set_case_sensitive(false); \
re2::RE2* matcher = new (instance) re2::RE2(PATTERN, options); \
DCHECK(matcher->ok()); \
return matcher; \
} \
}; \
base::LazyInstance<re2::RE2, LabelPatternLazyInstanceTraits_##NAME> NAME = \
LAZY_INSTANCE_INITIALIZER

// e.g. "?fbclid=1234"
DECLARE_LAZY_MATCHER(tracker_only_matcher,
"^(" + GetQueryStringTrackers() + ")=[^&]+$");

// e.g. "?fbclid=1234&foo=1"
DECLARE_LAZY_MATCHER(tracker_first_matcher,
"^(" + GetQueryStringTrackers() + ")=[^&]+&");

// e.g. "?foo=1&fbclid=1234" or "?foo=1&fbclid=1234&bar=2"
DECLARE_LAZY_MATCHER(tracker_appended_matcher,
"&(" + GetQueryStringTrackers() + ")=[^&]+");

#undef DECLARE_LAZY_MATCHER

bool ApplyPotentialReferrerBlock(std::shared_ptr<BraveRequestInfo> ctx) {
DCHECK_CURRENTLY_ON(BrowserThread::IO);
GURL target_origin = ctx->request_url.GetOrigin();
Expand All @@ -50,12 +90,36 @@ bool ApplyPotentialReferrerBlock(std::shared_ptr<BraveRequestInfo> ctx) {
return false;
}

void ApplyPotentialQueryStringFilter(const GURL& request_url,
std::string* new_url_spec) {
std::string new_query = request_url.query();
int replacement_count =
re2::RE2::GlobalReplace(&new_query, tracker_appended_matcher.Get(), "") +
re2::RE2::GlobalReplace(&new_query, tracker_first_matcher.Get(), "") +
re2::RE2::GlobalReplace(&new_query, tracker_only_matcher.Get(), "");

if (replacement_count > 0) {
url::Replacements<char> replacements;
if (new_query.empty()) {
replacements.ClearQuery();
} else {
replacements.SetQuery(new_query.c_str(),
url::Component(0, new_query.size()));
}
*new_url_spec = request_url.ReplaceComponents(replacements).spec();
}
}

} // namespace

int OnBeforeURLRequest_SiteHacksWork(
const ResponseCallback& next_callback,
std::shared_ptr<BraveRequestInfo> ctx) {
ApplyPotentialReferrerBlock(ctx);

if (ctx->request_url.has_query()) {
ApplyPotentialQueryStringFilter(ctx->request_url, &ctx->new_url_spec);
}
return net::OK;
}

Expand Down
82 changes: 82 additions & 0 deletions browser/net/brave_site_hacks_network_delegate_helper_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "brave/browser/net/url_context.h"
Expand Down Expand Up @@ -267,4 +268,85 @@ TEST_F(BraveSiteHacksNetworkDelegateHelperTest,
});
}

TEST_F(BraveSiteHacksNetworkDelegateHelperTest, QueryStringUntouched) {
const std::vector<const std::string> urls({
"https://example.com/",
"https://example.com/?",
"https://example.com/?+%20",
"https://user:pass@example.com/path/file.html?foo=1#fragment",
"http://user:pass@example.com/path/file.html?foo=1&bar=2#fragment",
"https://example.com/?file=https%3A%2F%2Fexample.com%2Ftest.pdf",
"https://example.com/?title=1+2&caption=1%202",
"https://example.com/?foo=1&&bar=2#fragment",
"https://example.com/?foo&bar=&#fragment",
"https://example.com/?foo=1&fbcid=no&gcid=no&mc_cid=no&bar=&#frag",
"https://example.com/?fbclid=&gclid&=mc_eid&msclkid=",
"https://example.com/?value=fbclid=1&not-gclid=2&foo+mc_eid=3",
"https://example.com/?+fbclid=1",
"https://example.com/?%20fbclid=1",
"https://example.com/#fbclid=1",
});
for (const auto& url : urls) {
net::TestDelegate test_delegate;
std::unique_ptr<net::URLRequest> request = context()->CreateRequest(
GURL(url), net::IDLE, &test_delegate, TRAFFIC_ANNOTATION_FOR_TESTS);

std::shared_ptr<brave::BraveRequestInfo> brave_request_info(
new brave::BraveRequestInfo());
brave::BraveRequestInfo::FillCTXFromRequest(request.get(),
brave_request_info);
brave::ResponseCallback callback;
int ret =
brave::OnBeforeURLRequest_SiteHacksWork(callback, brave_request_info);
EXPECT_EQ(ret, net::OK);
// new_url should not be set
EXPECT_TRUE(brave_request_info->new_url_spec.empty());
EXPECT_EQ(request->url(), GURL(url));
}
}

TEST_F(BraveSiteHacksNetworkDelegateHelperTest, QueryStringFiltered) {
const std::vector<const std::pair<const std::string, const std::string>> urls(
{
// { original url, expected url after filtering }
{"https://example.com/?fbclid=1234", "https://example.com/"},
{"https://example.com/?fbclid=1234&", "https://example.com/"},
{"https://example.com/?&fbclid=1234", "https://example.com/"},
{"https://example.com/?gclid=1234", "https://example.com/"},
{"https://example.com/?fbclid=0&gclid=1&msclkid=a&mc_eid=a1",
"https://example.com/"},
{"https://example.com/?fbclid=&foo=1&bar=2&gclid=abc",
"https://example.com/?fbclid=&foo=1&bar=2"},
{"https://example.com/?fbclid=&foo=1&gclid=1234&bar=2",
"https://example.com/?fbclid=&foo=1&bar=2"},
{"http://u:p@example.com/path/file.html?foo=1&fbclid=abcd#fragment",
"http://u:p@example.com/path/file.html?foo=1#fragment"},
// Obscure edge cases that break most parsers:
{"https://example.com/?fbclid&foo&&gclid=2&bar=&%20",
"https://example.com/?fbclid&foo&&bar=&%20"},
{"https://example.com/?fbclid=1&1==2&=msclkid&foo=bar&&a=b=c&",
"https://example.com/?1==2&=msclkid&foo=bar&&a=b=c&"},
{"https://example.com/?fbclid=1&=2&?foo=yes&bar=2+",
"https://example.com/?=2&?foo=yes&bar=2+"},
{"https://example.com/?fbclid=1&a+b+c=some%20thing&1%202=3+4",
"https://example.com/?a+b+c=some%20thing&1%202=3+4"},
});
for (const auto& pair : urls) {
net::TestDelegate test_delegate;
std::unique_ptr<net::URLRequest> request =
context()->CreateRequest(GURL(pair.first), net::IDLE, &test_delegate,
TRAFFIC_ANNOTATION_FOR_TESTS);

std::shared_ptr<brave::BraveRequestInfo> brave_request_info(
new brave::BraveRequestInfo());
brave::BraveRequestInfo::FillCTXFromRequest(request.get(),
brave_request_info);
brave::ResponseCallback callback;
int ret =
brave::OnBeforeURLRequest_SiteHacksWork(callback, brave_request_info);
EXPECT_EQ(ret, net::OK);
EXPECT_EQ(brave_request_info->new_url_spec, pair.second);
}
}

} // namespace

0 comments on commit 10a9663

Please sign in to comment.