-
Notifications
You must be signed in to change notification settings - Fork 4.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
build: import manually minified Chrome URL lib. (#3)
This is a manually minified variant of https://chromium.googlesource.com/chromium/src.git/+archive/74.0.3729.15/url.tar.gz, providing just the parts needed for url::CanonicalizePath(). This is intended to support a security release fix for CVE-2019-9901. Long term we need this to be moved to absl or QUICHE for upgrades and long-term support. Some specific transforms of interest: * url_parse.h is minified to just Component and flattened back into the URL directory. It does not contain any non-Chromium authored code any longer and so does not have a separate LICENSE. * envoy_shim.h adapts various macros to the Envoy context. 8 Anything not reachable from url::CanonicalizePath() has been dropped. * Header include paths have changed as needed. * BUILD was manually written. * Various clang-tidy and format fixes. Risk level: Low Testing: Validated with WiP PR for CVE-2019-9901. Signed-off-by: Harvey Tuch <htuch@google.com>
- Loading branch information
Showing
15 changed files
with
1,428 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
licenses(["notice"]) # Apache 2 | ||
|
||
load( | ||
"//bazel:envoy_build_system.bzl", | ||
"envoy_cc_library", | ||
"envoy_package", | ||
) | ||
|
||
envoy_package() | ||
|
||
envoy_cc_library( | ||
name = "chromium_url", | ||
srcs = [ | ||
"url_canon.cc", | ||
"url_canon_internal.cc", | ||
"url_canon_path.cc", | ||
"url_canon_stdstring.cc", | ||
], | ||
hdrs = [ | ||
"envoy_shim.h", | ||
"url_canon.h", | ||
"url_canon_internal.h", | ||
"url_canon_stdstring.h", | ||
"url_parse.h", | ||
"url_parse_internal.h", | ||
], | ||
deps = ["//source/common/common:assert_lib"], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
// Copyright 2015 The Chromium Authors. All rights reserved. | ||
// | ||
// Redistribution and use in source and binary forms, with or without | ||
// modification, are permitted provided that the following conditions are | ||
// met: | ||
// | ||
// * Redistributions of source code must retain the above copyright | ||
// notice, this list of conditions and the following disclaimer. | ||
// * Redistributions in binary form must reproduce the above | ||
// copyright notice, this list of conditions and the following disclaimer | ||
// in the documentation and/or other materials provided with the | ||
// distribution. | ||
// * Neither the name of Google Inc. nor the names of its | ||
// contributors may be used to endorse or promote products derived from | ||
// this software without specific prior written permission. | ||
// | ||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
This is a manually minified variant of | ||
https://chromium.googlesource.com/chromium/src.git/+archive/74.0.3729.15/url.tar.gz, | ||
providing just the parts needed for `url::CanonicalizePath()`. This is intended | ||
to support a security release fix for CVE-2019-9901. Long term we need this to | ||
be moved to absl or QUICHE for upgrades and long-term support. | ||
|
||
Some specific transforms of interest: | ||
* `url_parse.h` is minified to just `Component` and flattened back into the URL | ||
directory. It does not contain any non-Chromium authored code any longer and | ||
so does not have a separate LICENSE. | ||
* `envoy_shim.h` adapts various macros to the Envoy context. | ||
* Anything not reachable from `url::CanonicalizePath()` has been dropped. | ||
* Header include paths have changed as needed. | ||
* BUILD was manually written. | ||
* Various clang-tidy and format fixes. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#pragma once | ||
|
||
#include "common/common/assert.h" | ||
|
||
// This is a minimal Envoy adaptation layer for the Chromium URL library. | ||
// NOLINT(namespace-envoy) | ||
|
||
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ | ||
TypeName(const TypeName&) = delete; \ | ||
TypeName& operator=(const TypeName&) = delete | ||
|
||
#define EXPORT_TEMPLATE_DECLARE(x) | ||
#define EXPORT_TEMPLATE_DEFINE(x) | ||
#define COMPONENT_EXPORT(x) | ||
|
||
#define DCHECK(x) ASSERT(x) | ||
#define NOTREACHED() NOT_REACHED_GCOVR_EXCL_LINE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
// Envoy snapshot of Chromium URL path normalization, see README.md. | ||
// NOLINT(namespace-envoy) | ||
|
||
// Copyright 2017 The Chromium Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style license that can be | ||
// found in the LICENSE file. | ||
|
||
#include "common/chromium_url/url_canon.h" | ||
|
||
#include "common/chromium_url/envoy_shim.h" | ||
|
||
namespace url { | ||
|
||
template class EXPORT_TEMPLATE_DEFINE(COMPONENT_EXPORT(URL)) CanonOutputT<char>; | ||
|
||
} // namespace url |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
// Envoy snapshot of Chromium URL path normalization, see README.md. | ||
// NOLINT(namespace-envoy) | ||
|
||
// Copyright 2013 The Chromium Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style license that can be | ||
// found in the LICENSE file. | ||
|
||
#ifndef URL_URL_CANON_H_ | ||
#define URL_URL_CANON_H_ | ||
|
||
#include <stdlib.h> | ||
#include <string.h> | ||
|
||
#include "common/chromium_url/envoy_shim.h" | ||
#include "common/chromium_url/url_parse.h" | ||
|
||
namespace url { | ||
|
||
// Canonicalizer output ------------------------------------------------------- | ||
|
||
// Base class for the canonicalizer output, this maintains a buffer and | ||
// supports simple resizing and append operations on it. | ||
// | ||
// It is VERY IMPORTANT that no virtual function calls be made on the common | ||
// code path. We only have two virtual function calls, the destructor and a | ||
// resize function that is called when the existing buffer is not big enough. | ||
// The derived class is then in charge of setting up our buffer which we will | ||
// manage. | ||
template <typename T> class CanonOutputT { | ||
public: | ||
CanonOutputT() : buffer_(NULL), buffer_len_(0), cur_len_(0) {} | ||
virtual ~CanonOutputT() {} | ||
|
||
// Implemented to resize the buffer. This function should update the buffer | ||
// pointer to point to the new buffer, and any old data up to |cur_len_| in | ||
// the buffer must be copied over. | ||
// | ||
// The new size |sz| must be larger than buffer_len_. | ||
virtual void Resize(int sz) = 0; | ||
|
||
// Accessor for returning a character at a given position. The input offset | ||
// must be in the valid range. | ||
inline T at(int offset) const { return buffer_[offset]; } | ||
|
||
// Sets the character at the given position. The given position MUST be less | ||
// than the length(). | ||
inline void set(int offset, T ch) { buffer_[offset] = ch; } | ||
|
||
// Returns the number of characters currently in the buffer. | ||
inline int length() const { return cur_len_; } | ||
|
||
// Returns the current capacity of the buffer. The length() is the number of | ||
// characters that have been declared to be written, but the capacity() is | ||
// the number that can be written without reallocation. If the caller must | ||
// write many characters at once, it can make sure there is enough capacity, | ||
// write the data, then use set_size() to declare the new length(). | ||
int capacity() const { return buffer_len_; } | ||
|
||
// Called by the user of this class to get the output. The output will NOT | ||
// be NULL-terminated. Call length() to get the | ||
// length. | ||
const T* data() const { return buffer_; } | ||
T* data() { return buffer_; } | ||
|
||
// Shortens the URL to the new length. Used for "backing up" when processing | ||
// relative paths. This can also be used if an external function writes a lot | ||
// of data to the buffer (when using the "Raw" version below) beyond the end, | ||
// to declare the new length. | ||
// | ||
// This MUST NOT be used to expand the size of the buffer beyond capacity(). | ||
void set_length(int new_len) { cur_len_ = new_len; } | ||
|
||
// This is the most performance critical function, since it is called for | ||
// every character. | ||
void push_back(T ch) { | ||
// In VC2005, putting this common case first speeds up execution | ||
// dramatically because this branch is predicted as taken. | ||
if (cur_len_ < buffer_len_) { | ||
buffer_[cur_len_] = ch; | ||
cur_len_++; | ||
return; | ||
} | ||
|
||
// Grow the buffer to hold at least one more item. Hopefully we won't have | ||
// to do this very often. | ||
if (!Grow(1)) | ||
return; | ||
|
||
// Actually do the insertion. | ||
buffer_[cur_len_] = ch; | ||
cur_len_++; | ||
} | ||
|
||
// Appends the given string to the output. | ||
void Append(const T* str, int str_len) { | ||
if (cur_len_ + str_len > buffer_len_) { | ||
if (!Grow(cur_len_ + str_len - buffer_len_)) | ||
return; | ||
} | ||
for (int i = 0; i < str_len; i++) | ||
buffer_[cur_len_ + i] = str[i]; | ||
cur_len_ += str_len; | ||
} | ||
|
||
void ReserveSizeIfNeeded(int estimated_size) { | ||
// Reserve a bit extra to account for escaped chars. | ||
if (estimated_size > buffer_len_) | ||
Resize(estimated_size + 8); | ||
} | ||
|
||
protected: | ||
// Grows the given buffer so that it can fit at least |min_additional| | ||
// characters. Returns true if the buffer could be resized, false on OOM. | ||
bool Grow(int min_additional) { | ||
static const int kMinBufferLen = 16; | ||
int new_len = (buffer_len_ == 0) ? kMinBufferLen : buffer_len_; | ||
do { | ||
if (new_len >= (1 << 30)) // Prevent overflow below. | ||
return false; | ||
new_len *= 2; | ||
} while (new_len < buffer_len_ + min_additional); | ||
Resize(new_len); | ||
return true; | ||
} | ||
|
||
T* buffer_; | ||
int buffer_len_; | ||
|
||
// Used characters in the buffer. | ||
int cur_len_; | ||
}; | ||
|
||
// Simple implementation of the CanonOutput using new[]. This class | ||
// also supports a static buffer so if it is allocated on the stack, most | ||
// URLs can be canonicalized with no heap allocations. | ||
template <typename T, int fixed_capacity = 1024> class RawCanonOutputT : public CanonOutputT<T> { | ||
public: | ||
RawCanonOutputT() : CanonOutputT<T>() { | ||
this->buffer_ = fixed_buffer_; | ||
this->buffer_len_ = fixed_capacity; | ||
} | ||
~RawCanonOutputT() override { | ||
if (this->buffer_ != fixed_buffer_) | ||
delete[] this->buffer_; | ||
} | ||
|
||
void Resize(int sz) override { | ||
T* new_buf = new T[sz]; | ||
memcpy(new_buf, this->buffer_, sizeof(T) * (this->cur_len_ < sz ? this->cur_len_ : sz)); | ||
if (this->buffer_ != fixed_buffer_) | ||
delete[] this->buffer_; | ||
this->buffer_ = new_buf; | ||
this->buffer_len_ = sz; | ||
} | ||
|
||
protected: | ||
T fixed_buffer_[fixed_capacity]; | ||
}; | ||
|
||
// Explicitly instantiate commonly used instantiations. | ||
extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL)) CanonOutputT<char>; | ||
|
||
// Normally, all canonicalization output is in narrow characters. We support | ||
// the templates so it can also be used internally if a wide buffer is | ||
// required. | ||
typedef CanonOutputT<char> CanonOutput; | ||
|
||
template <int fixed_capacity> | ||
class RawCanonOutput : public RawCanonOutputT<char, fixed_capacity> {}; | ||
|
||
// Path. If the input does not begin in a slash (including if the input is | ||
// empty), we'll prepend a slash to the path to make it canonical. | ||
// | ||
// The 8-bit version assumes UTF-8 encoding, but does not verify the validity | ||
// of the UTF-8 (i.e., you can have invalid UTF-8 sequences, invalid | ||
// characters, etc.). Normally, URLs will come in as UTF-16, so this isn't | ||
// an issue. Somebody giving us an 8-bit path is responsible for generating | ||
// the path that the server expects (we'll escape high-bit characters), so | ||
// if something is invalid, it's their problem. | ||
COMPONENT_EXPORT(URL) | ||
bool CanonicalizePath(const char* spec, const Component& path, CanonOutput* output, | ||
Component* out_path); | ||
|
||
} // namespace url | ||
|
||
#endif // URL_URL_CANON_H_ |
Oops, something went wrong.