Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

introduce safe regex matcher based on re2 engine #7878

Merged
merged 40 commits into from
Aug 23, 2019
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
ec6e30a
introduce safe regex matcher based on re2 engine
mattklein123 Jul 29, 2019
52fe33b
fix
mattklein123 Aug 8, 2019
e2ce809
fix
mattklein123 Aug 8, 2019
876c6e6
fix
mattklein123 Aug 8, 2019
2da7fc2
more fix
mattklein123 Aug 9, 2019
b5100cb
Merge remote-tracking branch 'origin/master' into safe_regex
mattklein123 Aug 9, 2019
9a5df74
fix
mattklein123 Aug 13, 2019
a191481
Merge remote-tracking branch 'origin/master' into safe_regex
mattklein123 Aug 13, 2019
7646e3e
Merge branch 'master' into safe_regex
mattklein123 Aug 14, 2019
0a7ad69
fix
mattklein123 Aug 14, 2019
befef2b
Merge remote-tracking branch 'origin/master' into safe_regex
mattklein123 Aug 14, 2019
f2a25ea
more
mattklein123 Aug 14, 2019
85d6b06
format
mattklein123 Aug 14, 2019
c8f60b2
fix
mattklein123 Aug 14, 2019
be16355
fix
mattklein123 Aug 14, 2019
c7abb14
fix
mattklein123 Aug 14, 2019
25aa583
fix and merge
mattklein123 Aug 15, 2019
c6a4775
Merge remote-tracking branch 'origin/master' into safe_regex
mattklein123 Aug 15, 2019
1107252
more
mattklein123 Aug 16, 2019
0956d5a
Merge remote-tracking branch 'origin/master' into safe_regex
mattklein123 Aug 16, 2019
718c4f2
checkpoint
mattklein123 Aug 16, 2019
1cec838
Merge remote-tracking branch 'origin/master' into safe_regex
mattklein123 Aug 16, 2019
9237720
more
mattklein123 Aug 16, 2019
c98c32f
more
mattklein123 Aug 16, 2019
f3543b5
more
mattklein123 Aug 16, 2019
8381588
comment
mattklein123 Aug 16, 2019
ed82996
fix
mattklein123 Aug 16, 2019
a038234
Merge remote-tracking branch 'origin/master' into safe_regex
mattklein123 Aug 17, 2019
03544f7
Merge branch 'master' into safe_regex
mattklein123 Aug 20, 2019
462454e
comments
mattklein123 Aug 20, 2019
65102e4
Merge remote-tracking branch 'origin/master' into safe_regex
mattklein123 Aug 21, 2019
17f9579
comments
mattklein123 Aug 22, 2019
24f0fcf
fix
mattklein123 Aug 22, 2019
866d1d5
Merge remote-tracking branch 'origin/master' into safe_regex
mattklein123 Aug 22, 2019
a717ecf
fix
mattklein123 Aug 22, 2019
f2657e5
Merge remote-tracking branch 'origin/master' into safe_regex
mattklein123 Aug 22, 2019
b825b9a
comment
mattklein123 Aug 22, 2019
138f596
Merge remote-tracking branch 'origin/master' into safe_regex
mattklein123 Aug 22, 2019
b82ca63
fix
mattklein123 Aug 23, 2019
83f53ae
Merge remote-tracking branch 'origin/master' into safe_regex
mattklein123 Aug 23, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions api/docs/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ proto_library(
"//envoy/type:range",
"//envoy/type/matcher:metadata",
"//envoy/type/matcher:number",
"//envoy/type/matcher:regex",
"//envoy/type/matcher:string",
],
)
4 changes: 4 additions & 0 deletions api/envoy/api/v2/route/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ api_proto_library_internal(
"//envoy/api/v2/core:base",
"//envoy/type:percent",
"//envoy/type:range",
"//envoy/type/matcher:regex",
"//envoy/type/matcher:string",
],
)

Expand All @@ -20,5 +22,7 @@ api_go_proto_library(
"//envoy/api/v2/core:base_go_proto",
"//envoy/type:percent_go_proto",
"//envoy/type:range_go_proto",
"//envoy/type/matcher:regex_go_proto",
"//envoy/type/matcher:string_go_proto",
],
)
67 changes: 60 additions & 7 deletions api/envoy/api/v2/route/route.proto
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ option go_package = "route";
option java_generic_services = true;

import "envoy/api/v2/core/base.proto";
import "envoy/type/matcher/regex.proto";
import "envoy/type/matcher/string.proto";
import "envoy/type/percent.proto";
import "envoy/type/range.proto";

Expand Down Expand Up @@ -349,7 +351,17 @@ message RouteMatch {
// * The regex */b[io]t* matches the path */bot*
// * The regex */b[io]t* does not match the path */bite*
// * The regex */b[io]t* does not match the path */bit/bot*
string regex = 3 [(validate.rules).string.max_bytes = 1024];
//
// .. attention::
// This field has been deprecated in favor of `safe_regex` as it is not safe for use with
// untrusted input in all cases.
string regex = 3 [(validate.rules).string.max_bytes = 1024, deprecated = true];

// If specified, the route is a regular expression rule meaning that the
// regex must match the *:path* header once the query string is removed. The entire path
mattklein123 marked this conversation as resolved.
Show resolved Hide resolved
// (without the query string) must match the regex. The rule will not match if only a
// subsequence of the *:path* header matches the regex.
type.matcher.RegexMatcher safe_regex = 10 [(validate.rules).message.required = true];
mattklein123 marked this conversation as resolved.
Show resolved Hide resolved
}

// Indicates that prefix/path matching should be case insensitive. The default
Expand Down Expand Up @@ -409,7 +421,17 @@ message CorsPolicy {
// Specifies regex patterns that match allowed origins.
//
// An origin is allowed if either allow_origin or allow_origin_regex match.
repeated string allow_origin_regex = 8 [(validate.rules).repeated .items.string.max_bytes = 1024];
//
// .. attention::
// This field has been deprecated in favor of `allow_origin_safe_regex` as it is not safe for
// use with untrusted input in all cases.
repeated string allow_origin_regex = 8
[(validate.rules).repeated .items.string.max_bytes = 1024, deprecated = true];

// Specifies regex patterns that match allowed origins.
//
// An origin is allowed if either allow_origin or allow_origin_safe_regex match.
repeated type.matcher.RegexMatcher allow_origin_safe_regex = 11;
mattklein123 marked this conversation as resolved.
Show resolved Hide resolved

// Specifies the content for the *access-control-allow-methods* header.
string allow_methods = 2;
Expand Down Expand Up @@ -1076,9 +1098,17 @@ message VirtualCluster {
// * The regex */rides/\d+* matches the path */rides/0*
// * The regex */rides/\d+* matches the path */rides/123*
// * The regex */rides/\d+* does not match the path */rides/123/456*
string pattern = 1 [(validate.rules).string = {min_bytes: 1, max_bytes: 1024}];
//
// .. attention::
// This field has been deprecated in favor of `regex` as it is not safe for use with
// untrusted input in all cases.
string pattern = 1 [(validate.rules).string.max_bytes = 1024, deprecated = true];

// Specifies the name of the virtual cluster. The virtual cluster name as well
// Specifies a regex pattern to use for matching requests. The entire path of the request
// must match the regex.
type.matcher.RegexMatcher regex = 4;
mattklein123 marked this conversation as resolved.
Show resolved Hide resolved

// Specifies the name of the virtual cluster. The virtual cluster name as well
// as the virtual host name are used when emitting statistics. The statistics are emitted by the
// router filter and are documented :ref:`here <config_http_filters_router_stats>`.
string name = 2 [(validate.rules).string.min_bytes = 1];
Expand Down Expand Up @@ -1272,7 +1302,16 @@ message HeaderMatcher {
// * The regex *\d{3}* matches the value *123*
// * The regex *\d{3}* does not match the value *1234*
// * The regex *\d{3}* does not match the value *123.456*
string regex_match = 5 [(validate.rules).string.max_bytes = 1024];
//
// .. attention::
// This field has been deprecated in favor of `safe_regex_match` as it is not safe for use
// with untrusted input in all cases.
string regex_match = 5 [(validate.rules).string.max_bytes = 1024, deprecated = true];

// If specified, this regex string is a regular expression rule which implies the entire request
// header value must match the regex. The rule will not match if only a subsequence of the
// request header value matches the regex.
type.matcher.RegexMatcher safe_regex_match = 11;
mattklein123 marked this conversation as resolved.
Show resolved Hide resolved

// If specified, header match will be performed based on range.
// The rule will match if the request header value is within this range.
Expand Down Expand Up @@ -1327,11 +1366,25 @@ message QueryParameterMatcher {
// Specifies the value of the key. If the value is absent, a request
// that contains the key in its query string will match, whether the
// key appears with a value (e.g., "?debug=true") or not (e.g., "?debug")
string value = 3;
//
// ..attention::
// This field is deprecated. Use an `exact` match inside the `string_match` field.
string value = 3 [deprecated = true];

// Specifies whether the query parameter value is a regular expression.
// Defaults to false. The entire query parameter value (i.e., the part to
// the right of the equals sign in "key=value") must match the regex.
// E.g., the regex "\d+$" will match "123" but not "a123" or "123a".
google.protobuf.BoolValue regex = 4;
//
// ..attention::
// This field is deprecated. Use a `safe_regex` match inside the `string_match` field.
google.protobuf.BoolValue regex = 4 [deprecated = true];

oneof query_parameter_match_specifier {
// Specifies whether a query parameter value should match against a string.
type.matcher.StringMatcher string_match = 5;
mattklein123 marked this conversation as resolved.
Show resolved Hide resolved

// Specifies whether a query parameter should be present.
bool present_match = 6;
htuch marked this conversation as resolved.
Show resolved Hide resolved
}
}
17 changes: 17 additions & 0 deletions api/envoy/type/matcher/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,17 @@ api_proto_library_internal(
name = "string",
srcs = ["string.proto"],
visibility = ["//visibility:public"],
deps = [
":regex",
],
)

api_go_proto_library(
name = "string",
proto = ":string",
deps = [
":regex_go_proto",
],
)

api_proto_library_internal(
Expand All @@ -65,3 +71,14 @@ api_go_proto_library(
":string_go_proto",
],
)

api_proto_library_internal(
name = "regex",
srcs = ["regex.proto"],
visibility = ["//visibility:public"],
)

api_go_proto_library(
name = "regex",
proto = ":regex",
)
32 changes: 32 additions & 0 deletions api/envoy/type/matcher/regex.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
syntax = "proto3";

package envoy.type.matcher;

option java_outer_classname = "StringProto";
option java_multiple_files = true;
option java_package = "io.envoyproxy.envoy.type.matcher";
option go_package = "matcher";

import "validate/validate.proto";

// [#protodoc-title: RegexMatcher]

// A regex matcher designed for safety when used with untrusted input.
message RegexMatcher {
// Google's `re2 <https://github.com/google/re2>`_ regex engine. The regex string must adhere to
// the documented `syntax <https://github.com/google/re2/wiki/Syntax>`_. The engine is designed
// to complete execution in linear time as well as limit the amount of memory used. In the future
// different aspects of the engine may be made configurable.
message GoogleReEngine {
mattklein123 marked this conversation as resolved.
Show resolved Hide resolved
}

oneof engine_type {
option (validate.required) = true;

// Google's re2 regex engine.
GoogleReEngine google_re_engine = 1 [(validate.rules).message.required = true];
}

// The regex match string. The string must be supported by the configured engine.
htuch marked this conversation as resolved.
Show resolved Hide resolved
string regex = 2 [(validate.rules).string.min_bytes = 1];
}
11 changes: 10 additions & 1 deletion api/envoy/type/matcher/string.proto
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ option java_multiple_files = true;
option java_package = "io.envoyproxy.envoy.type.matcher";
option go_package = "matcher";

import "envoy/type/matcher/regex.proto";

import "validate/validate.proto";

// [#protodoc-title: StringMatcher]
Expand Down Expand Up @@ -48,7 +50,14 @@ message StringMatcher {
// * The regex *\d{3}* matches the value *123*
// * The regex *\d{3}* does not match the value *1234*
// * The regex *\d{3}* does not match the value *123.456*
string regex = 4 [(validate.rules).string.max_bytes = 1024];
//
// .. attention::
// This field has been deprecated in favor of `safe_regex` as it is not safe for use with
// untrusted input in all cases.
string regex = 4 [(validate.rules).string.max_bytes = 1024, deprecated = true];

// The input string must match the regular expression specified here.
RegexMatcher safe_regex = 5;
mattklein123 marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down
4 changes: 4 additions & 0 deletions bazel/repositories.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def envoy_dependencies(skip_targets = []):
_io_opentracing_cpp()
_net_zlib()
_repository_impl("bazel_toolchains")
_com_googlesource_code_re2()

_python_deps()
_cc_deps()
Expand Down Expand Up @@ -336,6 +337,9 @@ def _io_opentracing_cpp():
actual = "@io_opentracing_cpp//:opentracing",
)

def _com_googlesource_code_re2():
_repository_impl("com_googlesource_code_re2")

def _com_lightstep_tracer_cpp():
location = REPOSITORY_LOCATIONS["com_lightstep_tracer_cpp"]
_repository_impl("com_lightstep_tracer_cpp")
Expand Down
5 changes: 5 additions & 0 deletions bazel/repository_locations.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -248,4 +248,9 @@ REPOSITORY_LOCATIONS = dict(
sha256 = "fcdebf54c89d839ffa7eefae166c8e4b551c765559db13ff15bff98047f344fb",
urls = ["https://storage.googleapis.com/quiche-envoy-integration/2a930469533c3b541443488a629fe25cd8ff53d0.tar.gz"],
),
com_googlesource_code_re2 = dict(
sha256 = "de6c3ee49b2cecdfd2936af18d6947db36726590e566b5915db3746784c55745",
strip_prefix = "re2-2019-07-01",
urls = ["https://github.com/google/re2/archive/2019-07-01.tar.gz"],
),
mattklein123 marked this conversation as resolved.
Show resolved Hide resolved
)
1 change: 1 addition & 0 deletions docs/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ PROTO_RST="
/envoy/type/matcher/metadata/envoy/type/matcher/metadata.proto.rst
/envoy/type/matcher/value/envoy/type/matcher/value.proto.rst
/envoy/type/matcher/number/envoy/type/matcher/number.proto.rst
/envoy/type/matcher/regex/envoy/type/matcher/regex.proto.rst
/envoy/type/matcher/string/envoy/type/matcher/string.proto.rst
"

Expand Down
1 change: 1 addition & 0 deletions docs/root/api-v2/types/types.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ Types
../type/range.proto
../type/matcher/metadata.proto
../type/matcher/number.proto
../type/matcher/regex.proto
../type/matcher/string.proto
../type/matcher/value.proto
17 changes: 16 additions & 1 deletion docs/root/intro/deprecated.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,22 @@ Deprecated items below are listed in chronological order.

Version 1.12.0 (pending)
========================
* The ORIGINAL_DST_LB :ref:`load balancing policy <envoy_api_field_Cluster.lb_policy>` is deprecated, use CLUSTER_PROVIDED policy instead when configuring an :ref:`original destination cluster <envoy_api_field_Cluster.type>`.
* The ORIGINAL_DST_LB :ref:`load balancing policy <envoy_api_field_Cluster.lb_policy>` is
deprecated, use CLUSTER_PROVIDED policy instead when configuring an :ref:`original destination
cluster <envoy_api_field_Cluster.type>`.
* The `regex` field in :ref:`StringMatcher <envoy_api_msg_type.matcher.StringMatcher>` has been
deprecated in favor of the `safe_regex` field.
* The `regex` field in :ref:`RouteMatch <envoy_api_msg_route.RouteMatch>` has been
deprecated in favor of the `safe_regex` field.
* The `allow_origin_regex` field in :ref:`CorsPolicy <envoy_api_msg_route.CorsPolicy>` has been
deprecated in favor of the `allow_origin_safe_regex` field.
* The `pattern` field in :ref:`VirtualCluster <envoy_api_msg_route.VirtualCluster>` has been
deprecated in favor of the `regex` field.
* The `regex_match` field in :ref:`HeaderMatcher <envoy_api_msg_route.HeaderMatcher>` has been
deprecated in favor of the `safe_regex_match` field.
* The `value` and `regex` fields in :ref:`QueryParameterMatcher
<envoy_api_msg_route.QueryParameterMatcher>` has been deprecated in favor of the `string_match`
and `present_match` fields.

Version 1.11.0 (July 11, 2019)
==============================
Expand Down
4 changes: 4 additions & 0 deletions docs/root/intro/version_history.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ Version history
* http: added the ability to reject HTTP/1.1 requests with invalid HTTP header values, using the runtime feature `envoy.reloadable_features.strict_header_validation`.
* http: added the ability to :ref:`merge adjacent slashes<envoy_api_field_config.filter.network.http_connection_manager.v2.HttpConnectionManager.merge_slashes>` in the path.
* listeners: added :ref:`HTTP inspector listener filter <config_listener_filters_http_inspector>`.
* regex: introduce new :ref:`RegexMatcher <envoy_api_msg_type.matcher.RegexMatcher>` type that
provides a safe regex implementation for untrusted user input. This type is now used in all
configuration that processes user provided input. See :ref:`deprecated configuration details
<deprecated>` for more information.
* router: added :ref:`rq_retry_skipped_request_not_complete <config_http_filters_router_stats>` counter stat to router stats.
* router check tool: add coverage reporting & enforcement.
* tls: added verification of IP address SAN fields in certificates against configured SANs in the
Expand Down
5 changes: 5 additions & 0 deletions include/envoy/common/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ envoy_cc_library(
hdrs = ["time.h"],
)

envoy_cc_library(
name = "regex_interface",
hdrs = ["regex.h"],
)

envoy_cc_library(
name = "token_bucket_interface",
hdrs = ["token_bucket.h"],
Expand Down
28 changes: 28 additions & 0 deletions include/envoy/common/regex.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#pragma once

#include <memory>

#include "envoy/common/pure.h"

#include "absl/strings/string_view.h"

namespace Envoy {
namespace Regex {

/**
* A compiled regex expression matcher which uses an abstract regex engine.
*/
class CompiledMatcher {
mattklein123 marked this conversation as resolved.
Show resolved Hide resolved
public:
virtual ~CompiledMatcher() = default;

/**
* @return whether the value matches the compiled regex expression.
*/
virtual bool match(absl::string_view value) const PURE;
};

using CompiledMatcherPtr = std::unique_ptr<const CompiledMatcher>;

} // namespace Regex
} // namespace Envoy
1 change: 1 addition & 0 deletions include/envoy/router/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ envoy_cc_library(
external_deps = ["abseil_optional"],
deps = [
"//include/envoy/access_log:access_log_interface",
"//include/envoy/common:regex_interface",
"//include/envoy/config:typed_metadata_interface",
"//include/envoy/http:codec_interface",
"//include/envoy/http:codes_interface",
Expand Down
7 changes: 4 additions & 3 deletions include/envoy/router/router.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "envoy/access_log/access_log.h"
#include "envoy/api/v2/core/base.pb.h"
#include "envoy/common/regex.h"
#include "envoy/config/typed_metadata.h"
#include "envoy/http/codec.h"
#include "envoy/http/codes.h"
Expand Down Expand Up @@ -105,10 +106,10 @@ class CorsPolicy {
*/
virtual const std::list<std::string>& allowOrigins() const PURE;

/*
* @return std::list<std::regex>& regexes that match allowed origins.
/**
* @return std::list<Regex::CompiledMatcherPtr>& regexes that match allowed origins.
*/
virtual const std::list<std::regex>& allowOriginRegexes() const PURE;
virtual const std::list<Regex::CompiledMatcherPtr>& allowOriginRegexes() const PURE;

/**
* @return std::string access-control-allow-methods value.
Expand Down
1 change: 1 addition & 0 deletions source/common/access_log/access_log_formatter.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "common/access_log/access_log_formatter.h"

#include <cstdint>
#include <regex>
#include <string>
#include <vector>

Expand Down
Loading