Skip to content

Commit

Permalink
introduce safe regex matcher based on re2 engine (#7878)
Browse files Browse the repository at this point in the history
The libstdc++ std::regex implementation is not safe in all cases
for user provided input. This change deprecates the used of std::regex
in all user facing paths and introduces a new safe regex matcher with
an explicitly configurable engine, right now limited to Google's re2
regex engine. This is not a drop in replacement for std::regex as all
language features are not supported. As such we will go through a
deprecation period for the old regex engine.

Fixes envoyproxy/envoy#7728

Signed-off-by: Matt Klein <mklein@lyft.com>

Mirrored from https://github.com/envoyproxy/envoy @ eff020170c6267e6c8dc235473f7fc85c5b1e07d
  • Loading branch information
data-plane-api(CircleCI) committed Aug 23, 2019
1 parent 3337c67 commit 9b2125a
Show file tree
Hide file tree
Showing 6 changed files with 146 additions and 12 deletions.
1 change: 1 addition & 0 deletions docs/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ proto_library(
"//envoy/type:range",
"//envoy/type/matcher:metadata",
"//envoy/type/matcher:number",
"//envoy/type/matcher:regex",
"//envoy/type/matcher:string",
],
)
4 changes: 4 additions & 0 deletions envoy/api/v2/route/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ api_proto_library_internal(
"//envoy/api/v2/core:base",
"//envoy/type:percent",
"//envoy/type:range",
"//envoy/type/matcher:regex",
"//envoy/type/matcher:string",
],
)

Expand All @@ -20,5 +22,7 @@ api_go_proto_library(
"//envoy/api/v2/core:base_go_proto",
"//envoy/type:percent_go_proto",
"//envoy/type:range_go_proto",
"//envoy/type/matcher:regex_go_proto",
"//envoy/type/matcher:string_go_proto",
],
)
88 changes: 77 additions & 11 deletions envoy/api/v2/route/route.proto
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ option go_package = "route";
option java_generic_services = true;

import "envoy/api/v2/core/base.proto";
import "envoy/type/matcher/regex.proto";
import "envoy/type/matcher/string.proto";
import "envoy/type/percent.proto";
import "envoy/type/range.proto";

Expand Down Expand Up @@ -349,7 +351,25 @@ message RouteMatch {
// * The regex */b[io]t* matches the path */bot*
// * The regex */b[io]t* does not match the path */bite*
// * The regex */b[io]t* does not match the path */bit/bot*
string regex = 3 [(validate.rules).string.max_bytes = 1024];
//
// .. attention::
// This field has been deprecated in favor of `safe_regex` as it is not safe for use with
// untrusted input in all cases.
string regex = 3 [(validate.rules).string.max_bytes = 1024, deprecated = true];

// If specified, the route is a regular expression rule meaning that the
// regex must match the *:path* header once the query string is removed. The entire path
// (without the query string) must match the regex. The rule will not match if only a
// subsequence of the *:path* header matches the regex.
//
// [#next-major-version: In the v3 API we should redo how path specification works such
// that we utilize StringMatcher, and additionally have consistent options around whether we
// strip query strings, do a case sensitive match, etc. In the interim it will be too disruptive
// to deprecate the existing options. We should even consider whether we want to do away with
// path_specifier entirely and just rely on a set of header matchers which can already match
// on :path, etc. The issue with that is it is unclear how to generically deal with query string
// stripping. This needs more thought.]
type.matcher.RegexMatcher safe_regex = 10 [(validate.rules).message.required = true];
}

// Indicates that prefix/path matching should be case insensitive. The default
Expand Down Expand Up @@ -404,12 +424,24 @@ message CorsPolicy {
// Specifies the origins that will be allowed to do CORS requests.
//
// An origin is allowed if either allow_origin or allow_origin_regex match.
repeated string allow_origin = 1;
//
// .. attention::
// This field has been deprecated in favor of `allow_origin_string_match`.
repeated string allow_origin = 1 [deprecated = true];

// Specifies regex patterns that match allowed origins.
//
// An origin is allowed if either allow_origin or allow_origin_regex match.
repeated string allow_origin_regex = 8 [(validate.rules).repeated .items.string.max_bytes = 1024];
//
// .. attention::
// This field has been deprecated in favor of `allow_origin_string_match` as it is not safe for
// use with untrusted input in all cases.
repeated string allow_origin_regex = 8
[(validate.rules).repeated .items.string.max_bytes = 1024, deprecated = true];

// Specifies string patterns that match allowed origins. An origin is allowed if any of the
// string matchers match.
repeated type.matcher.StringMatcher allow_origin_string_match = 11;

// Specifies the content for the *access-control-allow-methods* header.
string allow_methods = 2;
Expand Down Expand Up @@ -1077,18 +1109,28 @@ message VirtualCluster {
// * The regex */rides/\d+* matches the path */rides/0*
// * The regex */rides/\d+* matches the path */rides/123*
// * The regex */rides/\d+* does not match the path */rides/123/456*
string pattern = 1 [(validate.rules).string = {min_bytes: 1, max_bytes: 1024}];
//
// .. attention::
// This field has been deprecated in favor of `headers` as it is not safe for use with
// untrusted input in all cases.
string pattern = 1 [(validate.rules).string.max_bytes = 1024, deprecated = true];

// Specifies a list of header matchers to use for matching requests. Each specified header must
// match. The pseudo-headers `:path` and `:method` can be used to match the request path and
// method, respectively.
repeated HeaderMatcher headers = 4;

// Specifies the name of the virtual cluster. The virtual cluster name as well
// Specifies the name of the virtual cluster. The virtual cluster name as well
// as the virtual host name are used when emitting statistics. The statistics are emitted by the
// router filter and are documented :ref:`here <config_http_filters_router_stats>`.
string name = 2 [(validate.rules).string.min_bytes = 1];

// Optionally specifies the HTTP method to match on. For example GET, PUT,
// etc.
// [#comment:TODO(htuch): add (validate.rules).enum.defined_only = true once
// https://github.com/lyft/protoc-gen-validate/issues/42 is resolved.]
core.RequestMethod method = 3;
//
// .. attention::
// This field has been deprecated in favor of `headers`.
core.RequestMethod method = 3 [deprecated = true];
}

// Global rate limiting :ref:`architecture overview <arch_overview_rate_limit>`.
Expand Down Expand Up @@ -1248,6 +1290,7 @@ message RateLimit {
// <envoy_api_field_route.HeaderMatcher.name>` header will match, regardless of the header's
// value.
//
// [#next-major-version: HeaderMatcher should be refactored to use StringMatcher.]
message HeaderMatcher {
// Specifies the name of the header in the request.
string name = 1 [(validate.rules).string.min_bytes = 1];
Expand All @@ -1273,7 +1316,16 @@ message HeaderMatcher {
// * The regex *\d{3}* matches the value *123*
// * The regex *\d{3}* does not match the value *1234*
// * The regex *\d{3}* does not match the value *123.456*
string regex_match = 5 [(validate.rules).string.max_bytes = 1024];
//
// .. attention::
// This field has been deprecated in favor of `safe_regex_match` as it is not safe for use
// with untrusted input in all cases.
string regex_match = 5 [(validate.rules).string.max_bytes = 1024, deprecated = true];

// If specified, this regex string is a regular expression rule which implies the entire request
// header value must match the regex. The rule will not match if only a subsequence of the
// request header value matches the regex.
type.matcher.RegexMatcher safe_regex_match = 11;

// If specified, header match will be performed based on range.
// The rule will match if the request header value is within this range.
Expand Down Expand Up @@ -1328,11 +1380,25 @@ message QueryParameterMatcher {
// Specifies the value of the key. If the value is absent, a request
// that contains the key in its query string will match, whether the
// key appears with a value (e.g., "?debug=true") or not (e.g., "?debug")
string value = 3;
//
// ..attention::
// This field is deprecated. Use an `exact` match inside the `string_match` field.
string value = 3 [deprecated = true];

// Specifies whether the query parameter value is a regular expression.
// Defaults to false. The entire query parameter value (i.e., the part to
// the right of the equals sign in "key=value") must match the regex.
// E.g., the regex "\d+$" will match "123" but not "a123" or "123a".
google.protobuf.BoolValue regex = 4;
//
// ..attention::
// This field is deprecated. Use a `safe_regex` match inside the `string_match` field.
google.protobuf.BoolValue regex = 4 [deprecated = true];

oneof query_parameter_match_specifier {
// Specifies whether a query parameter value should match against a string.
type.matcher.StringMatcher string_match = 5 [(validate.rules).message.required = true];

// Specifies whether a query parameter should be present.
bool present_match = 6;
}
}
17 changes: 17 additions & 0 deletions envoy/type/matcher/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,17 @@ api_proto_library_internal(
name = "string",
srcs = ["string.proto"],
visibility = ["//visibility:public"],
deps = [
":regex",
],
)

api_go_proto_library(
name = "string",
proto = ":string",
deps = [
":regex_go_proto",
],
)

api_proto_library_internal(
Expand All @@ -65,3 +71,14 @@ api_go_proto_library(
":string_go_proto",
],
)

api_proto_library_internal(
name = "regex",
srcs = ["regex.proto"],
visibility = ["//visibility:public"],
)

api_go_proto_library(
name = "regex",
proto = ":regex",
)
37 changes: 37 additions & 0 deletions envoy/type/matcher/regex.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
syntax = "proto3";

package envoy.type.matcher;

option java_outer_classname = "StringProto";
option java_multiple_files = true;
option java_package = "io.envoyproxy.envoy.type.matcher";
option go_package = "matcher";

import "google/protobuf/wrappers.proto";
import "validate/validate.proto";

// [#protodoc-title: RegexMatcher]

// A regex matcher designed for safety when used with untrusted input.
message RegexMatcher {
// Google's `RE2 <https://github.com/google/re2>`_ regex engine. The regex string must adhere to
// the documented `syntax <https://github.com/google/re2/wiki/Syntax>`_. The engine is designed
// to complete execution in linear time as well as limit the amount of memory used.
message GoogleRE2 {
// This field controls the RE2 "program size" which is a rough estimate of how complex a
// compiled regex is to evaluate. A regex that has a program size greater than the configured
// value will fail to compile. In this case, the configured max program size can be increased
// or the regex can be simplified. If not specified, the default is 100.
google.protobuf.UInt32Value max_program_size = 1;
}

oneof engine_type {
option (validate.required) = true;

// Google's RE2 regex engine.
GoogleRE2 google_re2 = 1 [(validate.rules).message.required = true];
}

// The regex match string. The string must be supported by the configured engine.
string regex = 2 [(validate.rules).string.min_bytes = 1];
}
11 changes: 10 additions & 1 deletion envoy/type/matcher/string.proto
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ option java_multiple_files = true;
option java_package = "io.envoyproxy.envoy.type.matcher";
option go_package = "matcher";

import "envoy/type/matcher/regex.proto";

import "validate/validate.proto";

// [#protodoc-title: StringMatcher]
Expand Down Expand Up @@ -48,7 +50,14 @@ message StringMatcher {
// * The regex *\d{3}* matches the value *123*
// * The regex *\d{3}* does not match the value *1234*
// * The regex *\d{3}* does not match the value *123.456*
string regex = 4 [(validate.rules).string.max_bytes = 1024];
//
// .. attention::
// This field has been deprecated in favor of `safe_regex` as it is not safe for use with
// untrusted input in all cases.
string regex = 4 [(validate.rules).string.max_bytes = 1024, deprecated = true];

// The input string must match the regular expression specified here.
RegexMatcher safe_regex = 5 [(validate.rules).message.required = true];
}
}

Expand Down

0 comments on commit 9b2125a

Please sign in to comment.